Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 42 additions & 22 deletions auto_tune_vllm/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,18 @@ class ObjectiveConfig:
direction: str # "maximize" or "minimize"

valid_metrics = {
"output_tokens_per_second",
"request_latency",
"time_to_first_token_ms",
"inter_token_latency_ms",
"requests_per_second",
}
"output_tokens_per_second",
"request_latency",
"time_to_first_token_ms",
"inter_token_latency_ms",
"requests_per_second",
}
valid_directions = {"maximize", "minimize"}
valid_percentiles = {"median", "p50", "p95", "p90", "p99", "mean"}
valid_metrics_combined = {f"{metric}_{percentile}" for metric, percentile in product(valid_metrics, valid_percentiles)}
valid_metrics_combined = {
f"{metric}_{percentile}"
for metric, percentile in product(valid_metrics, valid_percentiles)
}

def _break_down_objectives(self) -> list[str]:
"""
Expand All @@ -66,9 +69,7 @@ def _break_down_objectives(self) -> list[str]:
try:
tree = ast.parse(self.metric, mode="eval")
except SyntaxError as e:
raise ValueError(
f"Invalid metric expression {self.metric!r}: {e}"
) from e
raise ValueError(f"Invalid metric expression {self.metric!r}: {e}") from e

metrics: list[str] = []
seen: set[str] = set()
Expand Down Expand Up @@ -122,26 +123,45 @@ class OptimizationConfig:
approach: Optional[str] = None # "single_objective" or "multi_objective"
objectives: Optional[List[ObjectiveConfig]] = None # For multi-objective
preset: Optional[str] = None # "high_throughput", "low_latency", "balanced"
log_metrics: Optional[List[str]] = (
None # Optional metrics copied to Optuna trial user attrs (dashboard)
)

def __post_init__(self):
"""Process and validate optimization configuration."""
# Handle preset configurations
if self.preset:
self._apply_preset()
return

# Handle new structured format
if self.approach:
elif self.approach:
self._validate_structured_format()
return

# Handle backward compatibility (old format)
if self.objective:
elif self.objective:
self._convert_old_format()
return
else:
self._apply_default_config()
self._validate_log_metrics()

# Default fallback
self._apply_default_config()
def _validate_log_metrics(self) -> None:
"""Normalize and validate log_metrics (independent of objective setup)."""
if self.log_metrics is None:
self.log_metrics = []
return
if not isinstance(self.log_metrics, list):
raise ValueError(
"log_metrics must be a list of metric identifier strings, "
f"got {type(self.log_metrics).__name__}"
)
valid = ObjectiveConfig.valid_metrics_combined
for name in self.log_metrics:
if not isinstance(name, str):
raise ValueError(
"log_metrics entries must be strings, "
f"got {type(name).__name__}: {name!r}"
)
if name not in valid:
raise ValueError(
f"Unknown metric {name!r} in log_metrics. "
f"Each entry must be a single identifier from "
f"{sorted(valid)}"
)

def _apply_preset(self):
"""Apply preset optimization configurations."""
Expand Down
38 changes: 38 additions & 0 deletions auto_tune_vllm/core/study_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,8 @@ def _set_trial_user_attributes(self, trial_number: int, result: TrialResult):
f"Stored error attributes for trial {trial_number}: {result.error_type}"
)

self._set_log_metric_user_attrs(trial, result)

# Log timing attributes stored
if result.execution_info:
logger.debug(
Expand All @@ -926,6 +928,42 @@ def _set_trial_user_attributes(self, trial_number: int, result: TrialResult):
f"status={result.execution_info.trial_status}"
)

def _set_log_metric_user_attrs(
self, trial: optuna.Trial, result: TrialResult
) -> None:
"""
Copy selected benchmark scalars onto the Optuna trial as user attributes
for dashboard visibility (not objectives; not passed to study.tell).
Applies to optimization trials and baseline reference trials alike.
"""
if result.trial_type not in ("optimization", "baseline"):
return
names = self.config.optimization.log_metrics
if not names or not result.success or not result.detailed_metrics:
return
for name in names:
if name not in result.detailed_metrics:
logger.warning(
"log_metrics: metric %r not found in detailed_metrics for trial %s; "
"skipping user attr",
name,
result.trial_number,
)
continue
raw = result.detailed_metrics[name]
try:
value = float(raw)
except (TypeError, ValueError):
logger.warning(
"log_metrics: cannot coerce metric %r value %r to float for "
"trial %s; skipping user attr",
name,
raw,
result.trial_number,
)
continue
trial.set_user_attr(f"metric_{name}", value)

def get_best_baseline_result(self) -> list[float] | None:
"""Get the best baseline result for comparison."""
if not self.baseline_results:
Expand Down
19 changes: 19 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,25 @@ Number of optimization trials to run. Each trial tests one parameter combination
#### `n_startup_trials` (integer, optional)
Number of random trials to run before starting the main sampler algorithm. Only supported by some samplers (TPE, BoTorch). Helps initialize the sampler with diverse data points.

#### `log_metrics` (list of strings, optional)
Extra benchmark scalars to copy onto each **Optuna trial** as [user attributes](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.set_user_attr), mainly so tools like **Optuna Dashboard** can plot or filter on them alongside objectives.

- **Semantics**: This does **not** change the optimization objective. It only stores additional numbers on the trial record after a successful benchmark.
- **Identifiers**: Each list entry must be a single metric id in the same `<metric>_<percentile>` form as in objective expressions (see **`objectives`** above), e.g. `request_latency_p95`, `output_tokens_per_second_median`. Allowed names are exactly the combined identifiers derived from the base metrics and percentiles documented for objectives.
- **Storage**: For each configured name, the runner writes `trial.set_user_attr("metric_<name>", float_value)` using the value from the trial’s `detailed_metrics`. If a name is missing from `detailed_metrics`, or the value cannot be converted to a float, a warning is logged and that attribute is skipped.
- **Trials**: Applied to **optimization** and **baseline** trials when the run succeeds and detailed metrics are present. Omitted or unset `log_metrics` is treated as an empty list.

Example:

```yaml
optimization:
preset: "balanced"
n_trials: 50
log_metrics:
- "inter_token_latency_ms_p95"
- "time_to_first_token_ms_median"
```

### Preset Configurations Explained

#### High Throughput Preset
Expand Down
12 changes: 6 additions & 6 deletions examples/study_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ optimization:
direction: "maximize"
- metric: "time_to_first_token_ms_p95" # Worst-case TTFT
direction: "minimize"
# Optional: copy extra benchmark scalars to Optuna user attrs (metric_<name>)
# for the dashboard; entries must be single combined metric keys matching
# ObjectiveConfig metric identifiers (for example, "inter_token_latency_ms_p95"),
# not arithmetic expressions.
# log_metrics:
# - "inter_token_latency_ms_p95"
sampler: "nsga2" # Best for multi-objective optimization
n_trials: 11

Expand All @@ -32,9 +38,3 @@ parameters:
max_num_batched_tokens:
enabled: true
options: [1024, 2048, 10000]

gpu_memory_utilization:
enabled: true
min: 0.9
max: 0.92
step: 0.01
Binary file modified optuna_dashboard/study.db
Binary file not shown.
65 changes: 65 additions & 0 deletions tests/core/test_optimization_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Unit tests for OptimizationConfig.log_metrics validation."""

from __future__ import annotations

import pytest

from auto_tune_vllm.core.config import ObjectiveConfig, OptimizationConfig


def test_log_metrics_default_normalized_to_empty_list():
cfg = OptimizationConfig(
approach="single_objective",
objectives=[
ObjectiveConfig(
metric="output_tokens_per_second_mean",
direction="maximize",
)
],
)
assert cfg.log_metrics == []


def test_log_metrics_valid_entries():
cfg = OptimizationConfig(
approach="single_objective",
objectives=[
ObjectiveConfig(
metric="output_tokens_per_second_mean",
direction="maximize",
)
],
log_metrics=["time_to_first_token_ms_p95", "request_latency_median"],
)
assert cfg.log_metrics == [
"time_to_first_token_ms_p95",
"request_latency_median",
]


def test_log_metrics_invalid_metric_raises():
with pytest.raises(ValueError, match="Unknown metric"):
OptimizationConfig(
approach="single_objective",
objectives=[
ObjectiveConfig(
metric="output_tokens_per_second_mean",
direction="maximize",
)
],
log_metrics=["not_a_valid_metric_p95"],
)


def test_log_metrics_wrong_container_type_raises():
with pytest.raises(ValueError, match="log_metrics must be a list"):
OptimizationConfig(
approach="single_objective",
objectives=[
ObjectiveConfig(
metric="output_tokens_per_second_mean",
direction="maximize",
)
],
log_metrics="time_to_first_token_ms_p95", # type: ignore[arg-type]
)
Loading