InseeFrLab · VincentG1234 · May 19, 2026 · May 14, 2026 · May 18, 2026 · May 18, 2026
diff --git a/auto_tune_vllm/core/config.py b/auto_tune_vllm/core/config.py
@@ -40,15 +40,18 @@ class ObjectiveConfig:
     direction: str  # "maximize" or "minimize"
 
     valid_metrics = {
-            "output_tokens_per_second",
-            "request_latency",
-            "time_to_first_token_ms",
-            "inter_token_latency_ms",
-            "requests_per_second",
-        }
+        "output_tokens_per_second",
+        "request_latency",
+        "time_to_first_token_ms",
+        "inter_token_latency_ms",
+        "requests_per_second",
+    }
     valid_directions = {"maximize", "minimize"}
     valid_percentiles = {"median", "p50", "p95", "p90", "p99", "mean"}
-    valid_metrics_combined = {f"{metric}_{percentile}" for metric, percentile in product(valid_metrics, valid_percentiles)}
+    valid_metrics_combined = {
+        f"{metric}_{percentile}"
+        for metric, percentile in product(valid_metrics, valid_percentiles)
+    }
 
     def _break_down_objectives(self) -> list[str]:
         """
@@ -66,9 +69,7 @@ def _break_down_objectives(self) -> list[str]:
         try:
             tree = ast.parse(self.metric, mode="eval")
         except SyntaxError as e:
-            raise ValueError(
-                f"Invalid metric expression {self.metric!r}: {e}"
-            ) from e
+            raise ValueError(f"Invalid metric expression {self.metric!r}: {e}") from e
 
         metrics: list[str] = []
         seen: set[str] = set()
@@ -122,26 +123,45 @@ class OptimizationConfig:
     approach: Optional[str] = None  # "single_objective" or "multi_objective"
     objectives: Optional[List[ObjectiveConfig]] = None  # For multi-objective
     preset: Optional[str] = None  # "high_throughput", "low_latency", "balanced"
+    log_metrics: Optional[List[str]] = (
+        None  # Optional metrics copied to Optuna trial user attrs (dashboard)
+    )
 
     def __post_init__(self):
         """Process and validate optimization configuration."""
-        # Handle preset configurations
         if self.preset:
             self._apply_preset()
-            return
-
-        # Handle new structured format
-        if self.approach:
+        elif self.approach:
             self._validate_structured_format()
-            return
-
-        # Handle backward compatibility (old format)
-        if self.objective:
+        elif self.objective:
             self._convert_old_format()
-            return
+        else:
+            self._apply_default_config()
+        self._validate_log_metrics()
 
-        # Default fallback
-        self._apply_default_config()
+    def _validate_log_metrics(self) -> None:
+        """Normalize and validate log_metrics (independent of objective setup)."""
+        if self.log_metrics is None:
+            self.log_metrics = []
+            return
+        if not isinstance(self.log_metrics, list):
+            raise ValueError(
+                "log_metrics must be a list of metric identifier strings, "
+                f"got {type(self.log_metrics).__name__}"
+            )
+        valid = ObjectiveConfig.valid_metrics_combined
+        for name in self.log_metrics:
+            if not isinstance(name, str):
+                raise ValueError(
+                    "log_metrics entries must be strings, "
+                    f"got {type(name).__name__}: {name!r}"
+                )
+            if name not in valid:
+                raise ValueError(
+                    f"Unknown metric {name!r} in log_metrics. "
+                    f"Each entry must be a single identifier from "
+                    f"{sorted(valid)}"
+                )
 
     def _apply_preset(self):
         """Apply preset optimization configurations."""

diff --git a/auto_tune_vllm/core/study_controller.py b/auto_tune_vllm/core/study_controller.py
@@ -916,6 +916,8 @@ def _set_trial_user_attributes(self, trial_number: int, result: TrialResult):
                 f"Stored error attributes for trial {trial_number}: {result.error_type}"
             )
 
+        self._set_log_metric_user_attrs(trial, result)
+
         # Log timing attributes stored
         if result.execution_info:
             logger.debug(
@@ -926,6 +928,42 @@ def _set_trial_user_attributes(self, trial_number: int, result: TrialResult):
                 f"status={result.execution_info.trial_status}"
             )
 
+    def _set_log_metric_user_attrs(
+        self, trial: optuna.Trial, result: TrialResult
+    ) -> None:
+        """
+        Copy selected benchmark scalars onto the Optuna trial as user attributes
+        for dashboard visibility (not objectives; not passed to study.tell).
+        Applies to optimization trials and baseline reference trials alike.
+        """
+        if result.trial_type not in ("optimization", "baseline"):
+            return
+        names = self.config.optimization.log_metrics
+        if not names or not result.success or not result.detailed_metrics:
+            return
+        for name in names:
+            if name not in result.detailed_metrics:
+                logger.warning(
+                    "log_metrics: metric %r not found in detailed_metrics for trial %s; "
+                    "skipping user attr",
+                    name,
+                    result.trial_number,
+                )
+                continue
+            raw = result.detailed_metrics[name]
+            try:
+                value = float(raw)
+            except (TypeError, ValueError):
+                logger.warning(
+                    "log_metrics: cannot coerce metric %r value %r to float for "
+                    "trial %s; skipping user attr",
+                    name,
+                    raw,
+                    result.trial_number,
+                )
+                continue
+            trial.set_user_attr(f"metric_{name}", value)
+
     def get_best_baseline_result(self) -> list[float] | None:
         """Get the best baseline result for comparison."""
         if not self.baseline_results:

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -180,6 +180,25 @@ Number of optimization trials to run. Each trial tests one parameter combination
 #### `n_startup_trials` (integer, optional)
 Number of random trials to run before starting the main sampler algorithm. Only supported by some samplers (TPE, BoTorch). Helps initialize the sampler with diverse data points.
 
+#### `log_metrics` (list of strings, optional)
+Extra benchmark scalars to copy onto each **Optuna trial** as [user attributes](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.set_user_attr), mainly so tools like **Optuna Dashboard** can plot or filter on them alongside objectives.
+
+- **Semantics**: This does **not** change the optimization objective. It only stores additional numbers on the trial record after a successful benchmark.
+- **Identifiers**: Each list entry must be a single metric id in the same `<metric>_<percentile>` form as in objective expressions (see **`objectives`** above), e.g. `request_latency_p95`, `output_tokens_per_second_median`. Allowed names are exactly the combined identifiers derived from the base metrics and percentiles documented for objectives.
+- **Storage**: For each configured name, the runner writes `trial.set_user_attr("metric_<name>", float_value)` using the value from the trial’s `detailed_metrics`. If a name is missing from `detailed_metrics`, or the value cannot be converted to a float, a warning is logged and that attribute is skipped.
+- **Trials**: Applied to **optimization** and **baseline** trials when the run succeeds and detailed metrics are present. Omitted or unset `log_metrics` is treated as an empty list.
+
+Example:
+
+```yaml
+optimization:
+  preset: "balanced"
+  n_trials: 50
+  log_metrics:
+    - "inter_token_latency_ms_p95"
+    - "time_to_first_token_ms_median"
+```
+
 ### Preset Configurations Explained
 
 #### High Throughput Preset

diff --git a/examples/study_config.yaml b/examples/study_config.yaml
@@ -12,6 +12,12 @@ optimization:
       direction: "maximize"
     - metric: "time_to_first_token_ms_p95"  # Worst-case TTFT
       direction: "minimize"
+  # Optional: copy extra benchmark scalars to Optuna user attrs (metric_<name>)
+  # for the dashboard; entries must be single combined metric keys matching
+  # ObjectiveConfig metric identifiers (for example, "inter_token_latency_ms_p95"),
+  # not arithmetic expressions.
+  # log_metrics:
+  #   - "inter_token_latency_ms_p95"
   sampler: "nsga2"  # Best for multi-objective optimization
   n_trials: 11
 
@@ -32,9 +38,3 @@ parameters:
   max_num_batched_tokens:
     enabled: true
     options: [1024, 2048, 10000]
-
-  gpu_memory_utilization:
-    enabled: true
-    min: 0.9
-    max: 0.92
-    step: 0.01
diff --git a/optuna_dashboard/study.db b/optuna_dashboard/study.db
diff --git a/tests/core/test_optimization_config.py b/tests/core/test_optimization_config.py
@@ -0,0 +1,65 @@
+"""Unit tests for OptimizationConfig.log_metrics validation."""
+
+from __future__ import annotations
+
+import pytest
+
+from auto_tune_vllm.core.config import ObjectiveConfig, OptimizationConfig
+
+
+def test_log_metrics_default_normalized_to_empty_list():
+    cfg = OptimizationConfig(
+        approach="single_objective",
+        objectives=[
+            ObjectiveConfig(
+                metric="output_tokens_per_second_mean",
+                direction="maximize",
+            )
+        ],
+    )
+    assert cfg.log_metrics == []
+
+
+def test_log_metrics_valid_entries():
+    cfg = OptimizationConfig(
+        approach="single_objective",
+        objectives=[
+            ObjectiveConfig(
+                metric="output_tokens_per_second_mean",
+                direction="maximize",
+            )
+        ],
+        log_metrics=["time_to_first_token_ms_p95", "request_latency_median"],
+    )
+    assert cfg.log_metrics == [
+        "time_to_first_token_ms_p95",
+        "request_latency_median",
+    ]
+
+
+def test_log_metrics_invalid_metric_raises():
+    with pytest.raises(ValueError, match="Unknown metric"):
+        OptimizationConfig(
+            approach="single_objective",
+            objectives=[
+                ObjectiveConfig(
+                    metric="output_tokens_per_second_mean",
+                    direction="maximize",
+                )
+            ],
+            log_metrics=["not_a_valid_metric_p95"],
+        )
+
+
+def test_log_metrics_wrong_container_type_raises():
+    with pytest.raises(ValueError, match="log_metrics must be a list"):
+        OptimizationConfig(
+            approach="single_objective",
+            objectives=[
+                ObjectiveConfig(
+                    metric="output_tokens_per_second_mean",
+                    direction="maximize",
+                )
+            ],
+            log_metrics="time_to_first_token_ms_p95",  # type: ignore[arg-type]
+        )