Enhance plotting functionality in BaseDatasetScoreEvaluator to accept additional keyword arguments

sergioburdisso · sergioburdisso · commit 02b63429ae0e · 2026-01-29T10:36:04.000+01:00
For instance, `zoom` argument for KDEDistanceEvaluator (default zoom=False)
diff --git a/src/sdialog/evaluation/__init__.py b/src/sdialog/evaluation/__init__.py
@@ -117,7 +117,7 @@ def _kl_divergence(p1, p2, resolution=100, bw_method=1e-1):
     """
     Estimate KL divergence KL(p1 || p2) between two 1D distributions via KDE.
 
-    KL(p1||p2) is non‑symmetric and >= 0 (0 means identical).
+    KL(p1||p2) is non-symmetric and >= 0 (0 means identical).
 
     :param p1: First sample (treat as true distribution).
     :type p1: array-like
@@ -2009,7 +2009,7 @@ def __init__(self,
                                                       leave=verbose)]
         self.reference_scores = np.array([s for s in self.reference_scores if s is not None])
 
-    def __plot__(self, dialog_scores: Dict[str, np.ndarray], plot: Optional[plt.Axes] = None):
+    def __plot__(self, dialog_scores: Dict[str, np.ndarray], plot: Optional[plt.Axes] = None, zoom: bool = False):
         """
         Plot KDE curves of reference and candidate score distributions.
 
@@ -2038,6 +2038,21 @@ def __plot__(self, dialog_scores: Dict[str, np.ndarray], plot: Optional[plt.Axes
                 color_idx += 1
             except ValueError as e:
                 logger.error(f"Error plotting KDE for {dataset_name}: {e}")
+
+        if zoom:
+            # Percentile-based zoom
+            all_scores = []
+            if self.reference_scores is not None:
+                all_scores.append(self.reference_scores)
+            for scores in dialog_scores.values():
+                all_scores.append(scores)
+
+            if all_scores:
+                all_scores = np.concatenate(all_scores)
+                low, high = np.percentile(all_scores, [2, 98])  # tweak if needed
+                pad = 0.05 * (high - low)
+                plt.gca().set_xlim(low - pad, high + pad)
+
         plot.xlabel(self.plot_xlabel if self.plot_xlabel else self.dialog_score.name)
         plot.ylabel(self.plot_ylabel if self.plot_ylabel else "Density")
         plot.legend(loc='best', frameon=True, fancybox=False, edgecolor='black', framealpha=1.0)
diff --git a/src/sdialog/evaluation/base.py b/src/sdialog/evaluation/base.py
@@ -569,14 +569,17 @@ def clear(self):
 
     def plot(self,
              show: bool = True,
-             save_path: str = None):
+             save_path: str = None,
+             **kwargs):
         """
         Generate plots for stored dataset scores.
 
         :param show: Whether to display the plot(s).
         :type show: bool
         :param save_path: If provided, save figure(s) to this path (metric name appended when multi-metric).
         :type save_path: Optional[str]
+        :param kwargs: Additional keyword arguments for plotting.
+        :type kwargs: dict
         :return: None
         :rtype: None
         """
@@ -587,7 +590,7 @@ def plot(self,
         if self.datasets_scores and isinstance(next(iter(self.datasets_scores.values())), dict):
             for metric in self.datasets_scores:
                 plt.figure(figsize=(8, 5))
-                self.__plot__(self.datasets_scores[metric], plot=plt, metric=metric)
+                self.__plot__(self.datasets_scores[metric], plot=plt, metric=metric, **kwargs)
                 if save_path:
                     # Append metric name to filename before saving
                     if "." in save_path.split("/")[-1]:
@@ -601,7 +604,7 @@ def plot(self,
                     plt.show()
         else:
             plt.figure(figsize=(8, 5))
-            self.__plot__(self.datasets_scores, plot=plt)
+            self.__plot__(self.datasets_scores, plot=plt, **kwargs)
             if save_path:
                 os.makedirs(os.path.dirname(save_path), exist_ok=True)
                 plt.savefig(save_path, dpi=300)