From f1ba89a4538d58a2fe743710eb7866f770a8e36e Mon Sep 17 00:00:00 2001
From: karamouche <hugo4ibt@gmail.com>
Date: Thu, 4 Jun 2026 17:10:20 +0200
Subject: [PATCH] fix: ensure audio array is mono and maintain float32 dtype in
 extract_audio_and_text function

---
 noisekit/dataset.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/noisekit/dataset.py b/noisekit/dataset.py
index edeb7a6..5386e9a 100644
--- a/noisekit/dataset.py
+++ b/noisekit/dataset.py
@@ -69,6 +69,10 @@ def extract_audio_and_text(sample: dict) -> tuple[np.ndarray, int, str]:
     else:
         raise ValueError("Audio sample has neither 'bytes' nor 'path'.")
 
+    array = np.asarray(array, dtype=np.float32)
+    if array.ndim == 2:
+        array = array.mean(axis=1)  # (samples, channels) → (samples,) for mono-only metrics
+
     text = (
         sample.get("text")
         or sample.get("sentence")
@@ -76,4 +80,4 @@ def extract_audio_and_text(sample: dict) -> tuple[np.ndarray, int, str]:
         or sample.get("normalized_text")
         or ""
     )
-    return np.asarray(array, dtype=np.float32), int(sr), str(text).strip()
+    return array, int(sr), str(text).strip()