@@ -94,7 +94,7 @@ class Client:
9494
9595 Usage
9696 -----
97-
97+
9898 # instantiate client for pyannoteAI web API
9999 >>> from pyannoteai.sdk import Client
100100 >>> client = Client(token="{PYANNOTEAI_API_KEY}")
@@ -251,16 +251,17 @@ def _hash_md5(self, file: Union[str, Path]) -> str:
251251
252252 def upload (
253253 self ,
254- audio : str | Path ,
254+ audio : str | Path | dict [ str , str | Path ] ,
255255 media_url : Optional [str ] = None ,
256256 callback : Optional [Callable ] = None ,
257257 ) -> str :
258258 """Upload audio file to pyannoteAI platform
259259
260260 Parameters
261261 ----------
262- audio : str or Path
263- Audio file to be uploaded. Can be a "str" or "Path" instance: "audio.wav" or Path("audio.wav")
262+ audio : str or Path or dict
263+ Path to audio file to be uploaded. Can be a "str" or "Path" instance, or a dict with an
264+ "audio" key (e.g. {"audio": "/path/to/audio.wav"}).
264265 media_url : str, optional
265266 Unique identifier used to retrieve the uploaded audio file on the pyannoteAI platform.
266267 Any combination of letters {a-z, A-Z}, digits {0-9}, and {-./} characters prefixed
@@ -278,6 +279,13 @@ def upload(
278279 or "media://{md5-hash-of-audio-file}" otherwise.
279280 """
280281
282+ if isinstance (audio , dict ):
283+ if "audio" not in audio :
284+ raise ValueError (
285+ "When `audio` is a dict, it must provide the path to the audio file in 'audio' key."
286+ )
287+ audio = audio ["audio" ]
288+
281289 # get the total size of the file to upload
282290 # to provide progress information to the hook
283291 total_size = os .path .getsize (audio )
@@ -324,10 +332,13 @@ def upload(
324332 def diarize (
325333 self ,
326334 media_url : str ,
327- num_speakers : Optional [ int ] = None ,
328- min_speakers : Optional [ int ] = None ,
329- max_speakers : Optional [ int ] = None ,
335+ num_speakers : int | None = None ,
336+ min_speakers : int | None = None ,
337+ max_speakers : int | None = None ,
330338 confidence : bool = False ,
339+ turn_level_confidence : bool = False ,
340+ exclusive : bool = False ,
341+ model : str = "precision-2" ,
331342 ** kwargs ,
332343 ) -> str :
333344 """Initiate a diarization job on the pyannoteAI web API
@@ -341,11 +352,17 @@ def diarize(
341352 Force number of speakers to diarize. If not provided, the
342353 number of speakers will be determined automatically.
343354 min_speakers : int, optional
344- Not supported yet. Minimum number of speakers. Has no effect when `num_speakers` is provided .
355+ Minimum number of speakers.
345356 max_speakers : int, optional
346- Not supported yet. Maximum number of speakers. Has no effect when `num_speakers` is provided .
357+ Maximum number of speakers.
347358 confidence : bool, optional
348- Defaults to False
359+ Enable confidence scores.
360+ turn_level_confidence: bool, optional
361+ Enable turn-based confidence scores.
362+ exclusive: bool, optional
363+ Enable exclusive speaker diarization.
364+ model : str, optional
365+ Defaults to "precision-2"
349366 **kwargs : optional
350367 Extra arguments to send in the body of the request.
351368
@@ -359,10 +376,16 @@ def diarize(
359376 If something else went wrong
360377 """
361378
362- assert min_speakers is None , "`min_speakers` is not supported yet"
363- assert max_speakers is None , "`max_speakers` is not supported yet"
364-
365- json = {"url" : media_url , "numSpeakers" : num_speakers , "confidence" : confidence }
379+ json = {
380+ "url" : media_url ,
381+ "model" : model ,
382+ "numSpeakers" : num_speakers ,
383+ "minSpeakers" : min_speakers ,
384+ "maxSpeakers" : max_speakers ,
385+ "confidence" : confidence ,
386+ "turnLevelConfidence" : turn_level_confidence ,
387+ "exclusive" : exclusive ,
388+ }
366389 # add extra arguments to the request body
367390 json .update (kwargs )
368391
@@ -373,6 +396,7 @@ def diarize(
373396 def voiceprint (
374397 self ,
375398 media_url : str ,
399+ model : str = "precision-2" ,
376400 ** kwargs ,
377401 ) -> str :
378402 """Initiate a voiceprint job on the pyannoteAI web API
@@ -382,6 +406,8 @@ def voiceprint(
382406 media_url : str
383407 media://{...} URL created with the `upload` method or
384408 any other public URL pointing to an audio file.
409+ model : str, optional
410+ Defaults to "precision-2".
385411 **kwargs : optional
386412 Extra arguments to send in the body of the request.
387413
@@ -395,7 +421,7 @@ def voiceprint(
395421 If something else went wrong
396422 """
397423
398- json = {"url" : media_url }
424+ json = {"url" : media_url , "model" : model }
399425 # add extra arguments to the request body
400426 json .update (kwargs )
401427
@@ -409,10 +435,13 @@ def identify(
409435 voiceprints : dict [str , str ],
410436 exclusive_matching : bool = True ,
411437 matching_threshold : float = 0.0 ,
412- num_speakers : Optional [ int ] = None ,
413- min_speakers : Optional [ int ] = None ,
414- max_speakers : Optional [ int ] = None ,
438+ num_speakers : int | None = None ,
439+ min_speakers : int | None = None ,
440+ max_speakers : int | None = None ,
415441 confidence : bool = False ,
442+ turn_level_confidence : bool = False ,
443+ exclusive : bool = False ,
444+ model : str = "precision-2" ,
416445 ** kwargs ,
417446 ) -> str :
418447 """Initiate an identification job on the pyannoteAI web API
@@ -423,6 +452,7 @@ def identify(
423452 media://{...} URL created with the `upload` method or
424453 any other public URL pointing to an audio file.
425454 voiceprints : dict
455+ Voiceprints.
426456 exclusive_matching : bool, optional
427457 Prevent multiple speakers from being matched to the same voiceprint.
428458 Defaults to True.
@@ -433,11 +463,17 @@ def identify(
433463 Force number of speakers to diarize. If not provided, the
434464 number of speakers will be determined automatically.
435465 min_speakers : int, optional
436- Not supported yet. Minimum number of speakers. Has no effect when `num_speakers` is provided .
466+ Minimum number of speakers.
437467 max_speakers : int, optional
438- Not supported yet. Maximum number of speakers. Has no effect when `num_speakers` is provided .
468+ Maximum number of speakers.
439469 confidence : bool, optional
440- Defaults to False
470+ Enable confidence scores.
471+ turn_level_confidence: bool, optional
472+ Enable turn-based confidence scores.
473+ exclusive: bool, optional
474+ Enable exclusive speaker diarization.
475+ model : str, optional
476+ Defaults to "precision-2"
441477 **kwargs : optional
442478 Extra arguments to send in the body of the request.
443479
@@ -451,17 +487,19 @@ def identify(
451487 If something else went wrong
452488 """
453489
454- assert min_speakers is None , "`min_speakers` is not supported yet"
455- assert max_speakers is None , "`max_speakers` is not supported yet"
456-
457490 json = {
458491 "url" : media_url ,
492+ "model" : model ,
493+ "numSpeakers" : num_speakers ,
494+ "minSpeakers" : min_speakers ,
495+ "maxSpeakers" : max_speakers ,
496+ "confidence" : confidence ,
497+ "turnLevelConfidence" : turn_level_confidence ,
498+ "exclusive" : exclusive ,
459499 "voiceprints" : [
460500 {"label" : speaker , "voiceprint" : voiceprint }
461501 for speaker , voiceprint in voiceprints .items ()
462502 ],
463- "numSpeakers" : num_speakers ,
464- # "confidence": confidence,
465503 "matching" : {
466504 "exclusive" : exclusive_matching ,
467505 "threshold" : matching_threshold ,
0 commit comments