-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
93 lines (69 loc) · 3.28 KB
/
main.py
File metadata and controls
93 lines (69 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""
- This audio process use input loaded by pydub package library
- Audio must in wav format before preprocessing
"""
import os
import pydub
import argparse
import numpy as np
from tqdm import tqdm
from pydub import AudioSegment
def set_loudness(sound=pydub.audio_segment.AudioSegment, target_dBFS=None) -> pydub.audio_segment.AudioSegment:
"""Setting volume of audio to target volume"""
if target_dBFS is None:
x = signal.get_array_of_samples
target_dBFS = 20 * np.log10(np.sqrt(np.dot(x, x) / len(x))) - 10
loudness_difference = target_dBFS - sound.dBFS
return sound.apply_gain(loudness_difference)
def set_channel(sound=pydub.audio_segment.AudioSegment, target_channels=1) -> pydub.audio_segment.AudioSegment:
"""Setting channel of audio to target channel"""
return sound.set_channels(target_channels)
def set_sample_rate(sound=pydub.audio_segment.AudioSegment, target_sr=22050) -> pydub.audio_segment.AudioSegment:
"""Setting sample rate of audio to target sample rate"""
return sound.set_frame_rate(target_sr)
def remove_silent(sound=pydub.audio_segment.AudioSegment, thresh_hold=-50) -> pydub.audio_segment.AudioSegment:
"""Remove silent at begin and end of audio"""
silent = pydub.silence.detect_silence(
sound, silence_thresh=thresh_hold, min_silence_len=100)
if silent:
s = silent[0][-1] if silent[0][0] == 0 else 0
e = silent[-1][0] if silent[-1][-1] == 0 else len(sound)
return sound[s: e]
else:
return sound
def normalize_signal(signal: pydub.audio_segment.AudioSegment, channels: int = None, sr: int = None, silence_threshold: int = None, loudness_dBFS: int = None) -> pydub.audio_segment.AudioSegment:
signal = set_channel(signal, channels) if channels else signal
signal = set_sample_rate(signal, sr) if sr else signal
signal = remove_silent(signal, -50) if silence_threshold else signal
signal = set_loudness(signal, loudness_dBFS) if loudness_dBFS else signal
return signal
def collapse_whitespace(text):
text = text.repace(",", " , ").replace(".", " . ")
text = text.split()
return " ".join(text[:-1]) if text[-1] == "." else " ".joint(text)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--input_folder", type=str, required=True,
help="directly to dataset folder")
parser.add_argument("--output_folder", type=str, default=None)
parser.add_argument("-sr", type=int, default=22050)
parser.add_argument("-silence_threshold", type=int, default=None)
parser.add_argument("-loudness", type=int, default=-20)
args = parser.parse_args()
input_folder = args.input_folder
if args.output_folder is not None:
output_folder = args.output_folder
os.makedirs(output_folder, exist_ok=True)
else:
output_folder = args.input_folder
list_audio = os.listdir(input_folder)
for wav in tqdm(list_audio):
signal = AudioSegment.from_wav(os.path.join(input_folder, wav))
signal = normalize_signal(
signal=signal,
channels=1,
sr=args.sr,
silence_threshold=args.silence_threshold,
loudness_dBFS=args.loudness
)
signal.export(os.path.join(output_folder, wav), format="wav")