6
6
import numpy as np
7
7
import argparse
8
8
9
- sr = 22050
9
+ sr = 16000
10
10
max_wav_value = 32768.0
11
11
trim_fft_size = 1024
12
12
trim_hop_size = 256
15
15
trim_top_db = 23
16
16
skip_len = 14848
17
17
18
- def preprocess_audio (file_list , silence_audio_size , pre_emphasis = True ):
18
+ def preprocess_audio (file_list , silence_audio_size , pre_emphasis = False ):
19
19
for F in file_list :
20
20
f = open (F , encoding = 'utf-8' )
21
21
R = f .readlines ()
@@ -27,10 +27,10 @@ def preprocess_audio(file_list, silence_audio_size, pre_emphasis=True):
27
27
data , sampling_rate = librosa .core .load (wav_file , sr )
28
28
data = data / np .abs (data ).max () * 0.999
29
29
data_ = librosa .effects .trim (data , top_db = trim_top_db , frame_length = trim_fft_size , hop_length = trim_hop_size )[0 ]
30
- data_ = data_ * max_wav_value
31
30
if (pre_emphasis ):
32
31
data_ = np .append (data_ [0 ], data_ [1 :] - 0.97 * data_ [:- 1 ])
33
32
data_ = data_ / np .abs (data_ ).max () * 0.999
33
+ data_ = data_ * max_wav_value
34
34
data_ = np .append (data_ , [0. ]* silence_audio_size )
35
35
data_ = data_ .astype (dtype = np .int16 )
36
36
write (wav_file , sr , data_ )
@@ -58,23 +58,26 @@ def remove_short_audios(file_name):
58
58
if __name__ == "__main__" :
59
59
"""
60
60
usage
61
- python preprocess_audio.py -f=filelists/ljs_audio_text_test_filelist.txt,filelists/ljs_audio_text_train_filelist.txt,filelists/ljs_audio_text_val_filelist.txt -s=5 -p -r
61
+ python preprocess_dataset.py -f=metadata.csv -s=5 -t -p -r
62
+ python preprocess_dataset.py -f=metadata.csv
62
63
"""
63
64
parser = argparse .ArgumentParser ()
64
65
parser .add_argument ('-f' , '--file_list' , type = str ,
65
- help = 'file list to preprocess' )
66
+ help = 'Metadata file list to preprocess' )
66
67
parser .add_argument ('-s' , '--silence_padding' , type = int , default = 0 ,
67
68
help = 'Adding silence padding at the end of each audio, silence audio size is hop_length * silence padding' )
68
69
parser .add_argument ('-p' , '--pre_emphasis' , action = 'store_true' ,
69
- help = "do or don't do pre_emphasis" )
70
+ help = "Doing pre_emphasis" )
71
+ parser .add_argument ('-t' , '--trimming' , action = 'store_true' ,
72
+ help = "Doing trimming audios" )
70
73
parser .add_argument ('-r' , '--remove_short_audios' ,action = 'store_true' ,
71
- help = "do or don't remove short audios " )
74
+ help = "Removing short audios in metadata file " )
72
75
args = parser .parse_args ()
73
76
file_list = args .file_list .split (',' )
74
77
silence_audio_size = trim_hop_size * args .silence_padding
75
78
remove_short_audios = args .remove_short_audios
76
79
77
- preprocess_audio (file_list , silence_audio_size )
80
+ preprocess_audio (file_list , silence_audio_size , args . pre_emphasis )
78
81
79
82
if (remove_short_audios ):
80
83
for f in file_list :
0 commit comments