Skip to content

Commit 0f6e1b4

Browse files
committed
Dataset preprocessing
ㄴ debugging
1 parent 9a8ad53 commit 0f6e1b4

File tree

1 file changed

+11
-8
lines changed

1 file changed

+11
-8
lines changed

preprocess_dataset.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77
import argparse
88

9-
sr = 22050
9+
sr = 16000
1010
max_wav_value=32768.0
1111
trim_fft_size = 1024
1212
trim_hop_size = 256
@@ -15,7 +15,7 @@
1515
trim_top_db = 23
1616
skip_len = 14848
1717

18-
def preprocess_audio(file_list, silence_audio_size, pre_emphasis=True):
18+
def preprocess_audio(file_list, silence_audio_size, pre_emphasis=False):
1919
for F in file_list:
2020
f = open(F, encoding='utf-8')
2121
R = f.readlines()
@@ -27,10 +27,10 @@ def preprocess_audio(file_list, silence_audio_size, pre_emphasis=True):
2727
data, sampling_rate = librosa.core.load(wav_file, sr)
2828
data = data / np.abs(data).max() *0.999
2929
data_= librosa.effects.trim(data, top_db= trim_top_db, frame_length=trim_fft_size, hop_length=trim_hop_size)[0]
30-
data_ = data_*max_wav_value
3130
if (pre_emphasis):
3231
data_ = np.append(data_[0], data_[1:] - 0.97 * data_[:-1])
3332
data_ = data_ / np.abs(data_).max() * 0.999
33+
data_ = data_ * max_wav_value
3434
data_ = np.append(data_, [0.]*silence_audio_size)
3535
data_ = data_.astype(dtype=np.int16)
3636
write(wav_file, sr, data_)
@@ -58,23 +58,26 @@ def remove_short_audios(file_name):
5858
if __name__ == "__main__":
5959
"""
6060
usage
61-
python preprocess_audio.py -f=filelists/ljs_audio_text_test_filelist.txt,filelists/ljs_audio_text_train_filelist.txt,filelists/ljs_audio_text_val_filelist.txt -s=5 -p -r
61+
python preprocess_dataset.py -f=metadata.csv -s=5 -t -p -r
62+
python preprocess_dataset.py -f=metadata.csv
6263
"""
6364
parser = argparse.ArgumentParser()
6465
parser.add_argument('-f', '--file_list', type=str,
65-
help='file list to preprocess')
66+
help='Metadata file list to preprocess')
6667
parser.add_argument('-s', '--silence_padding', type=int, default=0,
6768
help='Adding silence padding at the end of each audio, silence audio size is hop_length * silence padding')
6869
parser.add_argument('-p', '--pre_emphasis', action='store_true',
69-
help="do or don't do pre_emphasis")
70+
help="Doing pre_emphasis")
71+
parser.add_argument('-t', '--trimming', action='store_true',
72+
help="Doing trimming audios")
7073
parser.add_argument('-r', '--remove_short_audios',action='store_true',
71-
help="do or don't remove short audios")
74+
help="Removing short audios in metadata file")
7275
args = parser.parse_args()
7376
file_list = args.file_list.split(',')
7477
silence_audio_size = trim_hop_size * args.silence_padding
7578
remove_short_audios = args.remove_short_audios
7679

77-
preprocess_audio(file_list, silence_audio_size)
80+
preprocess_audio(file_list, silence_audio_size, args.pre_emphasis)
7881

7982
if(remove_short_audios):
8083
for f in file_list:

0 commit comments

Comments
 (0)