-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfindA_withkdtree.py
209 lines (159 loc) · 6.92 KB
/
findA_withkdtree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import os
import numpy as np
from tqdm import tqdm
from audio import hparams as audio_hparams
from audio import load_wav, wav2unnormalized_mfcc, wav2normalized_db_mel, wav2normalized_db_spec
from audio import write_wav, normalized_db_mel2wav, normalized_db_spec2wav
eps = 1e-6
# 超参数个数:16
hparams = {
'sample_rate': 16000,
'preemphasis': 0.97,
'n_fft': 400,
'hop_length': 80,
'win_length': 400,
'num_mels': 80,
'n_mfcc': 13,
'window': 'hann',
'fmin': 30.,
'fmax': 7600.,
'ref_db': 20,
'min_db': -80.0,
'griffin_lim_power': 1.5,
'griffin_lim_iterations': 60,
'silence_db': -28.0,
'center': True,
}
assert hparams == audio_hparams
cn_raw_list_path = '/datapool/home/hujk17/chenxueyuan/DataBaker_Bilingual_CN/meta_good.txt'
cn_raw_ppg_path = '/datapool/home/hujk17/chenxueyuan/DataBaker_Bilingual_CN/ppg_from_generate_batch'
cn_raw_linear_dir ='/datapool/home/hujk17/chenxueyuan/DataBaker_Bilingual_CN/spec_5ms_by_audio_2'
en_raw_list_path = '/datapool/home/hujk17/chenxueyuan/LJSpeech-1.1/meta_good.txt'
en_raw_ppg_path = '/datapool/home/hujk17/chenxueyuan/LJSpeech-1.1/ppg_from_generate_batch'
en_raw_linear_dir = '/datapool/home/hujk17/chenxueyuan/LJSpeech-1.1/spec_5ms_by_audio_2'
en_final_cn_log_path = '/datapool/home/hujk17/chenxueyuan/en_final_cn_log_withkdtree'
en_final_cn_idx_path = os.path.join(en_final_cn_log_path, 'en_final_cn_idx_withkdtree.npy')
# 写
projected_wav_dir = '/datapool/home/hujk17/chenxueyuan/projected_wavs_16000_withkdtree'
if os.path.exists(projected_wav_dir) is False:
os.makedirs(projected_wav_dir)
Linear_DIM = 201
PPG_DIM = 345 #每一帧ppg的维度
en_all_cnt = 1 #
cn_all_cnt = 5000
def en_text2list(file): #封装读出每一句英文ppg文件名的函数,输入文本,得到每一句ppg文件名序列的列表
en_file_list = []
global en_all_cnt
with open(file, 'r') as f:
for i, line in enumerate(f.readlines()):
# !!!!!!!!!!!!!!!!
en_file_list.append(line.strip())
if i == en_all_cnt - 1:
break
print('en len:', len(en_file_list), 'en:', en_file_list[:min(3, en_all_cnt)])
return en_file_list
# 000001 那些#1庄稼#1田园#2在#1果果#1眼里#2感觉#1太亲切了#4
# na4 xie1 zhuang1 jia5 tian2 yuan2
def cn_text2list(file): #封装读出每一句中文ppg文件名的函数,输入文本,得到每一句ppg文件名序列的列表
cn_file_list = []
global cn_all_cnt
with open(file, 'r') as f:
a = [i.strip() for i in f.readlines()]
# print(a[0])
# print(a[1])
i = 0
while i < len(a):
fname = a[i]
cn_file_list.append(fname)
i += 1
if i >= cn_all_cnt:
break
print('cn len:', len(cn_file_list), 'cn:', cn_file_list[:min(3, cn_all_cnt)])
return cn_file_list
def get_single_data_pair(fname, ppgs_dir, linears_dir): #输入每一句的文件名,ppg的地址,线性谱的地址,得到每一句的ppg和linear
assert os.path.isdir(ppgs_dir) and os.path.isdir(linears_dir)
# mfcc_f = os.path.join(os.path.join(os.path.join(mfcc_dir, fname.split('-')[0]),fname.split('-')[1]),fname+'.npy')#fname+'.npy')
ppg_f = os.path.join(ppgs_dir, fname+'.npy')#os.path.join(ppg_dir, fname+'.npy')
linear_f = os.path.join(linears_dir, fname+'.npy')#os.path.join(ppg_dir, fname+'.npy')
ppg = np.load(ppg_f)
linear = np.load(linear_f)
# ppg = onehot(ppg, depth=PPG_DIM)
assert ppg.shape[0] == linear.shape[0],fname+' 维度不相等'
assert ppg.shape[1] == PPG_DIM and linear.shape[1] == Linear_DIM
return ppg, linear
# def for_loop_en(): #得到每一帧的英文ppg列表
# en_file_list = en_text2list(file=en_raw_list_path)
# en_ppgs_ls = []
# for f in tqdm(en_file_list):
# wav_ppgs, linears = get_single_data_pair(f, ppgs_dir=en_raw_ppg_path, linears_dir=en_raw_linear_dir)
# # 需要确认下
# en_ppgs_ls.extend(list(wav_ppgs))
# # 或者
# # for i in range(wav_ppgs.shape[0]):
# # # ppg[i]
# # en_ppgs_ls.append(wav_ppgs[i])
# # # find_jin(ppg[i])
# # 只考虑第一句话
# print('en now only sentence:', f)
# # break
# # shuffule
# # wav_id, frame_id
# return en_ppgs_ls
def for_loop_cn(): #得到每一帧的中文ppg列表
cn_file_list = cn_text2list(file=cn_raw_list_path)
cn_ppgs_ls = []
cn_linears_ls = []
for f in tqdm(cn_file_list):
wav_ppgs, linears = get_single_data_pair(f, ppgs_dir=cn_raw_ppg_path, linears_dir=cn_raw_linear_dir)
# 需要确认下
cn_ppgs_ls.extend(list(wav_ppgs))
cn_linears_ls.extend(list(linears))
# 或者
# for i in range(wav_ppgs.shape[0]):
# # ppg[i]
# cn_ppgs_ls.append(wav_ppgs[i])
# # find_jin(ppg[i])
# shuffule
# wav_id, frame_id
return cn_ppgs_ls, cn_linears_ls
# id list
def ppg_project(e_ppg_id, project_array):
ans = list()
for i in e_ppg_id:
j = int(project_array[i] + eps)
ans.append(j)
print('cn num from 0:', ans[:10])
return ans
def main():
print('start')
#
# en_ppg_l, en_linear_l = for_loop_en() #英文每一帧ppg的列表 en_l = [en_ppg1,en_ppg2,...]
en_file_list = en_text2list(file=en_raw_list_path)
print('en_file_list is:', en_file_list)
cn_ppg_l, cn_linear_l = for_loop_cn() #中文每一帧ppg的列表 cn_l = [cn_ppg1,cn_ppg2,...]
# all_ppg_l = en_ppg_l + cn_ppg_l #中英文混合后的ppg的列表
#
en_final_cn_idx = np.load(en_final_cn_idx_path)
#
# en_ppgs_ls = []
now = 0
for f in tqdm(en_file_list):
wav_ppgs, linears = get_single_data_pair(f, ppgs_dir=en_raw_ppg_path, linears_dir=en_raw_linear_dir)
e_ppg_id = [] # 英文从零开始
for i in range(wav_ppgs.shape[0]):
e_ppg_id.append(now)
now += 1
print('en id from 0:', e_ppg_id[:10])
c_ppg_id_projected = ppg_project(e_ppg_id, en_final_cn_idx) # 从中文的零开始
# 找到linear
c_lineas_projected = list()
for i in c_ppg_id_projected:
c_lineas_projected.append(cn_linear_l[i])
c_lineas_projected = np.asarray(c_lineas_projected)
save_linear_name = f + '_cn_linear_projected.wav'
write_wav(os.path.join(projected_wav_dir, save_linear_name), normalized_db_spec2wav(c_lineas_projected))
save_linear_original_name = f + '_en_linear_original.wav'
write_wav(os.path.join(projected_wav_dir, save_linear_original_name), normalized_db_spec2wav(linears))
# break
if __name__ == '__main__':
main()