-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrecorder.py
338 lines (286 loc) · 11.3 KB
/
recorder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# coding:utf-8
from expy import shared
# http://stackoverflow.com/questions/892199/detect-record-audio-in-python
from sys import byteorder
from array import array
from struct import pack
np = shared.np
def addSilence(snd_data, seconds):
"Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
r = array('h', [0 for i in range(
int(seconds * shared.setting['sample_rate']))])
r.extend(snd_data)
r.extend([0 for i in range(int(seconds * shared.setting['sample_rate']))])
return r
# def trim(snd_data, threshold, side='both'):
# "Trim the blank spots at the start and end"
# def _trim(snd_data):
# snd_data0 = np.array([int(abs(i) > threshold*3) for i in snd_data])
# snd_started = False
# r = array('h')
# lasting = 4000
# for i in range(len(snd_data)):
# if not snd_started and snd_data0[i - lasting:i].sum() < (lasting / 20) and snd_data0[i:i + lasting].sum() > (lasting / 3):
# snd_started = True
# r.append(snd_data[i])
# elif snd_started:
# r.append(snd_data[i])
# return r
# if side == 'left':
# # Trim to the left
# snd_data = _trim(snd_data)
# elif side == 'right':
# # Trim to the right
# snd_data.reverse()
# snd_data = _trim(snd_data)
# snd_data.reverse()
# elif side == 'both':
# # Trim to the left
# snd_data = _trim(snd_data)
# # Trim to the right
# snd_data.reverse()
# snd_data = _trim(snd_data)
# snd_data.reverse()
# elif side == 'none':
# pass
# return snd_data
def trim(snd_data, onset_frame, end_frame, side='both'):
if side == 'left':
snd_data = snd_data[onset_frame:-1]
elif side == 'right':
snd_data = snd_data[:end_frame]
elif side == 'both':
snd_data = snd_data[onset_frame:end_frame]
elif side == 'none':
pass
return snd_data
def measure(curFrame):
#杩囬浂鐜?
tmp1 = curFrame[:-1]
tmp2 = curFrame[1:]
sings = (tmp1*tmp2<=0)
diffs = (tmp1-tmp2)>0.02
zcr = np.sum(sings*diffs)
#鐭椂鑳介噺
amp = np.sum(np.abs(curFrame))
return zcr, amp
def environmentNoise(sampling_time, weights=(1.1,3,5,1.1,2,3), chunk=1024):
'''
Record the sound in a certain duration as the environment noise, and calcuate its amplitude and zero-crossing rate.
Parameters
----------
sampling_time: number
The duration of noise sampling
weights: tuple (default: (1.1,3,5,1.1,2,3))
(The weight of noise threshold of zero-crossing rate,
The weight of low threshold of zero-crossing rate,
The weight of high threshold of zero-crossing rate,
The weight of noise threshold of sound amplitude,
The weight of low threshold of sound amplitude,
The weight of high threshold of sound amplitude)
chunk: int (default: 1024)
The frame size
Returns
-------
zcr0: number
The noise threshold of zero-crossing rate
zcr1: number
The low threshold of zero-crossing rate
zcr2: number
The high threshold of zero-crossing rate
amp0: number
The noise threshold of sound amplitude
amp1: number
The low threshold of sound amplitude
amp2: number
The high threshold of sound amplitude
'''
def calc_threshold():
stream = shared.pa.open(format=shared.pyaudio.paInt16, channels=1, rate=shared.setting['sample_rate'],
input=True,
frames_per_buffer=chunk)
r = array('h')
noise = []
zcrs = []
amps = []
for i in range(int(sampling_time * shared.setting['sample_rate'] // chunk)):
# little endian, signed short
snd_data = array('h', stream.read(chunk))
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)
zcr, amp = measure(np.array(snd_data))
zcrs.append(zcr)
amps.append(amp)
stream.stop_stream()
stream.close()
return np.mean(zcrs),np.mean(amps)
zcr_noise, amp_noise = calc_threshold()
zcr0 = zcr_noise*weights[0] # The noise threshold of zero-crossing rate
zcr1 = zcr_noise*weights[1] # The low threshold of zero-crossing rate
zcr2 = zcr_noise*weights[2] # The high threshold of zero-crossing rate
amp0 = amp_noise*weights[3] # The noise threshold of sound amplitude
amp1 = amp_noise*weights[4] # The low threshold of sound amplitude
amp2 = amp_noise*weights[5] # The high threshold of sound amplitude
print("environment's noise level: (zcr %.3f, amp %.3f)" %(zcr_noise, amp_noise))
return zcr0, zcr1, zcr2, amp0, amp1, amp2
'''https://github.com/halleytl/pyvad/blob/master/vad.py'''
def recordSound(vad_levels, rec_length_min=0, rec_length_max=None, sound_length_max=None, trim_side='both', feedback=False, chunk=1024, playing_track=None, blocking=True, path=''):
'''
Record sound from the microphone.
Parameters
----------
vad_levels: tuple
(The noise threshold of zero-crossing rate,
The low threshold of zero-crossing rate,
The high threshold of zero-crossing rate,
The noise threshold of sound amplitude,
The low threshold of sound amplitude,
The high threshold of sound amplitude)
rec_length_min: number(s) (default: 0)
The second count of minimal recording time
rec_length_max: number(s), or None (default)
The second count of maximal recording time
sound_length_max: number(s), or None (default)
The second count of maximal sound length
trim_side: str (default: 'both')
The trimming way of recorded sound
Options: 'both', 'left', 'right', 'none'
feedback: True, or False(default)
Whether the sound feedbacks while recording
chunk: int (default: 1024)
The frame size
playing_track: int, str, or None(default)
The name of current track
blocking: True(default), or False
Whether the experiment procedure would be blocked by the current function
path: str (default: '')
The file path of target sound. If the path is undefined(''), the sound won't be recorded.
Returns
-------
If recorded:
rec_data: np.array
The recorded sound array in stereo
If recorded nothing:
[]
If not blocking:
None
'''
if type(playing_track)!=str:
playing_track=np.random.randint(99999)
shared.changeState(playing_track, True)
sr = shared.setting['sample_rate']
rec_length_min = rec_length_min * sr
if rec_length_max:
rec_length_max *= sr
if sound_length_max:
sound_length_max *= sr
zcr0, zcr1, zcr2, amp0, amp1, amp2 = vad_levels
maxsilence = 1.5 * sr // chunk #鍏佽鏈€澶ч潤闊抽暱搴?
minlen = 0.2 * sr // chunk #璇煶鐨勬渶鐭暱搴? 璇煶闀垮害澶煭璁や负鏄櫔澹?
def recordSoundSub():
#鍒濆鐘舵€佷负闈欓煶
count = 0
silence = 0
sound_status = 0
speech_status = 0
onset_detected = False
onset_frame = 0
end_frame = -1
status_record = []
status_record2 = []
stream = shared.pa.open(format=shared.pyaudio.paInt16, channels=1, rate=sr,
input=True, output=True)
rec_data = array('h')
while True:
if blocking:
shared.win.dispatch_events()
# When to stop recording
if (rec_length_max and len(rec_data) > rec_length_max) or \
(sound_length_max and (len(rec_data)-onset_frame) > sound_length_max) or \
(not onset_detected and len(rec_data) > rec_length_min) or \
shared.states[playing_track] == False:
break
# little endian, signed short
s = stream.read(chunk)
if feedback:
stream.write(s, chunk)
snd_data = array('h', s)
if byteorder == 'big':
snd_data.byteswap()
rec_data.extend(snd_data)
data = np.array(snd_data)
zcr, amp = measure(data)
status = 0
# 0= 闈欓煶锛?1= 鍙兘寮€濮?
if speech_status in [0, 1]:
# 纭畾杩涘叆璇煶娈?
if amp > amp2 or zcr > zcr2:
sound_status = 2
speech_status = 2
silence = 0
count += 1
onset_detected = True
for idx in range(len(status_record)-1, 0, -1):
if idx>0 and status_record[idx-1]==0:
onset_frame = (idx-1)*chunk
break
#鍙兘澶勪簬璇煶娈?
elif (amp > amp0 and zcr > zcr0) or (amp > amp1 or zcr > zcr1):
sound_status = 1
count += 1
#闈欓煶鐘舵€?
else:
sound_status = 0
count = 0
# 2 = 璇煶娈?
elif speech_status == 2:
# 淇濇寔鍦ㄨ闊虫
if (amp > amp0 and zcr > zcr0) or (amp > amp1 or zcr > zcr1):
count += 1
sound_status = 2
#璇煶灏嗙粨鏉?
else:
#闈欓煶杩樹笉澶熼暱锛屽皻鏈粨鏉?
silence += 1
if silence < maxsilence:
count += 1
sound_status = 2
#璇煶闀垮害澶煭璁や负鏄櫔澹?
elif count < minlen:
sound_status = 0
silence = 0
count = 0
#璇煶缁撴潫
else:
end_frame = (len(status_record) - silence)*chunk
sound_status = 3
status_record.append(sound_status)
# status_record2.append('%.1f, %.1f' %(amp/amp0,zcr/zcr0))
if sound_status == 3:
break
shared.changeState(playing_track, False)
# print(status_record)
# print(onset_frame,status_record,status_record2)
rec_data = trim(rec_data, onset_frame, end_frame, trim_side)
# rec_data = rec_data[onset_frame:end_frame]
if sound_length_max:
rec_data = rec_data[:int(sound_length_max)]
# rec_data = trim(rec_data, noise_level[1]//chunk, side=trim_side)
stream.stop_stream()
stream.close()
if len(rec_data)==0:
print('Recorded nothing')
return []
# rec_data = addSilence(rec_data, 0.5)
rec_data = np.require(np.tile(rec_data, (2, 1)).T, requirements='C')
# rec_data = np.require(np.array([rec_data[::2], rec_data[1::2]]).T, requirements='C')
if path!='':
shared.librosa.output.write_wav(path, rec_data, sr, norm=False)
return rec_data
if blocking:
rec_data = recordSoundSub()
return rec_data
else:
td = shared.threading.Thread(target=recordSoundSub)
td.start()