-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinch_dataset_gen.py
129 lines (109 loc) · 4.66 KB
/
finch_dataset_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import sys;
import os;
import glob;
import numpy as np;
import librosa as lb;
import soundfile as sf;
import pandas as pd;
import random as rand;
import math;
from sklearn.utils import shuffle;
def GetBackgroundDistribution(n_files, count):
bg = np.array(np.zeros(n_files)).astype(int);
i = 0;
while i < 3000:
idx = rand.randint(0, 49-1);
bg[idx] += 1;
i += 1;
return bg;
def prepare_sample(sample, input_length):
if len(sample) >= input_length:
sample = sample[0:input_length];
else:
sample = np.pad(sample, (0, input_length - len(sample)), 'constant');
return sample;
if __name__ == '__main__':
bg_dist = GetBackgroundDistribution(49, 3000);
mainDir = os.path.dirname(os.path.abspath('.'));
finch_path = os.path.join(mainDir, 'datasets/finch/');
label_path = finch_path+'btf_labels/';
audio_path = "/Users/mmoh0027/Desktop/Finch/OriginalRecordings/{}.wav";
input_length = 13440; #0.672 * 20000
btf_samples = [];
other_samples = [];
resample_at = 20000;
slice_length = 0.672;
i = 0;
for txt_file_path in sorted(glob.glob(os.path.join(label_path, '*.txt'))):
df = pd.read_csv(txt_file_path, sep='\t', usecols=[3,4,5,6], header=0, names=['start', 'end', 'low', 'high']);
df.dropna(axis=0, how='any', inplace=True);
# print(file);
file_name = os.path.split(txt_file_path)[1].split('.Table')[0];
# print(name.split('.Table')[0]);
# print(file_name);
f = sf.SoundFile(audio_path.format(file_name));
duration = len(f)/f.samplerate;
full_record_slices = np.arange(0, duration, slice_length);
# print(len(slices));
if len(df) > 0:
for idx, r in df.iterrows():
sound = lb.load(path = audio_path.format(file_name), sr = resample_at, offset= r['start'], duration = slice_length);
btf_samples.append(prepare_sample(sound[0], input_length));
#Removing this slice location from the full record slices
rms = math.floor(r['start']);
rme = math.ceil(r['end']);
full_record_slices = [s for s in full_record_slices if s<rms or s>rme];
bg_count = bg_dist[i];
bg_slices = rand.sample(list(full_record_slices), bg_count);
for bs in bg_slices:
sound = lb.load(path = audio_path.format(file_name), sr = resample_at, offset= bs, duration = slice_length);
other_samples.append(prepare_sample(sound[0], input_length));
i += 1;
print(len(btf_samples));
print(btf_samples[0]);
print(len(other_samples));
print(other_samples[0]);
#shuffle the Samples
btf_samples = shuffle(btf_samples, random_state=0);
other_samples = shuffle(other_samples, random_state=0);
#normalize the samples with 32768.0
btf_samples = np.array(btf_samples) / 32768.0;
other_samples = np.array(other_samples) / 32768.0;
# print(btf_samples[0]);
# exit();
#Create folds
btf_per_fold = len(btf_samples)//5;
btf_folds = [0, btf_per_fold, btf_per_fold*2, btf_per_fold*3, btf_per_fold*4];
print(btf_folds);
others_per_fold = len(other_samples)//5;
others_folds = [0, others_per_fold, others_per_fold*2, others_per_fold*3, others_per_fold*4];
print(others_folds);
finch_dataset = {};
for fold in range(1,6):
finch_dataset['fold{}'.format(fold)] = {};
finch_sounds = [];
finch_labels = [];
#BTF
start_index = btf_folds[fold-1];
end_index = len(btf_samples) if fold==5 else btf_folds[fold];
finch_sounds.extend(btf_samples[start_index:end_index]);
print(len(finch_sounds))
finch_labels.extend([0] * (end_index-start_index));
print(len(finch_labels))
print('BTF Indexes: {} - {}'.format(start_index, end_index));
#Others
start_index = others_folds[fold-1];
end_index = len(other_samples) if fold==5 else others_folds[fold];
finch_sounds.extend(other_samples[start_index:end_index]);
print(len(finch_sounds))
finch_labels.extend([1] * (end_index-start_index));
print(len(finch_labels))
print('Other Indexes: {} - {}'.format(start_index, end_index));
print('Finished Fold {}'.format(fold));
print('Samples: {}'.format(len(finch_sounds)));
print('Labels: {}'.format(len(finch_labels)));
finch_sounds, finch_labels = shuffle(finch_sounds,finch_labels, random_state=0);
finch_dataset['fold{}'.format(fold)]['sounds'] = finch_sounds;
finch_dataset['fold{}'.format(fold)]['labels'] = finch_labels;
# exit();
np.savez(finch_path+'wav20.npz', **finch_dataset);