diff --git a/acdc.py b/acdc.py new file mode 100644 index 0000000..535beb2 --- /dev/null +++ b/acdc.py @@ -0,0 +1,91 @@ +import glob, os +from variables import * + +# Reads .wav files in training_data directory, processes them, and outputs a .tdata file +# with the aggregated training data that can be used by the model for training later on +def prepare_training_data(): + print("==PREPARE TRAINING DATA==") + from training_data import TrainingData + training_data = TrainingData() + training_data.prepare() + training_data.save(in_models_dir=True) + +# Trains Keras/Tensorflow model by loading .tdata file from disk, fitting, saving, and finally +# evaluating the resulting model accuracy +def train(): + print("==TRAIN ALL MODELS==") + from model import Model + model = Model() + model.train() + model.save(in_models_dir=True) + Model.evaluate() + +def evaluate(): + print("==EVALUATE MODELS==") + from model import Model + Model.evaluate() + +# Processes recordings in the recordings folder using the saved Keras model and outputs +# call labels in .csv format and .txt format (tab-delimited Audacity readable) into the results directory. +# A new results directory is created each time this is run, as a sub-directory within 'results'. +# Results directories are named according to the date and time of the run, like this: +# [YYYYMMDD]_[HHMMSS]_[recording filename] +# It will process .wav files in the recordings directory regardless of whether they have been +# processed before, but will store unique results files for each time processed +def process(): + print("==PROCESS RECORDINGS==") + from scanner import Scanner + from exporter import Exporter + from recording import Recording + home_dir = os.getcwd() + os.chdir(Vars.RECORDINGS_DIR) + recordings = [] + wavefiles = glob.glob('*.wav') + for wavefile in wavefiles: + recordings.append(Recording(wavefile)) + os.chdir(home_dir) + print('processing the following recordings: ' + str(wavefiles)) + model = Scanner().model + for recording in recordings: + print(' ') + print('processing ' + recording.file) + scanner = Scanner(preload=False) + scanner.model = model + exporter = Exporter() + scanner.process(recording) + exporter.process(recording) + print(' ') + +# List all command line interface options +def shortcuts_all(): + sc = [ + ('prepare training data', prepare_training_data), + ('train models', train), + ('process recordings', process), + ('evaluate models', evaluate), + ('exit', None) + ] + return sc + + +# Controller function for the command line interface +def controller(): + shortcuts = None + shortcuts = shortcuts_all() + + while True: + print(' ') + print("==ANIMAL CALL DETECTION AND CLASSIFICATION (ACDC)==") + for i in range(len(shortcuts)): + print(str(i) + ') ' + shortcuts[i][0]) + selection = input('enter number for the action you would like to perform: ') + print(' ') + selection = int(selection) + + if shortcuts[selection][0] == 'exit': + break + + shortcuts[selection][1]() + +if __name__ == "__main__": + controller() diff --git a/exporter.py b/exporter.py new file mode 100644 index 0000000..3c05b63 --- /dev/null +++ b/exporter.py @@ -0,0 +1,30 @@ +import os, datetime +import numpy as np + +from variables import * +from results import * + +class Exporter: + def __init__(self): + self.recording = None + self.folder_name = None + + # Takes in a recording file, creates a new folder in the results directory to save all + # results files. Then creates a Results object and calls on it to save itself and corresponding + # csv file and images + def process(self, recording): + print('exporting results for ' + recording.file) + self.recording = recording + self.create_folder() + results = Results(self.folder_name, self.recording) + results.export_csv(to_results_dir=True) + results.export_Audacity(to_results_dir=True) + + def create_folder(self): + home_dir = os.getcwd() + os.chdir(Vars.RESULTS_DIR) + timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + self.folder_name = timestamp + '_' + self.recording.file + os.makedirs(self.folder_name) + print('created folder ' + self.folder_name) + os.chdir(home_dir) diff --git a/filters.py b/filters.py new file mode 100644 index 0000000..74e59c5 --- /dev/null +++ b/filters.py @@ -0,0 +1,211 @@ +import cv2, copy, random +import numpy as np +from scipy.signal import spectrogram +from tqdm import tqdm + +from variables import * + +# Helper functions and filters that are used commonly in the rest of the codebase +class Filters: + # Given raw data and sample rate, creates a spectrogram image and time series information + # for spectrogram columns + def create_spectrogram(data, sample_rate): + data = np.float32(data) * Vars.VOLUME_AMP_MULTIPLE + data = np.int16(np.clip(data, -32768, 32767)) + if len(data) / float(sample_rate) > 60: + return None + f, t, spec = spectrogram(data, + fs=float(sample_rate), + window=Vars.WINDOW, + nperseg=Vars.NPERSEG, + noverlap=Vars.NOVERLAP) + lowcut_index = np.searchsorted(f, Vars.LOWCUT) + highcut_index = np.searchsorted(f, Vars.HIGHCUT) + spec = spec[lowcut_index:highcut_index, :] + + if np.min(spec) == 0.0: + spec[spec == 0.0] = 0.0001 + spec = np.log10(spec) + + spec = np.clip(spec, Vars.SPECTROGRAM_RAW_LOW, Vars.SPECTROGRAM_RAW_HIGH) + spec = np.add(spec, -1*Vars.SPECTROGRAM_RAW_LOW) + spec = np.power(spec, Vars.SPECTROGRAM_POWER_FACTOR) + spec = np.divide(spec, (Vars.SPECTROGRAM_RAW_HIGH-Vars.SPECTROGRAM_RAW_LOW)**Vars.SPECTROGRAM_POWER_FACTOR) + spec = np.flipud(spec) + spec = cv2.resize(spec, (len(spec[0]), Vars.SPECTROGRAM_HEIGHT)) + return spec + + # Splits data from recording object into overlapping spectrogram segments + def segmentize_data(rec): + segment_size = int(round(rec.sample_rate * Vars.SEGMENT_LENGTH)) + step_size = int(round(rec.sample_rate * Vars.SEGMENT_STEP)) + + data = np.pad(rec.data, pad_width=(segment_size-step_size), mode='constant', constant_values=0) + + segments = [] + s = 0 + e = segment_size + while e <= len(data): + segment_data = data[s:e] + spec = Filters.create_spectrogram(segment_data, rec.sample_rate) + if Filters.simple_check(spec): + segments.append(spec) + s += step_size + e += step_size + + return segments + + # Resizes input spectrogam to be a square + def squarify(x): + return cv2.resize(x, (Vars.SQUARIFY_SIZE, Vars.SQUARIFY_SIZE)) + + # Rescales spectrogram image data to be from 0->1 to -1->1 to better fit the neural network + def rescale(x): + return (x*2)-1 + + # Adds border around spectrogram for cleaner visualization + def border(x): + x = np.pad(x, pad_width=1, mode='constant', constant_values=1) + return x + + # Morphological operation to clean spectrogram image + # def morph_clean(x): + # x = cv2.morphologyEx(x, cv2.MORPH_OPEN, Vars.MORPH_CLEAN_KERNEL) + # return x + + # Center spectrogram image by centroid + # def center(x): + # if np.sum(x) == 0: + # return x + # M = cv2.moments(x) + # cx = int(M['m10']/M['m00']) + # cy = int(M['m01']/M['m00']) + # (height, width) = np.shape(x) + # shiftx = round(width/2.0) - cx + # shifty = round(height/2.0) - cy + # t = np.float32([[1,0,shiftx],[0,1,shifty]]) + # return cv2.warpAffine(x, t, (width, height)) + + # Rotate input spectrogram image by theta degrees + def rotate(x, theta): + rows, cols = np.shape(x) + midrow = round(rows/2.0) + midcol = round(cols/2.0) + M = cv2.getRotationMatrix2D((midcol,midrow), -1*theta, 1) + return cv2.warpAffine(x, M, (cols, rows)) + + # Shear input spectrogram image by given pixels in each direction + def shear(x, horiz=0, vert=0): + rows, cols = np.shape(x) + pts1 = np.float32([[round(cols*0.33), round(rows*0.67)], + [round(cols*0.67), round(rows*0.67)], + [round(cols*0.67), round(rows*0.33)]]) + pts2 = np.float32([[round(cols*0.33), round(rows*0.67)+vert], + [round(cols*0.67), round(rows*0.67)], + [round(cols*0.67)+horiz, round(rows*0.33)]]) + M = cv2.getAffineTransform(pts1, pts2) + return cv2.warpAffine(x, M, (cols, rows)) + + # Stretch input spectrogram by given pixels in each direction + def stretch(x, horiz=0, vert=0): + rows, cols = np.shape(x) + vert_up = int(np.ceil(vert/2.0)) + vert_down = int(np.floor(vert/2.0)) + horiz_left = int(np.ceil(horiz/2.0)) + horiz_right = int(np.floor(horiz/2.0)) + pts1 = np.float32([[round(cols*0.33), round(rows*0.67)], + [round(cols*0.67), round(rows*0.67)], + [round(cols*0.67), round(rows*0.33)]]) + pts2 = np.float32([[round(cols*0.33)-horiz_left, round(rows*0.67)+vert_down], + [round(cols*0.67)+horiz_right, round(rows*0.67)+vert_down], + [round(cols*0.67)+horiz_right, round(rows*0.33)-vert_up]]) + M = cv2.getAffineTransform(pts1, pts2) + return cv2.warpAffine(x, M, (cols, rows)) + + # Tilt image horizontally + def tilt(x, horiz=0, vert=0): + rows, cols = np.shape(x) + pts1 = np.float32([[0, 0], + [cols-1, 0], + [0, rows-1], + [cols-1, rows-1]]) + pts2 = copy.copy(pts1) + if horiz > 0: + pts2[1,1] = pts2[1,1] + horiz + pts2[3,1] = pts2[3,1] - horiz + if horiz < 0: + horiz = -1*horiz + pts2[0,1] = pts2[0,1] + horiz + pts2[2,1] = pts2[2,1] - horiz + if vert > 0: + pts2[2,0] = pts2[2,0] + vert + pts2[3,0] = pts2[3,0] - vert + if vert < 0: + vert = -1*vert + pts2[0,0] = pts2[0,0] + vert + pts2[1,0] = pts2[1,0] - vert + M = cv2.getPerspectiveTransform(pts1, pts2) + return cv2.warpPerspective(x, M, (cols, rows)) + + # Adjust image brightness + def adjust_brightness(x, delta): + return np.clip(x * delta, 0.0, 1.0) + + # Simple check to determine if there is anything of value in an image, or if it mostly + # blank or mostly noise + def simple_check(x): + if np.max(x) < Vars.MINIMUM_VALUE: + return False + if np.mean(x) < Vars.MINIMUM_AVG_VALUE: + return False + if np.mean(x) > Vars.MAXIMUM_AVG_VALUE: + return False + return True + + # Train-Test-Validation split + # Assumes files have been shuffled and then segmented + # hence, segments in order, but sections of segments corresponding to files randomized + def split_data(data): + num_validation = int(np.ceil(len(data) * Vars.VALIDATION_RATIO)) + num_test = int(np.ceil(len(data) * Vars.TEST_RATIO)) + + validation = data[0:num_validation] + test = data[num_validation:(num_validation + num_test)] + train = data[(num_validation + num_test):] + + return (train, test, validation) + + # Given an input image, apply random image transforms to spectrogram + def create_synthetic_segment(segment): + rotation = random.randint(Vars.ROTATIONS[0], Vars.ROTATIONS[1]) + shear = (random.randint(Vars.SHEARS_HORIZ[0], Vars.SHEARS_HORIZ[1]), random.randint(Vars.SHEARS_VERT[0], Vars.SHEARS_VERT[1])) + tilt = (random.randint(Vars.TILTS_HORIZ[0], Vars.TILTS_HORIZ[1]), random.randint(Vars.TILTS_VERT[0], Vars.TILTS_VERT[1])) + stretch = (0, random.randint(Vars.STRETCHES_VERT[0], Vars.STRETCHES_VERT[1])) + adjust_brightness = random.uniform(Vars.ADJUST_BRIGHTNESS[0], Vars.ADJUST_BRIGHTNESS[1]) + segment = Filters.rotate(segment, rotation) + segment = Filters.shear(segment, horiz=shear[0], vert=shear[1]) + segment = Filters.tilt(segment, horiz=tilt[0], vert=tilt[1]) + segment = Filters.stretch(segment, horiz=stretch[0], vert=stretch[1]) + segment = Filters.adjust_brightness(segment, adjust_brightness) + return segment + + # Runs create_synthetic_segment as many times as necessary to + def augment_with_synthetic_data(data, target_number): + synthetic_segments = [] + num_original_segments = len(data) + num_segments_to_fill = target_number - num_original_segments + + print(' augmenting ' + str(num_original_segments) + ' segments to ' + str(target_number) + ' segments') + + if num_original_segments == target_number: + return data + elif num_original_segments > target_number: + return data[0:target_number] + + for i in tqdm(range(num_segments_to_fill)): + segment = data[i%num_original_segments] + segment = Filters.create_synthetic_segment(segment) + synthetic_segments.append(segment) + + data.extend(synthetic_segments) + return data diff --git a/model.py b/model.py new file mode 100644 index 0000000..5925b6e --- /dev/null +++ b/model.py @@ -0,0 +1,216 @@ +import numpy as np +import joblib + +import keras +from keras.models import Sequential +from keras.layers import Dense, Dropout, Flatten, BatchNormalization +from keras.layers import Conv2D, MaxPooling2D +from keras.layers.advanced_activations import LeakyReLU +from keras.models import load_model +from keras.callbacks import EarlyStopping, ModelCheckpoint +from keras import backend as K +from tensorflow.keras.optimizers import Adam +from sklearn.metrics import confusion_matrix +import matplotlib.pyplot as plt +from tensorflow.keras.utils import to_categorical +import seaborn as sns + +from training_data import TrainingData +from variables import * +from filters import * + +class Model: + # This class method loads a model file from disk in two parts + # 1) The raw savefile for the neural network and weights + # 2) Additional attributes for the model class (e.g. name, score) + def load(): + home_dir = os.getcwd() + os.chdir(Vars.MODELS_DIR) + model = Model() + filename = Vars.MODEL_ATTR_FILENAME + attr = joblib.load(filename) + print('loaded model attributes from ' + filename) + model.calls = attr[0] + model.score = attr[1] + model.cmatrix = attr[2] + filename = Vars.MODEL_FILENAME + model.classifier = load_model(filename) + print('loaded model classifier from ' + filename) + os.chdir(home_dir) + return model + + # Run evaluation script and save confusion matrix to disk + def evaluate(): + model = Model.load() + + tdata = TrainingData.load() + (X_train, y_train), (X_test, y_test), (X_validation, y_validation) = model.combine_and_add_targets(tdata) + + X_test = model.prefilter(X_test) + X_validation = model.prefilter(X_validation) + + y_test = to_categorical(y_test) + y_validation = to_categorical(y_validation) + + model.score = model.classifier.evaluate(X_test, y_test, verbose=1) + print('Test loss:', model.score[0]) + print('Test accuracy:', model.score[1]) + + X = np.concatenate((X_test, X_validation)) + y = np.concatenate((y_test, y_validation)) + + y_pred = model.classifier.predict(X) + cmatrix = confusion_matrix(np.argmax(y, axis=1), np.argmax(y_pred, axis=1)) + + print(model.calls) + print(cmatrix) + ax = sns.heatmap(cmatrix, annot=True, fmt='d', xticklabels=model.calls, yticklabels=model.calls) + ax.xaxis.tick_top() + ax.xaxis.set_label_position('top') + plt.xlabel('predicted') + plt.ylabel('actual') + fig = ax.get_figure() + fig.savefig(Vars.MODELS_DIR + '/' + Vars.MODEL_CMATRIX_FILENAME) + plt.show() + + def __init__(self): + self.classifier = None + self.calls = None + self.score = None + self.cmatrix = None + + # Loads the training data pertaining to the call type, then creates and trains the model + def train(self): + + print('gathering training data...') + tdata = TrainingData.load() + (X_train, y_train), (X_test, y_test), (X_validation, y_validation) = self.combine_and_add_targets(tdata) + + X_train = self.prefilter(X_train) + X_test = self.prefilter(X_test) + X_validation = self.prefilter(X_validation) + + + y_train = to_categorical(y_train) + y_test = to_categorical(y_test) + y_validation = to_categorical(y_validation) + + num_classes = len(self.calls) + + print('commencing training...') + input_shape = (Vars.SQUARIFY_SIZE, Vars.SQUARIFY_SIZE, 1) + model = Sequential() + + model.add(Conv2D(64, kernel_size=(3,3), + padding='same', + input_shape=input_shape)) + model.add(BatchNormalization(momentum=0.9)) + model.add(LeakyReLU(alpha=0.1)) + + model.add(Conv2D(64, kernel_size=(3,3), + padding='same', + strides=2)) + model.add(BatchNormalization(momentum=0.9)) + model.add(LeakyReLU(alpha=0.1)) + + model.add(Conv2D(64, kernel_size=(3,3), + padding='same', + strides=2)) + model.add(BatchNormalization(momentum=0.9)) + model.add(LeakyReLU(alpha=0.1)) + + model.add(Conv2D(64, kernel_size=(3,3), + padding='same', + strides=2)) + model.add(BatchNormalization(momentum=0.9)) + model.add(LeakyReLU(alpha=0.1)) + + model.add(Flatten()) + model.add(Dropout(0.4)) + model.add(Dense(2048)) + model.add(LeakyReLU(alpha=0.2)) + model.add(Dropout(0.3)) + model.add(Dense(512)) + model.add(LeakyReLU(alpha=0.2)) + model.add(Dropout(0.2)) + model.add(Dense(num_classes, activation='softmax')) + + model.compile(loss=keras.losses.categorical_crossentropy, + optimizer=Adam(0.0002, 0.5), + metrics=['accuracy']) + print('printed model') + + temp_filepath = Vars.MODELS_DIR+'/'+Vars.MODEL_FILENAME + + save_best_model = ModelCheckpoint(filepath=temp_filepath, + monitor='val_loss', + verbose=1, + save_best_only=True) + + model.fit(X_train, y_train, + batch_size=Vars.TRAINING_BATCH_SIZE, + epochs=Vars.TRAINING_EPOCHS, + verbose=1, + callbacks=[save_best_model], + validation_data=(X_validation, y_validation)) + + self.classifier = model + print('training complete!') + + # Uses the trained classifier to make a prediction on a single input + def predict_single(self, x): + if not Filters.simple_check(x): + return [0]*len(self.calls) + X = self.prefilter([x]) + result = self.classifier.predict(X)[0] + return result + + # Prefilter images + def prefilter(self, X): + Xp = [] + for i in range(len(X)): + x = Filters.squarify(X[i]) + x = Filters.rescale(x) + x = np.expand_dims(x, 2) + Xp.append(x) + Xp = np.array(Xp) + return Xp + + # Combine the different call training data and add the targets (i.e. the correct labels) + def combine_and_add_targets(self, tdata): + X_train, y_train = self.get_X_y(tdata.training_data) + X_test, y_test = self.get_X_y(tdata.testing_data) + X_validation, y_validation = self.get_X_y(tdata.validation_data) + return (X_train, y_train), (X_test, y_test), (X_validation, y_validation) + + # Helper function for above + def get_X_y(self, data): + calls = list(data.keys()) + calls.sort() + calls.remove(Vars.NOISE_STRING) + calls.append(Vars.NOISE_STRING) + + if type(self.calls) == type(None): + self.calls = calls + + X = [] + y = [] + for i in range(len(calls)): + call_data = data[calls[i]] + X.extend(call_data) + y.extend([i]*len(call_data)) + return X, y + + # Save trained model to model directory in the afortmentioned two parts + def save(self, in_models_dir=False): + home_dir = os.getcwd() + if in_models_dir: + os.chdir(Vars.MODELS_DIR) + print('saving model...') + filename = Vars.MODEL_FILENAME + self.classifier.save(filename) + attr = [self.calls, self.score, self.cmatrix] + filename = Vars.MODEL_ATTR_FILENAME + joblib.dump(attr, filename) + print(' complete') + os.chdir(home_dir) diff --git a/process.py b/process.py new file mode 100644 index 0000000..72b15d1 --- /dev/null +++ b/process.py @@ -0,0 +1,25 @@ +import glob, os +from variables import * + +def process(): + print("==PROCESS RECORDINGS==") + from scanner import Scanner + from exporter import Exporter + from recording import Recording + home_dir = os.getcwd() + os.chdir(Vars.RECORDINGS_DIR) + recordings = [] + wavefiles = glob.glob('*.wav') + for wavefile in wavefiles: + recordings.append(Recording(wavefile)) + os.chdir(home_dir) + print('processing the following recordings: ' + str(wavefiles)) + for recording in recordings: + print('processing ' + recording.file) + scanner = Scanner() + exporter = Exporter() + scanner.process(recording) + exporter.process(recording) + print(' ') + +process() diff --git a/recording.py b/recording.py new file mode 100644 index 0000000..0d7f3bb --- /dev/null +++ b/recording.py @@ -0,0 +1,49 @@ +import cv2 +import numpy as np +# from scipy.signal import butter, lfilter, freqz +from scipy.io import wavfile + +from variables import * +from filters import * + +# In order to load very long .wav files (e.g. recordings) into memory, this Recording class +# was created. It loads the raw data into memory, but only loads spectrograms between timestamps +# or indices when called upon to do so +class Recording: + def __init__(self, filename=None): + self.sample_rate = None + self.file = None + self.data = None + self.raw_results = None + self.smoothed_results = None + self.final_results = None + self.labels = None + self.length = None + + if type(filename) != type(None): + self.read(filename) + + def read(self, filename): + data = wavfile.read(filename) + self.file = filename[0:-4] + self.sample_rate = data[0] + if len(np.shape(data[1])) == 1: + self.data = data[1] + else: + self.data = data[1][:,0] + self.length = len(self.data) / float(self.sample_rate) + + def get_spectrogram_between_indices(self, s, e): + data = self.data[s:e+1] + spec = Filters.create_spectrogram(data, self.sample_rate) + return spec + + def get_spectrogram_between_timestamps(self, s, e): + s = self.timestamp_to_data_index(s) + e = self.timestamp_to_data_index(e) + data = self.data[s:e+1] + spec = Filters.create_spectrogram(data, self.sample_rate) + return spec + + def timestamp_to_data_index(self, timestamp): + return min(int(round(self.sample_rate * timestamp)), len(self.data)-1) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e09ed0c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,61 @@ +absl-py==1.0.0 +astunparse==1.6.3 +cached-property==1.5.2 +cachetools==4.2.4 +certifi==2021.10.8 +charset-normalizer==2.0.7 +cycler==0.11.0 +flatbuffers==2.0 +fonttools==4.28.1 +gast==0.4.0 +google-auth==2.3.3 +google-auth-oauthlib==0.4.6 +google-pasta==0.2.0 +grpcio==1.42.0 +h5py==3.6.0 +idna==3.3 +importlib-metadata==4.8.2 +joblib==1.1.0 +keras==2.7.0 +Keras-Preprocessing==1.1.2 +kiwisolver==1.3.2 +libclang==12.0.0 +Markdown==3.3.6 +matplotlib==3.5.0 +numpy==1.21.4 +oauthlib==3.1.1 +opencv-python==4.5.1.48 +opt-einsum==3.3.0 +packaging==21.3 +pandas==1.3.5 +Pillow==8.4.0 +protobuf==3.19.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pyparsing==3.0.6 +python-dateutil==2.8.2 +pytz==2021.3 +requests==2.26.0 +requests-oauthlib==1.3.0 +rsa==4.7.2 +scikit-learn==1.0.1 +scipy==1.7.2 +seaborn==0.11.2 +setuptools-scm==6.3.2 +six==1.16.0 +sklearn==0.0 +tensorboard==2.7.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorflow==2.7.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.22.0 +termcolor==1.1.0 +threadpoolctl==3.0.0 +tomli==1.2.2 +tqdm==4.62.3 +typing_extensions==4.0.0 +urllib3==1.26.7 +Werkzeug==2.0.2 +wrapt==1.13.3 +zipp==3.6.0 diff --git a/results.py b/results.py new file mode 100644 index 0000000..bb405be --- /dev/null +++ b/results.py @@ -0,0 +1,81 @@ +import csv, cv2, sys +import joblib +import numpy as np + +from variables import * + +# Results class saves all the raw results that were stored in a recording object and +# can export CSVs and images of the detected calls to the appropriate results directory. +# +class Results: + def load(folder_name): + home_dir = os.getcwd() + os.chdir(Vars.RESULTS_DIR) + os.chdir(folder_name) + filename = folder_name + '.results' + results = joblib.load(filename) + print('loaded results from ' + filename) + results.__init__(results.name, results.recording) + os.chdir(home_dir) + return results + + def __init__(self, name, recording): + self.name = name + self.recording = recording + + def export_csv(self, to_results_dir=False): + print('exporting csv for ' + self.name) + home_dir = os.getcwd() + if to_results_dir: + os.chdir(Vars.RESULTS_DIR) + os.chdir(self.name) + labels_file = open(self.recording.file+'.csv', 'w') + labels_file_writer = csv.writer(labels_file) + labels_file_writer.writerow(['call','start','end']) + for label in self.recording.labels: + labels_file_writer.writerow(label) + labels_file.close() + os.chdir(home_dir) + + + def export_Audacity(self, to_results_dir=False): + print('exporting Audacity labels for ' + self.name) + home_dir = os.getcwd() + if to_results_dir: + os.chdir(Vars.RESULTS_DIR) + os.chdir(self.name) + labels_file = open(self.recording.file+'.txt', 'w', newline='') + labels_file_writer = csv.writer(labels_file, delimiter='\t') + q=self.recording.labels + for label in self.recording.labels: + q=list(label) + q[0]=label[1] + q[1]=label[2] + q[2]=label[0] + labels_file_writer.writerow(q) + labels_file.close() + os.chdir(home_dir) + + + def export_images(self, to_results_dir=False): + print('exporting images for ' + self.name) + home_dir = os.getcwd() + if to_results_dir: + os.chdir(Vars.RESULTS_DIR) + os.chdir(self.name) + for label in self.recording.labels: + (call, s, e) = label + spec = np.uint8(self.recording.get_spectrogram_between_timestamps(s, e) * 255) + filename = str(int(round(s))) + '_' + call + '.png' + cv2.imwrite(filename, spec) + os.chdir(home_dir) + + def save(self, to_results_dir=False): + home_dir = os.getcwd() + if to_results_dir: + os.chdir(Vars.RESULTS_DIR) + os.chdir(self.name) + filename = self.name + '.results' + joblib.dump(self, filename) + print('saved model classifier to ' + filename) + os.chdir(home_dir) diff --git a/scanner.py b/scanner.py new file mode 100644 index 0000000..d83e08a --- /dev/null +++ b/scanner.py @@ -0,0 +1,110 @@ +import copy +import numpy as np +from tqdm import tqdm + +from variables import * +from model import * + +# This class takes in a recording and scans through it with all trained models, and then stores +# the results +class Scanner: + def __init__(self, preload=True, disable_pbar=False): + self.recording = None + self.raw_results = {} + self.smoothed_results = {} + self.final_results = {} + self.labels = [] + self.model = None + self.disable_pbar = disable_pbar + if preload: + self.model = Model.load() + + # scan and label a recording with all trained models + def process(self, recording): + if not self.disable_pbar: + print(' scanning, smoothing, and labeling...') + self.recording = recording + self.initial_scan() + self.smoothing() + self.label() + + # step through recording and grab slices of a given window size corresponding to given model. + # classify using given model and store raw result value + def initial_scan(self): #todo add decay + for call in self.model.calls: + self.raw_results[call] = [] + self.raw_results['timestamp'] = [] + + segment_size = int(round(self.recording.sample_rate * Vars.SEGMENT_LENGTH)) + step_size = int(round(self.recording.sample_rate * Vars.SEGMENT_STEP)) + recording_size = len(self.recording.data) + + s = 0 + e = segment_size + with tqdm(total=recording_size, unit="recording_secs", unit_scale=True, unit_divisor=self.recording.sample_rate, disable=self.disable_pbar) as pbar: + while e <= recording_size: + spec = self.recording.get_spectrogram_between_indices(s, e) + timestamp = np.mean((s,e)) / float(self.recording.sample_rate) + + model_prediction = self.model.predict_single(spec) + self.raw_results['timestamp'].append(timestamp) + for i in range(len(self.model.calls)): + call = self.model.calls[i] + self.raw_results[call].append(model_prediction[i]) + + pbar.update(step_size) + s += step_size + e += step_size + self.recording.raw_results = copy.copy(self.raw_results) + + def smoothing(self, kernel_size=Vars.SMOOTHING_KERNEL_SIZE): + for call in self.model.calls: + self.smoothed_results[call] = [] + self.smoothed_results['timestamp'] = copy.copy(self.raw_results['timestamp']) + half_kernel_size = int(np.floor(kernel_size/2.0)) + + calls_minus_noise = copy.copy(self.model.calls) + calls_minus_noise.remove(Vars.NOISE_STRING) + for i in tqdm(range(len(self.smoothed_results['timestamp'])), disable=self.disable_pbar): + s = max(0, i-half_kernel_size) + e = min(i+half_kernel_size, len(self.smoothed_results['timestamp'])-1) + for call in calls_minus_noise: + prev = self.raw_results[call][s:e+1] + prev = np.mean(prev) + smoothed = prev * 0.5 + self.raw_results[call][e] * 0.5 + self.smoothed_results[call].append(smoothed) + self.recording.smoothed_results = copy.copy(self.smoothed_results) + + # add a label for a continuous group of raw results higher than a specified threshold value + # as long as the length between start and end time is above a proportion of the call length + def label(self): + self.final_results['timestamp'] = copy.copy(self.smoothed_results['timestamp']) + for call in self.model.calls: + self.final_results[call] = [0] * len(self.final_results['timestamp']) + + calls_minus_noise = copy.copy(self.model.calls) + calls_minus_noise.remove(Vars.NOISE_STRING) + for call in tqdm(calls_minus_noise, disable=self.disable_pbar): + self.detect_call(call) + self.labels.sort(key=lambda x: x[1]) + + self.recording.final_results = copy.copy(self.final_results) + self.recording.labels = copy.copy(self.labels) + + def detect_call(self, call): + above = False + call_start = None + for i in range(len(self.final_results['timestamp'])): + prediction = self.smoothed_results[call][i] + if not above and prediction >= Vars.CONFIDENCE_THRESHOLD: + above = True + call_start = self.smoothed_results['timestamp'][i] + call_start_index = i + elif above and (prediction < Vars.CONFIDENCE_THRESHOLD or i >= len(self.smoothed_results['timestamp'])-1): + above = False + call_end = self.smoothed_results['timestamp'][i] + call_end_index = i + detection_length = call_end - call_start + if detection_length >= (Vars.MIN_DETECTION_LENGTH_RATIO * Vars.WINDOW_LENGTHS[call]): + self.labels.append((call, call_start, call_end)) + self.final_results[call][call_start_index:call_end_index] = [1] * (call_end_index-call_start_index) diff --git a/training_data.py b/training_data.py new file mode 100644 index 0000000..bf05ad9 --- /dev/null +++ b/training_data.py @@ -0,0 +1,65 @@ +import os, glob, cv2, copy, random +import numpy as np +import joblib +from random import shuffle +from tqdm import tqdm + +from variables import * +from recording import * +from filters import * + +class TrainingData: + # Loads TrainingData instance from disk + def load(): + home_dir = os.getcwd() + os.chdir(Vars.MODELS_DIR) + filename = Vars.TDATA_FILENAME + print('loading training data from ' + filename + '...') + tdata = joblib.load(filename) + print(' complete') + os.chdir(home_dir) + return tdata + + def __init__(self): + self.training_data = {} + self.testing_data = {} + self.validation_data = {} + + # Main script to call to prepare both positive and negative data + def prepare(self): + home_dir = os.getcwd() + os.chdir(Vars.TRAINING_DIR) + calls = list(filter(os.path.isdir, os.listdir(os.getcwd()))) + + for call in calls: + print(call) + self.training_data[call] = [] + os.chdir(call) + wavefiles = glob.glob('*.wav') + shuffle(wavefiles) + for wavefile in tqdm(wavefiles): + rec = Recording(wavefile) + segments = Filters.segmentize_data(rec) + self.training_data[call].extend(segments) + # shuffle(self.training_data[call]) + + print('split data') + (self.training_data[call], self.testing_data[call], self.validation_data[call]) = Filters.split_data(self.training_data[call]) + + print('augment data') + self.training_data[call] = Filters.augment_with_synthetic_data(self.training_data[call], Vars.TRAINING_SEGMENTS_PER_CALL) + self.testing_data[call] = Filters.augment_with_synthetic_data(self.testing_data[call], Vars.TESTING_SEGMENTS_PER_CALL) + self.validation_data[call] = Filters.augment_with_synthetic_data(self.validation_data[call], Vars.VALIDATION_SEGMENTS_PER_CALL) + os.chdir('..') + os.chdir(home_dir) + + # Saves training_data instance + def save(self, in_models_dir=False): + home_dir = os.getcwd() + if in_models_dir: + os.chdir(Vars.MODELS_DIR) + filename = Vars.TDATA_FILENAME + print('saving training data to ' + filename + '...') + joblib.dump(self, filename) + print(' complete') + os.chdir(home_dir) diff --git a/variables.py b/variables.py new file mode 100644 index 0000000..3819e55 --- /dev/null +++ b/variables.py @@ -0,0 +1,50 @@ +import os +import numpy as np + +class Vars: + NPERSEG = 256 + NOVERLAP = int(NPERSEG * 0.25) + WINDOW = 'hanning' + SPECTROGRAM_RAW_LOW = 1 + SPECTROGRAM_RAW_HIGH = 4 + SPECTROGRAM_POWER_FACTOR = 4 + LOWCUT = 4500 + HIGHCUT = 9500 + SPECTROGRAM_HEIGHT = int(64) + SQUARIFY_SIZE = 64 + MORPH_CLEAN_KERNEL = np.ones((3,3)) + ROTATIONS = (-2, 2) + SHEARS_HORIZ = (-2, 2) + SHEARS_VERT = (-3, 3) + TILTS_HORIZ = (-8, 8) + TILTS_VERT = (-8, 8) + STRETCHES_VERT = (-16, 6) + ADJUST_BRIGHTNESS = (0.5, 2) + MINIMUM_VALUE = 0.01 + MINIMUM_AVG_VALUE = 0.001 + MAXIMUM_AVG_VALUE = 0.9 + TRAINING_DIR = 'training_data' + RECORDINGS_DIR = 'recordings' + RESULTS_DIR = 'results' + MODELS_DIR = 'models' + CONFIDENCE_THRESHOLD = 0.95 + TRAINING_BATCH_SIZE = 16 + TRAINING_EPOCHS = 15 + DETECTION_LENGTH_RATIO = 0.5 + WINDOW_LENGTHS = {'Chi': 0.25,'Tr': 0.25,'Ph': 0.40,'Tw': 0.5} + SEGMENT_LENGTH = 0.45 + SEGMENT_STEP = 0.04 + VALIDATION_RATIO = 0.2 + TEST_RATIO = 0.1 + TRAINING_SEGMENTS_PER_CALL = 35000 + TESTING_SEGMENTS_PER_CALL = int(round(TRAINING_SEGMENTS_PER_CALL * TEST_RATIO)) + VALIDATION_SEGMENTS_PER_CALL = int(round(TRAINING_SEGMENTS_PER_CALL * VALIDATION_RATIO)) + TDATA_FILENAME = 'acdc.tdata' + NOISE_STRING = 'Noise' + MODEL_FILENAME = 'acdc.model' + MODEL_FILENAME = 'saved_model.pb' + MODEL_ATTR_FILENAME = 'acdc.modelattr' + MODEL_CMATRIX_FILENAME = 'acdc_model.png' + MIN_DETECTION_LENGTH_RATIO = 0.2 + SMOOTHING_KERNEL_SIZE = 5 + VOLUME_AMP_MULTIPLE = 60