-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetNotes.py
66 lines (53 loc) · 2.34 KB
/
getNotes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa.display
from pydub import AudioSegment
AudioSegment.ffmpeg = "C:/ffmpeg"
def getNotes(path):
#mp3 -> wav
audio = AudioSegment.from_file(path)
audio.export("raw.wav", format="wav")
# Load the audio file. y = time series, sr = sample rate
y, sr = librosa.load('raw.wav', sr=22050);
# Only want harmonic portion of audio
y_harmonic, y_percussive = librosa.effects.hpss(y)
# Pitch estimation:
# fmin for low D whistle is D4, fmax for high D whistle is B6
f0, voiced_flag, voiced_probs = librosa.pyin(y_harmonic, fmin=librosa.note_to_hz('D4'), fmax=librosa.note_to_hz('B6'))
# f0 = array of estimated frequencies
# voiced_flag = array of booleans indicating whether each frame has detectable pitch (voiced)
# voiced_probs = array of probabilities of each frame being voiced (0 to 1)
# Convert Hz to note
def hz_to_note_with_threshold(frequency, voiced_prob, threshold=.9):
# Get frequency if high likelyhood of valid pitch
if frequency > 0 and voiced_prob > threshold:
return librosa.hz_to_note(frequency)
return "N/A"
# Create list of notes
notes = [hz_to_note_with_threshold(freq, prob) for freq, prob in zip(f0, voiced_probs)]
# Convert to Pandas Series for easier processing
notes_series = pd.Series(notes)
# Remove consecutive duplicates and filter out "N/A"
notes_series = notes_series[notes_series.shift() != notes_series]
notes_series = notes_series[notes_series != "N/A"]
# return notes array
return(notes_series.tolist())
# fig, ax = plt.subplots(2, 1, figsize=(14, 10))
# # Plot the estimated pitch over time
# times = librosa.times_like(f0)
# ax[0].plot(times, f0, label='Estimated pitch', color='r')
# ax[0].set_xlabel('Time (s)')
# ax[0].set_ylabel('Frequency (Hz)')
# ax[0].set_title('Estimated pitch over time')
# ax[0].legend()
# # Compute STFT array
# D = np.abs(librosa.stft(y))
# # Plot the spectrogram
# img = librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max), sr=sr, x_axis='time', y_axis='log', ax=ax[1])
# fig.colorbar(img, ax=ax[1], format='%+2.0f dB')
# ax[1].set_title('Spectrogram (dB)')
# # Display the plots
# plt.tight_layout()
# plt.show()