-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference.py
65 lines (53 loc) · 1.88 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import torch
import torchaudio
from cnn import CNNNetwork
from vibvizdataset import VibVizDataset
from train import AUDIO_DIR, ANNOTATIONS_FILE, SAMPLE_RATE, NUM_SAMPLES
class_mapping = ['comfortable',
'lively',
'annoying',
'uncomfortable',
'natural',
'urgent',
'predictable',
'boring', 'agitating', 'rhythmic', 'calm', 'interesting',
'mechanical', 'unique', 'pleasant', 'funny', 'happy', 'creepy', 'surprising',
'angry', 'sad', 'strange', 'familiar',]
def predict(model, input, target, class_mapping):
model.eval()
with torch.no_grad():
predictions = model(input)
print(predictions)
# Tensor (1, 10) -> [ [0.1, 0.01, ..., 0.6] ]
predicted_index = predictions[0].argmax(0)
predicted = class_mapping[predicted_index]
expected = class_mapping[target]
return predicted, expected
if __name__ == "__main__":
# load back the model
cnn = CNNNetwork()
#state_dict = torch.load("feedforwardnet.pth")
#cnn.load_state_dict(state_dict)
net = torch.load('New_cnn.pt')
# load urban sound dataset dataset
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
sample_rate=SAMPLE_RATE,
n_fft=1024,
hop_length=512,
n_mels=64
)
usd = VibVizDataset(ANNOTATIONS_FILE,
AUDIO_DIR,
mel_spectrogram,
SAMPLE_RATE,
NUM_SAMPLES,
"cpu")
# get a sample from the urban sound dataset for inference
input, target = usd[0][0], usd[0][1] # [batch size, num_channels, fr, time]
print("Target: ")
print(target)
input.unsqueeze_(0)
# make an inference
predicted, expected = predict(cnn, input, target,
class_mapping)
print(f"Predicted: '{predicted}', expected: '{expected}'")