-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimpsons.py
116 lines (86 loc) · 3.28 KB
/
simpsons.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#pylint:disable=no-member (Removes linting problems with cv)
# Installing `caer` and `canaro` since they don't come pre-installed
# Uncomment the following line:
# !pip install --upgrade caer canaro
import os
import caer
import canaro
import numpy as np
import cv2 as cv
import gc
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import LearningRateScheduler
IMG_SIZE = (80,80)
channels = 1
char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'
# Creating a character dictionary, sorting it in descending order
char_dict = {}
for char in os.listdir(char_path):
char_dict[char] = len(os.listdir(os.path.join(char_path,char)))
# Sort in descending order
char_dict = caer.sort_dict(char_dict, descending=True)
char_dict
# Getting the first 10 categories with the most number of images
characters = []
count = 0
for i in char_dict:
characters.append(i[0])
count += 1
if count >= 10:
break
characters
# Create the training data
train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True)
# Number of training samples
len(train)
# Visualizing the data (OpenCV doesn't display well in Jupyter notebooks)
plt.figure(figsize=(30,30))
plt.imshow(train[0][0], cmap='gray')
plt.show()
# Separating the array and corresponding labels
featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)
# Normalize the featureSet ==> (0,1)
featureSet = caer.normalize(featureSet)
# Converting numerical labels to binary class vectors
labels = to_categorical(labels, len(characters))
# Creating train and validation data
x_train, x_val, y_train, y_val = caer.train_test_split(featureSet, labels, val_ratio=.2)
# Deleting variables to save memory
del train
del featureSet
del labels
gc.collect()
# Useful variables when training
BATCH_SIZE = 32
EPOCHS = 10
# Image data generator (introduces randomness in network ==> better accuracy)
datagen = canaro.generators.imageDataGenerator()
train_gen = datagen.flow(x_train, y_train, batch_size=BATCH_SIZE)
# Create our model (returns the compiled model)
model = canaro.models.createSimpsonsModel(IMG_SIZE=IMG_SIZE, channels=channels, output_dim=len(characters),
loss='binary_crossentropy', decay=1e-7, learning_rate=0.001, momentum=0.9,
nesterov=True)
model.summary()
# Training the model
callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]
training = model.fit(train_gen,
steps_per_epoch=len(x_train)//BATCH_SIZE,
epochs=EPOCHS,
validation_data=(x_val,y_val),
validation_steps=len(y_val)//BATCH_SIZE,
callbacks = callbacks_list)
print(characters)
"""## Testing"""
test_path = r'../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/charles_montgomery_burns_0.jpg'
img = cv.imread(test_path)
plt.imshow(img)
plt.show()
def prepare(image):
image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
image = cv.resize(image, IMG_SIZE)
image = caer.reshape(image, IMG_SIZE, 1)
return image
predictions = model.predict(prepare(img))
# Getting class with the highest probability
print(characters[np.argmax(predictions[0])])