-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathpreprocess.py
132 lines (109 loc) · 3.73 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
This python script will preprocess the data images.
It will import the images from center, left, and right camera
and turn it into numpy arrays. These numpy arrays will be splitted
into train and validation sets and saved as pickle.
"""
import argparse
import os
import sys
import csv
import base64
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
### Paths to folder and label
folder_path = "/Users/wonjunlee/Downloads/udacity/Self-Driving-Car-Nanodegree/CarND-BehavioralCloning-P3"
label_path = "{}/driving_log.csv".format(folder_path)
### Import data
data = []
with open(label_path) as F:
reader = csv.reader(F)
for i in reader:
data.append(i)
print("data imported")
### size of the data
data_size = len(data)
print("data size:", data_size)
### Emtpy generators for feature and labels
features = ()
labels = ()
### This function will resize the images from front, left and
### right camera to 18 x 80 and turn them into lists.
### The length of the each list will be 18 x 80 = 1440
### j = 0,1,2 corresponds to center, left, right
def load_image(data_line, j):
img = plt.imread(data_line[j].strip())[65:135:4,0:-1:4,0]
lis = img.flatten().tolist()
return lis
data = data[:100]
# For each item in data, convert camera images to single list
# and save them into features list.
for i in tqdm(range(int(len(data))), unit='items'):
for j in range(3):
features += (load_image(data[i],j),)
item_num = len(features)
print("features size", item_num)
# A single list will be convert back to the original image shapes.
# Each list contains 3 images so the shape of the result will be
# 54 x 80 where 3 images aligned vertically.
features = np.array(features).reshape(item_num, 18, 80, 1)
print("features shape", features.shape)
### Save labels
for i in tqdm(range(int(len(data))), unit='items'):
for j in range(3):
labels += (float(data[i][3]),)
labels = np.array(labels)
print("features:", features.shape)
print("labels:", labels.shape)
from sklearn.cross_validation import train_test_split
# Get randomized datasets for training and test
X_train, X_test, y_train, y_test = train_test_split(
features,
labels,
test_size=0.10,
random_state=832289)
# Get randomized datasets for training and validation
X_train, X_valid, y_train, y_valid = train_test_split(
X_train,
y_train,
test_size=0.25,
random_state=832289)
# Print out shapes of new arrays
train_size = X_train.shape[0]
test_size = X_test.shape[0]
valid_size = X_valid.shape[0]
input_shape = X_train.shape[1:]
features_count = X_train.shape[1]*X_train.shape[2]*X_train.shape[3]
print("train size:", train_size)
print("valid size:", valid_size)
print("test size:", test_size)
print("input_shape:", input_shape)
print("features count:", features_count)
import pickle
# Save the data for easy access
pickle_file = 'camera.pickle'
stop = False
while not stop:
if not os.path.isfile(pickle_file):
print('Saving data to pickle file...')
try:
with open(pickle_file, 'wb') as pfile:
pickle.dump(
{
'train_dataset': X_train,
'train_labels': y_train,
'valid_dataset': X_valid,
'valid_labels': y_valid,
'test_dataset': X_test,
'test_labels': y_test,
},
pfile, pickle.HIGHEST_PROTOCOL)
except Exception as e:
print('Unable to save data to', pickle_file, ':', e)
raise
print('Data cached in pickle file.')
stop = True
else:
print("Please use a different file name other than camera.pickle")
pickle_file = input("Enter: ")