Skip to content

Commit

Permalink
Chapter 04 section 03 completed.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuyubobobo committed Dec 5, 2017
1 parent 23967fa commit 72fda63
Show file tree
Hide file tree
Showing 10 changed files with 1,657 additions and 223 deletions.
294 changes: 159 additions & 135 deletions 04-kNN/03-Test-Our-Algorithm/03-Test-Our-Algorithm.ipynb

Large diffs are not rendered by default.

13 changes: 7 additions & 6 deletions 04-kNN/03-Test-Our-Algorithm/playML/kNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def fit(self, X_train, y_train):

def predict(self, X_predict):
"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""
assert self._X_train is not None and self._X_train is not None, \
assert self._X_train is not None and self._y_train is not None, \
"must fit before predict!"
assert X_predict.shape[1] == self._X_train.shape[1], \
"the feature number of X_predict must be equal to X_train"
Expand All @@ -34,16 +34,17 @@ def predict(self, X_predict):
return np.array(y_predict)

def _predict(self, x):
"""给定单个待预测数据x,返回x_predict的预测结果值"""
"""给定单个待预测数据x,返回x的预测结果值"""
assert x.shape[0] == self._X_train.shape[1], \
"the feature number of x must be equal to X_train"
distances = [(sqrt(((x_train - x) ** 2).sum()), self._y_train[i])
for i, x_train in enumerate(self._X_train)]
distances.sort()

topK_y = [pair[1] for pair in distances[:self.k]]
distances = [sqrt(np.sum((x_train - x) ** 2))
for x_train in self._X_train]
nearest = np.argsort(distances)

topK_y = [self._y_train[i] for i in nearest[:self.k]]
votes = Counter(topK_y)

return votes.most_common(1)[0][0]

def __repr__(self):
Expand Down
80 changes: 29 additions & 51 deletions 04-kNN/04-Hyper-Parameter-K/04-Hyper-Parameter-K.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 21,
"metadata": {
"collapsed": true
},
Expand All @@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 22,
"metadata": {},
"outputs": [
{
Expand All @@ -31,7 +31,7 @@
"dict_keys(['data', 'target', 'target_names', 'images', 'DESCR'])"
]
},
"execution_count": 2,
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 23,
"metadata": {},
"outputs": [
{
Expand All @@ -52,7 +52,7 @@
"(1797, 64)"
]
},
"execution_count": 3,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -64,7 +64,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 24,
"metadata": {},
"outputs": [
{
Expand All @@ -73,7 +73,7 @@
"(1797,)"
]
},
"execution_count": 4,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -85,7 +85,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 25,
"metadata": {
"collapsed": true
},
Expand All @@ -98,16 +98,16 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([6, 8, 5, 8, 7, 6, 4, 0, 2, 6])"
"array([1, 4, 9, 2, 0, 9, 0, 6, 1, 1])"
]
},
"execution_count": 6,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -118,7 +118,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 27,
"metadata": {
"collapsed": true
},
Expand All @@ -133,16 +133,16 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9916434540389972"
"0.9805013927576601"
]
},
"execution_count": 8,
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -160,16 +160,16 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9916434540389972"
"0.9805013927576601"
]
},
"execution_count": 9,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -180,28 +180,6 @@
"accuracy_score(y_test, y_predict)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.99164345403899723"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics import accuracy_score\n",
"\n",
"accuracy_score(y_test, y_predict)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -211,16 +189,16 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([4, 6, 5, 8, 9, 2, 7, 1, 6, 4])"
"array([3, 9, 5, 1, 2, 9, 2, 4, 1, 1])"
]
},
"execution_count": 16,
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -235,7 +213,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 31,
"metadata": {
"collapsed": true
},
Expand All @@ -250,7 +228,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 32,
"metadata": {},
"outputs": [
{
Expand All @@ -259,7 +237,7 @@
"0.98888888888888893"
]
},
"execution_count": 18,
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -272,7 +250,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 33,
"metadata": {},
"outputs": [
{
Expand All @@ -281,7 +259,7 @@
"0.98888888888888893"
]
},
"execution_count": 19,
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -299,15 +277,15 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"best_k = 1\n",
"best_score = 0.991666666667\n"
"best_k = 3\n",
"best_score = 0.988888888889\n"
]
}
],
Expand Down
13 changes: 7 additions & 6 deletions 04-kNN/04-Hyper-Parameter-K/playML/kNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def fit(self, X_train, y_train):

def predict(self, X_predict):
"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""
assert self._X_train is not None and self._X_train is not None, \
assert self._X_train is not None and self._y_train is not None, \
"must fit before predict!"
assert X_predict.shape[1] == self._X_train.shape[1], \
"the feature number of X_predict must be equal to X_train"
Expand All @@ -34,16 +34,17 @@ def predict(self, X_predict):
return np.array(y_predict)

def _predict(self, x):
"""给定单个待预测数据x,返回x_predict的预测结果值"""
"""给定单个待预测数据x,返回x的预测结果值"""
assert x.shape[0] == self._X_train.shape[1], \
"the feature number of x must be equal to X_train"
distances = [(sqrt(((x_train - x) ** 2).sum()), self._y_train[i])
for i, x_train in enumerate(self._X_train)]
distances.sort()

topK_y = [pair[1] for pair in distances[:self.k]]
distances = [sqrt(np.sum((x_train - x) ** 2))
for x_train in self._X_train]
nearest = np.argsort(distances)

topK_y = [self._y_train[i] for i in nearest[:self.k]]
votes = Counter(topK_y)

return votes.most_common(1)[0][0]

def __repr__(self):
Expand Down
Loading

0 comments on commit 72fda63

Please sign in to comment.