Skip to content

Commit

Permalink
note bool title upda ted.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuyubobobo committed Dec 8, 2017
1 parent 69c6ab4 commit 55451bd
Show file tree
Hide file tree
Showing 42 changed files with 559 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### ``matplotlib`` 基础"
"### 11 ``matplotlib`` 基础"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 读取数据和简单的数据探索"
"### 12 读取数据和简单的数据探索"
]
},
{
Expand All @@ -21,7 +21,9 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import matplotlib as mpl\n",
Expand Down
2 changes: 1 addition & 1 deletion 04-kNN/02-kNN-in-scikit-learn/02-kNN-in-scikit-learn.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## scikit-learn 中的 kNN "
"## 02 scikit-learn 中的 kNN "
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion 04-kNN/03-Test-Our-Algorithm/03-Test-Our-Algorithm.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 测试我们的算法"
"## 03 测试我们的算法"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion 04-kNN/04-Accuracy-Score/04-Accuracy-Score.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 分类准确度"
"## 04 分类准确度"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion 04-kNN/05-Hyper-Parameters/05-Hyper-Parameters.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 超参数"
"## 05 超参数"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 网格搜索和更多kNN中的超参数"
"## 06 网格搜索和更多kNN中的超参数"
]
},
{
Expand Down Expand Up @@ -35,7 +35,9 @@
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
Expand Down
10 changes: 7 additions & 3 deletions 04-kNN/08-Scaler-in-Scikit-Learn/08-Scaler-in-Scikit-Learn.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 09 Scikit-learn中的Scaler"
"## 08 Scikit-learn中的Scaler"
]
},
{
Expand All @@ -22,7 +22,9 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"iris = datasets.load_iris()"
Expand All @@ -31,7 +33,9 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X = iris.data\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 实现简单线性回归法"
"## 03 实现简单线性回归法"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
class SimpleLinearRegression1:

def __init__(self):
"""初始化kNN分类器"""
"""初始化Simple Linear Regression 模型"""
self.a_ = None
self.b_ = None

def fit(self, x_train, y_train):
"""根据训练数据集x_train训练Simple Linear Regression模型"""
"""根据训练数据集x_train,y_train训练Simple Linear Regression模型"""
assert x_train.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert len(x_train) == len(y_train), \
Expand Down Expand Up @@ -44,42 +44,3 @@ def _predict(self, x_single):

def __repr__(self):
return "SimpleLinearRegression1()"


class SimpleLinearRegression2:

def __init__(self):
"""初始化Simple Linear Regression模型"""
self.a_ = None
self.b_ = None

def fit(self, x_train, y_train):
"""根据训练数据集x_train训练Simple Linear Regression模型"""
assert x_train.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert len(x_train) == len(y_train), \
"the size of x_train must be equal to the size of y_train"

x_mean = np.mean(x_train)
y_mean = np.mean(y_train)

self.a_ = (x_train - x_mean).dot(y_train - y_mean) / (x_train - x_mean).dot(x_train - x_mean)
self.b_ = y_mean - self.a_ * x_mean

return self

def predict(self, x_predict):
"""给定待预测数据集x_predict,返回表示x_predict的结果向量"""
assert x_predict.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert self.a_ is not None and self.b_ is not None, \
"must fit before predict!"

return np.array([self._predict(x) for x in x_predict])

def _predict(self, x_single):
"""给定单个待预测数据x,返回x的预测结果值"""
return self.a_ * x_single + self.b_

def __repr__(self):
return "SimpleLinearRegression2()"

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import numpy as np


class SimpleLinearRegression1:

def __init__(self):
"""初始化Simple Linear Regression 模型"""
self.a_ = None
self.b_ = None

def fit(self, x_train, y_train):
"""根据训练数据集x_train,y_train训练Simple Linear Regression模型"""
assert x_train.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert len(x_train) == len(y_train), \
"the size of x_train must be equal to the size of y_train"

x_mean = np.mean(x_train)
y_mean = np.mean(y_train)

num = 0.0
d = 0.0
for x, y in zip(x_train, y_train):
num += (x - x_mean) * (y - y_mean)
d += (x - x_mean) ** 2

self.a_ = num / d
self.b_ = y_mean - self.a_ * x_mean

return self

def predict(self, x_predict):
"""给定待预测数据集x_predict,返回表示x_predict的结果向量"""
assert x_predict.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert self.a_ is not None and self.b_ is not None, \
"must fit before predict!"

return np.array([self._predict(x) for x in x_predict])

def _predict(self, x_single):
"""给定单个待预测数据x,返回x的预测结果值"""
return self.a_ * x_single + self.b_

def __repr__(self):
return "SimpleLinearRegression1()"


class SimpleLinearRegression2:

def __init__(self):
"""初始化Simple Linear Regression模型"""
self.a_ = None
self.b_ = None

def fit(self, x_train, y_train):
"""根据训练数据集x_train,y_train训练Simple Linear Regression模型"""
assert x_train.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert len(x_train) == len(y_train), \
"the size of x_train must be equal to the size of y_train"

x_mean = np.mean(x_train)
y_mean = np.mean(y_train)

self.a_ = (x_train - x_mean).dot(y_train - y_mean) / (x_train - x_mean).dot(x_train - x_mean)
self.b_ = y_mean - self.a_ * x_mean

return self

def predict(self, x_predict):
"""给定待预测数据集x_predict,返回表示x_predict的结果向量"""
assert x_predict.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert self.a_ is not None and self.b_ is not None, \
"must fit before predict!"

return np.array([self._predict(x) for x in x_predict])

def _predict(self, x_single):
"""给定单个待预测数据x_single,返回x_single的预测结果值"""
return self.a_ * x_single + self.b_

def __repr__(self):
return "SimpleLinearRegression2()"
9 changes: 9 additions & 0 deletions 05-Linear-Regression/04-Vectorization/playML/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import numpy as np


def accuracy_score(y_true, y_predict):
'''计算y_true和y_predict之间的准确率'''
assert y_true.shape[0] == y_predict.shape[0], \
"the size of y_true must be equal to the size of y_predict"

return sum(y_true == y_predict) / len(y_true)
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 评测回归算法的性能"
"## 05 评测回归算法的性能,MSE vs MAE"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## R Squared (R^2)"
"## 06 R Squared (R^2)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## scikit-learn 中的 Linear Regression"
"## 08 scikit-learn 中的 Linear Regression"
]
},
{
Expand Down Expand Up @@ -60,7 +60,9 @@
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from playML.model_selection import train_test_split\n",
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
from math import sqrt
from collections import Counter
from .metrics import accuracy_score

class KNNClassifier:

def __init__(self, k):
"""初始化kNN分类器"""
assert k >= 1, "k must be valid"
self.k = k
self._X_train = None
self._y_train = None

def fit(self, X_train, y_train):
"""根据训练数据集X_train和y_train训练kNN分类器"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"
assert self.k <= X_train.shape[0], \
"the size of X_train must be at least k."

self._X_train = X_train
self._y_train = y_train
return self

def predict(self, X_predict):
"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""
assert self._X_train is not None and self._y_train is not None, \
"must fit before predict!"
assert X_predict.shape[1] == self._X_train.shape[1], \
"the feature number of X_predict must be equal to X_train"

y_predict = [self._predict(x) for x in X_predict]
return np.array(y_predict)

def _predict(self, x):
"""给定单个待预测数据x,返回x的预测结果值"""
assert x.shape[0] == self._X_train.shape[1], \
"the feature number of x must be equal to X_train"

distances = [sqrt(np.sum((x_train - x) ** 2))
for x_train in self._X_train]
nearest = np.argsort(distances)

topK_y = [self._y_train[i] for i in nearest[:self.k]]
votes = Counter(topK_y)

return votes.most_common(1)[0][0]

def score(self, X_test, y_test):
"""根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""

y_predict = self.predict(X_test)
return accuracy_score(y_test, y_predict)

def __repr__(self):
return "KNN(k=%d)" % self.k


Loading

0 comments on commit 55451bd

Please sign in to comment.