note bool title upda ted.

liuyubobobo · Dec 8, 2017 · 55451bd · 55451bd
1 parent 69c6ab4
commit 55451bd
Show file tree

Hide file tree

Showing 42 changed files with 559 additions and 60 deletions.
diff --git a/03-Numpy-and-Matplotlib/11-Matplotlib-Basics/11-Matplotlib-Basics.ipynb b/03-Numpy-and-Matplotlib/11-Matplotlib-Basics/11-Matplotlib-Basics.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### ``matplotlib`` 基础"
+    "### 11 ``matplotlib`` 基础"
    ]
   },
   {

diff --git a/...12-Loading-Data-and-Simple-Data-Exploring/12-Loading-Data-and-Simple-Data-Exploring.ipynb b/...12-Loading-Data-and-Simple-Data-Exploring/12-Loading-Data-and-Simple-Data-Exploring.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 读取数据和简单的数据探索"
+    "### 12 读取数据和简单的数据探索"
    ]
   },
   {
@@ -21,7 +21,9 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "import matplotlib as mpl\n",

diff --git a/04-kNN/02-kNN-in-scikit-learn/02-kNN-in-scikit-learn.ipynb b/04-kNN/02-kNN-in-scikit-learn/02-kNN-in-scikit-learn.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## scikit-learn 中的 kNN "
+    "## 02 scikit-learn 中的 kNN "
    ]
   },
   {

diff --git a/04-kNN/03-Test-Our-Algorithm/03-Test-Our-Algorithm.ipynb b/04-kNN/03-Test-Our-Algorithm/03-Test-Our-Algorithm.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 测试我们的算法"
+    "## 03 测试我们的算法"
    ]
   },
   {

diff --git a/04-kNN/04-Accuracy-Score/04-Accuracy-Score.ipynb b/04-kNN/04-Accuracy-Score/04-Accuracy-Score.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 分类准确度"
+    "## 04 分类准确度"
    ]
   },
   {

diff --git a/04-kNN/05-Hyper-Parameters/05-Hyper-Parameters.ipynb b/04-kNN/05-Hyper-Parameters/05-Hyper-Parameters.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 超参数"
+    "## 05 超参数"
    ]
   },
   {

diff --git a/04-kNN/06-More-Hyper-Parameters-in-kNN-and-Grid-Search/06-More-Hyper-Parameters-in-kNN.ipynb b/04-kNN/06-More-Hyper-Parameters-in-kNN-and-Grid-Search/06-More-Hyper-Parameters-in-kNN.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 网格搜索和更多kNN中的超参数"
+    "## 06 网格搜索和更多kNN中的超参数"
    ]
   },
   {
@@ -35,7 +35,9 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "from sklearn.model_selection import train_test_split\n",

diff --git a/04-kNN/08-Scaler-in-Scikit-Learn/08-Scaler-in-Scikit-Learn.ipynb b/04-kNN/08-Scaler-in-Scikit-Learn/08-Scaler-in-Scikit-Learn.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 09 Scikit-learn中的Scaler"
+    "## 08 Scikit-learn中的Scaler"
    ]
   },
   {
@@ -22,7 +22,9 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "iris = datasets.load_iris()"
@@ -31,7 +33,9 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "X = iris.data\n",

diff --git a/...-Simple-Linear-Regression-Implementation/03-Simple-Linear-Regression-Implementation.ipynb b/...-Simple-Linear-Regression-Implementation/03-Simple-Linear-Regression-Implementation.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 实现简单线性回归法"
+    "## 03 实现简单线性回归法"
    ]
   },
   {

diff --git a/...ar-Regression/03-Simple-Linear-Regression-Implementation/playML/SimpleLinearRegression.py b/...ar-Regression/03-Simple-Linear-Regression-Implementation/playML/SimpleLinearRegression.py
@@ -4,12 +4,12 @@
 class SimpleLinearRegression1:
 
     def __init__(self):
-        """初始化kNN分类器"""
+        """初始化Simple Linear Regression 模型"""
         self.a_ = None
         self.b_ = None
 
     def fit(self, x_train, y_train):
-        """根据训练数据集x_train训练Simple Linear Regression模型"""
+        """根据训练数据集x_train,y_train训练Simple Linear Regression模型"""
         assert x_train.ndim == 1, \
             "Simple Linear Regressor can only solve single feature training data."
         assert len(x_train) == len(y_train), \
@@ -44,42 +44,3 @@ def _predict(self, x_single):
 
     def __repr__(self):
         return "SimpleLinearRegression1()"
-
-
-class SimpleLinearRegression2:
-
-    def __init__(self):
-        """初始化Simple Linear Regression模型"""
-        self.a_ = None
-        self.b_ = None
-
-    def fit(self, x_train, y_train):
-        """根据训练数据集x_train训练Simple Linear Regression模型"""
-        assert x_train.ndim == 1, \
-            "Simple Linear Regressor can only solve single feature training data."
-        assert len(x_train) == len(y_train), \
-            "the size of x_train must be equal to the size of y_train"
-
-        x_mean = np.mean(x_train)
-        y_mean = np.mean(y_train)
-
-        self.a_ = (x_train - x_mean).dot(y_train - y_mean) / (x_train - x_mean).dot(x_train - x_mean)
-        self.b_ = y_mean - self.a_ * x_mean
-
-        return self
-
-    def predict(self, x_predict):
-        """给定待预测数据集x_predict，返回表示x_predict的结果向量"""
-        assert x_predict.ndim == 1, \
-            "Simple Linear Regressor can only solve single feature training data."
-        assert self.a_ is not None and self.b_ is not None, \
-            "must fit before predict!"
-
-        return np.array([self._predict(x) for x in x_predict])
-
-    def _predict(self, x_single):
-        """给定单个待预测数据x，返回x的预测结果值"""
-        return self.a_ * x_single + self.b_
-
-    def __repr__(self):
-        return "SimpleLinearRegression2()"
diff --git a/05-Linear-Regression/04-Vectorization/04-Simple-Linear-Regression-Implementation.ipynb b/05-Linear-Regression/04-Vectorization/04-Simple-Linear-Regression-Implementation.ipynb
diff --git a/05-Linear-Regression/04-Vectorization/playML/SimpleLinearRegression.py b/05-Linear-Regression/04-Vectorization/playML/SimpleLinearRegression.py
@@ -0,0 +1,85 @@
+import numpy as np
+
+
+class SimpleLinearRegression1:
+
+    def __init__(self):
+        """初始化Simple Linear Regression 模型"""
+        self.a_ = None
+        self.b_ = None
+
+    def fit(self, x_train, y_train):
+        """根据训练数据集x_train,y_train训练Simple Linear Regression模型"""
+        assert x_train.ndim == 1, \
+            "Simple Linear Regressor can only solve single feature training data."
+        assert len(x_train) == len(y_train), \
+            "the size of x_train must be equal to the size of y_train"
+
+        x_mean = np.mean(x_train)
+        y_mean = np.mean(y_train)
+
+        num = 0.0
+        d = 0.0
+        for x, y in zip(x_train, y_train):
+            num += (x - x_mean) * (y - y_mean)
+            d += (x - x_mean) ** 2
+
+        self.a_ = num / d
+        self.b_ = y_mean - self.a_ * x_mean
+
+        return self
+
+    def predict(self, x_predict):
+        """给定待预测数据集x_predict，返回表示x_predict的结果向量"""
+        assert x_predict.ndim == 1, \
+            "Simple Linear Regressor can only solve single feature training data."
+        assert self.a_ is not None and self.b_ is not None, \
+            "must fit before predict!"
+
+        return np.array([self._predict(x) for x in x_predict])
+
+    def _predict(self, x_single):
+        """给定单个待预测数据x，返回x的预测结果值"""
+        return self.a_ * x_single + self.b_
+
+    def __repr__(self):
+        return "SimpleLinearRegression1()"
+
+
+class SimpleLinearRegression2:
+
+    def __init__(self):
+        """初始化Simple Linear Regression模型"""
+        self.a_ = None
+        self.b_ = None
+
+    def fit(self, x_train, y_train):
+        """根据训练数据集x_train,y_train训练Simple Linear Regression模型"""
+        assert x_train.ndim == 1, \
+            "Simple Linear Regressor can only solve single feature training data."
+        assert len(x_train) == len(y_train), \
+            "the size of x_train must be equal to the size of y_train"
+
+        x_mean = np.mean(x_train)
+        y_mean = np.mean(y_train)
+
+        self.a_ = (x_train - x_mean).dot(y_train - y_mean) / (x_train - x_mean).dot(x_train - x_mean)
+        self.b_ = y_mean - self.a_ * x_mean
+
+        return self
+
+    def predict(self, x_predict):
+        """给定待预测数据集x_predict，返回表示x_predict的结果向量"""
+        assert x_predict.ndim == 1, \
+            "Simple Linear Regressor can only solve single feature training data."
+        assert self.a_ is not None and self.b_ is not None, \
+            "must fit before predict!"
+
+        return np.array([self._predict(x) for x in x_predict])
+
+    def _predict(self, x_single):
+        """给定单个待预测数据x_single，返回x_single的预测结果值"""
+        return self.a_ * x_single + self.b_
+
+    def __repr__(self):
+        return "SimpleLinearRegression2()"
diff --git a/...ion-Metrics-MSE-vs-MAE/playML/__init__.py → ...ssion/04-Vectorization/playML/__init__.py b/...ion-Metrics-MSE-vs-MAE/playML/__init__.py → ...ssion/04-Vectorization/playML/__init__.py
diff --git a/...gression-Metrics-MSE-vs-MAE/playML/kNN.py → ...Regression/04-Vectorization/playML/kNN.py b/...gression-Metrics-MSE-vs-MAE/playML/kNN.py → ...Regression/04-Vectorization/playML/kNN.py
diff --git a/05-Linear-Regression/04-Vectorization/playML/metrics.py b/05-Linear-Regression/04-Vectorization/playML/metrics.py
@@ -0,0 +1,9 @@
+import numpy as np
+
+
+def accuracy_score(y_true, y_predict):
+    '''计算y_true和y_predict之间的准确率'''
+    assert y_true.shape[0] == y_predict.shape[0], \
+        "the size of y_true must be equal to the size of y_predict"
+
+    return sum(y_true == y_predict) / len(y_true)
diff --git a/...rics-MSE-vs-MAE/playML/model_selection.py → ...4-Vectorization/playML/model_selection.py b/...rics-MSE-vs-MAE/playML/model_selection.py → ...4-Vectorization/playML/model_selection.py
diff --git a/...etrics-MSE-vs-MAE/playML/preprocessing.py → .../04-Vectorization/playML/preprocessing.py b/...etrics-MSE-vs-MAE/playML/preprocessing.py → .../04-Vectorization/playML/preprocessing.py
diff --git a/...AE/04-Regression-Metrics-MSE-vs-MAE.ipynb → ...AE/05-Regression-Metrics-MSE-vs-MAE.ipynb b/...AE/04-Regression-Metrics-MSE-vs-MAE.ipynb → ...AE/05-Regression-Metrics-MSE-vs-MAE.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 评测回归算法的性能"
+    "## 05 评测回归算法的性能，MSE vs MAE"
    ]
   },
   {

diff --git a/...E-vs-MAE/playML/SimpleLinearRegression.py → ...E-vs-MAE/playML/SimpleLinearRegression.py b/...E-vs-MAE/playML/SimpleLinearRegression.py → ...E-vs-MAE/playML/SimpleLinearRegression.py
diff --git a/...egression/05-R-Squared/playML/__init__.py → ...ion-Metrics-MSE-vs-MAE/playML/__init__.py b/...egression/05-R-Squared/playML/__init__.py → ...ion-Metrics-MSE-vs-MAE/playML/__init__.py
diff --git a/...ear-Regression/05-R-Squared/playML/kNN.py → ...gression-Metrics-MSE-vs-MAE/playML/kNN.py b/...ear-Regression/05-R-Squared/playML/kNN.py → ...gression-Metrics-MSE-vs-MAE/playML/kNN.py
diff --git a/...sion-Metrics-MSE-vs-MAE/playML/metrics.py → ...sion-Metrics-MSE-vs-MAE/playML/metrics.py b/...sion-Metrics-MSE-vs-MAE/playML/metrics.py → ...sion-Metrics-MSE-vs-MAE/playML/metrics.py
diff --git a/...on/05-R-Squared/playML/model_selection.py → ...rics-MSE-vs-MAE/playML/model_selection.py b/...on/05-R-Squared/playML/model_selection.py → ...rics-MSE-vs-MAE/playML/model_selection.py
diff --git a/...sion/05-R-Squared/playML/preprocessing.py → ...etrics-MSE-vs-MAE/playML/preprocessing.py b/...sion/05-R-Squared/playML/preprocessing.py → ...etrics-MSE-vs-MAE/playML/preprocessing.py
diff --git a/...egression/05-R-Squared/05-R-Squared.ipynb → ...egression/06-R-Squared/06-R-Squared.ipynb b/...egression/05-R-Squared/05-R-Squared.ipynb → ...egression/06-R-Squared/06-R-Squared.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## R Squared (R^2)"
+    "## 06 R Squared (R^2)"
    ]
   },
   {

diff --git a/...-Squared/playML/SimpleLinearRegression.py → ...-Squared/playML/SimpleLinearRegression.py b/...-Squared/playML/SimpleLinearRegression.py → ...-Squared/playML/SimpleLinearRegression.py
diff --git a/...ession-in-scikit-learn/playML/__init__.py → ...egression/06-R-Squared/playML/__init__.py b/...ession-in-scikit-learn/playML/__init__.py → ...egression/06-R-Squared/playML/__init__.py
diff --git a/...-Regression-in-scikit-learn/playML/kNN.py → ...ear-Regression/06-R-Squared/playML/kNN.py b/...-Regression-in-scikit-learn/playML/kNN.py → ...ear-Regression/06-R-Squared/playML/kNN.py
diff --git a/...Regression/05-R-Squared/playML/metrics.py → ...Regression/06-R-Squared/playML/metrics.py b/...Regression/05-R-Squared/playML/metrics.py → ...Regression/06-R-Squared/playML/metrics.py
diff --git a/...in-scikit-learn/playML/model_selection.py → ...on/06-R-Squared/playML/model_selection.py b/...in-scikit-learn/playML/model_selection.py → ...on/06-R-Squared/playML/model_selection.py
diff --git a/...n-in-scikit-learn/playML/preprocessing.py → ...sion/06-R-Squared/playML/preprocessing.py b/...n-in-scikit-learn/playML/preprocessing.py → ...sion/06-R-Squared/playML/preprocessing.py
diff --git a/...7-Linear-Regression-in-scikit-learn.ipynb → ...8-Linear-Regression-in-scikit-learn.ipynb b/...7-Linear-Regression-in-scikit-learn.ipynb → ...8-Linear-Regression-in-scikit-learn.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## scikit-learn 中的 Linear Regression"
+    "## 08 scikit-learn 中的 Linear Regression"
    ]
   },
   {
@@ -60,7 +60,9 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "from playML.model_selection import train_test_split\n",

diff --git a/...n-scikit-learn/playML/LinearRegression.py → ...n-scikit-learn/playML/LinearRegression.py b/...n-scikit-learn/playML/LinearRegression.py → ...n-scikit-learn/playML/LinearRegression.py
diff --git a/...it-learn/playML/SimpleLinearRegression.py → ...it-learn/playML/SimpleLinearRegression.py b/...it-learn/playML/SimpleLinearRegression.py → ...it-learn/playML/SimpleLinearRegression.py
diff --git a/05-Linear-Regression/08-Linear-Regression-in-scikit-learn/playML/__init__.py b/05-Linear-Regression/08-Linear-Regression-in-scikit-learn/playML/__init__.py
diff --git a/05-Linear-Regression/08-Linear-Regression-in-scikit-learn/playML/kNN.py b/05-Linear-Regression/08-Linear-Regression-in-scikit-learn/playML/kNN.py
@@ -0,0 +1,59 @@
+import numpy as np
+from math import sqrt
+from collections import Counter
+from .metrics import accuracy_score
+
+class KNNClassifier:
+
+    def __init__(self, k):
+        """初始化kNN分类器"""
+        assert k >= 1, "k must be valid"
+        self.k = k
+        self._X_train = None
+        self._y_train = None
+
+    def fit(self, X_train, y_train):
+        """根据训练数据集X_train和y_train训练kNN分类器"""
+        assert X_train.shape[0] == y_train.shape[0], \
+            "the size of X_train must be equal to the size of y_train"
+        assert self.k <= X_train.shape[0], \
+            "the size of X_train must be at least k."
+
+        self._X_train = X_train
+        self._y_train = y_train
+        return self
+
+    def predict(self, X_predict):
+        """给定待预测数据集X_predict，返回表示X_predict的结果向量"""
+        assert self._X_train is not None and self._y_train is not None, \
+                "must fit before predict!"
+        assert X_predict.shape[1] == self._X_train.shape[1], \
+                "the feature number of X_predict must be equal to X_train"
+
+        y_predict = [self._predict(x) for x in X_predict]
+        return np.array(y_predict)
+
+    def _predict(self, x):
+        """给定单个待预测数据x，返回x的预测结果值"""
+        assert x.shape[0] == self._X_train.shape[1], \
+            "the feature number of x must be equal to X_train"
+
+        distances = [sqrt(np.sum((x_train - x) ** 2))
+                     for x_train in self._X_train]
+        nearest = np.argsort(distances)
+
+        topK_y = [self._y_train[i] for i in nearest[:self.k]]
+        votes = Counter(topK_y)
+
+        return votes.most_common(1)[0][0]
+
+    def score(self, X_test, y_test):
+        """根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""
+
+        y_predict = self.predict(X_test)
+        return accuracy_score(y_test, y_predict)
+
+    def __repr__(self):
+        return "KNN(k=%d)" % self.k
+
+
diff --git a/...ression-in-scikit-learn/playML/metrics.py → ...ression-in-scikit-learn/playML/metrics.py b/...ression-in-scikit-learn/playML/metrics.py → ...ression-in-scikit-learn/playML/metrics.py