Skip to content

Commit

Permalink
Chapter 05 prep completed.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuyubobobo committed Dec 7, 2017
1 parent c2c3345 commit 69c6ab4
Show file tree
Hide file tree
Showing 12 changed files with 1,077 additions and 1 deletion.
2 changes: 1 addition & 1 deletion 05-Linear-Regression/05-R-Squared/05-R-Squared.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"outputs": [],
"source": [
"boston = datasets.load_boston()\n",
"x = boston.data[:,5] # 只使用房间数量这个维度\n",
"x = boston.data[:,5] # 只使用房间数量这个特征\n",
"y = boston.target\n",
"\n",
"x = x[y < 50.0]\n",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## scikit-learn 中的 Linear Regression"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import datasets"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"boston = datasets.load_boston()\n",
"\n",
"X = boston.data\n",
"y = boston.target\n",
"\n",
"X = X[y < 50.0]\n",
"y = y[y < 50.0]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(490, 13)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from playML.model_selection import train_test_split\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 使用我们自己制作 Linear Regression\n",
"\n",
"代码参见 [这里](playML/LinearRegression.py)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from playML.LinearRegression import LinearRegression\n",
"\n",
"reg = LinearRegression()\n",
"reg.fit_normal(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ -1.18919477e-01, 3.63991462e-02, -3.56494193e-02,\n",
" 5.66737830e-02, -1.16195486e+01, 3.42022185e+00,\n",
" -2.31470282e-02, -1.19509560e+00, 2.59339091e-01,\n",
" -1.40112724e-02, -8.36521175e-01, 7.92283639e-03,\n",
" -3.81966137e-01])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg.coef_"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"34.161435496224712"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg.intercept_"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.81298026026584658"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg.score(X_test, y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### scikit-learn中的线性回归"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"\n",
"sk_reg = LinearRegression()\n",
"sk_reg.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ -1.18919477e-01, 3.63991462e-02, -3.56494193e-02,\n",
" 5.66737830e-02, -1.16195486e+01, 3.42022185e+00,\n",
" -2.31470282e-02, -1.19509560e+00, 2.59339091e-01,\n",
" -1.40112724e-02, -8.36521175e-01, 7.92283639e-03,\n",
" -3.81966137e-01])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sk_reg.coef_"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"34.161435496246924"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sk_reg.intercept_"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.81298026026584758"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sk_reg.score(X_test, y_test)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import numpy as np
from .metrics import r2_score


class LinearRegression:

def __init__(self):
"""初始化Linear Regression模型"""
self.coef_ = None
self.intercept_ = None
self._theta = None

def fit_normal(self, X_train, y_train):
"""根据训练数据集X_train, y_train训练Linear Regression模型"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"

X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)

self.intercept_ = self._theta[0]
self.coef_ = self._theta[1:]

return self

def predict(self, X_predict):
"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""
assert self.intercept_ is not None and self.coef_ is not None, \
"must fit before predict!"
assert X_predict.shape[1] == len(self.coef_), \
"the feature number of X_predict must be equal to X_train"

X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
return X_b.dot(self._theta)

def score(self, X_test, y_test):
"""根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""

y_predict = self.predict(X_test)
return r2_score(y_test, y_predict)

def __repr__(self):
return "LinearRegression()"
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import numpy as np
from .metrics import r2_score


class SimpleLinearRegression:

def __init__(self):
"""初始化Simple Linear Regression模型"""
self.a_ = None
self.b_ = None

def fit(self, x_train, y_train):
"""根据训练数据集x_train, y_train训练Simple Linear Regression模型"""
assert x_train.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert len(x_train) == len(y_train), \
"the size of x_train must be equal to the size of y_train"

x_mean = np.mean(x_train)
y_mean = np.mean(y_train)

self.a_ = (x_train - x_mean).dot(y_train - y_mean) / (x_train - x_mean).dot(x_train - x_mean)
self.b_ = y_mean - self.a_ * x_mean

return self

def predict(self, x_predict):
"""给定待预测数据集x_predict,返回表示x_predict的结果向量"""
assert x_predict.ndim == 1, \
"Simple Linear Regressor can only solve single feature training data."
assert self.a_ is not None and self.b_ is not None, \
"must fit before predict!"

return np.array([self._predict(x) for x in x_predict])

def _predict(self, x_single):
"""给定单个待预测数据x,返回x的预测结果值"""
return self.a_ * x_single + self.b_

def score(self, X_test, y_test):
"""根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""

y_predict = self.predict(X_test)
return r2_score(y_test, y_predict)

def __repr__(self):
return "SimpleLinearRegression()"
Empty file.
Loading

0 comments on commit 69c6ab4

Please sign in to comment.