-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1.LinearregressionML_PR.py
137 lines (83 loc) · 2.7 KB
/
1.LinearregressionML_PR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# -*- coding: utf-8 -*-
"""#DAY3 ML_LinearRegression interview.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1oxCgJ_K7Jwt3hhkbo7aAt-HiE1xi_xp_
# **DAY3: ML_Linear Regression Interview**
- By PARIMAL A
- PRACTICAL
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
!pip install scikit-learn==1.1.1
from sklearn.datasets import load_boston # housepricedataset
load_boston()
# or you canuse
#import seaborn as sns
## Load the Boston housing dataset
# boston = sns.load_dataset('boston')
df=load_boston()
df
dataset=pd.DataFrame(df.data)
dataset
dataset.columns=df.feature_names
dataset.head()
df.target.shape
df.feature_names.shape
## Independent features =X and dependent features=y
X=dataset
y=df.target
X
"""**Train test split**"""
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
X_train
X_train.shape
X_test.shape
"""**Standardizing the dataset**"""
## standardizing the dataset : mean=0 std deviation =1
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)
"""## **Model Training**"""
from sklearn.linear_model import LinearRegression
##cross validation
from sklearn.model_selection import cross_val_score
regr=LinearRegression()
regr.fit(X_train,y_train) #training model x,y
mse=cross_val_score(regr,X_train,y_train,scoring='neg_mean_squared_error',cv=10)# mse
np.mean(mse) # shouldbeless
# Prediction
reg_pred=regr.predict(X_test)
reg_pred # ypred=y=mx+c
import seaborn as sns
sns.displot(reg_pred-y_test,kind='kde') # ypred- yactual
"""*Almost -10to -30 are more paameters fall so differenceisvery less*"""
from sklearn.metrics import r2_score
score=r2_score(reg_pred,y_test)
score
#save the model
import joblib
# Save the model to a file
joblib_file = "linear_regression_model.pkl"
joblib.dump(regr, joblib_file)
print(f"Model saved to {joblib_file}")
import pickle
# Save the model to a file
pickle_file = "linear_regression_modelpk.pkl"
with open(pickle_file, 'wb') as file:
pickle.dump(regr, file)
print(f"Model saved to {pickle_file}")
# Compare predictions with actual values
print("Predicted values:", reg_pred) #ypredictedvalues
print("Actual values:", y_test) #yactualvalues
# Create a DataFrame to compare predictions with actual values
results_df = pd.DataFrame({'Actual': y_test, 'Predicted': reg_pred})
print(results_df)
# Predict y for a new given x value
predicted_y = regr.predict(X_test)[0] #first rowpredicted value
predicted_y
"""##**Day3 END**"""