-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathClassification.R
141 lines (118 loc) · 3.41 KB
/
Classification.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
df <- read.csv("D:/Online courses/ML(Rstudio)/resources/Complete ML in R/2. Classification/House-Price.csv",header=TRUE)
str(df)
View(df)
summary(df)
boxplot(df$n_hot_rooms)
pairs(~df$Sold+df$rainfall)
barplot(table(df$bus_ter))
#Observations
#n_hot_rooms and rainfall has outliers
#n_hos_beds has missing values
#bus_ter is useless
uv <- 3*quantile(df$n_hot_rooms,0.99)
lv <- 0.3*quantile(df$rainfall,0.01)
df$n_hot_rooms[df$n_hot_rooms>uv] <-uv
df$rainfall[df$rainfall<lv] <- lv
summary(df)
boxplot(df$n_hot_rooms)
pairs(~df$Sold+df$rainfall)
which(is.na(df$n_hos_beds))
m <- mean(df$n_hos_beds,na.rm=TRUE)
df$n_hos_beds[is.na(df$n_hos_beds)] <- m
which(is.na(df$n_hos_beds))
summary(df$n_hos_beds)
df$avg_dist <- (df$dist1+df$dist2+df$dist3+df$dist4)/4
df <- df[,-6:-9]
df <- df [,-13]
install.packages("dummies")
df<-dummy.data.frame(df)
df <- df[,-8]
df <- df[,-13]
#Simple logistic model
glm_fit <- glm(Sold~price, data=df, family=binomial)
summary(glm_fit)
#multiple logistic model
glm_mul_fit <- glm(Sold~.,data=df,family=binomial)
summary(glm_mul_fit)
glm_probs <- predict(glm_fit, type="response")
glm_probs[1:10]
glm_predict <- rep("NO",506)
glm_predict[glm_probs>0.5] <-"YES"
View(glm_predict)
View(glm_probs)
table(glm_predict,df$Sold)
#Linear Discriminant Analysis
library("MASS")
lda_fit <- lda(Sold~.,data=df)
summary(lda_fit)
lda_fit
lda_predict <- predict(lda_fit,df)
lda_predict
lda_predict$posterior
lda_predict$class
lda_class <- lda_predict$class
lda_class
table(lda_class,df$Sold)
sum(lda_predict$posterior[,1]>0.8)
lda_class_b <- rep(0,506)
lda_class_b[lda_predict$posterior[,1]>0.8] <-1
table(lda_class_b,df$Sold)
#Quadratic discriminant analysis
qda_fit <- qda(Sold~.,data=df)
summary(qda_fit)
qda_fit
qda_predict <- predict(qda_fit,df)
qda_predict
qda_predict$posterior
qda_predict$class
qda_class <- qda_predict$class
qda_class
table(qda_class,df$Sold)
sum(lda_predict$posterior[,1]>0.8)
qda_class_b <- rep(0,506)
qda_class_b[qda_predict$posterior[,1]>0.8] <-1
table(qda_class_b,df$Sold)
#test-train split
set.seed(0)
split <- sample.split(df,SplitRatio = 0.8)
train_set <- subset(df,split==TRUE)
test_set <- subset(df,split==FALSE)
#Logistic Regression (Test-Train)
glm_train_fit <- glm(Sold~.,data=train_set,family=binomial)
summary(glm_train_fit)
View(test_set)
glm_test_probs <-predict(glm_train_fit,test_set,type="response")
glm_test_probs[1:10]
glm_test_predict <-rep("NO",120)
glm_test_predict[glm_test_probs>0.5] <- "YES"
View(glm_test_predict)
table(glm_test_predict,test_set$Sold)
#LDA (Test-Train)
lda_train_fit <- lda(Sold~.,data=train_set)
summary(lda_train_fit)
lda_train_fit
lda_test_predict <- predict(lda_train_fit,test_set)
View(lda_test_predict)
lda_test_predict$class
lda_test_predict$posterior
lda_test_class<-lda_test_predict$class
lda_test_class
table(lda_test_class,test_set$Sold)
lda_test_class_b <-rep(0,120)
lda_test_class_b[lda_test_predict$posterior[,1]<0.5]<-1
table(lda_test_class_b,test_set$Sold)
#KNN Classifier
train_x <- train_set[,-16]
test_x <- test_set[,-16]
train_y <- train_set$Sold
test_y <- test_set$Sold
k <-3
train_x_std <- scale(train_x)
test_x_std <- scale(test_x)
set.seed(0)
knn_predict <- knn(train_x_std,test_x_std,train_y,k=k)
knn_predict
table(knn_predict,test_y)
k<-1
knn_predict <- knn(train_x_std,test_x_std,train_y,k=k)
table(knn_predict,test_y)