-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbot.py
99 lines (85 loc) · 3.17 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pandas as pd
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier,_tree
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn.tree import export_graphviz
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
training = pd.read_csv('C:/Users/DELL/Desktop/AI-Healthcare-chatbot-master/Training.csv')
testing = pd.read_csv('C:/Users/DELL/Desktop/AI-Healthcare-chatbot-master/Testing.csv')
cols = training.columns
cols = cols[:-1]
x = training[cols]
y = training['prognosis']
y1 = y
reduced_data = training.groupby(training['prognosis']).max()
#mapping strings to numbers
le = preprocessing.LabelEncoder()
le.fit(y)
y = le.transform(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
testx = testing[cols]
testy = testing['prognosis']
testy = le.transform(testy)
clf1 = DecisionTreeClassifier()
clf = clf1.fit(x_train,y_train)
#print(clf.score(x_train,y_train))
#print ("cross result========")
#scores = cross_validation.cross_val_score(clf, x_test, y_test, cv=3)
#print (scores)
#print (scores.mean())
#print(clf.score(testx,testy))
importances = clf.feature_importances_
indices = np.argsort(importances)[::-1]
features = cols
#feature_importances
#for f in range(10):
# print("%d. feature %d - %s (%f)" % (f + 1, indices[f], features[indices[f]] ,importances[indices[f]]))
print("Please reply Yes or No for the following symptoms")
def print_disease(node):
#print(node)
node = node[0]
#print(len(node))
val = node.nonzero()
#print(val)
disease = le.inverse_transform(val[0])
return disease
def tree_to_code(tree, feature_names):
tree_ = tree.tree_
#print(tree_)
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
#print("def tree({}):".format(", ".join(feature_names)))
symptoms_present = []
def recurse(node, depth):
indent = " " * depth
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
print(name + " ?")
ans = input()
ans = ans.lower()
if ans == 'yes':
val = 1
else:
val = 0
if val <= threshold:
recurse(tree_.children_left[node], depth + 1)
else:
symptoms_present.append(name)
recurse(tree_.children_right[node], depth + 1)
else:
present_disease = print_disease(tree_.value[node])
print( "You may have " + present_disease )
red_cols = reduced_data.columns
symptoms_given = red_cols[reduced_data.loc[present_disease].values[0].nonzero()]
print("symptoms present " + str(list(symptoms_present)))
print("symptoms given " + str(list(symptoms_given)) )
confidence_level = (1.0*len(symptoms_present))/len(symptoms_given)
print("confidence level is " + str(confidence_level))
recurse(0, 1)
tree_to_code(clf,cols)