-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprob.py
109 lines (89 loc) · 4.99 KB
/
prob.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import sys
import random
from project import load_data, show_data, rmse
def training(training_data):
#show_data(training_data)
student_result = {}
student_kc = {}
overall_result = {}
for i in range(1,len(training_data)):
studentId = training_data[i][1].upper()
problem_hierarchy = training_data[i][2].upper()
problem_name = training_data[i][3].upper()
step_name = training_data[i][5].upper()
is_first_correct = training_data[i][13]
kcs = training_data[i][len(training_data[i])-2].upper().split("~~")
o = training_data[i][len(training_data[i])-1].split("~~")
for idx, kc_name in enumerate(kcs):
kc_result = student_kc.setdefault(studentId, {}).setdefault(kc_name,[])
kc_result.append((int(o[idx]),is_first_correct))
student = student_result.setdefault(studentId, {})
problem_result = student.setdefault(problem_hierarchy,{}).setdefault(problem_name,{}).setdefault(step_name, {})
current_result = problem_result.setdefault(is_first_correct, 0)
problem_result[is_first_correct] = current_result + 1
problem_result = overall_result.setdefault(problem_hierarchy,{}).setdefault(problem_name,{}).setdefault(step_name, {})
current_result = problem_result.setdefault(is_first_correct, 0)
problem_result[is_first_correct] = current_result + 1
return student_result, overall_result, student_kc
def get_result_by_stepname(student, problem_hierarchy, problem_name, step_name):
if problem_hierarchy in student:
problems = student[problem_hierarchy]
if problem_name in problems:
steps = problems[problem_name]
if step_name in steps:
correct = steps[step_name].setdefault('1',0.0)
incorrect = steps[step_name].setdefault('0',0.0)
else:
correct = sum([ steps[name].setdefault('1',0.0) for name in steps])
incorrect = sum([ steps[name].setdefault('0',0.0) for name in steps])
else:
correct = sum([ sum([ problems[name][step].setdefault('1',0.0) for step in problems[name]]) for name in problems])
incorrect = sum([ sum([ problems[name][step].setdefault('0',0.0) for step in problems[name]]) for name in problems])
else:
raise KeyError(problem_hierarchy)
return correct, incorrect
def get_predict_result_by_kc(student, kc_name, opportunity):
result = 0.0
total_result = 1.0
if kc_name in student:
history = [result for count,result in student[kc_name] if count <= opportunity ]
all_result = [result for count,result in student[kc_name]]
total_correct, total_incorrect = float(all_result.count('1')), float(all_result.count('0'))
total_result = total_correct / (total_correct + total_incorrect)
correct, incorrect = float(history.count('1')), float(history.count('0'))
if (correct + incorrect) > 0:
result = correct / (correct + incorrect)
result = total_result * float(result)
return result
def predict(student_result, overall_result, student_kc, testing_data):
predict_result = []
for i in range(1,len(testing_data)):
studentId = testing_data[i][1].upper()
problem_hierarchy = testing_data[i][2].upper()
problem_name = testing_data[i][3].upper()
step_name = testing_data[i][5].upper()
kcs = testing_data[i][len(testing_data[i])-2].upper().split("~~") if len(testing_data[i]) > 6 else []
o = testing_data[i][len(testing_data[i])-1].split("~~") if len(testing_data[i]) > 6 else []
try:
student = student_result[studentId]
correct, incorrect = get_result_by_stepname(student, problem_hierarchy, problem_name, step_name)
except KeyError:
correct, incorrect = get_result_by_stepname(overall_result, problem_hierarchy, problem_name, step_name)
# print studentId, problem_hierarchy, problem_name, step_name
from_problem = float(correct) / (correct + incorrect)
from_kc = sum([get_predict_result_by_kc(student_kc[studentId], kc_name, int(o[idx])) for idx, kc_name in enumerate(kcs)]) / len(kcs) if studentId in student_kc and len(kcs) > 0 else 0.0
predict = from_problem + from_kc - from_problem * from_kc
predict_result.append(predict)
return predict_result
def main(arg):
dataset = arg[1] #'algebra_2005_2006'
training_data, testing_data, testing_result_data = load_data(dataset)
student_result, overall_result, student_kc = training(training_data)
predict_result = predict(student_result, overall_result, student_kc, testing_data)
predict_error = rmse(predict_result, [float(i[13]) for i in testing_result_data[1:]])
predict_result = predict(student_result, overall_result, student_kc, training_data)
training_error = rmse(predict_result, [float(i[13]) for i in training_data[1:]])
print '|', dataset, '|', training_error, '|', predict_error ,'|'
return
if __name__ == "__main__":
main(sys.argv)