-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprediction.py
113 lines (81 loc) · 3.15 KB
/
prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#
# Prediction.py
#
import neo4jrestclient
import pandas as pd
import numpy as np
from neo4jrestclient.client import GraphDatabase
import sys
from time import sleep
from prettytable import PrettyTable
print "Pandas loaded"
graph = GraphDatabase("http://172.17.30.135:7474/db/data/", username="neo4j", password="admin")
print "Loading Similarity CSV now"
csv = pd.read_csv('data/similarity.csv', index_col=0)
print "CSV Lookups!"
a = [0]*3884
for smov_id in xrange(1, 3884):
try:
if smov_id % 500 == 0:
print smov_id, "now"
a[smov_id] = csv.lookup([smov_id, smov_id], ["similar", "score"])
except KeyError:
a[smov_id] = 0
continue
print "Loaded Everything"
def main():
print "Welcome to MovieLens Recommender System"
while True:
user_id = raw_input("Enter the User ID corresponding to which you'd like predictions or press Q/q to quit\n> ")
if user_id.lower() == 'q':
sys.exit(0)
else:
if int(user_id) > 6000:
print "Enter User ID < 6000"
continue
print "Loading! \n"
query = """MATCH (u:User)-[r:RATED]->(m:Movie) WHERE u.name='%s' RETURN m.name, r.score""" % user_id
result = graph.query(query, data_contents=True)
movie_exists = {int(x[0]): float(str(x[1])) for x in result.rows}
# print result.rows
prediction = {}
for movie_id in xrange(1, 3883 + 1):
if movie_id in movie_exists.keys():
continue
else:
smov_id = movie_id
global a
a1 = a[movie_id]
if type(a1) == int:
continue
else:
similar_score = [float(str(x)) for x in a1.tolist()[1].tolist()]
similar_id = a1.tolist()[0].tolist()
similar_dict = dict(zip(similar_id, similar_score))
num = 0.0
deno = 0.0
for y in movie_exists.keys():
if similar_dict.get(y, False):
num += movie_exists[y] * float(str(similar_dict[y]))
deno += float(similar_dict[y])
if deno == 0.0:
continue
if prediction.get(str(num/deno), False):
prediction[str(num/deno)] += [movie_id]
else:
prediction[str(num/deno)] = [movie_id]
key_s = sorted(prediction.keys(), reverse=True)
count = 0
print "loading movies.csv now! "
movies_csv = pd.read_csv('data/movies.csv', index_col=0, names=["Id", "name", "genre"], encoding='iso-8859-1')
table = PrettyTable(["Movie ID", "Movie Name", "Score"])
for key in key_s:
count = count + len(prediction[key])
for movie in prediction[key][:10]:
ans = movies_csv.lookup([movie], ["name"]).tolist()
table.add_row([str(movie), ans[0], str(key)])
if count >= 10:
break
print table
if __name__ == '__main__':
main()