-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathknn.py
99 lines (76 loc) · 3.01 KB
/
knn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#! /usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import division
from base import BaseEstimator
from utility import sim_pearson
from sys import stderr
from heapq import nlargest
from operator import itemgetter
#from collections import Counter
#TODO : Fix these distance and uniform comparison. Use abstract factory
TOL = 0.000000001
class KNeighborRegressor(BaseEstimator):
#FIXME use kwargs!
def __init__(self, X, y, k=3, sim_metric=sim_pearson, r_method='uniform'):
super(KNeighborRegressor, self).__init__(X, y, k, sim_metric, r_method)
print self
def calculate_rating(self, n_list):
# n_list : [item_index, distance]
n = len(n_list)
if self.r_method == 'uniform':
r = sum([self.y[i[0]] for i in n_list]) / n #FIXME
return r
elif self.r_method == 'distance':
total = [[(1/distance) * self.y[i], 1/distance]
for i, distance in n_list]
total_rating = 0
total_weight = 0
for rating, weight in total:
total_rating += rating
total_weight += weight
try:
res = total_rating / (total_weight * n)
except RuntimeWarning:
print total_weight, n
return res
else:
stderr.write("Unrecognized method used to calculate rating\n")
exit(1)
def __str__(self):
s_m = self.sim_metric.func_name
r_m = self.r_method
return 'KNeighborRegressors\nk: %s\nsimilarity metric: %s\nrating evaluation:%s' % (self.k, s_m, r_m)
class KNeighborClassifier(BaseEstimator):
#FIXME use kwargs
def __init__(self, X, y, k=3, sim_metric=sim_pearson, r_method='uniform'):
super(KNeighborClassifier, self).__init__(X, y, k, sim_metric, r_method)
print self
def calculate_rating(self, n_list):
#n = len(n_list)
#cnt = Counter()
class_labels = dict()
if self.r_method == 'uniform':
for i, distance in n_list:
label = self.y[i]
if label in class_labels:
class_labels[label] += 1
else:
class_labels[label] = 1
if self.r_method == 'distance':
for i, distance in n_list:
label = self.y[i]
if label in class_labels:
try:
class_labels[label] += 1/distance
except ZeroDivisionError:
class_labels[label] += 1 / TOL
else:
class_labels[label] = 1
#print class_labels
label, occur = nlargest(1,class_labels.iteritems(),key=itemgetter(1))[0]
#print label, occur
return label
def __str__(self):
s_m = self.sim_metric.func_name
r_m = self.r_method
return 'KNeighborClassifier\nk: %s\nsimilarity metric: %s\nrating evaluation:%s' % (self.k, s_m, r_m)