-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
129 lines (80 loc) · 2.83 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Author: Gitanjali Bhattacharjee
# Purpose: Compute the Sequential Rank Agreement (SRA) of items in lists per Ekstrom et al. 2019, "Sequential rank agreement
# for comparison of ranked lists." Variable names in the code match those in the paper.
import csv
import numpy as np
from itertools import chain
import matplotlib.pyplot as plt
def get_ranking(Xp, list):
return list.index(Xp)
def get_average_ranking(Xp, listOfLists): # R-bar in paper
L = len(listOfLists)
rBar = np.average([get_ranking(Xp, listOfLists[l]) for l in range(0,L)])
return rBar
def get_variance(Xp, listOfLists): # this is A-hat-sub-L in paper
L = len(listOfLists)
rBar = get_average_ranking(Xp, listOfLists)
sum = 0
for l in range(0,L):
sum += (get_ranking(Xp, listOfLists[l]) - rBar)**2
sum = sum/(L - 1)
return sum
def get_sHat(d, listOfLists, epsilon = 0, verbose = False):
# Get the set of unique items ranked less than or equal to d in an of the L lists.
sHat = []
L = len(listOfLists)
# Abbreviate lists to length d. #TODO check indexing of d -- starting at 0 or 1?
shortLists = []
for l in range(0, L):
shortLists.append(listOfLists[l][0:d])
# Get the unique items in shortLists.
listTuple = (shortLists[0])
for l in range(1,L):
listTuple = listTuple + (shortLists[1])
# Combine all the lists into one
super_list = list(chain(*shortLists))
super_set = set(super_list)
if verbose:
print(d, super_set)
sHat = super_set
return sHat
def compute_sra(d, listOfLists, verbose = False): # d is a particular rank
L = len(listOfLists)
sHat = get_sHat(d, listOfLists, verbose=verbose)
sra = 0
for p in sHat:
sra += (L - 1) * get_variance(p, listOfLists)
sra = sra/((L-1)*len(sHat))
return sra
def verification_problem(): # compare to Table 1(c) in Ekstrom et al. 2019
listOfLists = [['A', 'B', 'C', 'D', 'E'], ['A', 'C', 'D', 'B', 'E'], ['B', 'A', 'E', 'C', 'D']]
for i in range(1, 6):
sra = compute_sra(i, listOfLists, verbose=True)
print(sra)
def import_listOfLists(csv_filepath, nCols):
# fp = 'Bridges ranked - test.csv'
listOfLists = []
for col in range(0,nCols):
ranking = np.loadtxt(csv_filepath,dtype=str,delimiter=',',skiprows=1,usecols=(col,))
listOfLists.append(list(ranking))
return listOfLists
def plot_sra(csv_filepath, nCols):
listOfLists = import_listOfLists(csv_filepath, nCols)
depth = []
sra = []
nElements = len(listOfLists[0])
for i in range(1, nElements + 1):
temp = compute_sra(i, listOfLists)
sra.append(temp)
depth.append(i)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(depth, sra)
ax.set_xlabel('Depth of list')
ax.set_ylabel('Sequential rank agreement')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
plt.savefig('sra.png', bbox_inches='tight')
verification_problem()