-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspending_experiment.py
100 lines (86 loc) · 3.24 KB
/
spending_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#
# spending_experiment.py
# - See how well correlated spending is with Glicko-2 score.
#
import pandas as pd
import numpy as np
from utils import append_glicko
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from matplotlib.lines import Line2D
def main():
"""
Main entry point.
"""
spending2017 = { # NCAAB spending for 2016-17 season by top-ten Glicko-2 team ID.
1242: 11126047, # Kansas
1437: 11120378, # Villanova
1246: 19180059, # Kentucky
1112: 9852596, # Arizona
1314: 21408475, # North Carolina
1211: 8874752, # Gonzaga
1181: 19507686, # Duke
1332: 9229004, # Oregon
1257: 17065364, # Louisville
1438: 8555125, # Virgina
1452: 9453611, # West Virginia
1124: 9097264, # Baylor
1374: 7269481, # Southern Methodist
1345: 8099562, # Purdue
1323: 6793853, # Notre Dame
1235: 6858775, # Iowa State
1458: 9564602, # Wisconsin
1455: 6380482, # Wichita State
1417: 9856861, # UCLA
1274: 7547589, # Miami FL
1268: 7442558, # Maryland
1153: 6970262, # Cincinnati
1139: 5489704, # Butler
1276: 16913194, # Michigan
1277: 10975215, # Michigan State
}
N = 25
remove_outliers = True
spendings = list(spending2017.values())
if remove_outliers:
# Calculate the Tukey fences.
k = 1.5
q1, q3 = np.percentile(spendings, [25, 75])
iqr = q3 - q1
outlier_lb = q1 - k * iqr
outlier_up = q3 + k * iqr
assert N == len(spending2017)
df, glicko_dict = append_glicko(pd.read_csv('./data/RegularSeasonCompactResults.csv'))
top_N = sorted(glicko_dict.items(), key=lambda x: x[1].getRating(), reverse=True)[:N]
top_N_ids, top_N_glicko = [i[0] for i in top_N], [i[1].getRating() for i in top_N]
ordered_spending = [spending2017[tid] for tid in top_N_ids]
if remove_outliers:
inlier_spending, inlier_glicko, inlier_ids = [], [], []
for tid, glicko, spending in zip(top_N_ids, top_N_glicko, ordered_spending):
if outlier_lb < spending < outlier_up:
inlier_spending.append(spending)
inlier_glicko.append(glicko)
inlier_ids.append(tid)
top_N_ids = inlier_ids
top_N_glicko = inlier_glicko
ordered_spending = inlier_spending
r_val = pearsonr(ordered_spending, top_N_glicko)
print("Corrcoef for spending and glicko: {}".format(r_val))
fig = plt.figure()
ax = fig.add_subplot(111)
for tid, glicko, spending in zip(top_N_ids, top_N_glicko, ordered_spending):
ax.scatter(spending, glicko, c='b')
z = np.polyfit(ordered_spending, top_N_glicko, 1)
p = np.poly1d(z)
plt.plot(ordered_spending, p(ordered_spending))
plt.ylabel("Glicko-2 Rating")
plt.xlabel("Men's 2016-17 Basketball Expenses")
plt.legend(
[Line2D([0], [0], color='b', lw=3)], ["r = {}".format(round(r_val[0], 4))]
)
f_name = "glickos_by_spending.png" if not remove_outliers else "glickos_by_spending_no_outliers.png"
f_name = "./plots/" + f_name
plt.savefig(f_name, dpi=300)
plt.show()
if __name__ == "__main__":
main()