-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_bidding.py
130 lines (116 loc) · 5.92 KB
/
check_bidding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/python3
'''
The goal of this script is to validate reviewer bidding
'''
import csv
import sys
POSITIVE_BID_MIN = 0.05 # we expect positive bids to at least % papers
POSITIVE_AUTHOR_BIDS_RATIO_THRESHOLD = 0.20 # threshold of percentage of positive bids to a single author above which it becomes suspicious
POSITIVE_AUTHOR_BIDS_THRESHOLD = 4 # report if more than X positive bids for the same author
BAI_MAX_POSITIVE_BIDS = 20 # to positive bids to evaluate, use -1 to check all
BAI_MIN_POSITIVE_SCORE = 5 # minimum positive score to count
class Reviewer:
def __init__(self, name, email):
self.name = name
self.email = email
self.positive_bids = 0
self.negpos_bids = 0
self.bids = {} # map with author -> array of positive preferences
self.preferences = [] # array with all positive preferences
def add_bid(self, paper_authors, preference, topic_score):
if preference > 0:
# positive bids
self.positive_bids += 1
for author in paper_authors:
if author in self.bids:
self.bids[author].append(preference)
else:
self.bids[author] = [preference]
self.preferences.append(preference)
if topic_score < 0 and preference > 0:
# topic score says nay but preference says yay
self.negpos_bids += 1
def report(self, nr_papers, papers_per_author):
resp = ''
# check ratio of positive bids
if self.positive_bids <= nr_papers*POSITIVE_BID_MIN:
resp += '- only {:.2f}% of bids were positive out of {} papers!\n'.format(self.positive_bids/float(nr_papers)*100, nr_papers)
# check general statistics (if a single author received more than a percentage of positive bids)
for author in self.bids:
if len(self.bids[author]) >= self.positive_bids*POSITIVE_AUTHOR_BIDS_RATIO_THRESHOLD:
resp += '- {} of the positive bids went to {} ({})\n'.format(len(self.bids[author]), author, self.bids[author])
else: # make sure to only add each author once
if len(self.bids[author]) >= POSITIVE_AUTHOR_BIDS_THRESHOLD:
resp += '- {} of the positive bids went to {} ({})\n'.format(len(self.bids[author]), author, self.bids[author])
pos_bids = sorted(self.preferences, reverse=True)
# calcualte min bid based on index max(pos_bids[20], 5)
min_bid = max(pos_bids[BAI_MAX_POSITIVE_BIDS], BAI_MIN_POSITIVE_SCORE) if len(pos_bids) > BAI_MAX_POSITIVE_BIDS else BAI_MIN_POSITIVE_SCORE
sampled_pos_bids = [i for i in pos_bids if i >= min_bid]
if len(sampled_pos_bids) == 0:
resp += '- no positive bids remaining for BAI\n'
else:
BX = sum(sampled_pos_bids)
for author in self.bids:
pos_author_bids = [i for i in self.bids[author] if i >= sampled_pos_bids[-1]]
if len(pos_author_bids) == 0:
continue
BXY = sum(pos_author_bids)
BAI = (float(BXY)/len(pos_author_bids))/(float(BX)/len(sampled_pos_bids))
#BAI = (float(BXY)/float(BX))/(papers_per_author[author]/float(nr_papers))
if BAI > 2.0:
resp += '- bidding affinity is high for {}: {:.2f} {} BXY {} BX {} author papers {}\n'.format(author, BAI, pos_author_bids, BXY, BX, papers_per_author[author])
if len(resp) != 0:
print('Reviewer {} <{}> has {} positive bids and {} positive bids with negative topic score'.format(self.name, self.email, self.positive_bids, self.negpos_bids))
print(resp[:-1])
def read_papers(papers_csv):
with open(papers_csv, 'r') as f:
papers_csv = csv.reader(f)
paper_id = '0'
paper_authors = {}
authors = []
papers_per_author = {}
for row in papers_csv:
if row[0] == 'paper':
# validate data format
assert(row[1] == 'title' and row[2] == 'first' and row[3] == 'last' and row[4] == 'email')
continue
if paper_id != row[0]:
if paper_id != '0':
# add one set of authors/paper
paper_authors[paper_id] = authors
authors = []
paper_id = row[0]
author = '{} {} <{}>'.format(row[2], row[3], row[4])
authors.append(author)
if author in papers_per_author:
papers_per_author[author] += 1
else:
papers_per_author[author] = 1
# add last authors to set
paper_authors[paper_id] = authors
return paper_authors, papers_per_author
def read_prefs(allprefs_csv, paper_authors):
with open(allprefs_csv, 'r') as f:
allprefs_csv = csv.reader(f)
reviewers = {}
for row in allprefs_csv:
if row[0] == 'paper':
# validate data format
assert(row[2] == 'given_name' and row[3] == 'family_name' and row[4] == 'email' and row[6] == 'preference' and row[7] == 'topic_score')
continue
name = '{} {}'.format(row[2], row[3])
email = row[4]
if not email in reviewers:
reviewers[email] = Reviewer(name, email)
preference = int(row[6]) if row[6] != '' else 0
topic_score = int(row[7]) if row[7] != '' else 0
reviewers[email].add_bid(paper_authors[row[0]], preference, topic_score)
return reviewers
if __name__ == '__main__':
if len(sys.argv) != 3:
print('Check allpref data for bidding outliers. Run the script with: python3 {} hotcrp-authors.csv hotcrp-allprefs.csv'.format(sys.argv[0]))
exit(1)
paper_authors, papers_per_author = read_papers(sys.argv[1])
reviewers = read_prefs(sys.argv[2], paper_authors)
for reviewer in sorted(reviewers):
reviewers[reviewer].report(len(paper_authors), papers_per_author)