-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetchComments.py
92 lines (77 loc) · 2.89 KB
/
fetchComments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# This script is used to fetch comments on several projects from Github.
# It gets all 3 types of comments (commits, issues and pull requests) and writes
# the raw JSOn data to a file
import ipdb
import ConfigParser
import ghhelper
import util
import json
import pandas
import sys
import os
import datetime
import time
import signal
from pprint import pprint
configFile = "./keysconfig.txt"
def get_from_config(config_file, section, config_tags):
# Reads the configFile file and returns the config tags located in specified section.
config = ConfigParser.ConfigParser()
config.read(config_file)
if isinstance(config_tags,list):
config_data = {k: config.get(section, k) for k in config_tags}
else:
config_data = {config_tags: config.get(section, config_tags)}
return config_data
if __name__ == "__main__":
# Init github client
creds = get_from_config(configFile, "gh_client",["username","oauth_token"])
ghc = ghhelper.GithubClient(credentials=creds, ignoring_errors=True)
util = util.Util(ghc)
outputDir = util.commentsDir
print "Loading Travis data..."
td = util.load_travis_data(util.filteredTravisData)
projectNames = td["gh_project_name"]
projectNames = projectNames.drop_duplicates()
projectNames = projectNames.sort_values()
startTime = time.time()
print "Fetching comments..."
index=0
for label, prj in projectNames.iteritems():
dir = "%s%s/"%(outputDir, prj)
if not os.path.exists(dir):
os.makedirs(dir)
print "Progress : %s/%s : fetching comments for %s"%(index,len(projectNames),prj)
# We first get all repo comments
commentFile = "%s%s.json"%(dir, util.repoStr)
if os.path.isfile(commentFile):
print "\tSkipping repo comments. Already fetched."
else:
repoComments = util.fetch_comments(util.repoStr, prj)
print "\t%s repo comments found "%(len(repoComments))
if repoComments:
with open(commentFile, 'w') as outfile:
json.dump(repoComments, outfile)
# We then get all pull request comments
commentFile = "%s%s.json"%(dir, util.prStr)
if os.path.isfile(commentFile):
print "\tSkipping pull request comments. Already fetched."
else:
pullRequestComments = util.fetch_comments(util.prStr, prj)
print "\t%s pull request comments found"%(len(pullRequestComments))
if pullRequestComments:
with open(commentFile, 'w') as outfile:
json.dump(pullRequestComments, outfile)
# Finally, we retrieve issue comments
commentFile = "%s%s.json"%(dir, util.issueStr)
if os.path.isfile(commentFile):
print "\tSkipping issue comments. Already fetched."
else:
issueComments = util.fetch_comments(util.issueStr, prj)
print "\t%s issue comments found"%(len(issueComments))
if issueComments:
with open(commentFile, 'w') as outfile:
json.dump(issueComments, outfile)
index = index+1
print "Fetching completed!"
print "Duration : %s"%(str(datetime.timedelta(seconds=time.time()-startTime)))