-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgmain_archive.py
51 lines (41 loc) · 1.22 KB
/
gmain_archive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python
'''
Inefficiently fetch full copy of gtalug mailing list without hitting
PHP execution 30 second limit on the server
'''
import requests
from time import time
import codecs
import StringIO
from get_latest import get_latest_remote_id, gmane_rss
current = 0
orig_step = 2000
tlug_url = 'http://download.gmane.org/gmane.org.user-groups.linux.tolug'
step = orig_step
out = None
if __name__ == '__main__':
maxget = get_latest_remote_id(gmane_rss)
target = codecs.open('tlug_data.txt', 'a', 'utf-8')
while current <= maxget:
req_start = time()
buff = StringIO.StringIO()
dl_url = "%s/%d/%d" % (tlug_url, current, current + step % (maxget + 1))
print dl_url
del out
out = requests.get(dl_url)
if out.ok:
buff.write(out.text)
else:
print "Network error, retrying", out
continue
interval = time() - req_start
print "Interval: ", interval
if interval < 30:
target.write(buff.getvalue())
current += step
if step != orig_step:
step = int((1.5 * step) % orig_step)
else:
step /= 2
buff.close()
target.close()