-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathlakshadweep.py
91 lines (69 loc) · 2.21 KB
/
lakshadweep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import requests
import os
import sys
import csv
from urllib.parse import urlencode
from tqdm import tqdm
BASE_URL = 'http://ceolakshadweep.gov.in/electoralroll/partsListAjax.html'
PARAMS = {
'distNo': '1',
'lacNo': '1',
'sEcho': '1',
'iColumns': '3',
'sColumns': '',
'iDisplayStart': '0',
'iDisplayLength': '10',
'iSortingCols': '1',
'iSortCol_0': '0',
'sSortDir_0': 'asc',
'bSortable_0': 'false',
'bSortable_1': 'false',
'bSortable_2': 'false',
'undefined': 'undefined',
}
FOLDER = 'lakshadweep_pdfs'
def download_file(url, target):
"""Download file from url and save to the target
"""
# Streaming, so we can iterate over the response.
r = requests.get(url, stream=True)
if r.status_code == 200:
chunk_size = (64 * 1024)
# Total size in bytes.
total_size = int(r.headers.get('content-length', 0)) / chunk_size
total_size += 1
with open(target, 'wb') as f:
for data in tqdm(r.iter_content(chunk_size), total=total_size,
unit_scale=chunk_size/1024, unit='KB'):
f.write(data)
return True
else:
print("ERROR: status_code={0:d}".format(r.status_code))
return False
def main():
if not os.path.exists(FOLDER):
os.makedirs(FOLDER)
start_count = 0
params = PARAMS.copy()
params['iDisplayStart'] = start_count
params['iDisplayLength'] = 1000
url = '?'.join((BASE_URL, urlencode(params)))
r = requests.get(url)
if r.status_code != 200:
print('Website returned {}'.format(r.status_code))
return
j = r.json()
f = open('lakshadweep.csv', 'w')
csv_writer = csv.writer(f)
csv_writer.writerow(('booth_no', 'polling_station_name', 'file_name'))
for i, d in enumerate(j['aaData']):
no, name, url = d
print('{}/{}\r'.format(i, j['iTotalRecords']), end='')
sys.stdout.flush()
pdf_url = url.split('=\'')[-1].split('\'>')[0]
file_name = pdf_url.split('/')[-1]
path = '{}/{}'.format(FOLDER, file_name)
download_file(pdf_url, path)
csv_writer.writerow((no, name, file_name))
if __name__ == '__main__':
main()