-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathexport.py
executable file
·53 lines (40 loc) · 1.97 KB
/
export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python2
# -- coding: utf-8 --
import urllib, os, json, datetime, requests, urlparse
import utils
url = utils.API_URL
token = utils.get_api_key()
headers = utils.get_headers(token)
# Stuff borrowed from http://stackoverflow.com/questions/6373094/how-to-download-a-file-to-a-specific-path-in-the-server-python
request_pks = [10565]
for pk in request_pks:
print "Working on request " + str(pk)
r = requests.get(url + 'foia/%d/' % pk, headers=headers)
json_data = r.json()
communications = json_data['communications']
if communications is None:
print "It looks like there were no communications here."
if not os.path.exists(str(pk)): # Checks to see if the folder exists.
dirName = str(pk)
print "Creating directory /" + str(pk)
os.makedirs(str(pk))
else:
print "The directory already exists. Phew."
for communication in communications:
commNum = 0
#print communication
for file in communication['files']:
print "Trying to grab a file from communication " + str(commNum)
url = file['ffile']
split = urlparse.urlsplit(url) # grabbed from http://stackoverflow.com/questions/2795331/python-download-without-supplying-a-filename
filename = split.path.split("/")[-1]
filename = str(communication["date"])+" "+filename
print filename
#urllib.urlretrieve(url, '/'+str(pk)+'/'+filename)
urllib.urlretrieve(url, str(pk) + '/' + filename)
print "Trying to grab the text from the communication"
# eventually this should save to pdf, maybe using this: https://github.com/mstamy2/PyPDF2/tree/master/Sample_Code
communicationText = communication["communication"].encode('ascii', 'ignore')
text_file = open(str(pk) + '/' + communication["date"] + " Communication.txt", "w+")
text_file.write(communicationText)
text_file.close()