forked from olahol/iso-3166-2.js
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.py
54 lines (46 loc) · 1.64 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import csv
import json
import re
def unicode_csv_reader(utf8_data, **kwargs):
csv_reader = csv.reader(utf8_data, dialect=csv.excel, **kwargs)
for row in csv_reader:
yield [unicode(cell, "utf-8") for cell in row]
# countries and their subdivisions.
with open("data.csv", "r") as csv_file:
countries = {}
for row in unicode_csv_reader(csv_file):
country_name = row[0]
subdivision_code = row[1]
subdivision_name = re.sub(r"\[.*\]", "", row[2])
type = row[3]
country_code = row[4]
if country_code not in countries:
countries[country_code] = {"name": country_name.strip(), "sub": {}}
countries[country_code]["sub"][subdivision_code] = {
"name": subdivision_name.strip(),
"type": type.strip()
}
subdivisions = reduce(
lambda a, b: a + len(countries[b].keys()), countries, 0
)
print "Countries: %d, Subdivisions: %d" % (
len(countries.keys()), subdivisions
)
with open("data.js", "w") as json_file:
print "Dumping subdivisions to data.js"
json_file.write("var data = ")
json.dump(countries, json_file)
json_file.write(";")
# alpha-3 to alpha-2 country code conversions
with open("codes.csv", "r") as csv_file:
codes = {}
for row in unicode_csv_reader(csv_file):
alpha2 = row[0]
alpha3 = row[1]
codes[alpha3] = alpha2
print "Country codes: %d" % len(codes.keys())
with open("data.js", "a") as json_file:
print "Dumping codes to data.js"
json_file.write("var codes = ")
json.dump(codes, json_file)
json_file.write(";")