-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate
executable file
·186 lines (168 loc) · 6.53 KB
/
update
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/python3
import csv
import json
import os
import sys
# from tools import data_util, generate_full_data
CSV_FILE = "HealthMap_H1N1_Global_All_Languages_2009-2012.csv"
SELF_DIR = os.path.dirname(os.path.realpath(__file__))
FIELDS = {
"location": 0,
"country": 1,
"disease": 2,
"species": 3,
"language": 4,
"alert-id": 5,
"article-title": 6,
"source-url": 7,
"datetime": 8,
"alert-tag": 9,
"suspected-cases": 10, # Can be ignored for now
"suspected-deaths": 11, # Can be ignored for now
"confirmed-cases": 12,
"confirmed-deaths": 13,
"ruled-out": 14, # Rarely used for this disease
"longitude": 15,
"latitude": 16,
}
def check_for_common_repo():
if not os.path.exists("../common"):
print("Please clone the 'common' repo as a sibling of this one:")
print("cd .. && git clone [email protected]:globaldothealth/common.git")
return False
return True
def iso_date_from_datetime(dt):
isodate = dt.split(" ")[0]
assert isodate.count("-") == 2
assert isodate.startswith("20")
return isodate
def copy_over(master_data, new_day, current_totals):
master_data[new_day] = {}
for country in current_totals:
master_data[new_day][country] = {"total": current_totals[country]["cases"]}
def process_single_row(r, master_data, current_totals):
geoid = geo_util.make_geoid(r[FIELDS["latitude"]], r[FIELDS["longitude"]])
country_code = country_converter.code_from_name(r[FIELDS["country"]])
date = iso_date_from_datetime(r[FIELDS["datetime"]])
if not geoid:
print("WARNING No lat/lng for this row: " + str(r))
return
if not date:
print("WARNING No date for this row: " + str(r))
return
if not country_code:
print("WARNING Counldn't infer country in row " + str(r))
return
if geoid not in current_totals:
current_totals[geoid] = {"cases": 0, "deaths": 0}
if date not in master_data:
copy_over(master_data, date, current_totals)
if geoid not in master_data[date]:
master_data[date][geoid] = {}
cases = r[FIELDS["confirmed-cases"]].strip()
deaths = r[FIELDS["confirmed-deaths"]].strip()
master_data[date][geoid]["total"] = current_totals[geoid]["cases"]
if cases == "":
master_data[date][geoid]["new"] = 0
if cases != "":
master_data[date][geoid]["new"] = int(cases)
master_data[date][geoid]["total"] += int(cases)
current_totals[geoid]["cases"] += int(cases)
if deaths != "":
current_totals[geoid]["deaths"] += int(deaths)
return
def row_chronological_sort_function(row):
return iso_date_from_datetime(row[FIELDS["datetime"]])
def sort_rows_chronologically(rows):
rows.sort(key=row_chronological_sort_function)
return rows
def process_csv_data(rows):
master_data = {}
current_totals = {}
sorted_rows = sort_rows_chronologically(rows)
for row in sorted_rows:
process_single_row(row, master_data, current_totals)
output_globals(master_data, current_totals)
return master_data
def output_daily_slices(master_data):
dates = sorted(master_data.keys())
for d in dates:
slice = {"date": d, "features": []}
for g in master_data[d]:
props = master_data[d][g]
if props["total"] == 0 and ("new" not in props or props["new"] == 0):
continue
feature = {"properties": {"geoid": g,
"total": props["total"]}}
if "new" in props and props["new"] > 0:
feature["properties"]["new"] = props["new"]
slice["features"].append(feature)
with open("d/" + d + ".json", "w") as f:
f.write(json.dumps(slice, sort_keys=True))
with open("d/index.txt", "w") as f:
f.write("\n".join([d + ".json" for d in dates]))
def output_globals(master_data, totals):
grand_total = 0
grand_total_deaths = 0
latest_date = sorted(master_data.keys())[-1]
for geoid in totals:
obj = totals[geoid]
if "cases" in obj:
grand_total += obj["cases"]
if "deaths" in obj:
grand_total_deaths += obj["deaths"]
print("Processed a total of " + str(grand_total) + " cases, "
"latest one on " + latest_date)
globals_obj = {"caseCount": grand_total, "deaths": grand_total_deaths,
"date": latest_date}
with open("globals.json", "w") as f:
f.write(json.dumps([globals_obj], sort_keys=True))
def output_aggregates(master_data, location_info, out_file):
aggregates = {}
# Total cases per country
country_total_acc = {}
dates = sorted(master_data.keys())
for d in dates:
aggregates[d] = []
# Total cases per country, only for today
country_acc_for_today = {}
for geoid in master_data[d]:
country_code = location_info[geoid][-1]
if country_code not in country_total_acc:
country_total_acc[country_code] = 0
if country_code not in country_acc_for_today:
country_acc_for_today[country_code] = country_total_acc[country_code]
if "new" in master_data[d][geoid]:
country_acc_for_today[country_code] += int(master_data[d][geoid]["new"])
for c in country_acc_for_today:
aggregates[d].append(
{"cum_conf": country_acc_for_today[c], "deaths": 0, "code": c})
country_total_acc[c] = int(country_acc_for_today[c])
with open(out_file, "w") as f:
f.write(json.dumps(aggregates, sort_keys=True))
def update():
# os.system("./sanitize_location_info")
all_rows = []
with open(CSV_FILE) as f:
reader = csv.reader(f)
for row in reader:
all_rows.append(row)
master_data = process_csv_data(all_rows)
# location_data = extra
output_daily_slices(master_data)
location_info = location_info_extractor.extract_location_info_from_csv(
all_rows, FIELDS["country"], FIELDS["location"],
FIELDS["latitude"], FIELDS["longitude"])
location_info_extractor.output_location_info(location_info, "location_info.data")
os.system("../common/tools/sanitize_location_info")
output_aggregates(master_data, location_info, "aggregate.json")
# Add any new daily file.
os.system("git add d/*.json")
if __name__ == "__main__":
if check_for_common_repo():
sys.path.insert(0, "../common/tools")
import country_converter
import geo_util
import location_info_extractor
geo_util.clean()
update()