forked from mshumko/running_heatmap
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathheatmap.py
351 lines (307 loc) · 13.4 KB
/
heatmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
import pathlib
import argparse
import numpy as np
import matplotlib.pyplot as plt
import scipy.sparse # To make a sparse lat/lon matrix
import pandas as pd
import progressbar
import folium
import folium.plugins
import gpxpy
import gpxpy.gpx
class Heatmap:
def __init__(self, lat_bins=None, lon_bins=None, center=None,
box_width=10, grid_res=0.001, global_grid=False):
"""
Initialize the heatmap class and the latitude and longitude bins.
Parameters
----------
lat_bins : float array, optional
Latitude histogram bins. Becomes class attribute.
lon_bins : float array, optional
Longitude histogram bins. Becomes class attribute.
center : float list of len(2), optional
The center of the map in the [lon, lat] format with
negative West longitudes.
box_width : float
The size of the lat/lon box centered on center if
lat_bins or lon_bins are not specified.
grid_res : float
The resolution of the grid in degrees. Useful if the
lat_bins or lon_bins kwargs are not specified.
global_grid : bool
If lat/lon bins are not specied, setting this kwarg will
make a global lat/lon grid for your activities. This
greatly slows down the data processing due to the size of
the arrays, even with sparse matrices.
Example
-------
h = Heatmap(center=[-111.0329, 45.660])
# Instead of running make_heatmap_hist() you can run
# h.load_heatmap() to load an existing ./data/heatmap.csv
# file.
h.make_heatmap_hist()
h.make_map()
Returns
-------
None
"""
self.grid_res = grid_res
if (lat_bins is None) or (lon_bins is None):
# If the user did not specify lat/lon bins assume we live in Bozeman.
self.center = [-77, 39]
if global_grid:
self.lon_bins = np.arange(-180, 180, grid_res)
self.lat_bins = np.arange(-90, 90, grid_res)
else:
self.lon_bins = np.arange(self.center[0]-box_width/2, self.center[0]+box_width/2,
grid_res)
self.lat_bins = np.arange(self.center[1]-box_width/2, self.center[1]+box_width/2,
grid_res)
else:
self.lon_bins = lon_bins
self.lat_bins = lat_bins
self.center = center
if not pathlib.Path('./data/').exists():
pathlib.Path('./data/').mkdir()
print('Made empty data directory.')
return
def make_heatmap_hist(self, gpx_path='./data/', save_heatmap=True,
verbose=False, gpx_pattern='*gpx'):
"""
Makes a 2d lat-lon histogram using the gpx tracks in ./data. The gpx_pattern kwarg allows you to
change the glob pattern e.g. wildcard (*) to match specific gpx files.
Parameters
----------
gpx_path : str, optional
Path to gpx tracks, defaults to ./data/.
save_heatmap : bool, optional
Save the 2d histogram - wrapped in a Pandas DataFrame - to a file ./data/heatmap.csv
verbose : bool, optional
If true, will print gpx files that could not be processed, typically are treadmill
runs. This is useful for debugging if the heatmap is not generated.
gpx_pattern : str, optional
A pattern string that gets passed to glob.glob(). By default it will match all
.gpx files.
Returns
-------
self.heatmap : a Pandas DataFrame object containing the 2d histogram
with the latitude bins in the index and longitude bins in the
columns
"""
# Get the names of gpx files in the ./data/ folder.
self._get_gpx_files(gpx_path, gpx_pattern)
# 2d heatmap histrogram.
self.heatmap = scipy.sparse.lil_matrix(
(len(self.lon_bins), len(self.lat_bins)), dtype='uint'
)
for gpx_file in progressbar.progressbar(self.gpx_files):
with open(gpx_file) as f:
# Check for empty gpx files that are typically due to
# treadmill runs.
try:
gpx = gpxpy.parse(f)
except gpxpy.gpx.GPXXMLSyntaxException as err:
if 'Error parsing XML: no element found:' in str(err):
if verbose: print(f'No element file in {gpx_file}. Empty file?')
continue
# Loop through each track. Each run file should only have one.
for track in gpx.tracks:
# Loop over all of the track segments (time, lat, lon, alt) points.
for segment in track.segments:
# list of longitude coordinates
lons = np.array([i.longitude for i in segment.points])
# list of latitude coordinates
lats = np.array([i.latitude for i in segment.points])
# For each gpx point find the closest grid point in
# self.lon_bins and self.lat_bins
idx = self._get_closest_index(lons, lats)
for lon_i, lat_i in idx:
# Note: the += notation is not supported yet by scipy.sparse
self.heatmap[lon_i, lat_i] = self.heatmap[lon_i, lat_i] + 1
if save_heatmap: self._save_heatmap()
return self.heatmap
def make_map(self, map_zoom_start=11, heatmap_max_zoom=13, heatmap_radius=10,
heatmap_blur=15, heatmap_min_opacity=0.7, saturation_percentile=100):
"""
Make a heatmap html file using folium
Parameters
----------
map_zoom_start : int, optional
Passed into folium.plugins.HeatMap and folium.Map. Sets the start
zoom level, the larger values will make a more zoomed-in map.
heatmap_max_zoom
Passed into folium.plugins.HeatMap and folium.Map to set the max zoom
level.
heatmap_radius : int, optional
Passed into folium.plugins.HeatMap to determine how large the heatmap
blobs are.
heatmap_blur : float, optional
Passed into folium.plugins.HeatMap and sets the amount of bluring.
heatmap_min_opacity : float, optional
Passed into folium.plugins.HeatMap and sets the minimum opacity of the
heatmap.
saturation_percentile : float, optional
Apply a mask that sets all values above the saturation_percentile
percentile (values 0 to 100) to the saturation_percentile's heat
value. This kwarg is useful to make it hard to identify where you
work, live, or your most popular running routes.
Returns
-------
self.map : a folium map object with the heatmap layer.
"""
if not hasattr(self, 'heatmap'):
raise AttributeError('self.heatmap not found. Either run'
' the make_heatmap_hist() or '
'load_heatmap() methods.')
# If the heatmap is in a DataFrame or sparse matrix format, convert
# to an array of (N-Non-Zero-Bins)*3. Columns are lon, lat, heat.
if isinstance(self.heatmap, scipy.sparse.lil_matrix):
heat_list = self._convert_sparse_to_lists(self.heatmap)
elif isinstance(self.heatmap, pd.DataFrame):
heat_list = self.heatmap.values
# Swap the columns to be in the lat, lon, heat order
data = heat_list[:, [1, 0, 2]]
if saturation_percentile < 100:
# Apply the saturation percentile mask
data[:, 2] = self._apply_percentile_mask(
data[:, 2],
saturation_percentile
)
# Make a terrain map.
self.map = folium.Map(
location=self.center[::-1],
zoom_start=map_zoom_start,
tiles='https://{s}.tile.opentopomap.org/{z}/{x}/{y}.png',
attr=(
'Map data: © <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a>'
' contributors, <a href="http://viewfinderpanoramas.org">SRTM</a> | Map style: ©'
'<a href="https://opentopomap.org">OpenTopoMap</a> (<a href="https://creativecommons.org'
'/licenses/by-sa/3.0/">CC-BY-SA</a>)'
),
max_zoom=heatmap_max_zoom
)
# Make the heatmap.
heatmap = folium.plugins.HeatMap(data,
min_opacity=heatmap_min_opacity,
radius=heatmap_radius,
blur=heatmap_blur,
max_zoom=heatmap_max_zoom)
self.map.add_child(heatmap) # Add the heatmap to the map object.
self.map.save('./data/heatmap.html')
return self.map
def load_heatmap(self, heatmap_path='./data/heatmap.csv'):
"""
Load the heatmap file into a Pandas DataFrame.
Parameters
----------
heatmap_path : str, optional
The relative path to the heatmap.csv file.
Returns
-------
None, creates self.heatmap attribute.
"""
self.heatmap = pd.read_csv(heatmap_path)
return
def _get_gpx_files(self, gpx_path, gpx_pattern):
"""
Get a list of paths to all gpx files.
Parameters
----------
gpx_path: str
The path to the gpx data.
gpx_pattern : str, optional
The patten for glob.glob(). Can be useful for
filtering activity types.
Returns
-------
None, creates self.gpx_files attribute.
"""
self.gpx_files = list(pathlib.Path(gpx_path).glob(gpx_pattern))
print(f'{__file__}: Found {len(self.gpx_files)} gpx files')
return
def _get_closest_index(self, lons, lats):
"""
Given a longitude and latitude lists, calculate the closet index in
self.lat_bins and self.lon_bins point.
Parameters
----------
lons : ndarray
A 1D array of longitude points
lats : ndarray
A 1D array of latitude points
Returns
-------
idx : a len(lons)x2 ndarray that contanins the index of
self.lon_grid and self.lat_grid points that are closest
to the lons and lats arrays.
"""
assert len(lons) == len(lats), 'Longitude and latitude arrays must be the same shape.'
idx = np.nan*np.ones((len(lons), 2), dtype=int)
for i, (lon_i, lat_i) in enumerate(zip(lons, lats)):
idx[i, 0] = np.argmin(np.abs(self.lon_bins - lon_i))
idx[i, 1] = np.argmin(np.abs(self.lat_bins - lat_i))
return idx.astype(int)
def _save_heatmap(self, save_path='./data/heatmap.csv'):
"""
Saves the heatmap to a csv file with the following three columns:
lon, lat, heat.
Parameters
----------
save_path: str, optional
The path where to save the csv file, by default the csv file
is saved in './data/heatmap.csv'.
Returns
-------
None
"""
non_zero_entries = self._convert_sparse_to_lists(self.heatmap)
df = pd.DataFrame(data=non_zero_entries, columns=['lon', 'lat', 'heat'])
df.to_csv(save_path, index=False)
return
def _convert_sparse_to_lists(self, x):
"""
Converts the sparse matrix x into a three lists that contain only the
non-zero values: a longitude array, a latitude array, and a heat array.
Parameters
----------
x: scipy.sparse.lil_matrix
The sparse matrix object to convert.
Returns
-------
non_zero_entries: ndarray
An array with (N-Non-Zero-Bins)*3 dimensions.
The columns are lon, lat, heat.
"""
if not isinstance(x, scipy.sparse.lil_matrix):
raise ValueError('Heatmap is not in the LIL sparse matrix format.')
coo_fmt = x.tocoo()
non_zero_entries = np.nan*np.ones((len(x.nonzero()[0]), 3))
non_zero_entries[:, 0] = self.lon_bins[coo_fmt.row]
non_zero_entries[:, 1] = self.lat_bins[coo_fmt.col]
non_zero_entries[:, 2] = coo_fmt.data
return non_zero_entries
def _apply_percentile_mask(self, heat, percentile):
"""
Applies a percentile saturation mask to the 1D heat array. Heat
values > percentile(heat) are set to percentile(heat)
Parameters
----------
heat : ndarray
A 1D array of heat values.
percentile : float
The saturation percentile between 0 and 100.
Returns
-------
heat : ndarray
An array of the same shape as heat, except with values greater than
percentile(heat) are set to percentile(heat).
"""
saturation_heat = np.percentile(heat, percentile)
heat[heat > saturation_heat] = saturation_heat
return heat
if __name__ == '__main__':
heat = Heatmap(global_grid=True, grid_res=0.0005)
heat.make_heatmap_hist(gpx_path='./data/')
heat.load_heatmap()
heat.make_map(saturation_percentile=90)