Skip to content

Commit

Permalink
#32 fixed WADT calculator in growth factor.
Browse files Browse the repository at this point in the history
  • Loading branch information
cczhu committed Jan 15, 2020
1 parent df815af commit 83652ce
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 35 deletions.
27 changes: 14 additions & 13 deletions traffic_prophet/countmatch/growthfactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,21 @@ def get_aadt(tc):
def get_wadt_py(tc):
"""Get WADT for all full weeks within permanent years."""
cdata = tc.data['Daily Count'].loc[tc.perm_years, :].copy()
# Overcomplicated groupby using the start of the week, as dt.week
# returns the "week ordinal". See https://stackoverflow.com/a/55890652
cdata['Start of Week'] = (
cdata['Date'] -
cdata['Date'].dt.dayofweek * np.timedelta64(1, 'D'))
wadt = (cdata.groupby(['Year', 'Start of Week'])['Daily Count']
.agg(['mean', 'count']))
# Only keep full weeks.
wadt = wadt.loc[wadt['count'] == 7, ('mean',)].reset_index()
wadt.columns = ('Year', 'Start of Week', 'WADT')

start_year = wadt['Year'].min()
wadt['Week'] = (wadt['Start of Week'].dt.week.astype(float) +
52. * (wadt['Year'] - start_year))
# Corrective measure, as dt.week returns the "week ordinal". See
# https://stackoverflow.com/a/55890652
cdata['Week'] = cdata['Date'].dt.week
cdata['Month'] = cdata['Date'].dt.month
invalid_dates = (((cdata['Week'] == 1) & (cdata['Month'] == 12)) |
(cdata['Week'] == 53))
cdata = cdata.loc[~invalid_dates, :]

wadt = (cdata.groupby(['Year', 'Week'])['Daily Count']
.agg(['mean', 'count'])).rename(columns={'mean': 'WADT'})

# Only keep full weeks.
wadt = wadt.loc[wadt['count'] == 7, ('WADT',)].reset_index()
wadt['Time'] = wadt['Week'] + 52. * (wadt['Year'] - wadt['Year'].min())
return wadt

def fit_growth(self, tc):
Expand Down
39 changes: 17 additions & 22 deletions traffic_prophet/countmatch/tests/test_growthfactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,30 +56,25 @@ def test_get_wadt_py(self, ptc_oneyear, ptc_multiyear):
wadt_nov29 = (ptc_oneyear.data['Daily Count']
.loc[(2010, 333):(2010, 339), 'Daily Count'].mean())
assert np.isclose(
(wadt_oy.loc[wadt_oy['Start of Week'] == '2010-06-14', 'WADT']
.values[0]), wadt_jun14)
wadt_oy.loc[wadt_oy['Week'] == 24, 'WADT'].values[0], wadt_jun14)
assert np.isclose(
(wadt_oy.loc[wadt_oy['Start of Week'] == '2010-11-29', 'WADT']
.values[0]), wadt_nov29)
wadt_oy.loc[wadt_oy['Week'] == 48, 'WADT'].values[0], wadt_nov29)

# For multiyear PTC, confirm we can reproduce data frame.
wadt_my = self.gfb.get_wadt_py(ptc_multiyear)

ptc_multiyear_dc = ptc_multiyear.data['Daily Count'].loc[
ptc_multiyear.perm_years, :].copy()
ptc_multiyear_dc['Week'] = (
ptc_multiyear_dc['Date'] -
(ptc_multiyear_dc['Date'].dt.dayofweek *
np.timedelta64(1, 'D'))).dt.week

wadt_ref = (ptc_multiyear_dc.loc[ptc_multiyear_dc['Week'] < 53, :]
.groupby(['Year', 'Week']).agg(['mean', 'count']))
wadt_ref.columns = ('WADT', 'N_days')
wadt_ref = wadt_ref.loc[wadt_ref['N_days'] == 7, :].reset_index()

wadt_ref['Week'] = (wadt_ref['Week'].astype(float) +
52. * (wadt_ref['Year'] - wadt_ref['Year'].min()))

# Check that all weeks with 7 days are in wadt.
assert np.array_equal(wadt_ref['Week'], wadt_my['Week'])
assert np.array_equal(wadt_ref['WADT'], wadt_my['WADT'])
wadt_apr26_2010 = (ptc_multiyear.data['Daily Count']
.loc[(2010, 116):(2010, 122), :])
wadt_my_apr26_2010 = wadt_my.loc[
(wadt_my['Year'] == 2010) & (wadt_my['Week'] == 17), :]
assert np.allclose(
wadt_my_apr26_2010[['WADT', 'Time']].values.ravel(),
np.array([wadt_apr26_2010['Daily Count'].mean(), 17.]))

wadt_oct15_2012 = (ptc_multiyear.data['Daily Count']
.loc[(2012, 289):(2012, 295), :])
wadt_my_oct15_2012 = wadt_my.loc[
(wadt_my['Year'] == 2012) & (wadt_my['Week'] == 42), :]
assert np.allclose(
wadt_my_oct15_2012[['WADT', 'Time']].values.ravel(),
np.array([wadt_oct15_2012['Daily Count'].mean(), 146.]))

0 comments on commit 83652ce

Please sign in to comment.