diff --git a/traffic_prophet/countmatch/growthfactor.py b/traffic_prophet/countmatch/growthfactor.py index 5ba28ee..e69254f 100644 --- a/traffic_prophet/countmatch/growthfactor.py +++ b/traffic_prophet/countmatch/growthfactor.py @@ -54,20 +54,21 @@ def get_aadt(tc): def get_wadt_py(tc): """Get WADT for all full weeks within permanent years.""" cdata = tc.data['Daily Count'].loc[tc.perm_years, :].copy() - # Overcomplicated groupby using the start of the week, as dt.week - # returns the "week ordinal". See https://stackoverflow.com/a/55890652 - cdata['Start of Week'] = ( - cdata['Date'] - - cdata['Date'].dt.dayofweek * np.timedelta64(1, 'D')) - wadt = (cdata.groupby(['Year', 'Start of Week'])['Daily Count'] - .agg(['mean', 'count'])) - # Only keep full weeks. - wadt = wadt.loc[wadt['count'] == 7, ('mean',)].reset_index() - wadt.columns = ('Year', 'Start of Week', 'WADT') - start_year = wadt['Year'].min() - wadt['Week'] = (wadt['Start of Week'].dt.week.astype(float) + - 52. * (wadt['Year'] - start_year)) + # Corrective measure, as dt.week returns the "week ordinal". See + # https://stackoverflow.com/a/55890652 + cdata['Week'] = cdata['Date'].dt.week + cdata['Month'] = cdata['Date'].dt.month + invalid_dates = (((cdata['Week'] == 1) & (cdata['Month'] == 12)) | + (cdata['Week'] == 53)) + cdata = cdata.loc[~invalid_dates, :] + + wadt = (cdata.groupby(['Year', 'Week'])['Daily Count'] + .agg(['mean', 'count'])).rename(columns={'mean': 'WADT'}) + + # Only keep full weeks. + wadt = wadt.loc[wadt['count'] == 7, ('WADT',)].reset_index() + wadt['Time'] = wadt['Week'] + 52. * (wadt['Year'] - wadt['Year'].min()) return wadt def fit_growth(self, tc): diff --git a/traffic_prophet/countmatch/tests/test_growthfactor.py b/traffic_prophet/countmatch/tests/test_growthfactor.py index 176e457..2018f3f 100644 --- a/traffic_prophet/countmatch/tests/test_growthfactor.py +++ b/traffic_prophet/countmatch/tests/test_growthfactor.py @@ -56,30 +56,25 @@ def test_get_wadt_py(self, ptc_oneyear, ptc_multiyear): wadt_nov29 = (ptc_oneyear.data['Daily Count'] .loc[(2010, 333):(2010, 339), 'Daily Count'].mean()) assert np.isclose( - (wadt_oy.loc[wadt_oy['Start of Week'] == '2010-06-14', 'WADT'] - .values[0]), wadt_jun14) + wadt_oy.loc[wadt_oy['Week'] == 24, 'WADT'].values[0], wadt_jun14) assert np.isclose( - (wadt_oy.loc[wadt_oy['Start of Week'] == '2010-11-29', 'WADT'] - .values[0]), wadt_nov29) + wadt_oy.loc[wadt_oy['Week'] == 48, 'WADT'].values[0], wadt_nov29) # For multiyear PTC, confirm we can reproduce data frame. wadt_my = self.gfb.get_wadt_py(ptc_multiyear) - ptc_multiyear_dc = ptc_multiyear.data['Daily Count'].loc[ - ptc_multiyear.perm_years, :].copy() - ptc_multiyear_dc['Week'] = ( - ptc_multiyear_dc['Date'] - - (ptc_multiyear_dc['Date'].dt.dayofweek * - np.timedelta64(1, 'D'))).dt.week - - wadt_ref = (ptc_multiyear_dc.loc[ptc_multiyear_dc['Week'] < 53, :] - .groupby(['Year', 'Week']).agg(['mean', 'count'])) - wadt_ref.columns = ('WADT', 'N_days') - wadt_ref = wadt_ref.loc[wadt_ref['N_days'] == 7, :].reset_index() - - wadt_ref['Week'] = (wadt_ref['Week'].astype(float) + - 52. * (wadt_ref['Year'] - wadt_ref['Year'].min())) - - # Check that all weeks with 7 days are in wadt. - assert np.array_equal(wadt_ref['Week'], wadt_my['Week']) - assert np.array_equal(wadt_ref['WADT'], wadt_my['WADT']) + wadt_apr26_2010 = (ptc_multiyear.data['Daily Count'] + .loc[(2010, 116):(2010, 122), :]) + wadt_my_apr26_2010 = wadt_my.loc[ + (wadt_my['Year'] == 2010) & (wadt_my['Week'] == 17), :] + assert np.allclose( + wadt_my_apr26_2010[['WADT', 'Time']].values.ravel(), + np.array([wadt_apr26_2010['Daily Count'].mean(), 17.])) + + wadt_oct15_2012 = (ptc_multiyear.data['Daily Count'] + .loc[(2012, 289):(2012, 295), :]) + wadt_my_oct15_2012 = wadt_my.loc[ + (wadt_my['Year'] == 2012) & (wadt_my['Week'] == 42), :] + assert np.allclose( + wadt_my_oct15_2012[['WADT', 'Time']].values.ravel(), + np.array([wadt_oct15_2012['Daily Count'].mean(), 146.]))