Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added singleton pattern to forecast.py #94

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/covidify/data_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
import matplotlib.pyplot as plt
from covidify.utils.utils import replace_arg_score


# plt settings
font = {'weight' : 'bold',
'size' : 22}
plt.rc('font', **font)
plt.style.use('ggplot')


args = docopt.docopt(__doc__)
out = args['--output_folder']
Expand Down
118 changes: 63 additions & 55 deletions src/covidify/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import matplotlib.pyplot as plt
from dateutil.parser import parse
from pmdarima.arima import auto_arima
from datetime import datetime, date, time
from datetime import datetime, date, time
from sklearn.metrics import mean_squared_error
from covidify.config import PERC_SPLIT, FIG_SIZE

Expand All @@ -37,7 +37,7 @@
args = docopt.docopt(__doc__)
out = args['--output_folder']
days_in_future = int(args['--num_days'])

# file paths
image_dir = os.path.join(out,'reports', 'images')
trend_file = 'trend_{}.csv'.format(datetime.date(datetime.now()))
Expand All @@ -55,62 +55,70 @@
train_period = [d.strftime('%Y-%m-%d') for d in pd.date_range(train_start, train_end)]
forecast_period = [d.strftime('%Y-%m-%d') for d in pd.date_range(forecast_start, forecast_end)]

class SingletonMeta(type):
_instances = {}

def __call__(abc, *args, **kwargs):
if abc not in abc._instances:
instance = super().__call__(*args, **kwargs)
abc._instances[abc] = instance
return abc._instances[abc]
class Singleton(metaclass=SingletonMeta):
if not os.path.exists(image_dir):
print('Creating reports folder...')
os.system('mkdir -p ' + image_dir)


def plot_forecast(tmp_df, train, index_forecast, forecast, confint):
'''
Plot the values of train and test, the predictions from ARIMA and the shadowing
for the confidence interval.

'''

# For shadowing
lower_series = pd.Series(confint[:, 0], index=index_forecast)
upper_series = pd.Series(confint[:, 1], index=index_forecast)

print('... saving graph')
fig, ax = plt.subplots(figsize=FIG_SIZE)
plt.title('ARIMA - Prediction for cumalitive case counts {} days in the future'.format(days_in_future))
plt.plot(tmp_df.cumulative_cases, label='Train',marker='o')
plt.plot(tmp_df.pred, label='Forecast', marker='o')
tmp_df.groupby('date')[['']].sum().plot(ax=ax)
plt.fill_between(index_forecast,
upper_series,
lower_series,
color='k', alpha=.1)
plt.ylabel('Infections')
plt.xlabel('Date')
fig.legend().set_visible(True)
fig = ax.get_figure()
fig.savefig(os.path.join(image_dir, 'cumulative_forecasts.png'))


def forecast(tmp_df, train, index_forecast, days_in_future):

# Fit model with training data
model = auto_arima(train, trace=False, error_action='ignore', suppress_warnings=True)
model_fit = model.fit(train)

forecast, confint = model_fit.predict(n_periods=len(index_forecast), return_conf_int=True)

forecast_df = pd.concat([tmp_df, pd.DataFrame(forecast, index = index_forecast, columns=['pred'])], axis=1, sort=False)
date_range = [d.strftime('%Y-%m-%d') for d in pd.date_range(train_start, forecast_end)]
forecast_df['date'] = pd.Series(date_range).astype(str)
forecast_df[''] = None # Dates get messed up, so need to use pandas plotting

# Save Model and file
print('... saving file:', forecast_file)
forecast_df.to_csv(os.path.join(data_dir, forecast_file))

Singleton.plot_forecast(forecast_df, train, index_forecast, forecast, confint)

if not os.path.exists(image_dir):
print('Creating reports folder...')
os.system('mkdir -p ' + image_dir)


def plot_forecast(tmp_df, train, index_forecast, forecast, confint):
'''
Plot the values of train and test, the predictions from ARIMA and the shadowing
for the confidence interval.

'''

# For shadowing
lower_series = pd.Series(confint[:, 0], index=index_forecast)
upper_series = pd.Series(confint[:, 1], index=index_forecast)

print('... saving graph')
fig, ax = plt.subplots(figsize=FIG_SIZE)
plt.title('ARIMA - Prediction for cumalitive case counts {} days in the future'.format(days_in_future))
plt.plot(tmp_df.cumulative_cases, label='Train',marker='o')
plt.plot(tmp_df.pred, label='Forecast', marker='o')
tmp_df.groupby('date')[['']].sum().plot(ax=ax)
plt.fill_between(index_forecast,
upper_series,
lower_series,
color='k', alpha=.1)
plt.ylabel('Infections')
plt.xlabel('Date')
fig.legend().set_visible(True)
fig = ax.get_figure()
fig.savefig(os.path.join(image_dir, 'cumulative_forecasts.png'))


def forecast(tmp_df, train, index_forecast, days_in_future):

# Fit model with training data
model = auto_arima(train, trace=False, error_action='ignore', suppress_warnings=True)
model_fit = model.fit(train)

forecast, confint = model_fit.predict(n_periods=len(index_forecast), return_conf_int=True)

forecast_df = pd.concat([tmp_df, pd.DataFrame(forecast, index = index_forecast, columns=['pred'])], axis=1, sort=False)
date_range = [d.strftime('%Y-%m-%d') for d in pd.date_range(train_start, forecast_end)]
forecast_df['date'] = pd.Series(date_range).astype(str)
forecast_df[''] = None # Dates get messed up, so need to use pandas plotting

# Save Model and file
print('... saving file:', forecast_file)
forecast_df.to_csv(os.path.join(data_dir, forecast_file))

plot_forecast(forecast_df, train, index_forecast, forecast, confint)

if __name__ == '__main__':
print('Training forecasting model...')

train = trend_df[trend_df.date.isin(train_period)].cumulative_cases
index_forecast = [x for x in range(train.index[-1]+1, train.index[-1] + days_in_future+1)]
forecast(trend_df, train, index_forecast, days_in_future)
Singleton.forecast(trend_df, train, index_forecast, days_in_future)