-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
265 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
"""Provides easy access to paths and credentials used in the project. | ||
Meant to be used as an imported module. | ||
Example | ||
------- | ||
import config | ||
path = config.output_dir | ||
path | ||
## The config YAML should look something like this: | ||
# config.yml | ||
default: | ||
data_dir: "C:/My Documents/data/misc_project" | ||
private_data_dir: "D:/My Documents/private_data/misc_project" | ||
output_dir: "C:/Users/jdoe/GitRepositories/misc_project/output" | ||
wrds_username: "jdoe" | ||
AWS: | ||
data_dir: "/data/awshomes/jdoe/data/misc_project" | ||
private_data_dir: "/data/awshomes/jdoe/private_data/misc_project" | ||
output_dir: "/data/awshomes/jdoe/GitRepositories/INT_misc_project/output" | ||
""" | ||
import yaml | ||
from pathlib import Path | ||
|
||
with open("../config.yml") as f: | ||
config = yaml.safe_load(f) | ||
|
||
def _read_config_entry(upper_key, lower_key): | ||
entry = config[upper_key][lower_key] | ||
if entry is None: | ||
p = None | ||
else: | ||
p = Path(entry) | ||
return p | ||
|
||
def switch_to(pathset_name='default'): | ||
global data_dir | ||
global private_data_dir | ||
global output_dir | ||
global pathset | ||
|
||
data_dir = _read_config_entry(pathset_name, "data_dir") | ||
private_data_dir = _read_config_entry(pathset_name, "private_data_dir") | ||
output_dir = _read_config_entry(pathset_name, "output_dir") | ||
pathset = pathset_name | ||
|
||
def read(key): | ||
upper_key = pathset | ||
value = config[upper_key][key] | ||
return value | ||
|
||
switch_to(pathset_name='default') | ||
|
||
if __name__ == "__main__": | ||
pass | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import pandas as pd | ||
import numpy as np | ||
from matplotlib import pyplot as plt | ||
from pathlib import Path | ||
|
||
import config | ||
|
||
|
||
|
||
|
||
import my_plotting_module | ||
|
||
config.read('output_dir') | ||
|
||
my_plotting_module.plot_sine_function(N=5) | ||
my_plotting_module.plot_sine_function(N=15) | ||
|
||
plt.clf() | ||
my_plotting_module.plot_sine_function(N=1000) | ||
plt.savefig(config.output_dir / 'mysine.png') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import pandas as pd | ||
import numpy as np | ||
from matplotlib import pyplot as plt | ||
|
||
|
||
def plot_sine_function(N=1000): | ||
x = np.linspace(-10, 10, N) | ||
y = np.sin(x) | ||
plt.plot(x, y) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
# %% [markdown] | ||
# # Factor Analysis and Principal Component Analysis on Financial and Economic Time Series | ||
|
||
# %% | ||
# If you're running this on Colab, make sure to install the following packages using pip. | ||
# On you're own computer, I recommend using conda or mamba. | ||
|
||
# !pip install pandas-datareader | ||
# !pip install yfinance | ||
|
||
# !conda install pandas-datareader | ||
# !conda install yfinance | ||
|
||
# %% | ||
import numpy as np | ||
import pandas as pd | ||
from matplotlib import pyplot as plt | ||
|
||
import yfinance as yf | ||
import pandas_datareader as pdr | ||
import sklearn.decomposition | ||
import statsmodels.multivariate.pca | ||
|
||
start_date = pd.to_datetime('1980-01-01') | ||
end_date = pd.to_datetime('today') | ||
|
||
# %% | ||
fred_series_short_names = { | ||
'BAMLH0A0HYM2': 'High Yield Index OAS', | ||
'NASDAQCOM': 'NASDAQ', | ||
'RIFSPPFAAD90NB': '90-Day AA Fin CP', | ||
'DTB3': '3-Month T-Bill', | ||
'DGS10': '10-Year Treasury', | ||
'VIXCLS': 'VIX', | ||
} | ||
df = pdr.get_data_fred(fred_series_short_names.keys(), start=start_date, end=end_date) | ||
df = df.rename(columns=fred_series_short_names) | ||
|
||
# %% | ||
df | ||
|
||
# %% | ||
df.dropna() | ||
|
||
# %% [markdown] | ||
# ## Transforming and Normalizing the data | ||
# | ||
# What is transformation and normalization? Are these different things? | ||
# | ||
# - Why would one transform data? What is feature engineering? | ||
# - What is normalization? | ||
# | ||
# What does stationarity mean? See the the following plots. Some of these variable are stationary. Other are not? Why is this a problem? | ||
|
||
|
||
# %% [markdown] | ||
# Let's try some transformations like those used in the OFR Financial Stress Index: https://www.financialresearch.gov/financial-stress-index/files/indicators/index.html | ||
|
||
# %% | ||
dfn = pd.DataFrame().reindex_like(df) | ||
dfn | ||
|
||
# %% | ||
df['NASDAQ'].rolling(250).mean() | ||
|
||
# %% | ||
df = df.dropna() | ||
|
||
# %% | ||
df['NASDAQ'].rolling(250).mean() | ||
|
||
# %% | ||
# 'High Yield Index OAS': Leave as is | ||
dfn['High Yield Index OAS'] = df['High Yield Index OAS'] | ||
dfn['CP - Treasury Spread, 3m'] = df['90-Day AA Fin CP'] - df['10-Year Treasury'] | ||
# 'NASDAQ': # We're using something different, but still apply rolling mean transformation | ||
dfn['NASDAQ'] = df['NASDAQ'] - df['NASDAQ'].rolling(250).mean() | ||
dfn['10-Year Treasury'] = df['10-Year Treasury'] - df['10-Year Treasury'].rolling(250).mean() | ||
# 'VIX': Leave as is | ||
dfn['VIX'] = df['VIX'] | ||
|
||
# %% | ||
dfn = dfn.drop(columns=['90-Day AA Fin CP', '3-Month T-Bill']) | ||
dfn = dfn.dropna() | ||
|
||
# %% [markdown] | ||
# We finished with our transformations. Now, let's normalize. First, why is it important? | ||
|
||
# %% [markdown] | ||
# Now, normalize each column, | ||
# $$ | ||
# z = \frac{x - \bar x}{\text{std}(x)} | ||
# $$ | ||
|
||
# %% | ||
dfn = (dfn - dfn.mean()) / dfn.std() | ||
|
||
# %% | ||
def pca(dfn, module='scikitlearn'): | ||
if module == 'statsmodels': | ||
_pc1, _loadings, projection, rsquare, _, _, _ = statsmodels.multivariate.pca.pca(dfn, | ||
ncomp=1, standardize=True, demean=True, normalize=True, gls=False, | ||
weights=None, method='svd') | ||
_loadings = _loadings['comp_0'] | ||
loadings = np.std(_pc1) * _loadings | ||
pc1 = _pc1 / np.std(_pc1) | ||
pc1 = pc1.rename(columns={'comp_0':'PC1'})['PC1'] | ||
|
||
elif module == 'scikitlearn': | ||
pca = sklearn.decomposition.PCA(n_components=1) | ||
_pc1 = pd.Series(pca.fit_transform(dfn)[:,0], index=dfn.index, name='PC1') | ||
_loadings = pca.components_.T * np.sqrt(pca.explained_variance_) | ||
_loadings = pd.Series(_loadings[:,0], index=dfn.columns) | ||
|
||
loadings = np.std(_pc1) * _loadings | ||
pc1 = _pc1 / np.std(_pc1) | ||
pc1.name = 'PC1' | ||
else: | ||
raise ValueError | ||
|
||
|
||
|
||
loadings.name = "loadings" | ||
|
||
return pc1, loadings | ||
|
||
def stacked_plot(df, filename=None): | ||
""" | ||
df=category_contributions | ||
# category_contributions.sum(axis=1).plot() | ||
""" | ||
|
||
df_pos = df[df >= 0] | ||
df_neg = df[df < 0] | ||
|
||
alpha = .3 | ||
linewidth = .5 | ||
|
||
ax = df_pos.plot.area(alpha=alpha, linewidth=linewidth, legend=False) | ||
pc1 = df.sum(axis=1) | ||
pc1.name = 'pc1' | ||
pc1.plot(color="Black", label='pc1', linewidth=1) | ||
|
||
|
||
plt.legend() | ||
ax.set_prop_cycle(None) | ||
df_neg.plot.area(alpha=alpha, ax=ax, linewidth=linewidth, legend=False, ylim=(-3,3)) | ||
# recompute the ax.dataLim | ||
ax.relim() | ||
# update ax.viewLim using the new dataLim | ||
ax.autoscale() | ||
# ax.set_ylabel('Standard Deviations') | ||
# ax.set_ylim(-3,4) | ||
# ax.set_ylim(-30,30) | ||
|
||
if not (filename is None): | ||
filename = Path(filename) | ||
figure = plt.gcf() # get current figure | ||
figure.set_size_inches(8, 6) | ||
plt.savefig(filename, dpi=300) | ||
|
||
|
||
# %% | ||
pc1, loadings = pca(dfn, module='scikitlearn') | ||
|
||
plt.clf() | ||
# %% | ||
pc1.plot() | ||
plt.savefig('pca.png') | ||
|
||
|
||
# # %% | ||
# stacked_plot(dfn) | ||
|
||
|