Temp changes

jmbejara · Jul 25, 2024 · f148532 · f148532
1 parent f26d331
commit f148532
Show file tree

Hide file tree

Showing 8 changed files with 265 additions and 0 deletions.
diff --git a/week_4/__pycache__/config.cpython-39.pyc b/week_4/__pycache__/config.cpython-39.pyc
diff --git a/week_4/__pycache__/my_plotting_module.cpython-39.pyc b/week_4/__pycache__/my_plotting_module.cpython-39.pyc
diff --git a/week_4/config.py b/week_4/config.py
@@ -0,0 +1,61 @@
+"""Provides easy access to paths and credentials used in the project.
+Meant to be used as an imported module.
+
+Example
+-------
+
+import config
+path = config.output_dir
+path
+
+## The config YAML should look something like this:
+# config.yml
+
+default:
+    data_dir: "C:/My Documents/data/misc_project"
+    private_data_dir: "D:/My Documents/private_data/misc_project"
+    output_dir: "C:/Users/jdoe/GitRepositories/misc_project/output"
+    wrds_username: "jdoe"
+
+AWS:
+    data_dir: "/data/awshomes/jdoe/data/misc_project"
+    private_data_dir: "/data/awshomes/jdoe/private_data/misc_project"
+    output_dir: "/data/awshomes/jdoe/GitRepositories/INT_misc_project/output"
+
+"""
+import yaml
+from pathlib import Path
+
+with open("../config.yml") as f:
+    config = yaml.safe_load(f)
+
+def _read_config_entry(upper_key, lower_key):
+    entry = config[upper_key][lower_key]
+    if entry is None:
+        p = None
+    else:
+        p = Path(entry)
+    return p
+
+def switch_to(pathset_name='default'):
+    global data_dir
+    global private_data_dir
+    global output_dir
+    global pathset
+
+    data_dir = _read_config_entry(pathset_name, "data_dir")
+    private_data_dir = _read_config_entry(pathset_name, "private_data_dir")
+    output_dir = _read_config_entry(pathset_name, "output_dir")
+    pathset = pathset_name
+
+def read(key):
+    upper_key = pathset
+    value = config[upper_key][key]
+    return value
+
+switch_to(pathset_name='default')
+
+if __name__ == "__main__":
+    pass
+
+
diff --git a/week_4/example_plot.py b/week_4/example_plot.py
@@ -0,0 +1,20 @@
+import pandas as pd
+import numpy as np
+from matplotlib import pyplot as plt
+from pathlib import Path
+
+import config
+
+
+
+
+import my_plotting_module
+
+config.read('output_dir')
+
+my_plotting_module.plot_sine_function(N=5)
+my_plotting_module.plot_sine_function(N=15)
+
+plt.clf()
+my_plotting_module.plot_sine_function(N=1000)
+plt.savefig(config.output_dir / 'mysine.png')
diff --git a/week_4/my_plotting_module.py b/week_4/my_plotting_module.py
@@ -0,0 +1,9 @@
+import pandas as pd
+import numpy as np
+from matplotlib import pyplot as plt
+
+
+def plot_sine_function(N=1000):
+    x = np.linspace(-10, 10, N)
+    y = np.sin(x)
+    plt.plot(x, y)
diff --git a/week_4/mysine.png b/week_4/mysine.png
diff --git a/week_4/pca.png b/week_4/pca.png
diff --git a/week_4/pca_example.py b/week_4/pca_example.py
@@ -0,0 +1,175 @@
+# %% [markdown]
+# # Factor Analysis and Principal Component Analysis on Financial and Economic Time Series
+
+# %%
+# If you're running this on Colab, make sure to install the following packages using pip.
+# On you're own computer, I recommend using conda or mamba.
+
+# !pip install pandas-datareader
+# !pip install yfinance
+
+# !conda install pandas-datareader
+# !conda install yfinance
+
+# %%
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+
+import yfinance as yf
+import pandas_datareader as pdr
+import sklearn.decomposition
+import statsmodels.multivariate.pca
+
+start_date = pd.to_datetime('1980-01-01') 
+end_date = pd.to_datetime('today') 
+
+# %%
+fred_series_short_names = {
+    'BAMLH0A0HYM2': 'High Yield Index OAS',
+    'NASDAQCOM': 'NASDAQ',
+    'RIFSPPFAAD90NB': '90-Day AA Fin CP',
+    'DTB3': '3-Month T-Bill',
+    'DGS10': '10-Year Treasury',
+    'VIXCLS': 'VIX',
+}
+df = pdr.get_data_fred(fred_series_short_names.keys(), start=start_date, end=end_date)
+df = df.rename(columns=fred_series_short_names)
+
+# %%
+df
+
+# %%
+df.dropna()
+
+# %% [markdown]
+# ## Transforming and Normalizing the data
+# 
+# What is transformation and normalization? Are these different things?
+# 
+#  - Why would one transform data? What is feature engineering?
+#  - What is normalization?
+# 
+# What does stationarity mean? See the the following plots. Some of these variable are stationary. Other are not? Why is this a problem?
+
+
+# %% [markdown]
+# Let's try some transformations like those used in the OFR Financial Stress Index: https://www.financialresearch.gov/financial-stress-index/files/indicators/index.html
+
+# %%
+dfn = pd.DataFrame().reindex_like(df)
+dfn
+
+# %%
+df['NASDAQ'].rolling(250).mean()
+
+# %%
+df = df.dropna()
+
+# %%
+df['NASDAQ'].rolling(250).mean()
+
+# %%
+# 'High Yield Index OAS': Leave as is
+dfn['High Yield Index OAS'] = df['High Yield Index OAS']
+dfn['CP - Treasury Spread, 3m'] = df['90-Day AA Fin CP'] - df['10-Year Treasury']
+# 'NASDAQ':  # We're using something different, but still apply rolling mean transformation
+dfn['NASDAQ'] = df['NASDAQ'] - df['NASDAQ'].rolling(250).mean()
+dfn['10-Year Treasury'] = df['10-Year Treasury'] - df['10-Year Treasury'].rolling(250).mean()
+# 'VIX': Leave as is
+dfn['VIX'] = df['VIX']
+
+# %%
+dfn = dfn.drop(columns=['90-Day AA Fin CP', '3-Month T-Bill'])
+dfn = dfn.dropna()
+
+# %% [markdown]
+# We finished with our transformations. Now, let's normalize. First, why is it important?
+
+# %% [markdown]
+# Now, normalize each column,
+# $$
+# z = \frac{x - \bar x}{\text{std}(x)}
+# $$
+
+# %%
+dfn = (dfn - dfn.mean()) / dfn.std()
+
+# %%
+def pca(dfn, module='scikitlearn'):
+    if module == 'statsmodels':
+        _pc1, _loadings, projection, rsquare, _, _, _ = statsmodels.multivariate.pca.pca(dfn,
+            ncomp=1, standardize=True, demean=True, normalize=True, gls=False,
+            weights=None, method='svd')
+        _loadings = _loadings['comp_0']
+        loadings = np.std(_pc1) * _loadings
+        pc1 = _pc1 / np.std(_pc1)
+        pc1 = pc1.rename(columns={'comp_0':'PC1'})['PC1']
+
+    elif module == 'scikitlearn':
+        pca = sklearn.decomposition.PCA(n_components=1)
+        _pc1 = pd.Series(pca.fit_transform(dfn)[:,0], index=dfn.index, name='PC1')
+        _loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
+        _loadings = pd.Series(_loadings[:,0], index=dfn.columns)
+
+        loadings = np.std(_pc1) * _loadings
+        pc1 = _pc1 / np.std(_pc1)
+        pc1.name = 'PC1'
+    else:
+        raise ValueError
+
+
+
+    loadings.name = "loadings"
+
+    return pc1, loadings
+
+def stacked_plot(df, filename=None):
+    """
+    df=category_contributions
+    # category_contributions.sum(axis=1).plot()
+    """
+
+    df_pos = df[df >= 0]
+    df_neg = df[df < 0]
+
+    alpha = .3
+    linewidth = .5
+
+    ax = df_pos.plot.area(alpha=alpha, linewidth=linewidth, legend=False)
+    pc1 = df.sum(axis=1)
+    pc1.name = 'pc1'
+    pc1.plot(color="Black", label='pc1', linewidth=1)
+
+
+    plt.legend()
+    ax.set_prop_cycle(None)
+    df_neg.plot.area(alpha=alpha, ax=ax, linewidth=linewidth, legend=False, ylim=(-3,3))
+    # recompute the ax.dataLim
+    ax.relim()
+    # update ax.viewLim using the new dataLim
+    ax.autoscale()
+    # ax.set_ylabel('Standard Deviations')
+    # ax.set_ylim(-3,4)
+    # ax.set_ylim(-30,30)
+
+    if not (filename is None):
+        filename = Path(filename)
+        figure = plt.gcf() # get current figure
+        figure.set_size_inches(8, 6)
+        plt.savefig(filename, dpi=300)
+
+
+# %%
+pc1, loadings = pca(dfn, module='scikitlearn')
+
+plt.clf()
+# %%
+pc1.plot()
+plt.savefig('pca.png')
+
+
+# # %%
+# stacked_plot(dfn)
+
+