From f39aba7d13015a3f9c40c22eee68cedcba661ab5 Mon Sep 17 00:00:00 2001
From: "Katherine L. Bottenhorn" <kbott006@fiu.edu>
Date: Fri, 14 Jun 2024 15:47:29 -0700
Subject: [PATCH] update nbsp workflows for hormones x fc paper

---
 idconn/io.py                                  |  20 +-
 idconn/nbs.py                                 | 144 +++---
 idconn/workflows/nbs_predict-bc.py            | 387 ++++++++++++++++
 .../workflows/nbs_predict-bc_sensitivity.py   | 412 ++++++++++++++++++
 idconn/workflows/nbs_predict-e2.py            | 210 +++++----
 .../workflows/nbs_predict-e2_sensitivity.py   | 412 ++++++++++++++++++
 .../workflows/nbs_predict-e2bc_sensitivity.py | 412 ++++++++++++++++++
 idconn/workflows/nbs_predict-e2xp4-bc.py      | 214 +++++----
 idconn/workflows/nbs_predict-e2xp4.py         | 214 +++++----
 idconn/workflows/nbs_predict-p4.py            | 206 +++++----
 .../workflows/nbs_predict-p4_sensitivity.py   | 412 ++++++++++++++++++
 .../workflows/nbs_predict-p4bc_sensitivity.py | 412 ++++++++++++++++++
 idconn/workflows/nbs_predict.py               | 167 ++++---
 13 files changed, 3027 insertions(+), 595 deletions(-)
 create mode 100644 idconn/workflows/nbs_predict-bc.py
 create mode 100644 idconn/workflows/nbs_predict-bc_sensitivity.py
 create mode 100644 idconn/workflows/nbs_predict-e2_sensitivity.py
 create mode 100644 idconn/workflows/nbs_predict-e2bc_sensitivity.py
 create mode 100644 idconn/workflows/nbs_predict-p4_sensitivity.py
 create mode 100644 idconn/workflows/nbs_predict-p4bc_sensitivity.py

diff --git a/idconn/io.py b/idconn/io.py
index 3e1bca9..55ddc81 100644
--- a/idconn/io.py
+++ b/idconn/io.py
@@ -35,6 +35,7 @@ def calc_fd(confounds):
     fd = np.sum([delta_x, delta_y, delta_z, delta_alpha, delta_beta, delta_gamma], axis=0)
     return fd
 
+
 def build_statsmodel_json(
     name,
     task,
@@ -131,6 +132,7 @@ def build_statsmodel_json(
         json.dump(statsmodel, outfile)
     return statsmodel_json
 
+
 def atlas_picker(atlas, path, key=None):
     """Takes in atlas name and path to file, if local, returns
     nifti-like object (usually file path to downloaded atlas),
@@ -190,6 +192,7 @@ def atlas_picker(atlas, path, key=None):
 
     return atlas, path
 
+
 def vectorize_corrmats(matrices, diagonal=False):
     """Returns the vectorized upper triangles of a 3-dimensional array
     (i.e., node x node x matrix) of matrices. Output will be a 2-dimensional
@@ -249,12 +252,13 @@ def vectorize_corrmats(matrices, diagonal=False):
     edge_vector = np.asarray(edge_vector)
     return edge_vector
 
+
 def read_corrmats(layout, task, deriv_name, atlas, z_score=True, vectorized=True, verbose=False):
     """Returns a node x node x (subject x session) matrix of correlation matrices
     from a BIDS derivative folder. Optionally returns a node^2 x (subject x session)
     array of vectorized upper triangles of those correlation matrices.
 
-    ME @ ME: NEEDS AN OPTION TO KEEP RUNS SEPARATE. CURRENTLY IT AVERAGES CONFOUNDS AND 
+    ME @ ME: NEEDS AN OPTION TO KEEP RUNS SEPARATE. CURRENTLY IT AVERAGES CONFOUNDS AND
     Parameters
     ----------
     layout : BIDSLayout or str
@@ -404,7 +408,7 @@ def read_corrmats(layout, task, deriv_name, atlas, z_score=True, vectorized=True
                 ############### DOES THIS ONLY GRAB ONE RUN?!?!?! ##############
                 ################################################################
                 path = path[0]
-                
+
             else:
                 pass
             assert exists(path), f"Corrmat file not found at {path}"
@@ -426,6 +430,7 @@ def read_corrmats(layout, task, deriv_name, atlas, z_score=True, vectorized=True
     ppt_df.replace({"": np.nan}, inplace=True)
     return ppt_df
 
+
 def undo_vectorize(edges, num_node=None, diagonal=False):
     """
     Puts an edge vector back into an adjacency matrix.
@@ -453,17 +458,18 @@ def undo_vectorize(edges, num_node=None, diagonal=False):
         num_node = int(num_node)
     X = np.zeros((num_node, num_node))
     if diagonal == False:
-        k=1
+        k = 1
     if diagonal == True:
-        k=0
+        k = 0
     X[np.triu_indices(num_node, k=k)] = edges
-    diag_X = X[np.diag_indices(num_node,2)]
+    diag_X = X[np.diag_indices(num_node, 2)]
     X = X + X.T
     if diagonal == True:
-        X[np.diag_indices(num_node,2)] = diag_X
-    #print('did undo_vectorize work?', np.allclose(X, X.T))
+        X[np.diag_indices(num_node, 2)] = diag_X
+    # print('did undo_vectorize work?', np.allclose(X, X.T))
     return X
 
+
 def plot_edges(
     adj,
     atlas_nii,
diff --git a/idconn/nbs.py b/idconn/nbs.py
index 26ed551..52e9b37 100644
--- a/idconn/nbs.py
+++ b/idconn/nbs.py
@@ -8,11 +8,7 @@
 
 # import bct
 from sklearn.experimental import enable_halving_search_cv
-from sklearn.model_selection import (
-    RepeatedStratifiedKFold,
-    RepeatedKFold,
-    HalvingGridSearchCV
-)
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, HalvingGridSearchCV
 
 from sklearn.feature_selection import f_regression, f_classif
 from sklearn.linear_model import LogisticRegression, ElasticNet, LogisticRegressionCV, RidgeCV
@@ -39,9 +35,9 @@ def calc_number_of_nodes(matrices):
 
 
 def residualize(X, y=None, confounds=None):
-    '''
+    """
     all inputs need to be arrays, not dataframes
-    '''
+    """
     # residualize the outcome
     if confounds is not None:
         if y is not None:
@@ -74,7 +70,9 @@ def residualize(X, y=None, confounds=None):
         print("Confound matrix wasn't provided, so no confounding was done")
 
 
-def pynbs(matrices, outcome, num_node=None, diagonal=False, alpha=0.05, predict=False, permutations=10000):
+def pynbs(
+    matrices, outcome, num_node=None, diagonal=False, alpha=0.05, predict=False, permutations=10000
+):
     """
     Calculates the Network Based Statistic (Zalesky et al., 2011) on connectivity matrices provided
     of shape ((subject x session)x node x node)
@@ -129,7 +127,6 @@ def pynbs(matrices, outcome, num_node=None, diagonal=False, alpha=0.05, predict=
         edges = matrices.copy()
     # print(edges.shape)
 
-
     # edges = edges.T
 
     # run an ols per edge
@@ -145,13 +142,15 @@ def pynbs(matrices, outcome, num_node=None, diagonal=False, alpha=0.05, predict=
 
     # find largest connected component of sig_edges
     # turn sig_edges into an nxn matrix first
-    sig_matrix = undo_vectorize(sig_edges, num_node=num_node, diagonal=diagonal)  # need to write this function
+    sig_matrix = undo_vectorize(
+        sig_edges, num_node=num_node, diagonal=diagonal
+    )  # need to write this function
     matrix = nx.from_numpy_array(sig_matrix)
 
     # use networkX to find connected components
     S = [matrix.subgraph(c).copy() for c in nx.connected_components(matrix)]
     S.sort(key=len, reverse=True)
-    #largest_cc = max(nx.connected_components(matrix), key=len)
+    # largest_cc = max(nx.connected_components(matrix), key=len)
     G0 = S[0]
     # print(G0)
 
@@ -202,7 +201,9 @@ def pynbs(matrices, outcome, num_node=None, diagonal=False, alpha=0.05, predict=
             # print(np.sum(perm_edges))
             # find largest connected component of sig_edges
             # turn sig_edges into an nxn matrix first
-            perm_matrix = undo_vectorize(perm_edges, num_node=num_node, diagonal=diagonal)  # need to write this function
+            perm_matrix = undo_vectorize(
+                perm_edges, num_node=num_node, diagonal=diagonal
+            )  # need to write this function
             perm_nx = nx.from_numpy_array(perm_matrix)
 
             largest_cc = max(nx.connected_components(perm_nx), key=len)
@@ -233,7 +234,17 @@ def pynbs(matrices, outcome, num_node=None, diagonal=False, alpha=0.05, predict=
 
 
 def kfold_nbs(
-    matrices, outcome, confounds=None, alpha=0.05, groups=None, num_node=None, diagonal=False, scale_x=False, scale_y=False, n_splits=10, n_iterations=10
+    matrices,
+    outcome,
+    confounds=None,
+    alpha=0.05,
+    groups=None,
+    num_node=None,
+    diagonal=False,
+    scale_x=False,
+    scale_y=False,
+    n_splits=10,
+    n_iterations=10,
 ):
     """Calculates the Network Based Statistic (Zalesky et al., 20##) on connectivity matrices provided
     of shape ((subject x session)x node x node)
@@ -326,38 +337,38 @@ def kfold_nbs(
     if diagonal == True:
         k = 0
     if diagonal == False:
-        k=1
+        k = 1
     upper_tri = np.triu_indices(num_node, k=k)
 
     i = 0
     manager = enlighten.get_manager()
     ticks = manager.counter(total=n_splits * n_iterations, desc="Progress", unit="folds")
     for train_idx, test_idx in cv.split(edges, split_y):
-        
+
         cv_results.at[i, "split"] = (train_idx, test_idx)
 
         # assert len(train_a_idx) == len(train_b_idx)
         Cs = np.logspace(-4, 4, 10)
-        #print(len(np.unique(outcome)))
+        # print(len(np.unique(outcome)))
         if np.unique(outcome).shape[0] == 2:
-            #print('binary')
+            # print('binary')
             regressor = LogisticRegressionCV(
-                Cs=Cs, 
+                Cs=Cs,
                 cv=4,
-                #verbose=2,
-                max_iter=100000, 
-                penalty="l2", 
-                solver="saga", 
-                n_jobs=4
+                # verbose=2,
+                max_iter=100000,
+                penalty="l2",
+                solver="saga",
+                n_jobs=4,
             )
-            
+
         else:
-            #print('continuous')
+            # print('continuous')
             regressor = RidgeCV(
-                alphas=Cs, 
-                cv=4, 
-                #n_jobs=4
-                )
+                alphas=Cs,
+                cv=4,
+                # n_jobs=4
+            )
 
         train_y = outcome[train_idx]
         test_y = outcome[test_idx]
@@ -392,16 +403,14 @@ def kfold_nbs(
                 y_scaler = Normalizer()
                 train_y = y_scaler.fit_transform(train_y.reshape(-1, 1))
                 test_y = y_scaler.transform(test_y.reshape(-1, 1))
-        
-        
-
-       
 
         # perform NBS wooooooooo
         # note: output is a dataframe :)
         # PYNBS SHOULD NOT DO CONFOUND REGRESSION?
-        adj = pynbs(train_edges, train_y, num_node=num_node, diagonal=diagonal, alpha=alpha, predict=True)
-        #print(adj.shape, adj.ndim, adj[0].shape, upper_tri)
+        adj = pynbs(
+            train_edges, train_y, num_node=num_node, diagonal=diagonal, alpha=alpha, predict=True
+        )
+        # print(adj.shape, adj.ndim, adj[0].shape, upper_tri)
 
         # cv_results.at[i, 'pval'] = pval
         cv_results.at[i, "component"] = adj.values
@@ -413,7 +422,7 @@ def kfold_nbs(
             # so you don't have repeated edges
             # returns (n_edges, )
             nbs_vector = adj.values[upper_tri]
-            #print(nbs_vector.shape)
+            # print(nbs_vector.shape)
             # print(nbs_vector.shape)
             # use those to make a "significant edges" mask
             mask = nbs_vector == 1.0
@@ -425,31 +434,31 @@ def kfold_nbs(
             # returns (n_edges, samples)
             train_features = train_edges.T[mask]
             test_features = test_edges.T[mask]
-            #print(mask.shape, np.sum(mask), train_edges.shape, train_features.shape)
+            # print(mask.shape, np.sum(mask), train_edges.shape, train_features.shape)
 
             train_features = train_features.T
             test_features = test_features.T
-            
-            #train_features = scaler.fit_transform(train_features.T)
-            #test_features = scaler.fit_transform(test_features.T)
-            #print(train_features.shape, train_y.shape)
 
-            #print(f"train_edges:\t{train_edges[:10, 0]}\ntrain_features:\t{train_features[:10, 0]}")
+            # train_features = scaler.fit_transform(train_features.T)
+            # test_features = scaler.fit_transform(test_features.T)
+            # print(train_features.shape, train_y.shape)
+
+            # print(f"train_edges:\t{train_edges[:10, 0]}\ntrain_features:\t{train_features[:10, 0]}")
             # print(np.ravel(train_y))
             # train model predicting outcome from brain (note: no mas covariates)
             # use grid search bc I want to know how to tune alpha and l1_ratio
-            
-            #grid = HalvingGridSearchCV(estimator=regressor, 
-            #                           param_grid=param_grid, 
-            #                           n_jobs=8, 
-            #                           cv=4, 
+
+            # grid = HalvingGridSearchCV(estimator=regressor,
+            #                           param_grid=param_grid,
+            #                           n_jobs=8,
+            #                           cv=4,
             #                           factor=2,
             #                           verbose=0,
-            #                           min_resources=20, 
-            #                           refit=True, 
+            #                           min_resources=20,
+            #                           refit=True,
             #                           aggressive_elimination=False)
             model = regressor.fit(X=train_features, y=np.ravel(train_y))
-            
+
             cv_results.at[i, "model"] = model
 
             # score that model on the testing data
@@ -462,18 +471,20 @@ def kfold_nbs(
             # I go die now
             if np.unique(outcome).shape[0] == 2:
                 score = model.score(X=test_features, y=np.ravel(test_y))
-                
+
             else:
                 predicted_y = model.predict(X=test_features)
-                score,p = spearmanr(predicted_y, np.ravel(test_y))
-                #spearman = spearmanr(predicted_y, np.ravel(test_y))
-            
+                score, p = spearmanr(predicted_y, np.ravel(test_y))
+                # spearman = spearmanr(predicted_y, np.ravel(test_y))
+
             cv_results.at[i, "score"] = score
             if i % (n_splits * n_iterations / 10) == 0:
-                mean = cv_results['score'].mean()
-                sdev = cv_results['score'].std()
-                print(f'Iteration {i} out of {n_splits * n_iterations}, average score:\t{mean:.2f} +/- {sdev:.2f}')
-            #print(score)
+                mean = cv_results["score"].mean()
+                sdev = cv_results["score"].std()
+                print(
+                    f"Iteration {i} out of {n_splits * n_iterations}, average score:\t{mean:.2f} +/- {sdev:.2f}"
+                )
+            # print(score)
 
             m = 0
             param_vector = np.zeros_like(nbs_vector)
@@ -489,21 +500,21 @@ def kfold_nbs(
                 else:
                     pass
             X = undo_vectorize(param_vector, num_node=num_node, diagonal=diagonal)
-            #cv_results.at[i, "coefficient_matrix"] = X
-            #cv_results.at[i, "coefficient_vector"] = param_vector
+            # cv_results.at[i, "coefficient_matrix"] = X
+            # cv_results.at[i, "coefficient_vector"] = param_vector
             i += 1
         else:
             pass
         ticks.update()
     # calculate weighted average
     # print(cv_results['score'])
-    weighted_stack = np.zeros((num_node,num_node))
-    fake = np.zeros((num_node,num_node))
+    weighted_stack = np.zeros((num_node, num_node))
+    fake = np.zeros((num_node, num_node))
     # print(weighted_stack.shape)
     for j in index:
         # print(cv_results.at[j, 'score'])
         weighted = cv_results.at[j, "component"] * cv_results.at[j, "score"]
-        
+
         if np.sum(weighted) == 0 or np.isnan(np.sum(weighted)) == True:
             weighted_stack = np.dstack([weighted_stack, fake])
         else:
@@ -511,5 +522,8 @@ def kfold_nbs(
 
         # print(weighted_stack.shape, weighted.shape)
     weighted_average = np.mean(weighted_stack, axis=-1)
-    #model = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
-    return weighted_average, cv_results, #model
+    # model = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
+    return (
+        weighted_average,
+        cv_results,
+    )  # model
diff --git a/idconn/workflows/nbs_predict-bc.py b/idconn/workflows/nbs_predict-bc.py
new file mode 100644
index 0000000..ec3c559
--- /dev/null
+++ b/idconn/workflows/nbs_predict-bc.py
@@ -0,0 +1,387 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "bc"
+CONFOUNDS = ["framewise_displacement"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+train_layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(train_layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+groups = dat["bc"]
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+# load in test data
+test_layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(test_layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=1000
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(nbs_vector.shape, filter.shape)
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
+    train_metrics["alpha"] = best.C_[0]
+    # train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(solver="saga", alpha=best.alpha_)
+    train_metrics["alpha"] = best.alpha_
+    # train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+
+scores = cross_validate(
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
+    cv=cv,
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
+
+fitted = scores["estimator"][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x=f"{OUTCOME}_scaled",
+    y=f"{OUTCOME}_pred",
+    # style='bc',
+    data=Ys,
+    ax=ax,
+    palette=dark_cmap,
+)
+# ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample prediction score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+train_metrics["in_sample_mse"] = mse
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        # print(j)
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x=f"{OUTCOME}_scaled",
+    y=f"{OUTCOME}_pred",
+    # style='bc',
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+    corr = spearmanr(test_outcome, y_pred)
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict-bc_sensitivity.py b/idconn/workflows/nbs_predict-bc_sensitivity.py
new file mode 100644
index 0000000..813cf66
--- /dev/null
+++ b/idconn/workflows/nbs_predict-bc_sensitivity.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "bc"
+CONFOUNDS = ["framewise_displacement"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+drop = dat[dat["cycle_day"].between(11, 17, inclusive="neither")].index
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+groups = dat["bc"]
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+
+# print(len(np.unique(outcome)))
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=500
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+# print(np.sum(weighted_average))
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(np.sum(filter))
+# print(nbs_vector.shape, filter.shape)
+
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
+    train_metrics["alpha"] = best.C_[0]
+    # train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(
+        solver="auto",
+        alpha=best.alpha_,
+        fit_intercept=False,
+    )
+    train_metrics["alpha"] = best.alpha_
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+# train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+# fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+scores = cross_validate(
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
+    cv=cv,
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
+
+fitted = scores["estimator"][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled", "bc", "cycle_day"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_pred", style="bc", data=Ys, ax=ax, palette=dark_cmap
+)
+h = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_scaled", style="bc", data=Ys, ax=ax, palette=light_cmap
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample train score: ", train_metrics["in_sample_train"])
+print("In-sample test score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+# print(fitted.coef_.shape)
+# print(fitted.coef_)
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        # print(j)
+        # print(fitted.coef_[0, j])
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+# print(coeff_vec)
+print(coeff_vec)
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+
+# cross_validate(model, )
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred", "cycle_day", "bc"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_pred",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+h = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_scaled",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="dark_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict-e2.py b/idconn/workflows/nbs_predict-e2.py
index c92d274..a846b5a 100644
--- a/idconn/workflows/nbs_predict-e2.py
+++ b/idconn/workflows/nbs_predict-e2.py
@@ -58,7 +58,7 @@
 
 outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
 
-#print(len(np.unique(outcome)))
+# print(len(np.unique(outcome)))
 
 if CONFOUNDS is not None:
     confounds = dat[CONFOUNDS]
@@ -107,7 +107,7 @@
     join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
 )
 
-best = cv_results.sort_values(by='score', ascending=False).iloc[0]['model']
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
 
 # this uses the most predictive subnetwork as features in the model
 # might replace with thresholded weighted_average
@@ -119,20 +119,20 @@
 
 # here is where we'd threshold the weighted average to use for elastic-net
 weighted_average = np.where(weighted_average > 0, weighted_average, 0)
-#print(np.sum(weighted_average))
-#nbs_vector = weighted_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
-#filter = np.where(nbs_vector >= p75, True, False)
-#print(np.sum(filter))
+# print(np.sum(weighted_average))
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(np.sum(filter))
 # print(nbs_vector.shape, filter.shape)
 
 thresh_average = threshold_proportional(weighted_average, THRESH)
 nbs_vector2 = thresh_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
+# p75 = np.percentile(nbs_vector, 75)
 filter = np.where(nbs_vector2 > 0, True, False)
 
 # mask = io.vectorize_corrmats(filter)
-edges_train = np.vstack(dat["edge_vector"].dropna().values)[:,filter]
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
 
 # NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
 if CONFOUNDS is not None:
@@ -162,79 +162,71 @@
     train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
 
 
-
 # run the model on the whole test dataset to get params
 
 # classification if the outcome is binary (for now)
 # could be extended to the multiclass case?
 train_metrics = {}
 if len(np.unique(outcome)) == 2:
-    model = LogisticRegression(
-        penalty="l2", 
-        solver="saga", 
-        C=best.C_[0]
-        )
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
     train_metrics["alpha"] = best.C_[0]
-    #train_metrics["l1_ratio"] = best.l1_ratio_
+    # train_metrics["l1_ratio"] = best.l1_ratio_
 else:
     model = Ridge(
-        solver="auto",  
+        solver="auto",
         alpha=best.alpha_,
         fit_intercept=False,
-        )
+    )
     train_metrics["alpha"] = best.alpha_
 
 cv = RepeatedKFold(n_splits=5, n_repeats=10)
 
-    #train_metrics["l1_ratio"] = best.l1_ratio_
-#print(params)
-#model.set_params(**params)
+# train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
 # train ElasticNet on full train dataset, using feature extraction from NBS-Predict
-#fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+# fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
 scores = cross_validate(
-    model, 
-    train_features, 
-    train_outcome, 
-    groups=groups, 
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
     cv=cv,
-    return_estimator=True, 
-    return_train_score=True
-    )
-train_metrics["in_sample_test"] = np.mean(scores['test_score'])
-train_metrics["in_sample_train"] = np.mean(scores['train_score'])
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
 
-fitted = scores['estimator'][0]
+fitted = scores["estimator"][0]
 y_pred = fitted.predict(X=train_features)
 train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
 
-dat[f'{OUTCOME}_pred'] = y_pred
-dat[f'{OUTCOME}_scaled'] = train_outcome
-
-Ys = dat[[f'{OUTCOME}_pred', f'{OUTCOME}_scaled', 'bc', 'cycle_day']]
-Ys.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-train_colors = ['#a08ad1', #light
-                '#685690', #medium
-                '#3f2d69' #dark
-                ]
-light_cmap = sns.color_palette('dark:#a08ad1')
-dark_cmap = sns.color_palette('dark:#685690')
-
-fig,ax = plt.subplots()
-g = sns.scatterplot(x='cycle_day', 
-                    y=f'{OUTCOME}_pred', 
-                    style='bc', 
-                    data=Ys,  
-                    ax=ax, 
-                    palette=dark_cmap)
-h = sns.scatterplot(x='cycle_day',
-                    y=f'{OUTCOME}_scaled', 
-                    style='bc', 
-                    data=Ys, 
-                    ax=ax, 
-                    palette=light_cmap)
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled", "bc", "cycle_day"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_pred", style="bc", data=Ys, ax=ax, palette=dark_cmap
+)
+h = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_scaled", style="bc", data=Ys, ax=ax, palette=light_cmap
+)
 ax.legend(bbox_to_anchor=(1.0, 0.5))
-fig.savefig(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
 
 mse = mean_squared_error(train_outcome, y_pred)
 train_metrics["mean squared error"] = mse
@@ -248,14 +240,14 @@
     json.dump(train_metrics, fp)
 
 # yoink the coefficients? for a more parsimonious figure?
-#print(fitted.coef_.shape)
-#print(fitted.coef_)
+# print(fitted.coef_.shape)
+# print(fitted.coef_)
 coeff_vec = np.zeros_like(filter)
 j = 0
 for i in range(0, filter.shape[0]):
     if filter[i] == True:
-        #print(j)
-        #print(fitted.coef_[0, j])
+        # print(j)
+        # print(fitted.coef_[0, j])
         coeff_vec[i] = fitted.coef_[0, j]
         j += 1
     else:
@@ -345,7 +337,7 @@
 # score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
 test_metrics = {}
 
-#cross_validate(model, )
+# cross_validate(model, )
 y_pred = fitted.predict(X=test_features)
 score = fitted.score(X=test_features, y=np.ravel(test_outcome))
 if len(np.unique(test_outcome)) == 2:
@@ -360,56 +352,56 @@
 print("Out-of-sample mean squared error:\t", mse)
 # print(np.mean(test_features))
 # pred_outcome = fitted.predict(test_features)
-test_df[f'{OUTCOME}_scaled'] = test_outcome
-test_df[f'{OUTCOME}_pred'] = y_pred
-Ys = test_df[[f'{OUTCOME}_scaled', 
-              f'{OUTCOME}_pred',
-              'cycle_day', 
-              'bc']]
-Ys.to_csv(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-Ys['ppts'] = Ys.index.get_level_values(0)
-
-
-light_colors = ['#33ACE3', #Bubbles
-                '#EA6964', #Blossom
-                '#4AB62C' #Buttercup
-                ]
-dark_colors = ['#1278a6', 
-               '#a11510', 
-               '#228208']
-light = ListedColormap(light_colors, name='light_powderpuff')
-dark = ListedColormap(dark_colors, name='dark_powderpuff')
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred", "cycle_day", "bc"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
 mpl.colormaps.register(cmap=light)
 mpl.colormaps.register(cmap=dark)
 
-fig,ax = plt.subplots()
-g = sns.scatterplot(x='cycle_day', 
-                    y=f'{OUTCOME}_pred', 
-                    style='bc', 
-                    data=Ys, 
-                    hue='ppts',  
-                    hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'],
-                    ax=ax, 
-                    palette='light_powderpuff'
-                    )
-h = sns.scatterplot(x='cycle_day',
-                     y=f'{OUTCOME}_scaled', 
-                     style='bc', 
-                     data=Ys, 
-                     hue='ppts',
-                     hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'], 
-                     ax=ax, 
-                     palette='dark_powderpuff')
-ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left')
-fig.savefig(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
-
-
-
-#print(test_outcome, "\n", y_pred)
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_pred",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+h = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_scaled",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="dark_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
 # print(pred_outcome)
 if len(np.unique(test_outcome)) > 2:
-    
+
     print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
     test_metrics["spearman correlation"] = corr
 with open(
diff --git a/idconn/workflows/nbs_predict-e2_sensitivity.py b/idconn/workflows/nbs_predict-e2_sensitivity.py
new file mode 100644
index 0000000..13177c7
--- /dev/null
+++ b/idconn/workflows/nbs_predict-e2_sensitivity.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "estradiol"
+CONFOUNDS = ["framewise_displacement"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+drop = dat[dat["cycle_day"].between(11, 17, inclusive="neither")].index
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+groups = dat["bc"]
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+
+# print(len(np.unique(outcome)))
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=500
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+# print(np.sum(weighted_average))
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(np.sum(filter))
+# print(nbs_vector.shape, filter.shape)
+
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
+    train_metrics["alpha"] = best.C_[0]
+    # train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(
+        solver="auto",
+        alpha=best.alpha_,
+        fit_intercept=False,
+    )
+    train_metrics["alpha"] = best.alpha_
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+# train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+# fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+scores = cross_validate(
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
+    cv=cv,
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
+
+fitted = scores["estimator"][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled", "bc", "cycle_day"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_pred", style="bc", data=Ys, ax=ax, palette=dark_cmap
+)
+h = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_scaled", style="bc", data=Ys, ax=ax, palette=light_cmap
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample train score: ", train_metrics["in_sample_train"])
+print("In-sample test score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+# print(fitted.coef_.shape)
+# print(fitted.coef_)
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        # print(j)
+        # print(fitted.coef_[0, j])
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+# print(coeff_vec)
+print(coeff_vec)
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+
+# cross_validate(model, )
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred", "cycle_day", "bc"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_pred",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+h = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_scaled",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="dark_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict-e2bc_sensitivity.py b/idconn/workflows/nbs_predict-e2bc_sensitivity.py
new file mode 100644
index 0000000..8052164
--- /dev/null
+++ b/idconn/workflows/nbs_predict-e2bc_sensitivity.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "estradiol"
+CONFOUNDS = ["framewise_displacement", "bc"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+drop = dat[dat["cycle_day"].between(11, 17, inclusive="neither")].index
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+groups = dat["bc"]
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+
+# print(len(np.unique(outcome)))
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=500
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+# print(np.sum(weighted_average))
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(np.sum(filter))
+# print(nbs_vector.shape, filter.shape)
+
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
+    train_metrics["alpha"] = best.C_[0]
+    # train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(
+        solver="auto",
+        alpha=best.alpha_,
+        fit_intercept=False,
+    )
+    train_metrics["alpha"] = best.alpha_
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+# train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+# fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+scores = cross_validate(
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
+    cv=cv,
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
+
+fitted = scores["estimator"][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled", "bc", "cycle_day"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_pred", style="bc", data=Ys, ax=ax, palette=dark_cmap
+)
+h = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_scaled", style="bc", data=Ys, ax=ax, palette=light_cmap
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample train score: ", train_metrics["in_sample_train"])
+print("In-sample test score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+# print(fitted.coef_.shape)
+# print(fitted.coef_)
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        # print(j)
+        # print(fitted.coef_[0, j])
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+# print(coeff_vec)
+print(coeff_vec)
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+
+# cross_validate(model, )
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred", "cycle_day", "bc"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_pred",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+h = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_scaled",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="dark_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict-e2xp4-bc.py b/idconn/workflows/nbs_predict-e2xp4-bc.py
index ad6a6d8..4b32a85 100644
--- a/idconn/workflows/nbs_predict-e2xp4-bc.py
+++ b/idconn/workflows/nbs_predict-e2xp4-bc.py
@@ -46,7 +46,7 @@
 
 dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
 
-dat['estradiol÷progesterone'] = dat['estradiol'] / dat['progesterone']
+dat["estradiol÷progesterone"] = dat["estradiol"] / dat["progesterone"]
 
 keep = dat["adj"].dropna().index
 dat = dat.loc[keep]
@@ -60,7 +60,7 @@
 
 outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
 
-#print(len(np.unique(outcome)))
+# print(len(np.unique(outcome)))
 
 if CONFOUNDS is not None:
     confounds = dat[CONFOUNDS]
@@ -109,7 +109,7 @@
     join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
 )
 
-best = cv_results.sort_values(by='score', ascending=False).iloc[0]['model']
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
 
 # this uses the most predictive subnetwork as features in the model
 # might replace with thresholded weighted_average
@@ -121,20 +121,20 @@
 
 # here is where we'd threshold the weighted average to use for elastic-net
 weighted_average = np.where(weighted_average > 0, weighted_average, 0)
-#print(np.sum(weighted_average))
-#nbs_vector = weighted_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
-#filter = np.where(nbs_vector >= p75, True, False)
-#print(np.sum(filter))
+# print(np.sum(weighted_average))
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(np.sum(filter))
 # print(nbs_vector.shape, filter.shape)
 
 thresh_average = threshold_proportional(weighted_average, THRESH)
 nbs_vector2 = thresh_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
+# p75 = np.percentile(nbs_vector, 75)
 filter = np.where(nbs_vector2 > 0, True, False)
 
 # mask = io.vectorize_corrmats(filter)
-edges_train = np.vstack(dat["edge_vector"].dropna().values)[:,filter]
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
 
 # NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
 if CONFOUNDS is not None:
@@ -164,79 +164,71 @@
     train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
 
 
-
 # run the model on the whole test dataset to get params
 
 # classification if the outcome is binary (for now)
 # could be extended to the multiclass case?
 train_metrics = {}
 if len(np.unique(outcome)) == 2:
-    model = LogisticRegression(
-        penalty="l2", 
-        solver="saga", 
-        C=best.C_[0]
-        )
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
     train_metrics["alpha"] = best.C_[0]
-    #train_metrics["l1_ratio"] = best.l1_ratio_
+    # train_metrics["l1_ratio"] = best.l1_ratio_
 else:
     model = Ridge(
-        solver="auto",  
+        solver="auto",
         alpha=best.alpha_,
         fit_intercept=False,
-        )
+    )
     train_metrics["alpha"] = best.alpha_
 
 cv = RepeatedKFold(n_splits=5, n_repeats=10)
 
-    #train_metrics["l1_ratio"] = best.l1_ratio_
-#print(params)
-#model.set_params(**params)
+# train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
 # train ElasticNet on full train dataset, using feature extraction from NBS-Predict
-#fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+# fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
 scores = cross_validate(
-    model, 
-    train_features, 
-    train_outcome, 
-    groups=groups, 
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
     cv=cv,
-    return_estimator=True, 
-    return_train_score=True
-    )
-train_metrics["in_sample_test"] = np.mean(scores['test_score'])
-train_metrics["in_sample_train"] = np.mean(scores['train_score'])
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
 
-fitted = scores['estimator'][0]
+fitted = scores["estimator"][0]
 y_pred = fitted.predict(X=train_features)
 train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
 
-dat[f'{OUTCOME}_pred'] = y_pred
-dat[f'{OUTCOME}_scaled'] = train_outcome
-
-Ys = dat[[f'{OUTCOME}_pred', f'{OUTCOME}_scaled', 'bc', 'cycle_day']]
-Ys.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-train_colors = ['#a08ad1', #light
-                '#685690', #medium
-                '#3f2d69' #dark
-                ]
-light_cmap = sns.color_palette('dark:#a08ad1')
-dark_cmap = sns.color_palette('dark:#685690')
-
-fig,ax = plt.subplots()
-g = sns.scatterplot(x='cycle_day', 
-                    y=f'{OUTCOME}_pred', 
-                    style='bc', 
-                    data=Ys,  
-                    ax=ax, 
-                    palette=dark_cmap)
-h = sns.scatterplot(x='cycle_day',
-                    y=f'{OUTCOME}_scaled', 
-                    style='bc', 
-                    data=Ys, 
-                    ax=ax, 
-                    palette=light_cmap)
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled", "bc", "cycle_day"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_pred", style="bc", data=Ys, ax=ax, palette=dark_cmap
+)
+h = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_scaled", style="bc", data=Ys, ax=ax, palette=light_cmap
+)
 ax.legend(bbox_to_anchor=(1.0, 0.5))
-fig.savefig(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
 
 mse = mean_squared_error(train_outcome, y_pred)
 train_metrics["mean squared error"] = mse
@@ -250,14 +242,14 @@
     json.dump(train_metrics, fp)
 
 # yoink the coefficients? for a more parsimonious figure?
-#print(fitted.coef_.shape)
-#print(fitted.coef_)
+# print(fitted.coef_.shape)
+# print(fitted.coef_)
 coeff_vec = np.zeros_like(filter)
 j = 0
 for i in range(0, filter.shape[0]):
     if filter[i] == True:
-        #print(j)
-        #print(fitted.coef_[0, j])
+        # print(j)
+        # print(fitted.coef_[0, j])
         coeff_vec[i] = fitted.coef_[0, j]
         j += 1
     else:
@@ -295,7 +287,7 @@
 layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
 
 test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
-test_df['estradiol÷progesterone'] = test_df['estradiol'] / test_df['progesterone']
+test_df["estradiol÷progesterone"] = test_df["estradiol"] / test_df["progesterone"]
 
 keep = test_df[[OUTCOME, "adj"]].dropna().index
 # print(keep)
@@ -348,7 +340,7 @@
 # score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
 test_metrics = {}
 
-#cross_validate(model, )
+# cross_validate(model, )
 y_pred = fitted.predict(X=test_features)
 score = fitted.score(X=test_features, y=np.ravel(test_outcome))
 if len(np.unique(test_outcome)) == 2:
@@ -363,56 +355,56 @@
 print("Out-of-sample mean squared error:\t", mse)
 # print(np.mean(test_features))
 # pred_outcome = fitted.predict(test_features)
-test_df[f'{OUTCOME}_scaled'] = test_outcome
-test_df[f'{OUTCOME}_pred'] = y_pred
-Ys = test_df[[f'{OUTCOME}_scaled', 
-              f'{OUTCOME}_pred',
-              'cycle_day', 
-              'bc']]
-Ys.to_csv(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-Ys['ppts'] = Ys.index.get_level_values(0)
-
-
-light_colors = ['#33ACE3', #Bubbles
-                '#EA6964', #Blossom
-                '#4AB62C' #Buttercup
-                ]
-dark_colors = ['#1278a6', 
-               '#a11510', 
-               '#228208']
-light = ListedColormap(light_colors, name='light_powderpuff')
-dark = ListedColormap(dark_colors, name='dark_powderpuff')
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred", "cycle_day", "bc"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
 mpl.colormaps.register(cmap=light)
 mpl.colormaps.register(cmap=dark)
 
-fig,ax = plt.subplots()
-g = sns.scatterplot(x='cycle_day', 
-                    y=f'{OUTCOME}_pred', 
-                    style='bc', 
-                    data=Ys, 
-                    hue='ppts',  
-                    hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'],
-                    ax=ax, 
-                    palette='light_powderpuff'
-                    )
-h = sns.scatterplot(x='cycle_day',
-                     y=f'{OUTCOME}_scaled', 
-                     style='bc', 
-                     data=Ys, 
-                     hue='ppts',
-                     hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'], 
-                     ax=ax, 
-                     palette='dark_powderpuff')
-ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left')
-fig.savefig(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
-
-
-
-#print(test_outcome, "\n", y_pred)
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_pred",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+h = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_scaled",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="dark_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
 # print(pred_outcome)
 if len(np.unique(test_outcome)) > 2:
-    
+
     print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
     test_metrics["spearman correlation"] = corr
 with open(
diff --git a/idconn/workflows/nbs_predict-e2xp4.py b/idconn/workflows/nbs_predict-e2xp4.py
index 022d8b9..fcd6f40 100644
--- a/idconn/workflows/nbs_predict-e2xp4.py
+++ b/idconn/workflows/nbs_predict-e2xp4.py
@@ -46,7 +46,7 @@
 
 dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
 
-dat['estradiol÷progesterone'] = dat['estradiol'] / dat['progesterone']
+dat["estradiol÷progesterone"] = dat["estradiol"] / dat["progesterone"]
 
 keep = dat["adj"].dropna().index
 dat = dat.loc[keep]
@@ -60,7 +60,7 @@
 
 outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
 
-#print(len(np.unique(outcome)))
+# print(len(np.unique(outcome)))
 
 if CONFOUNDS is not None:
     confounds = dat[CONFOUNDS]
@@ -109,7 +109,7 @@
     join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
 )
 
-best = cv_results.sort_values(by='score', ascending=False).iloc[0]['model']
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
 
 # this uses the most predictive subnetwork as features in the model
 # might replace with thresholded weighted_average
@@ -121,20 +121,20 @@
 
 # here is where we'd threshold the weighted average to use for elastic-net
 weighted_average = np.where(weighted_average > 0, weighted_average, 0)
-#print(np.sum(weighted_average))
-#nbs_vector = weighted_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
-#filter = np.where(nbs_vector >= p75, True, False)
-#print(np.sum(filter))
+# print(np.sum(weighted_average))
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(np.sum(filter))
 # print(nbs_vector.shape, filter.shape)
 
 thresh_average = threshold_proportional(weighted_average, THRESH)
 nbs_vector2 = thresh_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
+# p75 = np.percentile(nbs_vector, 75)
 filter = np.where(nbs_vector2 > 0, True, False)
 
 # mask = io.vectorize_corrmats(filter)
-edges_train = np.vstack(dat["edge_vector"].dropna().values)[:,filter]
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
 
 # NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
 if CONFOUNDS is not None:
@@ -164,79 +164,71 @@
     train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
 
 
-
 # run the model on the whole test dataset to get params
 
 # classification if the outcome is binary (for now)
 # could be extended to the multiclass case?
 train_metrics = {}
 if len(np.unique(outcome)) == 2:
-    model = LogisticRegression(
-        penalty="l2", 
-        solver="saga", 
-        C=best.C_[0]
-        )
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
     train_metrics["alpha"] = best.C_[0]
-    #train_metrics["l1_ratio"] = best.l1_ratio_
+    # train_metrics["l1_ratio"] = best.l1_ratio_
 else:
     model = Ridge(
-        solver="auto",  
+        solver="auto",
         alpha=best.alpha_,
         fit_intercept=False,
-        )
+    )
     train_metrics["alpha"] = best.alpha_
 
 cv = RepeatedKFold(n_splits=5, n_repeats=10)
 
-    #train_metrics["l1_ratio"] = best.l1_ratio_
-#print(params)
-#model.set_params(**params)
+# train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
 # train ElasticNet on full train dataset, using feature extraction from NBS-Predict
-#fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+# fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
 scores = cross_validate(
-    model, 
-    train_features, 
-    train_outcome, 
-    groups=groups, 
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
     cv=cv,
-    return_estimator=True, 
-    return_train_score=True
-    )
-train_metrics["in_sample_test"] = np.mean(scores['test_score'])
-train_metrics["in_sample_train"] = np.mean(scores['train_score'])
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
 
-fitted = scores['estimator'][0]
+fitted = scores["estimator"][0]
 y_pred = fitted.predict(X=train_features)
 train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
 
-dat[f'{OUTCOME}_pred'] = y_pred
-dat[f'{OUTCOME}_scaled'] = train_outcome
-
-Ys = dat[[f'{OUTCOME}_pred', f'{OUTCOME}_scaled', 'bc', 'cycle_day']]
-Ys.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-train_colors = ['#a08ad1', #light
-                '#685690', #medium
-                '#3f2d69' #dark
-                ]
-light_cmap = sns.color_palette('dark:#a08ad1')
-dark_cmap = sns.color_palette('dark:#685690')
-
-fig,ax = plt.subplots()
-g = sns.scatterplot(x='cycle_day', 
-                    y=f'{OUTCOME}_pred', 
-                    style='bc', 
-                    data=Ys,  
-                    ax=ax, 
-                    palette=dark_cmap)
-h = sns.scatterplot(x='cycle_day',
-                    y=f'{OUTCOME}_scaled', 
-                    style='bc', 
-                    data=Ys, 
-                    ax=ax, 
-                    palette=light_cmap)
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled", "bc", "cycle_day"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_pred", style="bc", data=Ys, ax=ax, palette=dark_cmap
+)
+h = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_scaled", style="bc", data=Ys, ax=ax, palette=light_cmap
+)
 ax.legend(bbox_to_anchor=(1.0, 0.5))
-fig.savefig(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
 
 mse = mean_squared_error(train_outcome, y_pred)
 train_metrics["mean squared error"] = mse
@@ -250,14 +242,14 @@
     json.dump(train_metrics, fp)
 
 # yoink the coefficients? for a more parsimonious figure?
-#print(fitted.coef_.shape)
-#print(fitted.coef_)
+# print(fitted.coef_.shape)
+# print(fitted.coef_)
 coeff_vec = np.zeros_like(filter)
 j = 0
 for i in range(0, filter.shape[0]):
     if filter[i] == True:
-        #print(j)
-        #print(fitted.coef_[0, j])
+        # print(j)
+        # print(fitted.coef_[0, j])
         coeff_vec[i] = fitted.coef_[0, j]
         j += 1
     else:
@@ -295,7 +287,7 @@
 layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
 
 test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
-test_df['estradiol÷progesterone'] = test_df['estradiol'] / test_df['progesterone']
+test_df["estradiol÷progesterone"] = test_df["estradiol"] / test_df["progesterone"]
 
 keep = test_df[[OUTCOME, "adj"]].dropna().index
 # print(keep)
@@ -348,7 +340,7 @@
 # score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
 test_metrics = {}
 
-#cross_validate(model, )
+# cross_validate(model, )
 y_pred = fitted.predict(X=test_features)
 score = fitted.score(X=test_features, y=np.ravel(test_outcome))
 if len(np.unique(test_outcome)) == 2:
@@ -363,56 +355,56 @@
 print("Out-of-sample mean squared error:\t", mse)
 # print(np.mean(test_features))
 # pred_outcome = fitted.predict(test_features)
-test_df[f'{OUTCOME}_scaled'] = test_outcome
-test_df[f'{OUTCOME}_pred'] = y_pred
-Ys = test_df[[f'{OUTCOME}_scaled', 
-              f'{OUTCOME}_pred',
-              'cycle_day', 
-              'bc']]
-Ys.to_csv(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-Ys['ppts'] = Ys.index.get_level_values(0)
-
-
-light_colors = ['#33ACE3', #Bubbles
-                '#EA6964', #Blossom
-                '#4AB62C' #Buttercup
-                ]
-dark_colors = ['#1278a6', 
-               '#a11510', 
-               '#228208']
-light = ListedColormap(light_colors, name='light_powderpuff')
-dark = ListedColormap(dark_colors, name='dark_powderpuff')
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred", "cycle_day", "bc"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
 mpl.colormaps.register(cmap=light)
 mpl.colormaps.register(cmap=dark)
 
-fig,ax = plt.subplots()
-g = sns.scatterplot(x='cycle_day', 
-                    y=f'{OUTCOME}_pred', 
-                    style='bc', 
-                    data=Ys, 
-                    hue='ppts',  
-                    hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'],
-                    ax=ax, 
-                    palette='light_powderpuff'
-                    )
-h = sns.scatterplot(x='cycle_day',
-                     y=f'{OUTCOME}_scaled', 
-                     style='bc', 
-                     data=Ys, 
-                     hue='ppts',
-                     hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'], 
-                     ax=ax, 
-                     palette='dark_powderpuff')
-ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left')
-fig.savefig(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
-
-
-
-#print(test_outcome, "\n", y_pred)
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_pred",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+h = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_scaled",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="dark_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
 # print(pred_outcome)
 if len(np.unique(test_outcome)) > 2:
-    
+
     print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
     test_metrics["spearman correlation"] = corr
 with open(
diff --git a/idconn/workflows/nbs_predict-p4.py b/idconn/workflows/nbs_predict-p4.py
index 559b4ff..2251179 100644
--- a/idconn/workflows/nbs_predict-p4.py
+++ b/idconn/workflows/nbs_predict-p4.py
@@ -58,7 +58,7 @@
 
 outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
 
-#print(len(np.unique(outcome)))
+# print(len(np.unique(outcome)))
 
 if CONFOUNDS is not None:
     confounds = dat[CONFOUNDS]
@@ -107,7 +107,7 @@
     join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
 )
 
-best = cv_results.sort_values(by='score', ascending=False).iloc[0]['model']
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
 
 # this uses the most predictive subnetwork as features in the model
 # might replace with thresholded weighted_average
@@ -119,20 +119,20 @@
 
 # here is where we'd threshold the weighted average to use for elastic-net
 weighted_average = np.where(weighted_average > 0, weighted_average, 0)
-#print(np.sum(weighted_average))
-#nbs_vector = weighted_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
-#filter = np.where(nbs_vector >= p75, True, False)
-#print(np.sum(filter))
+# print(np.sum(weighted_average))
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(np.sum(filter))
 # print(nbs_vector.shape, filter.shape)
 
 thresh_average = threshold_proportional(weighted_average, THRESH)
 nbs_vector2 = thresh_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
+# p75 = np.percentile(nbs_vector, 75)
 filter = np.where(nbs_vector2 > 0, True, False)
 
 # mask = io.vectorize_corrmats(filter)
-edges_train = np.vstack(dat["edge_vector"].dropna().values)[:,filter]
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
 
 # NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
 if CONFOUNDS is not None:
@@ -162,79 +162,71 @@
     train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
 
 
-
 # run the model on the whole test dataset to get params
 
 # classification if the outcome is binary (for now)
 # could be extended to the multiclass case?
 train_metrics = {}
 if len(np.unique(outcome)) == 2:
-    model = LogisticRegression(
-        penalty="l2", 
-        solver="saga", 
-        C=best.C_[0]
-        )
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
     train_metrics["alpha"] = best.C_[0]
-    #train_metrics["l1_ratio"] = best.l1_ratio_
+    # train_metrics["l1_ratio"] = best.l1_ratio_
 else:
     model = Ridge(
-        solver="auto",  
+        solver="auto",
         alpha=best.alpha_,
         fit_intercept=False,
-        )
+    )
     train_metrics["alpha"] = best.alpha_
 
 cv = RepeatedKFold(n_splits=5, n_repeats=10)
 
-    #train_metrics["l1_ratio"] = best.l1_ratio_
-#print(params)
-#model.set_params(**params)
+# train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
 # train ElasticNet on full train dataset, using feature extraction from NBS-Predict
-#fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+# fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
 scores = cross_validate(
-    model, 
-    train_features, 
-    train_outcome, 
-    groups=groups, 
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
     cv=cv,
-    return_estimator=True, 
-    return_train_score=True
-    )
-train_metrics["in_sample_test"] = np.mean(scores['test_score'])
-train_metrics["in_sample_train"] = np.mean(scores['train_score'])
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
 
-fitted = scores['estimator'][0]
+fitted = scores["estimator"][0]
 y_pred = fitted.predict(X=train_features)
 train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
 
-dat[f'{OUTCOME}_pred'] = y_pred
-dat[f'{OUTCOME}_scaled'] = train_outcome
-
-Ys = dat[[f'{OUTCOME}_pred', f'{OUTCOME}_scaled', 'bc', 'cycle_day']]
-Ys.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-train_colors = ['#a08ad1', #light
-                '#685690', #medium
-                '#3f2d69' #dark
-                ]
-light_cmap = sns.color_palette('dark:#a08ad1')
-dark_cmap = sns.color_palette('dark:#685690')
-
-fig,ax = plt.subplots()
-g = sns.scatterplot(x='cycle_day', 
-                    y=f'{OUTCOME}_pred', 
-                    style='bc', 
-                    data=Ys,  
-                    ax=ax, 
-                    palette=dark_cmap)
-h = sns.scatterplot(x='cycle_day',
-                    y=f'{OUTCOME}_scaled', 
-                    style='bc', 
-                    data=Ys, 
-                    ax=ax, 
-                    palette=light_cmap)
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled", "bc", "cycle_day"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_pred", style="bc", data=Ys, ax=ax, palette=dark_cmap
+)
+h = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_scaled", style="bc", data=Ys, ax=ax, palette=light_cmap
+)
 ax.legend(bbox_to_anchor=(1.0, 0.5))
-fig.savefig(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
 
 mse = mean_squared_error(train_outcome, y_pred)
 train_metrics["mean squared error"] = mse
@@ -248,12 +240,12 @@
     json.dump(train_metrics, fp)
 
 # yoink the coefficients? for a more parsimonious figure?
-#print(fitted.coef_.shape)
+# print(fitted.coef_.shape)
 coeff_vec = np.zeros_like(filter)
 j = 0
 for i in range(0, filter.shape[0]):
     if filter[i] == True:
-        #print(j)
+        # print(j)
         coeff_vec[i] = fitted.coef_[0, j]
         j += 1
     else:
@@ -342,7 +334,7 @@
 # score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
 test_metrics = {}
 
-#cross_validate(model, )
+# cross_validate(model, )
 y_pred = fitted.predict(X=test_features)
 score = fitted.score(X=test_features, y=np.ravel(test_outcome))
 if len(np.unique(test_outcome)) == 2:
@@ -357,56 +349,56 @@
 print("Out-of-sample mean squared error:\t", mse)
 # print(np.mean(test_features))
 # pred_outcome = fitted.predict(test_features)
-test_df[f'{OUTCOME}_scaled'] = test_outcome
-test_df[f'{OUTCOME}_pred'] = y_pred
-Ys = test_df[[f'{OUTCOME}_scaled', 
-              f'{OUTCOME}_pred',
-              'cycle_day', 
-              'bc']]
-Ys.to_csv(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-Ys['ppts'] = Ys.index.get_level_values(0)
-
-
-light_colors = ['#33ACE3', #Bubbles
-                '#EA6964', #Blossom
-                '#4AB62C' #Buttercup
-                ]
-dark_colors = ['#1278a6', 
-               '#a11510', 
-               '#228208']
-light = ListedColormap(light_colors, name='light_powderpuff')
-dark = ListedColormap(dark_colors, name='dark_powderpuff')
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred", "cycle_day", "bc"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
 mpl.colormaps.register(cmap=light)
 mpl.colormaps.register(cmap=dark)
 
-fig,ax = plt.subplots()
-g = sns.scatterplot(x='cycle_day', 
-                    y=f'{OUTCOME}_pred', 
-                    style='bc', 
-                    data=Ys, 
-                    hue='ppts',  
-                    hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'],
-                    ax=ax, 
-                    palette='light_powderpuff'
-                    )
-h = sns.scatterplot(x='cycle_day',
-                     y=f'{OUTCOME}_scaled', 
-                     style='bc', 
-                     data=Ys, 
-                     hue='ppts',
-                     hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'], 
-                     ax=ax, 
-                     palette='dark_powderpuff')
-ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left')
-fig.savefig(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
-
-
-
-#print(test_outcome, "\n", y_pred)
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_pred",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+h = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_scaled",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="dark_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
 # print(pred_outcome)
 if len(np.unique(test_outcome)) > 2:
-    
+
     print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
     test_metrics["spearman correlation"] = corr
 with open(
diff --git a/idconn/workflows/nbs_predict-p4_sensitivity.py b/idconn/workflows/nbs_predict-p4_sensitivity.py
new file mode 100644
index 0000000..449db27
--- /dev/null
+++ b/idconn/workflows/nbs_predict-p4_sensitivity.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "progesterone"
+CONFOUNDS = ["framewise_displacement"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+drop = dat[dat["cycle_day"].between(11, 17, inclusive="neither")].index
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+groups = dat["bc"]
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+
+# print(len(np.unique(outcome)))
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=500
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+# print(np.sum(weighted_average))
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(np.sum(filter))
+# print(nbs_vector.shape, filter.shape)
+
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
+    train_metrics["alpha"] = best.C_[0]
+    # train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(
+        solver="auto",
+        alpha=best.alpha_,
+        fit_intercept=False,
+    )
+    train_metrics["alpha"] = best.alpha_
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+# train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+# fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+scores = cross_validate(
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
+    cv=cv,
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
+
+fitted = scores["estimator"][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled", "bc", "cycle_day"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_pred", style="bc", data=Ys, ax=ax, palette=dark_cmap
+)
+h = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_scaled", style="bc", data=Ys, ax=ax, palette=light_cmap
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample train score: ", train_metrics["in_sample_train"])
+print("In-sample test score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+# print(fitted.coef_.shape)
+# print(fitted.coef_)
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        # print(j)
+        # print(fitted.coef_[0, j])
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+# print(coeff_vec)
+print(coeff_vec)
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+
+# cross_validate(model, )
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred", "cycle_day", "bc"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_pred",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+h = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_scaled",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="dark_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict-p4bc_sensitivity.py b/idconn/workflows/nbs_predict-p4bc_sensitivity.py
new file mode 100644
index 0000000..8cf6026
--- /dev/null
+++ b/idconn/workflows/nbs_predict-p4bc_sensitivity.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "progesterone"
+CONFOUNDS = ["framewise_displacement", "bc"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+drop = dat[dat["cycle_day"].between(11, 17, inclusive="neither")].index
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+groups = dat["bc"]
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+
+# print(len(np.unique(outcome)))
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=500
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+# print(np.sum(weighted_average))
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
+# print(np.sum(filter))
+# print(nbs_vector.shape, filter.shape)
+
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
+    train_metrics["alpha"] = best.C_[0]
+    # train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(
+        solver="auto",
+        alpha=best.alpha_,
+        fit_intercept=False,
+    )
+    train_metrics["alpha"] = best.alpha_
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+# train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+# fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+scores = cross_validate(
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
+    cv=cv,
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
+
+fitted = scores["estimator"][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled", "bc", "cycle_day"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_pred", style="bc", data=Ys, ax=ax, palette=dark_cmap
+)
+h = sns.scatterplot(
+    x="cycle_day", y=f"{OUTCOME}_scaled", style="bc", data=Ys, ax=ax, palette=light_cmap
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample train score: ", train_metrics["in_sample_train"])
+print("In-sample test score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+# print(fitted.coef_.shape)
+# print(fitted.coef_)
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        # print(j)
+        # print(fitted.coef_[0, j])
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+# print(coeff_vec)
+print(coeff_vec)
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+
+# cross_validate(model, )
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred", "cycle_day", "bc"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_pred",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+h = sns.scatterplot(
+    x="cycle_day",
+    y=f"{OUTCOME}_scaled",
+    style="bc",
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="dark_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
+
+
+# print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict.py b/idconn/workflows/nbs_predict.py
index 46e804c..169a5aa 100644
--- a/idconn/workflows/nbs_predict.py
+++ b/idconn/workflows/nbs_predict.py
@@ -55,7 +55,7 @@
 upper_tri = np.triu_indices(num_node, k=1)
 
 outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
-groups = dat['bc']
+groups = dat["bc"]
 
 if CONFOUNDS is not None:
     confounds = dat[CONFOUNDS]
@@ -85,7 +85,6 @@
 edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
 
 
-
 # NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
 if CONFOUNDS is not None:
     confounds_test = test_df[CONFOUNDS].values
@@ -145,7 +144,7 @@
     join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
 )
 
-best = cv_results.sort_values(by='score', ascending=False).iloc[0]['model']
+best = cv_results.sort_values(by="score", ascending=False).iloc[0]["model"]
 
 # this uses the most predictive subnetwork as features in the model
 # might replace with thresholded weighted_average
@@ -157,13 +156,13 @@
 
 # here is where we'd threshold the weighted average to use for elastic-net
 weighted_average = np.where(weighted_average > 0, weighted_average, 0)
-#nbs_vector = weighted_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
-#filter = np.where(nbs_vector >= p75, True, False)
+# nbs_vector = weighted_average[upper_tri]
+# p75 = np.percentile(nbs_vector, 75)
+# filter = np.where(nbs_vector >= p75, True, False)
 # print(nbs_vector.shape, filter.shape)
 thresh_average = threshold_proportional(weighted_average, THRESH)
 nbs_vector2 = thresh_average[upper_tri]
-#p75 = np.percentile(nbs_vector, 75)
+# p75 = np.percentile(nbs_vector, 75)
 filter = np.where(nbs_vector2 > 0, True, False)
 
 # mask = io.vectorize_corrmats(filter)
@@ -205,61 +204,59 @@
 
 train_metrics = {}
 if len(np.unique(outcome)) == 2:
-    model = LogisticRegression(
-        penalty="l2", 
-        solver="saga", 
-        C=best.C_[0]
-        )
+    model = LogisticRegression(penalty="l2", solver="saga", C=best.C_[0])
     train_metrics["alpha"] = best.C_[0]
-    #train_metrics["l1_ratio"] = best.l1_ratio_
+    # train_metrics["l1_ratio"] = best.l1_ratio_
 else:
-    model = Ridge(
-        solver="saga",  
-        alpha=best.alpha_
-        )
+    model = Ridge(solver="saga", alpha=best.alpha_)
     train_metrics["alpha"] = best.alpha_
-    #train_metrics["l1_ratio"] = best.l1_ratio_
-#print(params)
-#model.set_params(**params)
+    # train_metrics["l1_ratio"] = best.l1_ratio_
+# print(params)
+# model.set_params(**params)
 # train ElasticNet on full train dataset, using feature extraction from NBS-Predict
 
 scores = cross_validate(
-    model, 
-    train_features, 
-    train_outcome, 
-    groups=groups, 
+    model,
+    train_features,
+    train_outcome,
+    groups=groups,
     cv=cv,
-    return_estimator=True, 
-    return_train_score=True
-    )
-train_metrics["in_sample_test"] = np.mean(scores['test_score'])
-train_metrics["in_sample_train"] = np.mean(scores['train_score'])
+    return_estimator=True,
+    return_train_score=True,
+)
+train_metrics["in_sample_test"] = np.mean(scores["test_score"])
+train_metrics["in_sample_train"] = np.mean(scores["train_score"])
 
-fitted = scores['estimator'][0]
+fitted = scores["estimator"][0]
 y_pred = fitted.predict(X=train_features)
 train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
-dat[f'{OUTCOME}_pred'] = y_pred
-dat[f'{OUTCOME}_scaled'] = train_outcome
-
-Ys = dat[[f'{OUTCOME}_pred', f'{OUTCOME}_scaled']]
-Ys.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-train_colors = ['#a08ad1', #light
-                '#685690', #medium
-                '#3f2d69' #dark
-                ]
-light_cmap = sns.color_palette('dark:#a08ad1')
-dark_cmap = sns.color_palette('dark:#685690')
-
-fig,ax = plt.subplots()
-g = sns.scatterplot(x=f'{OUTCOME}_scaled',
-                    y=f'{OUTCOME}_pred', 
-                    #style='bc', 
-                    data=Ys,  
-                    ax=ax, 
-                    palette=dark_cmap)
-#ax.legend(bbox_to_anchor=(1.0, 0.5))
-fig.savefig(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+dat[f"{OUTCOME}_pred"] = y_pred
+dat[f"{OUTCOME}_scaled"] = train_outcome
+
+Ys = dat[[f"{OUTCOME}_pred", f"{OUTCOME}_scaled"]]
+Ys.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+train_colors = ["#a08ad1", "#685690", "#3f2d69"]  # light  # medium  # dark
+light_cmap = sns.color_palette("dark:#a08ad1")
+dark_cmap = sns.color_palette("dark:#685690")
+
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x=f"{OUTCOME}_scaled",
+    y=f"{OUTCOME}_pred",
+    # style='bc',
+    data=Ys,
+    ax=ax,
+    palette=dark_cmap,
+)
+# ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
 
 mse = mean_squared_error(train_outcome, y_pred)
 train_metrics["mean squared error"] = mse
@@ -277,7 +274,7 @@
 j = 0
 for i in range(0, filter.shape[0]):
     if filter[i] == True:
-        #print(j)
+        # print(j)
         coeff_vec[i] = fitted.coef_[0, j]
         j += 1
     else:
@@ -341,43 +338,43 @@
 print("Out-of-sample mean squared error:\t", mse)
 # print(np.mean(test_features))
 # pred_outcome = fitted.predict(test_features)
-test_df[f'{OUTCOME}_scaled'] = test_outcome
-test_df[f'{OUTCOME}_pred'] = y_pred
-Ys = test_df[[f'{OUTCOME}_scaled', 
-              f'{OUTCOME}_pred']]
-Ys.to_csv(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
-
-Ys['ppts'] = Ys.index.get_level_values(0)
-
-
-light_colors = ['#33ACE3', #Bubbles
-                '#EA6964', #Blossom
-                '#4AB62C' #Buttercup
-                ]
-dark_colors = ['#1278a6', 
-               '#a11510', 
-               '#228208']
-light = ListedColormap(light_colors, name='light_powderpuff')
-dark = ListedColormap(dark_colors, name='dark_powderpuff')
+test_df[f"{OUTCOME}_scaled"] = test_outcome
+test_df[f"{OUTCOME}_pred"] = y_pred
+Ys = test_df[[f"{OUTCOME}_scaled", f"{OUTCOME}_pred"]]
+Ys.to_csv(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep="\t"
+)
+
+Ys["ppts"] = Ys.index.get_level_values(0)
+
+
+light_colors = ["#33ACE3", "#EA6964", "#4AB62C"]  # Bubbles  # Blossom  # Buttercup
+dark_colors = ["#1278a6", "#a11510", "#228208"]
+light = ListedColormap(light_colors, name="light_powderpuff")
+dark = ListedColormap(dark_colors, name="dark_powderpuff")
 mpl.colormaps.register(cmap=light)
 mpl.colormaps.register(cmap=dark)
 
-fig,ax = plt.subplots()
-g = sns.scatterplot(x=f'{OUTCOME}_scaled', 
-                    y=f'{OUTCOME}_pred', 
-                    #style='bc', 
-                    data=Ys, 
-                    hue='ppts',  
-                    hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'],
-                    ax=ax, 
-                    palette='light_powderpuff'
-                    )
-ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left')
-fig.savefig(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
-
+fig, ax = plt.subplots()
+g = sns.scatterplot(
+    x=f"{OUTCOME}_scaled",
+    y=f"{OUTCOME}_pred",
+    # style='bc',
+    data=Ys,
+    hue="ppts",
+    hue_order=["sub-Bubbles", "sub-Blossom", "sub-Buttercup"],
+    ax=ax,
+    palette="light_powderpuff",
+)
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
+fig.savefig(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"),
+    dpi=400,
+    bbox_inches="tight",
+)
 
 
-#print(test_outcome, "\n", y_pred)
+# print(test_outcome, "\n", y_pred)
 # print(pred_outcome)
 if len(np.unique(test_outcome)) > 2:
     corr = spearmanr(test_outcome, y_pred)