From c2445c9f1a38ccd415e21d31c6f65d33d51ecb1a Mon Sep 17 00:00:00 2001 From: estefanysuarez Date: Mon, 27 Mar 2023 12:56:06 -0400 Subject: [PATCH 1/6] changes for tutorial --- conn2res/coding.py | 17 ++++--- conn2res/iodata.py | 65 ++++----------------------- conn2res/performance.py | 2 +- conn2res/plotting.py | 99 +++++++---------------------------------- conn2res/reservoir.py | 37 +++++++-------- conn2res/task.py | 99 ++++++++++++++++++++--------------------- 6 files changed, 101 insertions(+), 218 deletions(-) diff --git a/conn2res/coding.py b/conn2res/coding.py index 8a4912a..2f69506 100755 --- a/conn2res/coding.py +++ b/conn2res/coding.py @@ -16,7 +16,17 @@ def get_modules(module_assignment): """ - # TODO + _summary_ + + Parameters + ---------- + module_assignment : _type_ + _description_ + + Returns + ------- + _type_ + _description_ """ # get module ids module_ids = np.unique(module_assignment) @@ -65,7 +75,6 @@ def encoder(reservoir_states, target, readout_modules=None, """ - # use multiple subsets of readout nodes designated by readout_modules if readout_modules is not None: if isinstance(readout_modules, np.ndarray): @@ -100,14 +109,12 @@ def encoder(reservoir_states, target, readout_modules=None, df_encoding = pd.concat(df_encoding) elif readout_nodes is not None: - # use a subset of output nodes as readout nodes df_encoding, model = run_task(reservoir_states=( reservoir_states[0][:, readout_nodes], reservoir_states[1][:, readout_nodes]), y=target, metric=metric, **kwargs) df_encoding['n_nodes'] = len(readout_nodes) else: - # use all output nodes as readout nodes df_encoding, model = run_task(reservoir_states=reservoir_states, y=target, metric=metric, **kwargs) @@ -142,7 +149,7 @@ def time_average_samples(seq_len, data, sample_weight, operation=None): elif isinstance(data, np.ndarray): data = [data] - if len(data) != len(sample_weight): + if len(data) != len(data): raise ValueError( 'data and sample_weight should have the same number of assigned variables') diff --git a/conn2res/iodata.py b/conn2res/iodata.py index 63f9985..8901319 100755 --- a/conn2res/iodata.py +++ b/conn2res/iodata.py @@ -18,17 +18,17 @@ NEUROGYM_TASKS = [ 'AntiReach', - 'Bandit', # * + # 'Bandit', 'ContextDecisionMaking', - 'DawTwoStep', # * + # 'DawTwoStep', 'DelayComparison', 'DelayMatchCategory', 'DelayMatchSample', 'DelayMatchSampleDistractor1D', 'DelayPairedAssociation', - 'Detection', # * + # 'Detection', # TODO: Temporary removing until bug fixed 'DualDelayMatchSample', - 'EconomicDecisionMaking', # * + # 'EconomicDecisionMaking', 'GoNogo', 'HierarchicalReasoning', 'IntervalDiscrimination', @@ -47,7 +47,7 @@ 'ReadySetGo', 'SingleContextDecisionMaking', 'SpatialSuppressMotion', - 'ToneDetection' # * + # 'ToneDetection' # TODO: Temporary removing until bug fixed ] NATIVE_TASKS = [ @@ -100,26 +100,7 @@ def get_available_tasks(): return NEUROGYM_TASKS + NATIVE_TASKS + RESERVOIRPY_TASKS -def unbatch(x): - """ - Removes batch_size dimension from array - - Parameters - ---------- - x : numpy.ndarray - array with dimensions (seq_len, batch_size, features) - - Returns - ------- - new_x : numpy.ndarray - new array with dimensions (batch_size*seq_len, features) - - """ - # TODO right now it only works when x is (batch_first = False) - return np.concatenate(x, axis=0) - - -def fetch_dataset(task, **kwargs): +def fetch_dataset(task, report=True, **kwargs): """ Fetches inputs and labels for 'task' from the NeuroGym repository @@ -162,29 +143,11 @@ def fetch_dataset(task, **kwargs): def create_neurogymn_dataset(task, n_trials=100, add_constant=False, **kwargs): - """ - _summary_ - - Parameters - ---------- - task : _type_ - _description_ - n_trials : int, optional - _description_, by default 100 - add_constant : bool, optional - _description_, by default False - - Returns - ------- - _type_ - _description_ - """ # create a Dataset object from NeuroGym dataset = ngym.Dataset(task+'-v0', env_kwargs=kwargs) # get environment object env = dataset.env - # print(env.timing) # generate per trial dataset _ = env.reset() @@ -287,7 +250,7 @@ def create_dataset(task, n_timesteps=1000, horizon=1, **kwargs): y = np.hstack([x[horizon_max-h:-h] for h in horizon]) x = x[horizon_max:] - get_info_data(task, x, y) + # get_info_data(task, x, y) if horizon_sign == -1: return x, y @@ -299,19 +262,7 @@ def create_dataset(task, n_timesteps=1000, horizon=1, **kwargs): def get_n_features(task): - """ - _summary_ - - Parameters - ---------- - task : _type_ - _description_ - Returns - ------- - _type_ - _description_ - """ x, _ = fetch_dataset(task, n_trials=1) return x[0].shape[1] @@ -411,7 +362,7 @@ def get_info_data(task, x, y): print(f'\tmodel = {model.__name__}') -def get_sample_weight(inputs, sample_block=None): +def get_sample_weight(inputs, labels, sample_block=None): """ Time averages dataset based on sample class and sample weight diff --git a/conn2res/performance.py b/conn2res/performance.py index 3ac71c4..a1a7764 100644 --- a/conn2res/performance.py +++ b/conn2res/performance.py @@ -166,7 +166,7 @@ def mean_absolute_error( def corrcoef( y_true, y_pred, multioutput='uniform_average', nonnegative=None, **kwargs -): +): """ Pearson's correlation coefficient. diff --git a/conn2res/plotting.py b/conn2res/plotting.py index f71c88f..8ca7eef 100755 --- a/conn2res/plotting.py +++ b/conn2res/plotting.py @@ -115,8 +115,7 @@ def transform_data( def plot_iodata( - x, y, n_trials=7, title=None, show=True, savefig=False, fname=None, - **kwargs + x, y, n_instances=7, title=None, show=True, savefig=False, fname=None, **kwargs ): """ #TODO @@ -136,23 +135,14 @@ def plot_iodata( _description_, by default False fname : _type_, optional _description_, by default None - + """ - x = x[:n_trials] - y = y[:n_trials] - - # get end points for trials to plot trial separators - end_points = [] - tf = 0 - for i in range(n_trials): - tf += len(x[i]) - end_points.append(tf) # convert x and y to arrays for visualization if isinstance(x, list): - x = np.vstack(x) + x = np.vstack(x[:n_instances]) if isinstance(y, list): - y = np.vstack(y).squeeze() + y = np.vstack(y[:n_instances]).squeeze() # set plotting theme sns.set(style="ticks", font_scale=1.0) @@ -161,11 +151,13 @@ def plot_iodata( # set color palette palette = kwargs.pop('palette', None) - # plot inputs (x) and outputs (y) + # plot sns.lineplot( - data=x, palette=palette, dashes=False, legend=False, ax=ax, **kwargs) + data=x, palette=palette, dashes=False, legend=False, + ax=ax, **kwargs) sns.lineplot( - data=y, palette=palette, dashes=False, legend=False, ax=ax, **kwargs) + data=y, palette=palette, dashes=False, legend=False, + ax=ax, **kwargs) # set axis labels ax.set_xlabel('time steps', fontsize=11) @@ -186,11 +178,6 @@ def plot_iodata( ax.legend(handles=ax.lines, labels=new_labels, loc='best', fontsize=8) - # plot trial line separators - for tf in end_points: - plt.plot( - tf * np.ones((2)), np.arange(2), c='black', linestyle='--') - # set title if title is not None: plt.title(title, fontsize=12) @@ -275,7 +262,6 @@ def plot_diagnostics( axs = axs.ravel() plt.subplots_adjust(wspace=0.1) - # set color palette palette = kwargs.pop('palette', None) @@ -291,14 +277,13 @@ def plot_diagnostics( dashes=False, legend=False, ax=axs[2]) sns.lineplot( data=y_pred[:160], palette=palette, - dashes=False, legend=False, ax=axs[2], linewidth=2.5) + dashes=False, legend=False, ax=axs[2]) # set axis labels axs[0].set_ylabel('x signal \namplitude', fontsize=11) axs[1].set_ylabel('decision \nfunction', fontsize=11) axs[2].set_xlabel('time steps', fontsize=11) axs[2].set_ylabel('y signal \namplitude', fontsize=11) - # axs[1].set_ylim(0, 5e7) # set axis limits for ax in axs: @@ -309,16 +294,9 @@ def plot_diagnostics( x_labels = ['x'] else: x_labels = [f'x{n+1}' for n in range(x.shape[1])] - - if dec_func.ndim == 1: - dec_func_labels = ['decision function'] - else: - dec_func_labels = [f'decision function {n+1}' for n in range(dec_func.shape[1])] - - # set legend axs[0].legend(handles=axs[0].lines, labels=x_labels, loc='upper right', fontsize=8) - axs[1].legend(handles=axs[1].lines, labels=dec_func_labels, + axs[1].legend(handles=axs[1].lines, labels=['decision function'], loc='upper right', fontsize=8) axs[2].legend(handles=axs[2].lines, labels=['target', 'predicted target'], loc='upper right', fontsize=8) @@ -340,16 +318,16 @@ def plot_diagnostics( fig.savefig(fname=os.path.join(FIG_DIR, f'{fname}.png'), transparent=True, bbox_inches='tight', dpi=300) - + plt.close() def plot_performance( - df, x='alpha', y='score', normalize=False, + df, x='alpha', y='score', norm=False, title=None, show=True, savefig=False, fname=None, **kwargs ): - if normalize: + if norm: df[y] = df[y] / max(df[y]) # set plotting theme @@ -373,7 +351,7 @@ def plot_performance( ax.set_xlabel('alpha', fontsize=11) y_label = ' '.join(y.split('_')) ax.set_ylabel(y_label, fontsize=11) - + # set title if title is not None: plt.title(title, fontsize=12) @@ -392,49 +370,4 @@ def plot_performance( fig.savefig(fname=os.path.join(FIG_DIR, f'{fname}.png'), transparent=True, bbox_inches='tight', dpi=300) - plt.close() - - -def plot_phase_space(x, y, sample=None, xlim=None, ylim=None, subplot=None, cmap=None, - num=1, figsize=(13, 5), title=None, fname='phase_space', savefig=False, block=False -): - #TODO - # open figure and create subplot - plt.figure(num=num, figsize=figsize) - if subplot is None: - subplot = (1, 1, 1) - plt.subplot(*subplot) - - # plot data - if sample is None: - plt.plot(x) - else: - t = np.arange(*sample) - if cmap is None: - plt.plot(t, x[t]) - else: - for i, _ in enumerate(t[:-1]): - plt.plot(x[t[i:i+2]], y[t[i:i+2]], - color=getattr(plt.cm, cmap)(255*i//np.diff(sample))) - - # add x and y limits - if xlim is not None: - plt.xlim(xlim) - if ylim is not None: - plt.xlim(ylim) - - # set xtick/ythick fontsize - plt.xticks(fontsize=22) - plt.yticks(fontsize=22) - - # add title - if title is not None: - plt.title(f'{title} phase space', fontsize=22) - - # set tight layout in case there are different subplots - plt.tight_layout() - - if savefig: - plt.savefig(fname=os.path.join(FIG_DIR, f'{fname}.png'), - transparent=True, bbox_inches='tight', dpi=300) - plt.show(block=block) + plt.close() \ No newline at end of file diff --git a/conn2res/reservoir.py b/conn2res/reservoir.py index 1961d60..1a96c0d 100755 --- a/conn2res/reservoir.py +++ b/conn2res/reservoir.py @@ -370,8 +370,7 @@ class EchoStateNetwork(Reservoir): """ - def __init__(self, *args, input_nodes, output_nodes=None, - activation_function='tanh', input_gain=1.0, **kwargs): + def __init__(self, *args, activation_function='tanh', input_gain=1.0, **kwargs): """ Constructor class for Echo State Networks @@ -385,17 +384,15 @@ def __init__(self, *args, input_nodes, output_nodes=None, Reservoir connectivity matrix (source, target) N: number of nodes in the network. If w_hh is directed, then rows (columns) should correspond to source (target) nodes. - input_nodes: numpy.ndarray - set of indexes of input nodes - output_nodes: numpy.ndarray - set of indexes of output nodes. If None, output_nodes is the - complementery set of input_nodes (i.e., set difference between all - the nodes in the network and input_nodes) activation_function: str {'linear', 'elu', 'relu', 'leaky_relu', 'sigmoid', 'tanh', 'step'}, default 'tanh' Activation function (nonlinearity of the system's units) input_gain: float gain to scale input weights + input_nodes: numpy.ndarray + set of indexes of input nodes + output_nodes: numpy.ndarray + set of indexes of output nodes """ super().__init__(*args, **kwargs) @@ -404,22 +401,18 @@ def __init__(self, *args, input_nodes, output_nodes=None, self.activation_function = self.set_activation_function( activation_function) - # input nodes - self.input_nodes = input_nodes + # if not provided we feed into and read out from all nodes + self.input_nodes = kwargs.get( + 'input_nodes', np.arange(self.hidden_size)) + self.output_nodes = kwargs.get( + 'output_nodes', np.arange(self.hidden_size)) - # output nodes. If not provided, all nodes are used as output nodes - # except input nodes - if output_nodes is not None: - self.output_nodes = output_nodes - else: - self.output_nodes = np.setdiff1d(np.arange(self.hidden_size), self.input_nodes) - # scale the input weights self.input_gain = input_gain self.w_ih[:, self.input_nodes] = self.input_gain * \ self.w_ih[:, self.input_nodes] - def simulate(self, ext_input, ic=None, **kwargs): + def simulate(self, ext_input, ic=None, threshold=0.5): """ Simulates reservoir dynamics given an external input signal 'ext_input' @@ -433,8 +426,8 @@ def simulate(self, ext_input, ic=None, **kwargs): Initial conditions N: number of nodes in the network. If w_hh is directed, then rows (columns) should correspond to source (target) nodes. - kwargs: - Other keyword arguments are passed to self.activation_function + threshold : float + Threshold for piecewise nonlinearity. Ignored for the others. Returns ------- @@ -463,10 +456,10 @@ def simulate(self, ext_input, ic=None, **kwargs): # if (t>0) and (t%100 == 0): print(f'\t ----- timestep = {t}') synap_input = np.dot( self._state[t-1, :], self.w_hh) + np.dot(ext_input[t-1, :], self.w_ih) - self._state[t, :] = self.activation_function(synap_input, **kwargs) + self._state[t, :] = self.activation_function(synap_input) # select output nodes and remove initial condition (to match the time index of - # _state and ext_input) + # _state and ext_input) self._state = self._state[1:, self.output_nodes] return self._state diff --git a/conn2res/task.py b/conn2res/task.py index 7e02291..a0979d5 100644 --- a/conn2res/task.py +++ b/conn2res/task.py @@ -10,8 +10,9 @@ import pandas as pd from sklearn.linear_model import Ridge, RidgeClassifier -# from sklearn.multiclass import OneVsRestClassifier -# from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier +from sklearn.multiclass import OneVsRestClassifier +from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier + from . import performance @@ -36,22 +37,21 @@ def check_xy_dims(x, y): return x_train, x_test, y_train, y_test -def regression( - x, y, model=None, metric='r2_score', model_kws=None, metric_kws=None, - **kwargs -): +def regression(x, y, model=None, metric='r2_score', + model_kws=None, metric_kws=None, **kwargs): """ Regression tasks # TODO """ + # get train and test samples x_train, x_test = x y_train, y_test = y # get sample_weights sample_weight_train, sample_weight_test = kwargs.pop( - 'sample_weight', (None, None)) - + 'sample_weight', (None, None)) + # specify default model if model is None: model = Ridge(**model_kws) @@ -62,82 +62,80 @@ def regression( # calculate model metric on test data if metric == 'score': # by default, use score method of model - metric_value = model.score( - x_test, y_test, sample_weight=sample_weight_test) + metric_value = model.score(x_test, y_test, + sample_weight=sample_weight_test) else: func = getattr(performance, metric) y_pred = model.predict(x_test) - metric_value = func( - y_test, y_pred, sample_weight=sample_weight_test, **metric_kws) + metric_value = func(y_test, y_pred, + sample_weight=sample_weight_test, **metric_kws) return metric_value, model -def multioutput_regression(*args, **kwargs): +def multiOutputRegression(*args, **kwargs): """ #TODO """ + return regression(*args, **kwargs) -def classification( - x, y, model=None, metric='accuracy_score', model_kws=None, - metric_kws=None, **kwargs -): +def classification(x, y, model=None, metric='accuracy_score', + model_kws=None, metric_kws=None, **kwargs): """ Classification tasks # TODO """ + # get train and test samples x_train, x_test = x y_train, y_test = y # get sample_weights sample_weight_train, sample_weight_test = kwargs.pop( - 'sample_weight', (None, None)) + 'sample_weight', (None, None)) # specify default model if model is None: model = RidgeClassifier(**model_kws) # fit model on training data - try: + try: model.fit(x_train, y_train, sample_weight_train) except TypeError: - # Note: multi-class classification uses OneVsRest strategy. OneVsRest - # does not admit sample_weight arguments. Only non-zero sample - # points are used instead. model.fit(x_train[np.nonzero(y_train)], y_train[np.nonzero(y_train)]) if metric == 'score': - metric_value = model.score(x_test[np.nonzero(y_test)], + metric_value = model.score(x_test[np.nonzero(y_test)], y_test[np.nonzero(y_test)]) return metric_value, model else: sample_weight_test = None - + # calculate model metric on test data if metric == 'score': # by default, use score method of model - metric_value = model.score( - x_test, y_test, sample_weight=sample_weight_test) + metric_value = model.score(x_test, y_test, + sample_weight=sample_weight_test) else: - func = getattr(performance, metric) + func = getattr(performance, metric) y_pred = model.predict(x_test) - metric_value = func( - y_test, y_pred, sample_weight=sample_weight_test, **metric_kws) - + metric_value = func(y_test, y_pred, + sample_weight=sample_weight_test, **metric_kws) + return metric_value, model -def binary_classification(*args, **kwargs): +def binaryClassification(*args, **kwargs): """ #TODO """ + return classification(*args, **kwargs) -def multiclass_classification(*args, **kwargs): +def multiClassClassification(*args, **kwargs): """ #TODO """ @@ -151,12 +149,16 @@ def multiclass_classification(*args, **kwargs): return classification(*args, **kwargs) -def multilabel_classification(*args, **kwargs): +def multiLabelClassification(*args, **kwargs): """ #TODO """ - print("Multi-label classification problems are not supported.") - exit() + + try: + return classification(*args, **kwargs) + except: + print('multiLabelClassification problems are not supported.') + exit() def select_model(y): @@ -165,22 +167,23 @@ def select_model(y): variable # TODO """ - if isinstance(y, list): - y = np.vstack(y) + if isinstance(y, list): + y = np.asarray(y) if y.dtype in [np.float32, np.float64]: if y.squeeze().ndim == 1: return regression # regression else: - return multioutput_regression # multilabel regression + return multiOutputRegression # multilabel regression + elif y.dtype in [np.int32, np.int64]: if y.squeeze().ndim == 1: - if len(np.unique(y)) == 2: - return binary_classification + if len(np.unique(y)) == 2: + return binaryClassification else: - return multiclass_classification + return multiClassClassification else: - return multilabel_classification + return multiLabelClassification def run_task(reservoir_states, y, metric, **kwargs): @@ -196,7 +199,7 @@ def run_task(reservoir_states, y, metric, **kwargs): y : tuple of numpy.ndarrays training and test targets or output labels; the shape of each numpy.ndarray is n_samples, n_labels - metric : str + metric : str kwargs : other keyword arguments are passed to one of the following functions: memory_capacity_task(); delays=None, t_on=0 @@ -224,14 +227,10 @@ def run_task(reservoir_states, y, metric, **kwargs): # fit model metrics = dict() for m in metric: - metrics[m], model = func( - x=(x_train, x_test), - y=(y_train, y_test), - metric=m, **kwargs - ) - + metrics[m], model = func(x=(x_train, x_test), y=( + y_train, y_test), metric=m, **kwargs) # print(f'\t\t {m} = {metrics[m]}') df_res = pd.DataFrame(data=metrics, index=[0]) - return df_res, model + return df_res, model \ No newline at end of file From 84db9ecf042fde63b26c9cfe0c557f0661718bdd Mon Sep 17 00:00:00 2001 From: estefanysuarez Date: Mon, 27 Mar 2023 12:58:13 -0400 Subject: [PATCH 2/6] added inline comments to coding module --- conn2res/coding.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conn2res/coding.py b/conn2res/coding.py index 2f69506..70339ea 100755 --- a/conn2res/coding.py +++ b/conn2res/coding.py @@ -75,6 +75,7 @@ def encoder(reservoir_states, target, readout_modules=None, """ + # use multiple subsets of readout nodes designated by readout_modules if readout_modules is not None: if isinstance(readout_modules, np.ndarray): @@ -109,12 +110,14 @@ def encoder(reservoir_states, target, readout_modules=None, df_encoding = pd.concat(df_encoding) elif readout_nodes is not None: + # use a subset of output nodes as readout nodes df_encoding, model = run_task(reservoir_states=( reservoir_states[0][:, readout_nodes], reservoir_states[1][:, readout_nodes]), y=target, metric=metric, **kwargs) df_encoding['n_nodes'] = len(readout_nodes) else: + # use all output nodes as readout nodes df_encoding, model = run_task(reservoir_states=reservoir_states, y=target, metric=metric, **kwargs) From 926b420ac6fa61f72b519fcb9b03fe8c3a5d207a Mon Sep 17 00:00:00 2001 From: estefanysuarez Date: Mon, 27 Mar 2023 13:14:17 -0400 Subject: [PATCH 3/6] small fix to args in class and reg models --- conn2res/task.py | 8 ++++---- examples/tutorial.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conn2res/task.py b/conn2res/task.py index a0979d5..16f88a5 100644 --- a/conn2res/task.py +++ b/conn2res/task.py @@ -38,7 +38,7 @@ def check_xy_dims(x, y): def regression(x, y, model=None, metric='r2_score', - model_kws=None, metric_kws=None, **kwargs): + model_kws={}, metric_kws={}, **kwargs): """ Regression tasks # TODO @@ -81,8 +81,8 @@ def multiOutputRegression(*args, **kwargs): return regression(*args, **kwargs) -def classification(x, y, model=None, metric='accuracy_score', - model_kws=None, metric_kws=None, **kwargs): +def classification(x, y, model=None, metric='accuracy_score', + model_kws={}, metric_kws={}, **kwargs): """ Classification tasks # TODO @@ -229,7 +229,7 @@ def run_task(reservoir_states, y, metric, **kwargs): for m in metric: metrics[m], model = func(x=(x_train, x_test), y=( y_train, y_test), metric=m, **kwargs) - # print(f'\t\t {m} = {metrics[m]}') + print(f'\t\t {m} = {metrics[m]}') df_res = pd.DataFrame(data=metrics, index=[0]) diff --git a/examples/tutorial.py b/examples/tutorial.py index c4c0547..ecd64f5 100755 --- a/examples/tutorial.py +++ b/examples/tutorial.py @@ -23,7 +23,7 @@ 'AntiReach', 'ReachingDelayResponse' ] -TASK = TASKS[2] +TASK = TASKS[0] METRICS = { 'ContextDecisionMaking': ['score', 'balanced_accuracy_score', 'f1_score'], 'SingleContextDecisionMaking': ['score', 'balanced_accuracy_score', 'f1_score'], From 3dfa8180a4110bbce0a1f4739c0cb8524f7624ef Mon Sep 17 00:00:00 2001 From: estefanysuarez Date: Mon, 27 Mar 2023 13:21:29 -0400 Subject: [PATCH 4/6] renaming task functions following Python conventions --- conn2res/task.py | 99 ++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/conn2res/task.py b/conn2res/task.py index 16f88a5..ea4636d 100644 --- a/conn2res/task.py +++ b/conn2res/task.py @@ -10,9 +10,8 @@ import pandas as pd from sklearn.linear_model import Ridge, RidgeClassifier -from sklearn.multiclass import OneVsRestClassifier -from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier - +# from sklearn.multiclass import OneVsRestClassifier +# from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier from . import performance @@ -37,21 +36,22 @@ def check_xy_dims(x, y): return x_train, x_test, y_train, y_test -def regression(x, y, model=None, metric='r2_score', - model_kws={}, metric_kws={}, **kwargs): +def regression( + x, y, model=None, metric='r2_score', model_kws={}, metric_kws={}, + **kwargs +): """ Regression tasks # TODO """ - # get train and test samples x_train, x_test = x y_train, y_test = y # get sample_weights sample_weight_train, sample_weight_test = kwargs.pop( - 'sample_weight', (None, None)) - + 'sample_weight', (None, None)) + # specify default model if model is None: model = Ridge(**model_kws) @@ -62,80 +62,82 @@ def regression(x, y, model=None, metric='r2_score', # calculate model metric on test data if metric == 'score': # by default, use score method of model - metric_value = model.score(x_test, y_test, - sample_weight=sample_weight_test) + metric_value = model.score( + x_test, y_test, sample_weight=sample_weight_test) else: func = getattr(performance, metric) y_pred = model.predict(x_test) - metric_value = func(y_test, y_pred, - sample_weight=sample_weight_test, **metric_kws) + metric_value = func( + y_test, y_pred, sample_weight=sample_weight_test, **metric_kws) return metric_value, model -def multiOutputRegression(*args, **kwargs): +def multioutput_regression(*args, **kwargs): """ #TODO """ - return regression(*args, **kwargs) -def classification(x, y, model=None, metric='accuracy_score', - model_kws={}, metric_kws={}, **kwargs): +def classification( + x, y, model=None, metric='accuracy_score', model_kws={}, + metric_kws={}, **kwargs +): """ Classification tasks # TODO """ - # get train and test samples x_train, x_test = x y_train, y_test = y # get sample_weights sample_weight_train, sample_weight_test = kwargs.pop( - 'sample_weight', (None, None)) + 'sample_weight', (None, None)) # specify default model if model is None: model = RidgeClassifier(**model_kws) # fit model on training data - try: + try: model.fit(x_train, y_train, sample_weight_train) except TypeError: + # Note: multi-class classification uses OneVsRest strategy. OneVsRest + # does not admit sample_weight arguments. Only non-zero sample + # points are used instead. model.fit(x_train[np.nonzero(y_train)], y_train[np.nonzero(y_train)]) if metric == 'score': - metric_value = model.score(x_test[np.nonzero(y_test)], + metric_value = model.score(x_test[np.nonzero(y_test)], y_test[np.nonzero(y_test)]) return metric_value, model else: sample_weight_test = None - + # calculate model metric on test data if metric == 'score': # by default, use score method of model - metric_value = model.score(x_test, y_test, - sample_weight=sample_weight_test) + metric_value = model.score( + x_test, y_test, sample_weight=sample_weight_test) else: - func = getattr(performance, metric) + func = getattr(performance, metric) y_pred = model.predict(x_test) - metric_value = func(y_test, y_pred, - sample_weight=sample_weight_test, **metric_kws) - + metric_value = func( + y_test, y_pred, sample_weight=sample_weight_test, **metric_kws) + return metric_value, model -def binaryClassification(*args, **kwargs): +def binary_classification(*args, **kwargs): """ #TODO """ - return classification(*args, **kwargs) -def multiClassClassification(*args, **kwargs): +def multiclass_classification(*args, **kwargs): """ #TODO """ @@ -149,16 +151,12 @@ def multiClassClassification(*args, **kwargs): return classification(*args, **kwargs) -def multiLabelClassification(*args, **kwargs): +def multilabel_classification(*args, **kwargs): """ #TODO """ - - try: - return classification(*args, **kwargs) - except: - print('multiLabelClassification problems are not supported.') - exit() + print("Multi-label classification problems are not supported.") + exit() def select_model(y): @@ -167,23 +165,22 @@ def select_model(y): variable # TODO """ - if isinstance(y, list): - y = np.asarray(y) + if isinstance(y, list): + y = np.vstack(y) if y.dtype in [np.float32, np.float64]: if y.squeeze().ndim == 1: return regression # regression else: - return multiOutputRegression # multilabel regression - + return multioutput_regression # multilabel regression elif y.dtype in [np.int32, np.int64]: if y.squeeze().ndim == 1: - if len(np.unique(y)) == 2: - return binaryClassification + if len(np.unique(y)) == 2: + return binary_classification else: - return multiClassClassification + return multiclass_classification else: - return multiLabelClassification + return multilabel_classification def run_task(reservoir_states, y, metric, **kwargs): @@ -199,7 +196,7 @@ def run_task(reservoir_states, y, metric, **kwargs): y : tuple of numpy.ndarrays training and test targets or output labels; the shape of each numpy.ndarray is n_samples, n_labels - metric : str + metric : str kwargs : other keyword arguments are passed to one of the following functions: memory_capacity_task(); delays=None, t_on=0 @@ -227,10 +224,14 @@ def run_task(reservoir_states, y, metric, **kwargs): # fit model metrics = dict() for m in metric: - metrics[m], model = func(x=(x_train, x_test), y=( - y_train, y_test), metric=m, **kwargs) + metrics[m], model = func( + x=(x_train, x_test), + y=(y_train, y_test), + metric=m, **kwargs + ) + print(f'\t\t {m} = {metrics[m]}') df_res = pd.DataFrame(data=metrics, index=[0]) - return df_res, model \ No newline at end of file + return df_res, model From 1698bf397dc2ae13199b60493be11349e24c3dbe Mon Sep 17 00:00:00 2001 From: estefanysuarez Date: Mon, 27 Mar 2023 13:24:04 -0400 Subject: [PATCH 5/6] edit to task lists --- conn2res/iodata.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/conn2res/iodata.py b/conn2res/iodata.py index 8901319..1fab111 100755 --- a/conn2res/iodata.py +++ b/conn2res/iodata.py @@ -18,17 +18,17 @@ NEUROGYM_TASKS = [ 'AntiReach', - # 'Bandit', + 'Bandit', # * 'ContextDecisionMaking', - # 'DawTwoStep', + 'DawTwoStep', # * 'DelayComparison', 'DelayMatchCategory', 'DelayMatchSample', 'DelayMatchSampleDistractor1D', 'DelayPairedAssociation', - # 'Detection', # TODO: Temporary removing until bug fixed + 'Detection', # * 'DualDelayMatchSample', - # 'EconomicDecisionMaking', + 'EconomicDecisionMaking', # * 'GoNogo', 'HierarchicalReasoning', 'IntervalDiscrimination', @@ -47,7 +47,7 @@ 'ReadySetGo', 'SingleContextDecisionMaking', 'SpatialSuppressMotion', - # 'ToneDetection' # TODO: Temporary removing until bug fixed + 'ToneDetection' # * ] NATIVE_TASKS = [ From 7d0a32afc49c6836c7498742d0837002e4416f29 Mon Sep 17 00:00:00 2001 From: estefanysuarez Date: Mon, 27 Mar 2023 13:25:51 -0400 Subject: [PATCH 6/6] small fix to time_average_samples in coding --- conn2res/coding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conn2res/coding.py b/conn2res/coding.py index 70339ea..2f8c856 100755 --- a/conn2res/coding.py +++ b/conn2res/coding.py @@ -152,7 +152,7 @@ def time_average_samples(seq_len, data, sample_weight, operation=None): elif isinstance(data, np.ndarray): data = [data] - if len(data) != len(data): + if len(data) != len(sample_weight): raise ValueError( 'data and sample_weight should have the same number of assigned variables')