-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkfold_one_class_par.m
383 lines (298 loc) · 12.4 KB
/
kfold_one_class_par.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
function [ses_coef, T, optimal] = kfold_one_class_par(EEG, BOLD, path_pars, path_hc, varargin)
% [tree, optimal] = kfold_cv_sessopns_par(EEG, BOLD,...) performs
% hierarchical clustering and nested k-fold cross-validation to obtain
% a one-class model for the entire dataset
%
% Input data:
%
% EEG The model's X
% BOLD The model's Y
%
% Optional input parameters:
%
% 'v' The validation to learn fraction of the
% inner CV loop
% 'n' The number of cyles in the inner CV loop
% 'rho' The complexity paramter rho
% 'lambda' The complexity parameter lambda
%
% Outputs:
%
% ses_coef The coefficients of the model for each session
% T The cluster assignment
% optimal The struct with information corresponding
% the best across each of the leave-one-out CV folds
% 'rho' The regularization parameter rho of the L21+1 fit
% 'lambda' The complexity parameter lambda of the L21+1 fit
% 'efp' The activation pattern of the final fit
% 'df' The number of degrees of freedom of the final fit
% 'nmse' The normalized mean squared error of the final fit
% 'bic' The bayesian inference criterion of the final fit
%
tic
% ------------------------------------------------------------
% Sanity check and process input parameters
% ------------------------------------------------------------
% X a real 3D matrix
if length(size(EEG)) ~= 3 || ~isreal(EEG)
error('EEG is not a real 3D matrix');
end
% If Y is a #sessions-row vector,
% convert to a #sessions-column vector
if size(BOLD, 1) < size(BOLD, 2)
BOLD = BOLD';
end
% Y a vector, same length as the columns of X
if ~ismatrix(BOLD) || ~isreal(BOLD) || length(size(BOLD)) ~= 2
error('BOLD is not a real 2D matrix');
end
% Number of samples (time-points)
n_pnts = size(BOLD, 1);
% Number of regressors in the model
n_features = size(EEG, 2);
% Number of sessions
n_sessions = size(EEG, 3);
% ------------------------------------------------------------
% Sanity check and process optional parameters
% ------------------------------------------------------------
% Assign default values for each optional parameter
pnames = {'hc_distance' 'v' 'n' 'rho' 'lambda'};
dflts = {["average", "correlation"] 0.3 10 [] []};
% Assign variables corresponding to optional parameters
[hc_distance, V, N, rho, lambda] = internal.stats.parseArgs(pnames, dflts, varargin{:});
% Check if user supplied lambda and rho
user_supplied_rho = true;
user_supplied_lambda = true;
if isempty(rho)
user_supplied_rho = false;
end
if isempty(lambda)
user_supplied_lambda = false;
end
if ~isequal(user_supplied_rho, user_supplied_lambda)
error('Must supply both rho and lambda, or neither');
end
% ------------------------------------------------------------
% Hierarchical clustering
% ------------------------------------------------------------
% First compute a model for each of the input sessions
% In case the user hasn't supplied rho and lambda (must
% supply both or neither), fix rho and compute the set
% of 20 best lambda values for a random dataset
if ~user_supplied_rho
rho = 0.6;
% Retreive pre-established lambda range of interest
load(fullfile(path_pars, 'Lambdas.mat'), 'lambda');
n_pars = length(lambda);
end
% EEG = reshape(EEG, [size(EEG, 1) size(EEG, 2) 3 23]);
% EEG = permute(EEG, [1 3 2 4]);
% EEG = reshape(EEG, [size(EEG, 1)*size(EEG, 2) size(EEG, 3) size(EEG, 4)]);
%
% BOLD = reshape(BOLD, [size(BOLD, 1) 3 23]);
% BOLD = reshape(BOLD, [size(BOLD, 1)*size(BOLD, 2) size(BOLD, 3)]);
%
% n_pnts = size(BOLD, 1);
% n_features = size(EEG, 2);
% n_sessions = size(EEG, 3);
if exist(fullfile(path_hc, 'ses_coef.mat'), 'file')
load(fullfile(path_hc, 'ses_coef.mat'), 'ses_coef');
else
% Pre-allocate variables
ses_coef = zeros(n_features + 1, n_sessions);
% Go through all sessions and estimate a model for each
parfor se = 1 : n_sessions
X = EEG(:, :, se);
y = BOLD(:, se);
% L2+1 elastic-net regression fit
[betas, stats] = lasso(X, y, 'Alpha', rho, 'Lambda', lambda);
intercept = stats.Intercept;
df = stats.DF';
% Compute BIC for all lambda values and find minimum BIC
y_hat = intercept + X*betas;
mses = sum((y_hat - y).^2)';
%nmse = mses/sum((y - mean(y)).^2);
bic = log(length(y)).*df + length(y).*log(mses ./ length(y));
[~, ind] = min(bic);
ses_coef(:, se) = [intercept(ind); betas(:, ind)];
end
end
% Report clustering results
report_clustering_results(ses_coef', hc_distance, fullfile('IMAGES', path_hc));
% Create a hierarchical cluster tree, Z
% Uses euclidean distance as the distance between leaves
% Uses the centroid of each cluster to compute distance
% between clusters
Z = linkage(ses_coef', hc_distance(1), hc_distance(2));
% Find the optimal number of clusters by finding the elbow
% (or knee) method - find the knee of the objective
% function, which in this case is the correlation function
[n_clusters, ~] = knee_pt(Z(:, 3));
% Define clusters from the hierarchical cluster tree Z
T = cluster(Z, 'MaxClust', n_clusters);
% Find the largest cluster
[occurences, clusters]= hist(T, unique(T));
[~, ind] = max(occurences);
largest_cluster = clusters(ind);
% Select sessions belonging to the largest_cluster
sessions_in = (T == largest_cluster);
sessions_out = (T ~= largest_cluster);
% ------------------------------------------------------------
% Estimate one-class model and its performance
% ------------------------------------------------------------
X = EEG(:, :, sessions_in);
y = BOLD(:, sessions_in);
K = size(X, 3);
% Allocate arrays of model
% parameters and performance
% for each test/train pair
opt_lambda = zeros(K, 1);
opt_rho = opt_lambda;
opt_coef = zeros(n_features+1, K);
opt_df = opt_lambda;
opt_bic_train = opt_lambda;
opt_mse_train = opt_lambda;
opt_nmse_train = opt_lambda;
opt_corr_train = opt_lambda;
opt_bic_test = opt_lambda;
opt_mse_test = opt_lambda;
opt_nmse_test = opt_lambda;
opt_corr_test = opt_lambda;
%-------------- Begin outer loop ---------------%
% Leave-one-out CV procedure
for k = 1 : K
% Train set indices
ind_train = find(1 : K ~= k);
% Test set indices
ind_test = k;
% Train set
X_train = X(:, :, ind_train);
X_train = reshape(permute(X_train, [1 3 2]), [n_pnts*(K-1) n_features]);
y_train = y(:, ind_train); y_train = y_train(:);
siz_train = length(y_train);
% Test set
X_test = squeeze(X(:, :, ind_test));
y_test = squeeze(y(:, ind_test));
siz_test = length(y_test);
% Allocate bic and mse matrices for the learn and
% for the val set, each inner iteration through cols
bic_val = zeros(n_pars, N); nmse_val = bic_val;
df_inner = bic_val;
%-------------- Begin inner loop --------------%
% The inner loop
% has N iterations
parfor n = 1 : N
% Circular holdout assignment
ind_start = randi([1 siz_train]);
indices_in = sort(crossvalind('Holdout', siz_train, V));
% Assign learning set indices
ind_learn = zeros(size(indices_in));
ind_learn(ind_start : end) = indices_in(1 : size(ind_learn) - ind_start + 1);
ind_learn(1 : ind_start - 1) = indices_in(size(ind_learn) - ind_start + 2 : size(ind_learn));
% Assign broadcast variables to loop variables for efficiency
% Large broadcast variables can cause significant communication
% between client and workers and increase parallel overhead
% replace them for temporary variables, created inside the loop
X_par = X_train; y_par = y_train; lambda_par = lambda;
% Assign validation set indices
ind_val = (~ind_learn);
ind_val = find(ind_val);
siz_val = length(ind_val);
ind_learn = find(ind_learn);
% Assign learning and validation variables
X_learn = squeeze(X_par(ind_learn, :));
y_learn = squeeze(y_par(ind_learn));
X_val = squeeze(X_par(ind_val, :));
y_val = squeeze(y_par(ind_val));
% L2+1 elastic-net regression fit
[betas, stats] = lasso(X_learn, y_learn, ...
'Alpha', rho, 'Lambda', lambda_par);
intercept = stats.Intercept;
df = stats.DF;
df = flip(df)';
betas = flip(betas, 2);
intercept= flip(intercept);
% Compute BIC for all lambda values and find minimum BIC
y_hat_val = intercept + X_val*betas;
mse_val = sum((y_hat_val - y_val).^2)';
nmse_val(:, n) = mse_val/sum((y_val - mean(y_val)).^2);
bic_val(:, n) = log(siz_val).*df + siz_val.*log(mse_val ./ ...
siz_val);
df_inner(:, n) = df;
end
%--------------- End inner loop ---------------%
% Flag as ineligible, in 'trash_lam' (1) lambda values for which
% nmse values are, at any of the inner iterations, above a given
% inacceptable threshold, 'thresh_nmse' and (2) lambda values for
% which dof values are, at any of the inner iterations, zero
% Flag, in 'trash_n', inner iterations in which the average nmse
% value in the validation set was above 'thresh_nmse'
thresh_nmse = 0.98; aux = nmse_val > thresh_nmse;
aux(:, mean(nmse_val, 1) > thresh_nmse - 0.05) = 0;
trash_lam = find(sum(aux, 2) >= 1);
[rows, ~] = find(df_inner == 0);
trash_lam = unique([trash_lam; unique(rows)]);
% Average bic values without considering
% iterations flagged as "trash"
aux = bic_val; aux(:, mean(nmse_val, 1) > thresh_nmse) = 0;
[rows,~,val] = find(aux);
aux_val = accumarray(rows, val, [], @mean);
% Find optimal rho-lambda for the current test/train pair,
% i.e, find rho-lambda that minimizes sum of bics through
% all N inner iterations in the validation set
aux_val(trash_lam)=Inf; [~, ind_opt] = min(aux_val);
if isempty(ind_opt); [~, ind_opt] = min(sum(bic_val, 2)); end
% Save rho-lambda pair that minimizes
% bic in learn and val sets combined
opt_lambda(k, 1) = lambda(ind_opt);
opt_rho(k, 1) = rho;
% L2+1 fit for the current iteration of the outer CV loop
[betas, stats] = lasso(X_train, y_train, 'Alpha', opt_rho(k), ...
'Lambda', opt_lambda(k)); opt_df(k) = stats.DF;
% Compute the y hat for the test and training
% sets of the current outer iteration k
y_hat_test = stats.Intercept + X_test*betas;
y_hat_train = stats.Intercept + X_train*betas;
% Model coefficientrs of the current outer
% iteration k
opt_coef(:, k) = [stats.Intercept; betas];
% Model performance of the current outer
% iteration k, in the test and train sets
opt_mse_test(k) = sum((y_hat_test - y_test).^2);
opt_mse_train(k) = sum((y_hat_train - y_train).^2);
opt_bic_test(k) = log(siz_test).* opt_df(k) + ...
siz_test.* log(opt_mse_test(k)./ siz_test);
opt_bic_train(k) = log(siz_train).* opt_df(k) + ...
siz_train.* log(opt_mse_train(k)./ siz_train);
opt_nmse_test(k) = opt_mse_test(k)/ ...
sum((y_test - mean(y_test)).^2);
opt_nmse_train(k) = opt_mse_train(k)/ ...
sum((y_train - mean(y_train)).^2);
opt_corr_test(k) = corr(y_hat_test,y_test);
opt_corr_train(k) = corr(y_hat_train,y_train);
end
%--------------- End outer loop ---------------%
% ------------------------------------------------------------
% Prepare output data
% ------------------------------------------------------------
% Session number
optimal.ses = sessions_in;
% Prediction performance
% across the test sets
optimal.bic_test = opt_bic_test;
optimal.mse_test = opt_mse_test;
optimal.nmse_test = opt_nmse_test;
optimal.corr_test = opt_corr_test;
% Prediction performance
% across the train sets
optimal.bic_train = opt_bic_train;
optimal.mse_train = opt_mse_train;
optimal.nmse_train = opt_nmse_train;
optimal.corr_train = opt_corr_train;
% Estimated model
% across folds
optimal.efp = opt_coef;
optimal.lambda = opt_lambda;
optimal.rho = opt_rho;
optimal.df = opt_df;
end