-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcompute_clique_topology.m
319 lines (278 loc) · 12.4 KB
/
compute_clique_topology.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
function [bettiCurves, edgeDensities, persistenceIntervals,...
unboundedIntervals] = compute_clique_topology ( inputMatrix, varargin )
% ----------------------------------------------------------------
% COMPUTE CLIQUE TOPOLOGY
% written by Chad Giusti, 9/2014
%
% Given a symmetric real matrix, construct its order complex, a
% family of graphs filtered by graph density with edges added in
% order of decreasing corrsponding entry in the matrix. Enumerate
% cliques in this family of graphs and run Perseus to compute the
% persistent homology of the resulting clique complexes. Return
% both the aggregate Betti curves and the distribution of
% persistence lifetimes for the order complex of the matrix.
%
% SYNTAX:
% compute_clique_topology( inputMatrix )
% compute_clique_topology( inputMatrix, 'ParameterName', param, ... )
%
% INPUTS:
% inputMatrix: an NxN symmetric matrix with real coefficients
% OPTIONAL PARAMETERS:
% 'ReportProgress': displays status and time elapsed in each stage
% as computation progresses (default: false)
% 'MaxBettiNumber': positive integer specifying maximum Betti
% number to compute (default: 3)
% 'MaxEdgeDensity': maximum graph density to include in the
% order complex in range (0, 1] (default: .6)
% 'FilePrefix': prefix for intermediate computation files,
% useful for multiple simultaneous jobs
% (default: 'matrix')
% 'ComputeBetti0': boolean flag for keeping Betti 0
% computations; this shifts the indexing of the
% outputs so that column n represents Betti (n-1).
% (default: false)
% 'KeepFiles': boolean flag indicating whether to keep
% intermediate files when the computation
% is complete (default: false)
% 'WorkDirectory': directory in which to keep intermediate
% files during computation (default: current
% directory, '.')
% 'BaseDirectory': location of the CliqueTop matlab files
% (default: detected by which('compute_clique_topology'))
% 'WriteMaximalCliques': boolean flag indicating whether
% to create a separate file containing the maximal cliques
% in each graph. May slow process. (default: false)
% 'Algorithm': which version of the clique enumeration algorithm
% to use. Options are: 'split', 'combine' and 'naive'. 'split'
% is the version used for the data processing in "Clique topology
% reveals intrinsic structure in neural correlations". It is
% usually the most memory-efficient algorithm, but requires the
% user to "guess" a maximum density and compute a list of cliques
% using Cliquer, which must also be compiled using MEX. 'combine'
% is much less memory efficient, but constructs a list of maximal
% cliques starting from the first filtration and building upward.
% 'naive' does not construct maximal cliques, but instead enumerates
% all cliques of sizes necessary to compute Bettis in the specified
% range. It is very memory efficient but slow, and works well for
% large matrices. It is incompatible with 'WriteMaximalCliques'.
%
% OUTPUTS:
% bettiCurves: rectangular array of size
% maxHomDim x floor(maxGraphDensity * (N choose 2))
% whose rows are the Betti curves B_1 ... B_maxHomDim
% across the order complex
% edgeDensities: the edge densities of the graphs in the
% order complex, useful for x-axis labels when graphing
% persistenceIntervals: rectangular array of size
% maxHomDim x floor(maxGraphDensity * (N choose 2))
% whose rows are counts of the persistence lifetimes
% in each homological dimension.
% unboundedIntervals: vector of length maxHomDim whose
% entries are the number of unbounded persistence intervals
% for each dimension. Here, unbounded should be interpreted
% as meaning that the cycle disappears after maxGraphDensity
% as all cycles disappear by density 1.
%
% ----------------------------------------------------------------
% ----------------------------------------------------------------
% Validate and set parameters
% ----------------------------------------------------------------
p = inputParser;
defaultReportProgress = false;
defaultBettiNumber = 3;
defaultEdgeDensity = .6;
defaultBetti0 = false;
defaultFilePrefix = 'matrix';
defaultKeepFiles = false;
defaultWorkDirectory = '.';
defaultWriteMaxCliques = false;
defaultAlgorithm = 'naive';
defaultThreads = 1;
functionLocation = which('compute_clique_topology');
defaultBaseDirectory = fileparts(functionLocation);
addRequired(p, 'inputMatrix', @(x) isreal(x) && isequal(x, x'));
addOptional(p, 'ReportProgress', defaultReportProgress, ...
@(x) islogical(x) && isscalar(x) );
addOptional(p, 'MaxBettiNumber', defaultBettiNumber, ...
@(x) isreal(x) && isscalar(x) && (floor(x) == x) && x > 0);
addOptional(p, 'ComputeBetti0', defaultBetti0, ...
@(x) islogical(x) && isscalar(x) );
addOptional(p, 'MaxEdgeDensity', defaultEdgeDensity, ...
@(x) isreal(x) && (x > 0) && (x <= 1) );
addOptional(p, 'FilePrefix', defaultFilePrefix, @ischar );
addOptional(p, 'KeepFiles', defaultKeepFiles, ...
@(x) islogical(x) && isscalar(x) );
addOptional(p, 'WriteMaximalCliques', defaultWriteMaxCliques, ...
@(x) islogical(x) && isscalar(x) );
addOptional(p, 'WorkDirectory', defaultWorkDirectory, ...
@(x) exist(x, 'dir') );
addOptional(p, 'BaseDirectory', defaultBaseDirectory, ...
@(x) exist(x, 'dir'));
addOptional(p, 'Algorithm', defaultAlgorithm, ...
@(x) any(strcmp(x, {'naive', 'split', 'combine', 'parnaive'})));
addOptional(p, 'Threads', defaultThreads, ...
@(x) isscalar(x) && (x > 0));
parse(p,inputMatrix,varargin{:});
inputMatrix = p.Results.inputMatrix;
reportProgress = p.Results.ReportProgress;
maxBettiNumber = p.Results.MaxBettiNumber;
computeBetti0 = p.Results.ComputeBetti0;
maxEdgeDensity = p.Results.MaxEdgeDensity;
filePrefix = p.Results.FilePrefix;
keepFiles = p.Results.KeepFiles;
baseDirectory = p.Results.BaseDirectory;
workDirectory = p.Results.WorkDirectory;
writeMaximalCliques = p.Results.WriteMaximalCliques;
algorithm = p.Results.Algorithm;
numThreads = floor(p.Results.Threads);
if ispc
perseusDirectory = [baseDirectory '/perseus'];
neuralCodewareDirectory = [baseDirectory '/Neural_Codeware'];
else
perseusDirectory = [baseDirectory '/perseus'];
neuralCodewareDirectory = [baseDirectory '/Neural_Codeware'];
end
if ((strcmp(algorithm, 'naive') || strcmp(algorithm, 'parnaive')) && writeMaximalCliques)
error('Naive clique enumeration and WriteMaximalCliques are incompatible');
end
if (strcmp(algorithm, 'parnaive'))
end
% ----------------------------------------------------------------
% If we need Cliquer, make sure the files are compiled
% ----------------------------------------------------------------
if (strcmp(algorithm, 'split'))
if ~exist(sprintf('./Neural_Codeware/+Cliquer/FindAll.%s', mexext), 'file')
disp('MEX Cliquer not compiled. Compiling before beginning process.')
startFolder = cd(fileparts(which('compute_clique_topology.m')));
cd('Neural_Codeware');
Cliquer.Compile();
cd(startFolder);
end
end
% ----------------------------------------------------------------
% Ensure that the diagonal is zero
% ----------------------------------------------------------------
inputMatrix(logical(eye(size(inputMatrix,1)))) = 0;
% ----------------------------------------------------------------
% Move to working directoy and stop if files might be overwritten
% ----------------------------------------------------------------
path(neuralCodewareDirectory, path);
try
cd(workDirectory);
if exist(sprintf('%s/%s_max_simplices.txt', workDirectory, filePrefix), 'file')
error('File %s_max_simplices.txt already exists in directory %s.',...
filePrefix, workDirectory);
end
if exist(sprintf('%s/%s_simplices.txt', workDirectory, filePrefix), 'file')
error('File %s_simplices.txt already exists in directory %s.',...
filePrefix, workDirectory);
end
if exist(sprintf('%s/%s_homology_betti.txt', workDirectory, filePrefix), 'file')
error('File %s_homology_betti.txt already exists in directory %s.',...
filePrefix, workDirectory);
end
for d=0:maxBettiNumber+1
if exist(sprintf('%s/%s_homology_%i.txt', workDirectory, filePrefix, d), 'file')
error('File %s_homology_%i.txt already exists in directory %s.',...
filePrefix, d, workDirectory);
end
end
catch exception
disp(exception.message);
rethrow(exception);
end
% ----------------------------------------------------------------
% Enumerate maximal cliques and print to Perseus input file
% ----------------------------------------------------------------
if reportProgress
toc;
sprintf('Enumerating cliques using %s algorithm.', algorithm);
tic;
end
switch algorithm
case 'combine'
numFiltrations = combine_cliques_and_write_to_file(...
inputMatrix, maxBettiNumber + 2, maxEdgeDensity, filePrefix,...
writeMaximalCliques);
case 'naive'
numFiltrations = naive_enumerate_cliques_and_write_to_file(...
inputMatrix, maxBettiNumber + 2, maxEdgeDensity, filePrefix);
case 'parnaive'
numFiltrations = ...
parallel_naive_enumerate_cliques_and_write_to_file(...
inputMatrix, maxBettiNumber + 2, maxEdgeDensity, filePrefix,...
numThreads );
case 'split'
numFiltrations = split_cliques_and_write_to_file(...
inputMatrix, maxBettiNumber + 2, maxEdgeDensity, filePrefix,...
writeMaximalCliques);
end
% ----------------------------------------------------------------
% Use Perseus to compute persistent homology
% ----------------------------------------------------------------
if reportProgress
toc;
disp('Using Perseus to compute persistent homology.');
tic;
end
run_perseus(filePrefix, perseusDirectory);
if reportProgress
toc;
end
% ----------------------------------------------------------------
% Assemble the results of the computation for output
% ----------------------------------------------------------------
matrixSize = size(inputMatrix, 1);
edgeDensities = (1:numFiltrations) / nchoosek(matrixSize,2);
try
bettiCurves = read_perseus_bettis(sprintf('%s_homology_betti.txt',...
filePrefix), numFiltrations, maxBettiNumber, computeBetti0);
if computeBetti0
persistenceIntervals = zeros(numFiltrations, maxBettiNumber+1);
unboundedIntervals = zeros(1,maxBettiNumber+1);
for d=0:maxBettiNumber
[persistenceIntervals(:,d+1), unboundedIntervals(d+1) ] =...
read_persistence_interval_distribution(...
sprintf('%s_homology_%i.txt', filePrefix, d), ...
numFiltrations);
end
else
persistenceIntervals = zeros(numFiltrations, maxBettiNumber);
unboundedIntervals = zeros(1,maxBettiNumber);
for d=1:maxBettiNumber
[persistenceIntervals(:,d), unboundedIntervals(d) ] =...
read_persistence_interval_distribution(...
sprintf('%s_homology_%i.txt', filePrefix, d), ...
numFiltrations);
end
end
catch exception
disp(exception.message);
disp('Failure to read Perseus output files. This error has likely occurred due to the Perseus process aborting due to memory limitations. It may be possible to circumvent this difficulty by reducing either the maximum Betti number or the maximum edge density computed. Please see the CliqueTop documentation for details.');
rethrow(exception);
end
% ----------------------------------------------------------------
% Remove remaining intermediate files if desired
% ----------------------------------------------------------------
if keepFiles == false
try
if exist(sprintf('%s_max_simplices.txt', filePrefix), 'file')
delete(sprintf('%s_max_simplices.txt', filePrefix));
end
if exist(sprintf('%s_simplices.txt', filePrefix), 'file')
delete(sprintf('%s_simplices.txt', filePrefix));
end
if exist(sprintf('%s_homology_betti.txt', filePrefix), 'file')
delete(sprintf('%s_homology_betti.txt', filePrefix));
end
for d=0:maxBettiNumber+1
if exist(sprintf('%s_homology_%i.txt', filePrefix, d), 'file')
delete(sprintf('%s_homology_%i.txt', filePrefix, d));
end
end
catch exception
disp(exception.message);
rethrow(exception);
end
end