This repository has been archived by the owner on Aug 15, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathpsom_pipeline_init.m
919 lines (822 loc) · 32.9 KB
/
psom_pipeline_init.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
function [file_pipeline,flag_start] = psom_pipeline_init(pipeline,opt)
% Prepare the log folders of a pipeline before execution by PSOM.
%
% When the pipeline is executed for the first time, that means
% initialize the dependency graph, store individual job description
% in a matlab file, and initialize status and logs.
%
% If the pipeline is restarted after some failures or update of some of the
% jobs' parameters, the job status and logs are "refreshed" to make
% everything ready before restart. See the notes in the COMMENTS section
% below for details.
%
% SYNTAX:
% [FILE_PIPELINE,FLAG_START] = PSOM_PIPELINE_INIT(PIPELINE,OPT)
%
% _________________________________________________________________________
% INPUTS:
%
% PIPELINE
% (structure) a matlab structure which defines a pipeline.
% Each field name <JOB_NAME> will be used to name the corresponding
% job. The fields <JOB_NAME> are themselves structure, with
% the following fields :
%
% COMMAND
% (string) the name of the command applied for this job.
% This command can use the variables FILES_IN, FILES_OUT and OPT
% associated with the job (see below).
% Examples :
% 'niak_brick_something(files_in,files_out,opt);'
% 'my_function(opt)'
%
% FILES_IN
% (string, cell of strings, structure whose terminal nodes are
% string or cell of strings)
% The files used as input by the command. Note that for properly
% handling dependencies, this field needs to contain the exact
% name of the file (full path, no wildcards, no '' for default
% values).
%
% FILES_OUT
% (string, cell of strings, structure whose terminal nodes are
% string or cell of strings) The list of files generated by the
% command. Note that for properly handling dependencies, this
% field needs to contain the exact name of the file
% (full path, no wildcards, no '' for default values).
%
% FILES_CLEAN
% (string, cell of strings, structure whose terminal nodes are
% string or cell of strings) The list of files deleted by the
% command. Note that for properly handling dependencies, this
% field needs to contain the exact name of the file
% (full path, no wildcards, no '' for default values).
%
% OPT
% (any matlab variable) options of the job. This field has no
% impact on dependencies. OPT can for example be a structure,
% where each field will be used as an argument of the command.
%
% OPT
% (structure) with the following fields :
%
% PATH_LOGS
% (string) The folder where the .mat files will be stored. That
% folder needs to be empty, and left untouched during the whole
% pipeline processing. Renaming or deleting files from the
% PATH_LOGS may result in unrecoverable crash of the pipeline.
%
% PATH_SEARCH
% (string, default GB_PSOM_PATH_SEARCH in the file PSOM_GB_VARS).
% If PATH_SEARCH is empty, the current path is used. If
% PATH_SEARCH equals 'gb_psom_omitted', then PSOM will not attempt
% to set the search path, i.e. the search path for every job will
% be the current search path in 'session' mode, and the default
% Octave/Matlab search path in the other modes.
%
% COMMAND_MATLAB
% (string, default GB_PSOM_COMMAND_MATLAB or
% GB_PSOM_COMMAND_OCTAVE depending on the current environment)
% how to invoke Matlab (or Octave).
% You may want to update that to add the full path of the command.
% The defaut for this field can be set using the variable
% GB_PSOM_COMMAND_MATLAB/OCTAVE in the file PSOM_GB_VARS.
%
% RESTART
% (cell of strings, default {}) any job whose name contains one
% of the strings in RESTART will be restarted, along with all of
% its children, and some of his parents whenever needed. See the
% note 3 for more details.
%
% TYPE_RESTART
% (string, default 'substring') defines how OPT.RESTART is to be
% interpreted. Available options:
% 'substring' : restart jobs whose name contains one of the
% string in OPT.RESTART
% 'exact' restart jobs whose name is listed in OPT.RESTART.
%
% FLAG_UPDATE
% (boolean, default true) If FLAG_UPDATE is true, a comparison
% between previous pipelines and the current pipeline will be
% performed to restart updated jobs.
%
% FLAG_PAUSE
% (boolean, default false) If FLAG_PAUSE is true, the pipeline
% initialization will pause before writting the logs.
%
% FLAG_VERBOSE
% (integer 0, 1 or 2, default 1) No verbose (0), standard
% verbose (1), a lot of verbose, useful for debugging (2).
%
% _________________________________________________________________________
% OUTPUTS:
%
% FILE_PIPELINE
% (string) the file name of the .MAT file recapitulating all the
% infos on the pipeline
%
% FLAG_START
% (boolean) true if some jobs need to be processed, false otherwise.
%
% _________________________________________________________________________
% SEE ALSO:
% PSOM_PIPELINE_PROCESS, PSOM_PIPELINE_VISU, PSOM_DEMO_PIPELINE,
% PSOM_RUN_PIPELINE
%
% _________________________________________________________________________
% COMMENTS:
%
% The following notes describe the stages performed by PSOM_PIPELINE_INIT
% in a chronological order.
%
% * STAGE 1:
%
% The dependency graph of the pipeline is defined as follows: job A
% depends on job B if at least one of the two following conditions is
% satisfied :
% 1. the input files of job A belongs to the list of output files of
% job B.
% 2. the job B will clean (i.e. delete) some files that job A uses as
% inputs or outputs.
% See PSOM_BUILD_DEPENDENCIES and PSOM_VISU_DEPENDENCIES for details.
%
% Some viability checks are performed on the pipeline :
%
% 1. Check that the dependency graph of the pipeline is a directed
% acyclic graph, i.e. if job A depends on job B, job B cannot depend
% (even indirectly) on job A.
%
% 2. Check that an output file is not created twice. Overwritting on
% files is regarded as a bug in a pipeline (forgetting to edit a
% copy-paste is a common mistake that leads to overwritting).
%
% * STAGE 2:
% Load old pipeline descriptions & status. If a job is marked as 'none'
% but a log file and a 'finished' tag files can be found, then the job is
% marked as 'finished' and the log is saved in the log structure.
% That behavior is usefull when the pipeline manager has crashed but some
% jobs completed after the crash, thus generating left-over tags.
%
% * STAGE 3:
% Some 'start' flags are generated for each job, which are then used to
% update the status of all jobs:
%
% 1. If a job was already processed during a previous execution of the
% pipeline, but anything changed in the job description (the
% command line, the options or the names of inputs/outputs), then the
% job will be marked as 'start'. This operation is done by
% comparing the content of the variable <JOB_NAME> in PIPE_jobs.mat
% with the field PIPELINE.<JOB_NAME>. This feature can be turned
% on/off using OPT.FLAG_UPDATE.
%
% 2. Jobs that had failed during previous executions are marked as
% start.
%
% 3. Jobs that are new are marked as 'start'.
%
% 3. All jobs whose name contains at least one of the strings listed
% in OPT.RESTART will be marked as 'start'.
%
% 4. All jobs that depend even indirectly on a job marked as
% 'start' (in the sense of the dependency graph) are themselves
% marked as 'start'.
%
% If a job is going to be started, the system checks if all the inputs exist.
%
% 1. If some files are missing and are not potential outputs of the pipeline,
% this is specified in the log and the job is marked as 'failed'. Note that
% if any job has failed this way, the pipeline initialization will pause to
% let the user the time to cancel the execution of the pipeline.
%
% 2. All jobs that can produce missing files are started (along with all children).
% Note that this behaviour is iterative.
%
% The following strategy is implemented to update the job status:
%
% 2. Unless the job already has a 'finished' status and is not marked
% as 'start', its status is set to 'none' and the log file is
% re-initialized as blank.
%
% 3. If a job was marked as 'finished' and is not marked as
% 'restart', its status is left as 'finished' and the log file is
% also left "as is". Note that even if the outputs do not exist
% (because they have been deleted since the pipeline was last
% executed) the job will not be restarted.
%
% * STAGE 4:
%
% The current description of the pipeline is saved in the logs folder
% and a lock file is generated.
%
% The directory PATH_LOGS is created if necessary. A description of the
% pipeline, its dependencies and the matlab environment are saved in a
% set of files (see the COMMENTS section in PSOM_RUN_PIPELINE).
% Existing tag/log/exit/qsub files in the logs folder are deleted,
% as well as the 'tmp' subfolder, if it exists.
%
% Copyright (c) Pierre Bellec, Montreal Neurological Institute, 2008-2010.
% Departement d'informatique et de recherche operationnelle
% Centre de recherche de l'institut de Geriatrie de Montreal
% Universite de Montreal, 2010-2012.
% Maintainer : [email protected]
% See licensing information in the code.
% Keywords : pipeline
% Permission is hereby granted, free of charge, to any person obtaining a copy
% of this software and associated documentation files (the "Software"), to deal
% in the Software without restriction, including without limitation the rights
% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
% copies of the Software, and to permit persons to whom the Software is
% furnished to do so, subject to the following conditions:
%
% The above copyright notice and this permission notice shall be included in
% all copies or substantial portions of the Software.
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
% THE SOFTWARE.
%%%%%%%%%%%%%%%%%%%%%
%% Checking inputs %%
%%%%%%%%%%%%%%%%%%%%%
psom_gb_vars
%% Syntax
if ~exist('pipeline','var')||~exist('opt','var')
error('syntax: [FILE_PIPELINE,FLAG_START] = PSOM_PIPELINE_INIT(PIPELINE,OPT).\n Type ''help psom_pipeline_init'' for more info.')
end
%% Options
gb_name_structure = 'opt';
gb_list_fields = { 'path_search' , 'flag_pause' , 'flag_update' , 'restart' , 'path_logs' , 'command_matlab' , 'flag_verbose' , 'type_restart' };
gb_list_defaults = { gb_psom_path_search , false , true , {} , NaN , '' , 1 , 'substring' };
psom_set_defaults
name_pipeline = 'PIPE';
if ~strcmp(path_logs(end),filesep)
path_logs = [path_logs filesep];
end
if isempty(path_search)
path_search = path;
opt.path_search = path_search;
end
if isempty(opt.command_matlab)
if strcmp(gb_psom_language,'matlab')
opt.command_matlab = gb_psom_command_matlab;
else
opt.command_matlab = gb_psom_command_octave;
end
end
%% Misc variables
hat_qsub_o = sprintf('\n\n*****************\nOUTPUT QSUB\n*****************\n');
hat_qsub_e = sprintf('\n\n*****************\nERROR QSUB\n*****************\n');
%% Print a small banner for the initialization
if flag_verbose
fprintf('\nLogs will be stored in %s\n',path_logs);
end
%% Generate file names
file_pipeline = [path_logs name_pipeline '.mat' ];
file_jobs = [path_logs name_pipeline '_jobs.mat' ];
file_logs = [path_logs name_pipeline '_logs.mat' ];
file_logs_backup = [path_logs name_pipeline '_logs_backup.mat' ];
file_news_feed = [path_logs name_pipeline '_news_feed.csv' ];
file_status = [path_logs name_pipeline '_status.mat' ];
file_status_backup = [path_logs name_pipeline '_status_backup.mat' ];
file_status_init = [path_logs name_pipeline '_status_init.mat' ];
file_profile = [path_logs name_pipeline '_profile.mat' ];
file_profile_backup = [path_logs name_pipeline '_profile_backup.mat' ];
file_pipe_running = [path_logs name_pipeline '.lock' ];
list_jobs = fieldnames(pipeline);
nb_jobs = length(list_jobs);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Stage 1: Build the dependency graph and check the viability of the pipeline %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if flag_verbose>1
fprintf('Examining the dependencies of the pipeline ...\n');
end
%% Remove empty jobs
if flag_verbose>1
fprintf(' Removing empty jobs ...\n');
end
for num_j = 1:nb_jobs
if length(fieldnames(pipeline.(list_jobs{num_j})))==0
pipeline = rmfield(pipeline,list_jobs{num_j});
end
end
%% Check that all jobs have a "command" field
if flag_verbose>1
fprintf(' Checking that all jobs are associated with a command ...\n');
end
for num_j = 1:nb_jobs
if ~isfield(pipeline.(list_jobs{num_j}),'command')
error('The job %s has no ''command'' field. Sorry dude, I cannot process that pipeline.\n',list_jobs{num_j});
end
end
%% Generate dependencies
if flag_verbose>1
fprintf(' Generating dependencies ...\n');
elseif flag_verbose ==1
fprintf('Generating dependencies ...\n');
end
[graph_deps,list_jobs,files_in,files_out,files_clean] = psom_build_dependencies(pipeline,opt.flag_verbose);
%% Check if some outputs were not generated twice
if flag_verbose>1
fprintf(' Checking if some outputs were not generated twice ...\n');
end
[flag_ok,list_files_failed,list_jobs_failed] = psom_is_files_out_ok(files_out);
if ~flag_ok
for num_f = 1:length(list_files_failed)
if num_f == 1
str_files = list_files_failed{num_f};
else
str_files = [str_files ' ; ' list_files_failed{num_f}];
end
end
for num_j = 1:length(list_jobs_failed)
if num_j == 1
str_jobs = list_jobs_failed{num_j};
else
str_jobs = [str_jobs ' ; ' list_jobs_failed{num_j}];
end
end
error('The following output files are generated multiple times : %s.\n\nThe following jobs are responsible for that : %s',str_files,str_jobs);
end
%% Check for cycles
if flag_verbose>1
fprintf(' Checking if the graph of dependencies is acyclic ...\n');
end
[flag_dag,list_vert_cycle] = psom_is_dag(graph_deps);
if ~flag_dag
for num_f = 1:length(list_vert_cycle)
if num_f == 1
str_files = list_jobs{list_vert_cycle(num_f)};
else
str_files = [str_files ' ; ' list_jobs{list_vert_cycle(num_f)}];
end
end
error('There are cycles in the dependency graph of the pipeline. The following jobs are involved in at least one cycle : %s',str_files);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Stage 2: Load previous pipeline description, logs and status %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% update the logs
try
psom_garbage(path_logs,opt_g,true);
end
%% Test for the existence of an old pipeline
flag_old_pipeline = psom_exist(file_jobs);
if flag_old_pipeline
if flag_verbose>1
fprintf('\nLoading previous pipeline ...\n');
end
end
%% Old jobs
pipeline_old = sub_load_old(file_jobs);
%% Old status
all_status_old = sub_load_old(file_status);
if isempty(all_status_old)
for num_j = 1:nb_jobs
name_job = list_jobs{num_j};
all_status_old.(name_job) = 'none';
end
end
%% Old logs
all_logs_old = sub_load_old(file_logs);
%% Old profile
profile_old = sub_load_old(file_profile);
%% Update the status of the jobs using the tag files that can be found
if flag_verbose>1
fprintf(' Cleaning up job status ...\n');
end
job_status = cell(size(list_jobs));
for num_j = 1:length(list_jobs)
name_job = list_jobs{num_j};
if isfield(all_status_old,name_job)
job_status{num_j} = all_status_old.(name_job);
else
job_status{num_j} = 'none';
end
end
mask_inq = ismember(job_status,{'submitted','running'});
list_num_inq = find(mask_inq);
list_num_inq = list_num_inq(:)';
list_jobs_inq = list_jobs(mask_inq);
curr_status = psom_job_status(path_logs,list_jobs_inq,'session');
%% Remove the dependencies on finished jobs
mask_finished = ismember(curr_status,'finished');
list_num_finished = list_num_inq(mask_finished);
list_num_finished = list_num_finished(:)';
for num_j = list_num_finished
job_status{num_j} = 'finished';
end
job_status_old = job_status;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Stage 3 : Set up the 'restart' flags %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
flag_restart = false([1 length(job_status)]);
flag_start = false([1 length(job_status)]);
flag_start(ismember(job_status_old,{'none','failed','exit'})) = true;
if flag_verbose
fprintf('Setting up the to-do list ...\n');
end
lmax = 0;
for num_j = 1:nb_jobs
lmax = max(length(list_jobs{num_j}),lmax);
end
lmax = lmax + 1;
for num_j = 1:nb_jobs
name_job = list_jobs{num_j};
flag_restart_job = flag_restart(num_j);
if flag_restart_job
continue
end
if strcmp(job_status_old{num_j},'failed')||strcmp(job_status_old{num_j},'exit')
flag_restart_job = true;
if flag_verbose>1
fprintf(' %s%s(failed)\n',name_job,repmat(' ',[1 lmax-length(name_job)]))
end
else
%% If an old pipeline exists, check if the job has been modified
if isfield(pipeline_old,name_job)
if opt.flag_update
flag_same = psom_cmp_var(pipeline_old.(name_job),pipeline.(name_job));
if ~flag_same&&(flag_verbose>1)
fprintf(' %s%s(changed)\n',name_job,repmat(' ',[1 lmax-length(name_job)]))
end
else
flag_same = true;
if (num_j == 1)&&flag_verbose
fprintf(' The OPT.FLAG_UPDATE is off, jobs are not going to be checked for updates.\n');
end
end
if flag_same && strcmp(job_status_old{num_j},'none')
if flag_verbose>1
fprintf(' %s%s(new)\n',name_job,repmat(' ',[1 lmax-length(name_job)]))
end
flag_restart_job = true;
elseif flag_same && (strcmp(job_status_old{num_j},'submitted')|strcmp(job_status_old{num_j},'running'))
if flag_verbose>1
fprintf(' %s%s(submission failed)\n',name_job,repmat(' ',[1 lmax-length(name_job)]))
end
flag_restart_job = true;
end
flag_restart_job = flag_restart_job||~flag_same;
else
flag_restart_job = true;
if flag_verbose>1
fprintf(' %s%s(new)\n',name_job,repmat(' ',[1 lmax-length(name_job)]))
end
end
%% Check if the user did not force a restart on that job
switch opt.type_restart
case 'substring'
flag_force = psom_find_str_cell(name_job,opt.restart);
case 'exact'
flag_force = ismember(name_job,opt.restart);
otherwise
error('%s is an unknow method to restart job (OPT.TYPE_RESTART)',opt.type_restart)
end
if flag_force&&~flag_restart(num_j)
if flag_verbose
fprintf(' %s%s(manual restart)\n',name_job,repmat(' ',[1 lmax-length(name_job)]))
end
flag_restart_job = true;
end
end
%% If the job is restarted, iteratively restart all its children
if flag_restart_job
mask_new_restart = false(size(flag_restart));
mask_new_restart(num_j) = true;
flag_restart(num_j) = true;
while any(mask_new_restart)
%% restart the children of the restarted jobs that were not
%% already planned to be restarted
mask_new_restart2 = sub_find_children(mask_new_restart,graph_deps);
mask_new_restart2 = mask_new_restart2 & ~flag_restart & ~flag_start;
list_add = find(mask_new_restart2&~mask_new_restart);
if ~isempty(list_add)
for num_a = 1:length(list_add)
if flag_verbose>1
fprintf(' %s%s(child of a restarted job)\n',list_jobs{list_add(num_a)},repmat(' ',[1 lmax-length(list_jobs{list_add(num_a)})]))
end
end
end
%% Iterate the process on the children and parents that were newly assigned
%% a restart flag
flag_restart(mask_new_restart2) = true;
mask_new_restart = mask_new_restart2&~mask_new_restart;
end
end
end
%% iteratively restart jobs that produce necessary and missing input files
flag_miss = true;
while flag_miss
list_idle = list_jobs(~flag_restart);
ind_idle = find(~flag_restart);
files_re_in = unique(psom_files2cell(rmfield(files_in,list_idle)));
files_re_out = psom_files2cell(rmfield(files_out,list_idle));
files_nec = files_re_in(~ismember(files_re_in,files_re_out));
mask_nec = true(size(files_nec));
for num_n = 1:length(files_nec)
mask_nec(num_n) = ~psom_exist(files_nec{num_n});
end
files_nec = files_nec(mask_nec);
if isempty(files_nec)
mask_new_restart = false;
flag_miss = false;
continue
end
mask_new_restart = false(size(flag_restart));
for num_j = 1:length(list_idle)
if any(ismember(files_out.(list_idle{num_j}),files_nec))
mask_new_restart(ind_idle(num_j)) = true;
flag_restart(ind_idle(num_j)) = true;
end
end
if (flag_verbose>1) && any(mask_new_restart)
list_new = find(mask_new_restart);
for num_n = 1:length(list_new)
fprintf(' %s%s(produce necessary files)\n',list_jobs{list_new(num_n)},repmat(' ',[1 lmax-length(list_jobs{list_new(num_n)})]))
end
end
flag_miss = any(mask_new_restart);
while any(mask_new_restart)
%% restart the children of the restarted jobs that were not
%% already planned to be restarted
mask_new_restart2 = sub_find_children(mask_new_restart,graph_deps);
mask_new_restart2 = mask_new_restart2 & ~flag_restart & ~flag_start;
list_add = find(mask_new_restart2&~mask_new_restart);
if ~isempty(list_add)
for num_a = 1:length(list_add)
if flag_verbose>1
fprintf(' %s%s(child of a restarted job)\n',list_jobs{list_add(num_a)},repmat(' ',[1 lmax-length(list_jobs{list_add(num_a)})]))
end
end
end
%% Iterate the process on the children and parents that were newly assigned
%% a restart flag
flag_restart(mask_new_restart2) = true;
mask_new_restart = mask_new_restart2&~mask_new_restart;
end
end
%% Initialize the status :
%% Everything goes to 'none', except jobs that have a 'finished' status and
%% no restart tag
job_status = repmat({'none'},[nb_jobs 1]);
if flag_old_pipeline
if flag_verbose>1
fprintf('Initializing the new status (keeping finished jobs "as is")...\n');
end
end
flag_finished = ismember(job_status_old,'finished');
flag_finished = flag_finished(:)';
flag_finished = flag_finished & ~flag_restart;
job_status(flag_finished) = repmat({'finished'},[sum(flag_finished) 1]);
if ~any(ismember(job_status,'none'))
fprintf('All jobs are already completed. Bye for now !\n')
flag_start = false;
return
else
flag_start = true;
if flag_verbose
if any(flag_finished)
fprintf(' I found %i job(s) to do, and %i job(s) already completed.\n',sum(ismember(job_status,'none')),sum(ismember(job_status,'finished')));
else
fprintf(' I found %i job(s) to do.\n',sum(ismember(job_status,'none')));
end
end
end
%% Check if all the files necessary to complete each job of the pipeline
%% can be found
if flag_verbose>1
fprintf('Checking if all the files necessary to complete the pipeline can be found ...\n');
end
all_in = psom_files2cell(rmfield(files_in,list_jobs(flag_finished)));
all_out = psom_files2cell(files_out);
files_necessary = all_in(~ismember(all_in,all_out));
mask_missing = false(length(files_necessary),1);
flag_OK = true;
files_necessary = unique(files_necessary);
for num_f = 1:length(files_necessary)
if ~psom_exist(files_necessary{num_f})
if flag_OK
fprintf('The following file(s) are missing to process the pipeline : %s',files_necessary{num_f});
else
fprintf(' , %s',files_necessary{num_f});
end
flag_OK = false;
end
end
if ~flag_OK
fprintf('\n!!! The input files of some jobs were found missing.\n');
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Stage 4: Save the pipeline description in the logs folder %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if flag_pause||~flag_OK
fprintf('Press CTRL-C now to cancel or press any key to continue.\n');
pause
end
if flag_verbose>1
fprintf('\nSaving the pipeline description in the logs folder ...\n');
end
%% Create logs folder
if ~exist(path_logs,'dir')
if flag_verbose>1
fprintf(' Creating the logs folder ...\n');
end
[succ,messg,messgid] = psom_mkdir(path_logs);
if succ == 0
warning(messgid,messg);
end
end
%% Creating output folders
if flag_verbose>1
fprintf(' Creating output folders ...\n')
end
path_all = psom_files2cell(files_out);
path_all = cellfun (@fileparts,path_all,'UniformOutput',false);
path_all = unique(path_all);
for num_p = 1:length(path_all)
path_f = path_all{num_p};
[succ,messg,messgid] = psom_mkdir(path_f);
if succ == 0
warning(messgid,messg);
end
end
%% Removing old outputs
if flag_verbose>1
fprintf(' Removing old outputs ...\n')
end
for num_j = 1:length(list_jobs)
job_name = list_jobs{num_j};
list_files = unique(files_out.(job_name));
if ~flag_finished(num_j)&&(~isfield(pipeline.(job_name),'ispipeline')||~pipeline.(job_name).ispipeline)
for num_f = 1:length(list_files)
if psom_exist(list_files{num_f})
psom_clean(list_files{num_f},struct('flag_verbose',false));
end
end
end
end
%% Create a .lock file
if flag_verbose>1
fprintf(' Creating a ''lock'' file %s ...\n',file_pipe_running);
end
str_now = datestr(clock);
save(file_pipe_running,'str_now');
%% Save the jobs
if flag_verbose>1
fprintf(' Saving the individual ''jobs'' file %s ...\n',file_jobs);
end
save(file_jobs,'-struct','pipeline');
%% Save the dependencies
if flag_verbose>1
fprintf(' Saving the pipeline dependencies in %s...\n',file_pipeline);
end
if flag_old_pipeline
try
load(file_pipeline,'history');
history = char(history,[datestr(now) ' ' gb_psom_user ' on a ' gb_psom_OS ' system used PSOM v' gb_psom_version '>>>> The pipeline was restarted']);
catch
history = [datestr(now) ' ' gb_psom_user ' on a ' gb_psom_OS ' system used PSOM v' gb_psom_version '>>>> Created a pipeline !'];
end
else
history = [datestr(now) ' ' gb_psom_user ' on a ' gb_psom_OS ' system used PSOM v' gb_psom_version '>>>> Created a pipeline !'];
end
path_work = path_search;
save(file_pipeline,'history','graph_deps','list_jobs','files_in','files_out','path_work')
%% Save the status
if flag_verbose>1
fprintf(' Saving the ''status'' file %s ...\n',file_status);
end
flag_failed = ismember(job_status,'failed');
for num_j = 1:nb_jobs
name_job = list_jobs{num_j};
all_status.(name_job) = job_status{num_j};
end
save(file_status,'-struct','all_status');
copyfile(file_status,file_status_backup,'f');
save(file_status_init,'-struct','all_status');
%% Save the logs
if flag_verbose>1
fprintf(' Saving the ''logs'' file %s ...\n',file_logs);
end
for num_j = 1:nb_jobs
name_job = list_jobs{num_j};
if flag_finished(num_j)||flag_failed(num_j)
if ~isfield('all_logs',name_job)
if exist('all_logs_old','var')&&isfield(all_logs_old,name_job)
all_logs.(name_job) = all_logs_old.(name_job);
else
all_logs.(name_job) = '';
end
end
else
all_logs.(name_job) = '';
end
end
save(file_logs,'-struct','all_logs');
copyfile(file_logs,file_logs_backup,'f');
%% Save the profile
if flag_verbose>1
fprintf(' Saving the ''profile'' file %s ...\n',file_profile);
end
profile = struct();
for num_j = 1:nb_jobs
name_job = list_jobs{num_j};
if flag_finished(num_j)||flag_failed(num_j)
if isfield(profile_old,name_job)
profile.(name_job) = profile_old.(name_job);
else
profile.(name_job) = '';
end
else
profile.(name_job) = '';
end
end
save(file_profile,'-struct','profile');
copyfile(file_profile,file_profile_backup,'f');
%% Clean up the log folders from old tag and log files
if flag_verbose>1
fprintf(' Cleaning up old tags and logs from the logs folders ...\n')
end
if psom_exist(file_news_feed)
psom_clean(file_news_feed,struct('flag_verbose',false));
end
psom_clean_logs(path_logs);
if exist([path_logs 'tmp'],'dir')
[status,msg] = psom_clean([path_logs 'tmp'],struct('flag_verbose',false));
if status
warning('Could not remove the temporary folder %s. Check for permissions.',[path_logs 'tmp']);
end
end
if exist([path_logs 'worker'],'dir')
[status,msg] = psom_clean([path_logs 'worker'],struct('flag_verbose',false));
if status
warning('Could not remove the folder %s. Check for permissions.',[path_logs 'worker']);
end
end
%% Done !
if flag_verbose>1
fprintf('\nThe pipeline has been successfully initialized !\n')
end
%%%%%%%%%%%%%%%%%%
%% Subfunctions %%
%%%%%%%%%%%%%%%%%%
%% Read a text file
function str_txt = sub_read_txt(file_name)
if psom_exist(file_name)
hf = fopen(file_name,'r');
str_txt = fread(hf,Inf,'uint8=>char')';
fclose(hf);
else
str_txt = '';
end
%% find all the jobs that depend on a set of jobs
function mask_child = sub_find_children(mask,graph_deps)
if max(double(mask))>0
mask_child = max(graph_deps(mask,:),[],1);
else
mask_child = false(size(mask));
end
%% Test if the inputs of some jobs are missing, and set restart
%% flags on the jobs that can produce those inputs.
function flag_parent = sub_restart_parents(flag_restart_new,flag_restart,flag_start,list_jobs,files_in,files_out,graph_deps,flag_verbose)
list_restart = find(flag_restart_new);
flag_parent = false(size(flag_restart_new));
for num_j = list_restart % loop over jobs that need to be restarted
name_job = list_jobs{num_j};
flag_files = false([length(files_in.(name_job)) 1]);
for num_f = 1:length(files_in.(name_job))
flag_files(num_f) = psom_exist(files_in.(name_job){num_f});
end
if any(~flag_files)
target_files = files_in.(name_job)(~flag_files);
% Pick up parents that are not already scheduled to be restarted
list_num_parent = find(graph_deps(:,num_j)&~flag_restart_new(:)&~flag_restart(:)&~flag_start(:));
flag_missing = false;
for num_l = list_num_parent'
name_job2 = list_jobs{num_l};
mask_missing = ismember(files_out.(name_job2),target_files);
flag_missing = any(mask_missing);
end
if flag_missing
flag_parent(num_l) = true;
end
end
end
%% Load a previous version of the pipeline. If the file is corrupted, load the backup and copy over the original file_jobs
function pipeline_str = sub_load_old(file_name)
if psom_exist(file_name)
try
pipeline_str = load(file_name);
catch
[path_f,name_f,ext_f] = fileparts(file_name);
file_backup = [path_f filesep name_f '_backup' ext_f];
warning('There was something wrong when loading the file %s, I''ll try loading the backup instead',file_name)
pipeline_str = load(file_backup);
copyfile(file_backup,file_name,'f');
end
else
pipeline_str = struct();
end