forked from chaolinzhanglab/czplib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSGE.pm
167 lines (112 loc) · 3 KB
/
SGE.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#
#===============================================================================
#
# FILE: SGE.pm
#
# DESCRIPTION: interface of SGE
# BUGS: ---
# NOTES: The package is spun off from the Common.pm
# AUTHOR: Chaolin Zhang (cz), [email protected]
# COMPANY: Rockefeller University
# VERSION: 1.0
# CREATED: 12/17/10
# REVISION: ---
#===============================================================================
package SGE;
require Exporter;
our $VERSION = 1.01;
@ISA = qw (Exporter);
@EXPORT = qw (
checkSGEJobStatus
waitUntilSGEJobsDone
);
=head1 NAME
SGE - subroutines to handle Sun Grid Engine (SGE)
=cut
use strict;
use warnings;
use Data::Dumper;
use Carp;
=head2 waitUntilSGEJobsDone
#return the status of unfinished jobs among a specified list
waitUntilSGEJobsDone ($jobIds, $verbose=0, $user="");
$jobIds: job ids to monitor
$verbose:
$user:
=cut
sub waitUntilSGEJobsDone
{
my ($jobIds, $verbose, $user) = @_;
my $total = @$jobIds;
my $secondSlept = 0;
while (1)
{
my $status = checkSGEJobStatus ($jobIds, $user);
return 1 unless keys %$status > 0;
my $summary = $status->{'summary'};
my $njobs = 0;
foreach my $stat (keys %$summary)
{
Carp::croak "detect failed jobs: ", Dumper ($status), "\n" unless $stat eq 'r' || $stat eq 't' || $stat eq 'qw';
$njobs += $summary->{$stat};
}
return 1 if $njobs == 0;
#bug fix. different programs/runs might interfere with each other
#04/06/2014 Chaolin Zhang
#TODO: check whether each job finished correctly by running qacct
#my $n = keys %$status;
#$n--;
my $date = `date`;
chomp $date;
print "$njobs tasks of $total jobs are not finished yet at $date ...\n" if $verbose && $secondSlept % 60 == 0;
sleep (10); #10 seconds
$secondSlept += 10;
}
}
=head2 checkSGEJobStatus
my $status = checkSGEJobStatus ($jobIds, $user);
$status->{$jobId}{$taskId} : is the status of a specific task of job id
$taskId is assigned for array jobs, and otherwise 1
$status->{'summary'}->{$status}: is the number of jobs/tasks of with $status
=cut
sub checkSGEJobStatus
{
my ($jobIds, $user) = @_;
Carp::croak "no job id specified in:", Dumper ($jobIds), "\n" unless @$jobIds > 0;
my %jobHash = map {$_=> 1} @$jobIds;
my %jobStatus;
my $cmd = "qstat";
$cmd .= " -u $user" if $user;
my @qstat = `$cmd`;
return {} unless @qstat > 0;
#remove title rows
shift @qstat; shift @qstat;
my %summary;
foreach my $line (@qstat)
{
chomp $line;
$line=~s/^\s*//;
my @cols = split (/\s+/, $line);
my $id = $cols[0];
my $u = $cols[3];
my $status = $cols[4];
my $taskId = $cols[$#cols];
#print $taskId, "\n";
if ($user)
{
next unless $u eq $user;
}
next unless exists $jobHash {$id};
my $ntask = 1;
if ($taskId && $taskId=~/^(\d+)-(\d+):/)
{
$ntask = $2-$1 + 1;
}
$taskId = 1 unless $taskId;
$summary{$status} += $ntask;
$jobStatus{$id}{$taskId} = $status;
}
$jobStatus{'summary'} = \%summary;
return \%jobStatus;
}
1;