-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpisces_Tumour_parallel.wdl
442 lines (366 loc) · 14 KB
/
pisces_Tumour_parallel.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
## NOTE 29/12/22: To run pisces in paired tumour mode, both normal and the tumour needs to be run individually. The result of both of these are to be merged together
## A module needs to be created in this pipeline to merge the two together
version 1.0
workflow pisces_workflow {
input {
File refFasta
File refFastaIdx
File refFastaDict
File? pisces_reference
File tumorBam
File tumorBai
File? normalBam
File? normalBai
String pairName
File? InputtargetInterval
String runMode
Array[File]? bed_list_in
Array[Int]? scatterIndices_in
String gatk_docker
}
Boolean buildIndices = if defined(bed_list_in) then false else true
File targetIntervals = select_first([InputtargetInterval, "NULL"])
if (buildIndices){
call CallSomaticMutations_Prepare_Task {
input:
refFasta=refFasta,
refFastaIdx=refFastaIdx,
refFastaDict=refFastaDict,
targetIntervals=targetIntervals,
gatk_docker=gatk_docker # takes padded interval file (10bp on each side)
}
}
Array[File] bed_list=select_first([bed_list_in, CallSomaticMutations_Prepare_Task.bed_list])
Array[Int] scatterIndices=select_first([scatterIndices_in, CallSomaticMutations_Prepare_Task.scatterIndices])
scatter (idx in scatterIndices){
call runpiscesSomatic {
input:
refFasta=refFasta,
refFastaFai=refFastaIdx,
refFastaDict=refFastaDict,
tumorBam=tumorBam,
normalBam=normalBam,
normalBai=normalBai,
tumorBai=tumorBai,
pairName=pairName,
pisces_reference=pisces_reference,
interval=bed_list[idx],
runMode=runMode
}
}
call UpdateHeaders as TumHead {
input:
input_vcfs = runpiscesSomatic.tumor_unique_variants,
ref_dict=refFastaDict,
gatk_docker = gatk_docker,
caller = "Pisces_Tum"
}
call CombineVariants as TumComb {
input:
input_header=TumHead.head_vcf,
ref_fasta = refFasta,
ref_fai = refFastaIdx,
ref_dict = refFastaDict,
gatk_docker = gatk_docker,
sample_name = pairName,
caller="Pisces"
}
if (runMode=="Paired"){
Array[File] normalSamples=select_first([runpiscesSomatic.normal_variants_same_site, "NULL"])
call UpdateHeaders as NormHead {
input:
input_vcfs = normalSamples,
ref_dict=refFastaDict,
gatk_docker = gatk_docker,
caller = "Pisces_Tum"
}
call CombineVariants as NormComb {
input:
input_header=NormHead.head_vcf,
ref_fasta = refFasta,
ref_fai = refFastaIdx,
ref_dict = refFastaDict,
gatk_docker = gatk_docker,
sample_name = pairName,
caller="Pisces"
}
call MergeFinal {
input:
tumor = TumComb.merged_vcf,
tumorTbi= TumComb.merged_vcf_tbi,
normal = NormComb.merged_vcf,
normalTbi = NormComb.merged_vcf_tbi,
pairName=pairName
}
}
output {
File tumor_variants=select_first([MergeFinal.vcf, TumComb.merged_vcf])
File tumor_variants_tbi=select_first([MergeFinal.vcf_tbi, TumComb.merged_vcf_tbi])
}
}
task runpiscesSomatic {
input {
File? refFasta
File? refFastaFai
File? refFastaDict
File? pisces_reference
File? tumorBam
File? tumorBai
File? normalBam
File? normalBai
String pairName
File? interval
Boolean MNVcall = true
Boolean scatterchr = true
Int? nthreads =2
String mem =8
Int preemptible =3
String saveDict = "0"
String runMode
String runScylla = "0"
}
String normP = select_first([ normalBam ,""])
String tumP = select_first([ tumorBam ,""])
String tumPrefix=if (tumP!="") then basename(sub(tumP,"\\.bam$", "")) else ""
String normPrefix= if (normP!="") then basename(sub(normP,"\\.bam$", "")) else ""
String buildRef = if defined(pisces_reference) then "0" else "1"
String runTum = if (runMode!="Germline") then "1" else "0"
String runGerm = if (runMode!="TumOnly" || defined(normalBam)) then "1" else "0"
String matchPair = if (runMode=="Paired" || ( defined(normalBam) && defined(tumorBam))) then "1" else "0"
String tOnly = if (runMode=="TumOnly" || ( !defined(normalBam) && defined(tumorBam))) then "1" else "0"
Int tumBamSize = select_first([ceil(size(tumorBam, "G")+size(tumorBai, "G")), 0])
Int normBamSize = select_first([ceil(size(normalBam, "G")+size(normalBai, "G")), 0])
Int disk_size=2*(tumBamSize+ normBamSize+3)
command <<<
set -e
mkdir somatic_~{pairName}
sname=~{pisces_reference}
## Build the reference libraries here if they do not exist
if [ ~{buildRef} -eq "1" ];
then
echo 'create the reference'
sname=`basename ~{refFasta}`
sname=$(echo $sname| cut -f 1 -d '.')
echo $sname
mkdir $sname
cp ~{refFasta} $sname
cp ~{refFastaFai} $sname
cp ~{refFastaDict} $sname
dotnet /app/CreateGenomeSizeFile_5.2.10.49/CreateGenomeSizeFile.dll -g $sname -s "Homo sapien $sname" -o $sname
fi
if [ ~{buildRef} -eq "0" ];
then
## to unpack the tar file
tar xvzf ~{pisces_reference}
## use this when using a reference file
## cp -r ~{pisces_reference} .
sname=`basename ~{pisces_reference}`
sname=$(echo $sname| cut -f 1 -d '.')
fi
## Run the tumor based calling of variants
if [ ~{runTum} -eq "1" ];
then
## run the variant calling: this works
dotnet /app/Pisces_5.2.10.49/Pisces.dll -g $sname -b ~{tumorBam} -CallMNVs ~{MNVcall} -gVCF false --threadbychr ~{scatterchr} --collapse true -c 5 \
--filterduplicates true --maxthreads ~{nthreads} -o somatic_~{pairName} ~{"-i " + interval}
## VSQR
dotnet /app/VariantQualityRecalibration_5.2.10.49/VariantQualityRecalibration.dll --vcf somatic_~{pairName}/~{tumPrefix}.vcf --out somatic_~{pairName}
## check that this is for
if [[ -f somatic_~{pairName}/~{tumPrefix}.vcf.recal ]];
then
cp somatic_~{pairName}/~{tumPrefix}.vcf.recal somatic_~{pairName}/~{tumPrefix}.recal.vcf
elif [[ -f somatic_~{pairName}/~{tumPrefix}.vcf ]];
then
cp somatic_~{pairName}/~{tumPrefix}.vcf somatic_~{pairName}/~{tumPrefix}.recal.vcf
fi
fi
## Run germline based calling
if [ ~{runGerm} -eq "1" ];
then
dotnet /app/Pisces_5.2.10.49/Pisces.dll -g $sname -b ~{normalBam} -CallMNVs ~{MNVcall} -gVCF false --threadbychr ~{scatterchr} --collapse true -ploidy diploid \
--filterduplicates true --maxthreads ~{nthreads} -o somatic_~{pairName} ~{"-i " + interval}
dotnet /app/VariantQualityRecalibration_5.2.10.49/VariantQualityRecalibration.dll --vcf somatic_~{pairName}/~{normPrefix}.vcf --out somatic_~{pairName}
if [[ -f somatic_~{pairName}/~{normPrefix}.vcf.recal ]];
then
cp somatic_~{pairName}/~{normPrefix}.vcf.recal somatic_~{pairName}/~{normPrefix}.recal.vcf
elif [[ -f somatic_~{pairName}/~{normPrefix}.vcf ]];
then
cp somatic_~{pairName}/~{normPrefix}.vcf somatic_~{pairName}/~{normPrefix}.recal.vcf
fi
## dotnet /app/Scylla_5.2.10.49/Scylla.dll -g $sname --vcf somatic_~{pairName}/~{normPrefix}.recal.vcf --bam ~{normalBam}
fi
if [ ~{matchPair} -eq "1" ];
then
dotnet /app/VennVcf_5.2.10.49/VennVcf.dll -if somatic_~{pairName}/~{tumPrefix}.recal.vcf,somatic_~{pairName}/~{normPrefix}.recal.vcf -o venn
tar -zvcf ~{pairName}_venn_pisces.tar.gz venn
sampID="venn/~{tumPrefix}.recal_not_~{normPrefix}.recal.vcf"
awk '! /\#/' $sampID | awk '{if(length($4) > length($5)) print $1"\t"($2-1)"\t"($2+length($4)-1); else print $1"\t"($2-1)"\t"($2+length($5)-1)}' > output.bed
dotnet /app/Pisces_5.2.10.49/Pisces.dll -g $sname -b ~{normalBam} -CallMNVs ~{MNVcall} -gVCF true --threadbychr ~{scatterchr} --collapse true -ploidy diploid --filterduplicates true --maxthreads ~{nthreads} -o variant2_~{pairName} -i output.bed
dotnet /app/VariantQualityRecalibration_5.2.10.49/VariantQualityRecalibration.dll --vcf variant2_~{pairName}/~{normPrefix}.genome.vcf --out variant2_~{pairName}
if [[ -f variant2_~{pairName}/~{normPrefix}.genome.vcf.recal ]];
then
mv variant2_~{pairName}/~{normPrefix}.genome.vcf.recal ~{normPrefix}.genome.recal.vcf
elif [[ -f variant2_~{pairName}/~{normPrefix}.genome.vcf ]];
then
cp variant2_~{pairName}/~{normPrefix}.genome.vcf ~{normPrefix}.genome.recal.vcf
fi
mv venn/~{tumPrefix}.recal_not_~{normPrefix}.recal.vcf ~{tumPrefix}.somatic.unique.recal.vcf
elif [ ~{tOnly} -eq "1" ];
then
mv somatic_~{pairName}/~{tumPrefix}.recal.vcf ~{tumPrefix}.somatic.unique.recal.vcf
fi;
if [ ~{runScylla} -eq "1" ];
then
## perform phasing here
dotnet /app/Scylla_5.2.10.49/Scylla.dll -g $sname --vcf ~{tumPrefix}.somatic.unique.recal.vcf --bam ~{tumorBam}
fi
>>>
output {
File tumor_unique_variants= select_first(["~{tumPrefix}.somatic.unique.recal.vcf", "somatic_~{pairName}/~{tumPrefix}.recal.vcf" ])
File? normal_variants_same_site = "~{normPrefix}.genome.recal.vcf"
}
runtime {
docker: "trinhanne/pisces:5.2.10"
cpu: nthreads
preemptible: preemptible
memory: "${mem} GB"
disks: "local-disk ${disk_size} HDD"
}
}
task CombineVariants {
input {
Array[File] input_header
String caller
File ref_fasta
File ref_fai
File ref_dict
# runtime
String gatk_docker
String sample_name
Int mem_gb= 6
}
Int diskGB = 4*ceil(size(ref_fasta, "GB")+size(input_header, "GB"))
command <<<
gatk GatherVcfs -I ~{sep=' -I ' input_header} -R ~{ref_fasta} -O ~{sample_name}.~{caller}.vcf.gz
>>>
runtime {
docker: gatk_docker
memory: "~{mem_gb} GB"
disk_space: "local-disk ~{diskGB} HDD"
}
output {
File merged_vcf = "~{sample_name}.~{caller}.vcf.gz"
File merged_vcf_tbi = "~{sample_name}.~{caller}.vcf.gz.tbi"
}
}
task UpdateHeaders {
input {
Array[File] input_vcfs
File ref_dict
# runtime
String gatk_docker
String caller
Int mem_gb=6
}
Int diskGB = 4*ceil(size(ref_dict, "GB"))
command <<<
count=0
for i in ~{sep=' ' input_vcfs};
do
newstr=`basename $i`
gatk UpdateVCFSequenceDictionary \
-V $i \
--source-dictionary ~{ref_dict} \
--output $newstr.$count.reheader.~{caller}.vcf \
--replace true
count+=1
done
>>>
runtime {
docker: gatk_docker
memory: "~{mem_gb} GB"
disk_space: "local-disk ~{diskGB} HDD"
}
output {
Array[File] head_vcf = glob("*.reheader.~{caller}.vcf")
}
}
task CallSomaticMutations_Prepare_Task {
input {
# TASK INPUT PARAMS
File targetIntervals
File refFasta
File refFastaIdx
File refFastaDict
String nWay = "10"
# RUNTIME INPUT PARAMS
String preemptible = "1"
String diskGB_boot = "15"
String gatk_docker
}
parameter_meta {
nWay : "Number of ways to scatter (MuTect1 and MuTect2)"
targetIntervals : "a list of genomic intervals over which MuTect1 will operate"
refFasta : "FASTA file for the appropriate genome build (Reference sequence file)"
refFastaIdx : "FASTA file index for the reference genome"
refFastaDict : "FASTA file dictionary for the reference genome"
}
command {
set -euxo pipefail
seq 0 $((~{nWay}-1)) > indices.dat
# create a list of intervalfiles
mkdir intervalfolder
gatk SplitIntervals -R ~{refFasta} -L ~{targetIntervals} --scatter-count ~{nWay} --subdivision-mode BALANCING_WITHOUT_INTERVAL_SUBDIVISION -O intervalfolder
cp intervalfolder/*.interval_list .
## make the list of bed files
mkdir bedfolder
for file in *.interval_list;
do
gatk IntervalListToBed -I $file -O bedfolder/$file.bed
##small hack to subtract 1 from the bed file
done
cp bedfolder/*.bed .
}
runtime {
docker : gatk_docker
bootDiskSizeGb : diskGB_boot
preemptible : preemptible
memory : "1 GB"
}
output {
Array[File] interval_files=glob("*.interval_list")
Array[Int] scatterIndices=read_lines("indices.dat")
Array[File] bed_list=glob("*.bed")
}
}
task MergeFinal {
input {
File tumor
File tumorTbi
File normal
File normalTbi
String pairName
}
command <<<
# list names
PISCES_MERGE="~{pairName}.Pisces.call_stats.tum.norm.vcf.gz"
PISCES_Filt="~{pairName}.Pisces.call_stats.tum.norm.filt.vcf"
# merge the tumor normal in pisces together
bcftools merge ~{tumor} ~{normal} -O vcf -o $PISCES_MERGE
tabix -p vcf $PISCES_MERGE
# not sure whether to run this - needs to find an intersect?
## bcftools isec -p test -n=2 $PISCES_MERGE ~{tumor}
##awk '{gsub(/SB/, "SBP")}1' test/0000.vcf > test/0000.mod.vcf
##mv test/0000.mod.vcf $PISCES_Filt
##bgzip $PISCES_MERGEu
##tabix -p vcf $PISCES_MERGEu.gz
>>>
runtime {
docker : "trinhanne/sambcfhts:v1.13.3"
memory : "1 GB"
}
output {
File vcf = "~{pairName}.Pisces.call_stats.tum.norm.vcf.gz"
File vcf_tbi = "~{pairName}.Pisces.call_stats.tum.norm.vcf.gz.tbi"
}
}