-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathwgsmetrics.wdl
139 lines (121 loc) · 3.11 KB
/
wgsmetrics.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
## CollectWgsmetrics By interval or WGS
##
## This workflow runs wgs metrics based on list of interval files submitted.
## This text file must contain the paths for all intervals that are run in parallel
##
## Required:
## - bam and bam index
## - sampleName
## - reference genome and index
## - Intervals: txt file containing list of regions to assess in scatter mode.
## Default: gs://kccg-cb-annotation-files/BedIntervals/hg38/wgs_contigs/wgs_1_22_chrom_list
##
## Optional:
## - ByChromosome: whether to run on each chr individually (true) or whole genome (false)
## - FastMode: turn on fast mode for wgsmetrics? can be done for wgs analysi
## - GATK docker image
version 1.0
workflow WgsMetrics {
input {
File bam
File bam_index
String sampleName
File refFasta
File refFastaIdx
File Intervals
Boolean ByChromosome = false
Boolean FastMode = false
String docker
String opts = ""
}
if (ByChromosome == true){
Array[File] intervals_list = read_lines(Intervals)
scatter (chromList in intervals_list){
String chr = basename(sub(chromList, "\\.interval_list", ""))
call runWgsMetrics as runWgsMetricsChrom {
input:
bam=bam,
bam_index=bam_index,
sampleName=sampleName,
refFasta=refFasta,
refFastaIdx=refFastaIdx,
Intervals=chromList,
FastMode=FastMode,
docker=docker,
region=chr,
opts=opts
}
}
call compressFiles {
input:
outputsChrom = runWgsMetricsChrom.stats,
sampleName = sampleName,
docker=docker
}
}
if (ByChromosome == false ){
call runWgsMetrics as runWgsMetricsAll {
input:
bam=bam,
bam_index=bam_index,
sampleName=sampleName,
refFasta=refFasta,
refFastaIdx=refFastaIdx,
FastMode=FastMode,
docker=docker,
region="wgs",
opts=opts
}
}
File Output = select_first([compressFiles.outputsTar, runWgsMetricsAll.stats])
output {
File outputs = Output
}
}
task compressFiles {
input{
Array[File] outputsChrom
String sampleName
String docker
}
command <<<
tar -czvf ~{sampleName}.wgs.by.chrom.tar.gz --files-from=~{write_lines(outputsChrom)}
>>>
output {
File outputsTar = "~{sampleName}.wgs.by.chrom.tar.gz"
}
runtime {
docker: docker
preemptible: "3"
memory: "2 GB"
disks: "local-disk 10 HDD"
}
}
task runWgsMetrics {
input{
File bam
File bam_index
String sampleName
File refFasta
File refFastaIdx
File? Intervals
Boolean FastMode = false
String docker
String region
String opts
}
Int diskspace = 2*ceil(size(bam, "GB")+size(bam_index, "GB")+size(refFasta, "GB")+size(refFastaIdx, "GB"))
command <<<
gatk CollectWgsMetrics -I ~{bam} -O ~{sampleName}.~{region}.txt -R ~{refFasta} \
--USE_FAST_ALGORITHM ~{FastMode} ~{"--INTERVALS " + Intervals} ~{opts}
>>>
output {
File stats="~{sampleName}.~{region}.txt"
}
runtime {
docker: docker
preemptible: "3"
memory: "5 GB"
disks: "local-disk ~{diskspace} HDD"
}
}