-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSTARfusion.wdl
236 lines (204 loc) · 6.92 KB
/
STARfusion.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
## STAR fusion pipeline, Jan 2022
## This workflow is an adaptation of the workflow found here: https://github.com/STAR-Fusion/STAR-Fusion
## specifically, using this workflow: https://raw.githubusercontent.com/STAR-Fusion/STAR-Fusion/Terra-1.10.1/WDL/star_fusion_workflow.wdl
##
## The inputs are:
## - raw fastq files
##
## The outputs are:
## - fastqc html
## - Aligned bam file
## - HTSeq counts table
## - STARFusion list of fusions
##
## Key steps:
## 1. Fastqc of files
## 2. Trimmomatic of fastqs
## 3. Run STAR fusion on trimmed fastq
## 4. Index and sort output bam file
## 5. HTSeq on bam file
version 1.0
import "star_fusion_workflow.wdl" as starfusion
import "FastQC.wdl" as FastQC
workflow STARfusion {
input {
String sample_id
File genome_plug_n_play_tar_gz
# Inputs required for full pipeline
File left_fq
File? right_fq
String? fusion_inspector
File gtf #for annotation of genecounts
Boolean examine_coding_effect
# Optional inputs for fastqc
File? adap # input fasta file for adapter sequences, optional
Boolean runTrimmomatic = true # run trimmomatic on the fastq files
String? trimmomaticSettings = ":2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36"
# runtime params
String FusionDocker = "trinityctat/starfusion:1.10.1"
String samtoolsDocker = "trinhanne/sambcfhts:v1.13.3"
Int num_cpu = 12
Float fastq_disk_space_multiplier = 5
String memory = "50G"
Float genome_disk_space_multiplier = 5
Int preemptible = 2
Float extra_disk_space = 10
Boolean use_ssd = true
# small tasks eg. samtools, fastqc etc
Int smallCPU=1
Int smallMem=12
}
call FastQC.Fastqc as fastqc {
input:
f1=left_fq,
f2=right_fq,
sampleName=sample_id,
adap=adap,
runTrimmomatic=runTrimmomatic,
settings=trimmomaticSettings,
cpu=smallCPU
}
File f1 = select_first([fastqc.trim_f1, left_fq])
File f2 = select_first([fastqc.trim_f2, right_fq])
call starfusion.star_fusion as star_fusion {
input:
left_fq = f1,
right_fq = f2,
genome = genome_plug_n_play_tar_gz,
sample_id = sample_id,
examine_coding_effect = examine_coding_effect,
preemptible = preemptible,
docker = FusionDocker,
cpu = num_cpu,
memory = memory,
extra_disk_space = extra_disk_space,
fastq_disk_space_multiplier = fastq_disk_space_multiplier,
genome_disk_space_multiplier = genome_disk_space_multiplier,
fusion_inspector = fusion_inspector,
use_ssd = use_ssd
}
call samtoolsSortTask {
input:
inputBAM = star_fusion.bam,
sample_id = sample_id,
dockerImage=samtoolsDocker,
preemptible=preemptible,
cpu=smallCPU,
memoryGB=smallMem
}
call HTSeqCount {
input:
inputBAM=samtoolsSortTask.outputBAM,
gtf=gtf,
preemptible=preemptible,
sample_id=sample_id,
cpu=smallCPU,
memoryGB=smallMem
}
output {
File fusion_predictions = star_fusion.fusion_predictions
File fusion_predictions_abridged = star_fusion.fusion_predictions_abridged
File bam = samtoolsSortTask.outputBAM
File bai = samtoolsSortTask.outputBAI
File? junction = star_fusion.junction
File? sj = star_fusion.sj
File? coding_effect = star_fusion.coding_effect
Array[File]? extract_fusion_reads = star_fusion.extract_fusion_reads
File? star_log_final = star_fusion.star_log_final
File? fusion_inspector_validate_fusions_abridged = star_fusion.fusion_inspector_validate_fusions_abridged
File? fusion_inspector_validate_web = star_fusion.fusion_inspector_validate_web
File? fusion_inspector_inspect_fusions_abridged = star_fusion.fusion_inspector_inspect_fusions_abridged
File? fusion_inspector_inspect_web = star_fusion.fusion_inspector_inspect_web
File geneCounts = HTSeqCount.ReadCounts
}
}
task HTSeqCount {
input {
File inputBAM
File gtf
String sample_id
Int preemptible =1
Int memoryGB = 8
Int cpu=1
}
Int diskSpace=3*ceil(size(inputBAM,"GB")+size(gtf, "GB"))
command <<<
htseq-count ~{inputBAM} ~{gtf} -f bam > ~{sample_id}.ReadCounts.txt
>>>
runtime {
docker: "biocontainers/htseq:v0.11.2-1-deb-py3_cv1"
disks: "local-disk ~{diskSpace} HDD"
memory: memoryGB + "GB"
cpu: cpu
preemptible: preemptible
}
output {
File ReadCounts="~{sample_id}.ReadCounts.txt"
}
}
task samtoolsSortTask {
input {
File inputBAM
String sample_id
String dockerImage
Int memoryGB = 16
Int cpu = 1
Int preemptible = 1
}
Int diskSpace = 3*ceil(size(inputBAM, "GB"))
command <<<
samtools sort -o ~{sample_id}.sorted.bam ~{inputBAM} && \
samtools index ~{sample_id}.sorted.bam
>>>
runtime {
docker: dockerImage
disks: "local-disk ~{diskSpace} HDD"
memory: memoryGB + "GB"
cpu: cpu
preemptible: preemptible
}
output {
File outputBAM="~{sample_id}.sorted.bam"
File outputBAI="~{sample_id}.sorted.bam"
}
}
task ChimFusion {
input {
File ChimericJunction
File genome
String docker
String sample_id
Int num_cpu
Float fastq_disk_space_multiplier
Float genome_disk_space_multiplier
String? machine_mem_gb
String? preemptible
Boolean examine_coding_effect
}
Int disk_space_gb = ceil(genome_disk_space_multiplier*size(genome, "GB")+fastq_disk_space_multiplier*size(ChimericJunction, "GB"))
command <<<
mkdir -p genome_dir
tar xf ~{genome} -C genome_dir --strip-components 1
Input="ChimericInput.Chimeric.out.junction"
if [[ ~{ChimericJunction} == *.gz ]]; then
gunzip -c ~{ChimericJunction} > $Input
else
cp ~{ChimericJunction} > $Input
fi
# Identify the fusions from chimericjunction file
/usr/local/src/STAR-Fusion/STAR-Fusion --genome_lib_dir `pwd`/genome_dir/ctat_genome_lib_build_dir \
-J $Input \
--output_dir ~{sample_id} --CPU ~{num_cpu} ~{true='--examine_coding_effect' false='' examine_coding_effect}
>>>
output {
File fusions = "~{sample_id}/star-fusion.fusion_predictions.tsv"
File fusionsAbridged = "~{sample_id}/star-fusion.fusion_predictions.abridged.tsv"
}
runtime {
docker: docker
memory: select_first([machine_mem_gb, "50G"])
cpu: num_cpu
disks: "local-disk " + disk_space_gb + " HDD"
preemptible: select_first([preemptible, 3])
}
}