-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstar.wdl
156 lines (140 loc) · 5.77 KB
/
star.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
### This is a pipeline to run STAR
###
### Required inputs:
### fastq1
### fastq2 (optional)
### prefix: to name all output files
### star_index (see gs://kccg-cb-tools for index.tar.gz)
### disk_space: ~100GB works
### memory: try 40GB
### num_preempt: set at 0 if you do not want job to fail
### num_threads: 10
task star {
File fastq1
File? fastq2
String prefix
File star_index
# STAR options
Int? outFilterMultimapNmax
Int? alignSJoverhangMin
Int? alignSJDBoverhangMin
Int? outFilterMismatchNmax
Float? outFilterMismatchNoverLmax
Int? alignIntronMin
Int? alignIntronMax
Int? alignMatesGapMax
String? outFilterType
Float? outFilterScoreMinOverLread
Float? outFilterMatchNminOverLread
Int? limitSjdbInsertNsj
String? outSAMstrandField
String? outFilterIntronMotifs
String? alignSoftClipAtReferenceEnds
String? quantMode
String? outSAMattrRGline
String? outSAMattributes
File? varVCFfile
String? waspOutputMode
Int? chimSegmentMin
Int? chimJunctionOverhangMin
String? chimOutType
Int? chimMainSegmentMultNmax
Int? chimOutJunctionFormat
File? sjdbFileChrStartEnd
String? readFilesCommand
String docker
Int memory
Int disk_space = 9*ceil(size(fastq1, "GB")+size(fastq2, "GB"))+3*ceil(size(star_index, "GB"))
Int num_threads
Int num_preempt
command {
set -euo pipefail
if [[ ${fastq1} == *".tar" || ${fastq1} == *".tar.gz" ]]; then
tar -xvvf ${fastq1}
fastq1_abs=$(for f in *_1.fastq*; do echo "$(pwd)/$f"; done | paste -s -d ',')
fastq2_abs=$(for f in *_2.fastq*; do echo "$(pwd)/$f"; done | paste -s -d ',')
if [[ $fastq1_abs == *"*_1.fastq*" ]]; then # no paired-end FASTQs found; check for single-end FASTQ
fastq1_abs=$(for f in *.fastq*; do echo "$(pwd)/$f"; done | paste -s -d ',')
fastq2_abs=''
fi
else
# make sure paths are absolute
fastq1_abs=${fastq1}
fastq2_abs=${fastq2}
if [[ $fastq1_abs != /* ]]; then
fastq1_abs=$PWD/$fastq1_abs
fastq2_abs=$PWD/$fastq2_abs
fi
fi
echo "FASTQs:"
echo $fastq1_abs
echo $fastq2_abs
# extract index
echo $(date +"[%b %d %H:%M:%S] Extracting STAR index")
mkdir star_index
tar -xvvf ${star_index} -C star_index --strip-components=1
mkdir star_out
# placeholders for optional outputs
touch star_out/${prefix}.Aligned.toTranscriptome.out.bam
touch star_out/${prefix}.Chimeric.out.sorted.bam
touch star_out/${prefix}.Chimeric.out.sorted.bam.bai
touch star_out/${prefix}.ReadsPerGene.out.tab # run_STAR.py will gzip
STAR \
--genomeDir star_index \
--readFilesIn $fastq1_abs $fastq2_abs ${prefix} \
--output_dir star_out \
${"--outFilterMultimapNmax " + outFilterMultimapNmax} \
${"--alignSJoverhangMin " + alignSJoverhangMin} \
${"--alignSJDBoverhangMin " + alignSJDBoverhangMin} \
${"--outFilterMismatchNmax " + outFilterMismatchNmax} \
${"--outFilterMismatchNoverLmax " + outFilterMismatchNoverLmax} \
${"--alignIntronMin " + alignIntronMin} \
${"--alignIntronMax " + alignIntronMax} \
${"--alignMatesGapMax " + alignMatesGapMax} \
${"--outFilterType " + outFilterType} \
${"--outFilterScoreMinOverLread " + outFilterScoreMinOverLread} \
${"--outFilterMatchNminOverLread " + outFilterMatchNminOverLread} \
${"--limitSjdbInsertNsj " + limitSjdbInsertNsj} \
${"--outSAMstrandField " + outSAMstrandField} \
${"--outFilterIntronMotifs " + outFilterIntronMotifs} \
${"--alignSoftClipAtReferenceEnds " + alignSoftClipAtReferenceEnds} \
${"--quantMode " + quantMode} \
${"--outSAMattrRGline " + outSAMattrRGline} \
${"--outSAMattributes " + outSAMattributes} \
${"--varVCFfile " + varVCFfile} \
${"--waspOutputMode " + waspOutputMode} \
${"--chimSegmentMin " + chimSegmentMin} \
${"--chimJunctionOverhangMin " + chimJunctionOverhangMin} \
${"--chimOutType " + chimOutType} \
${"--chimMainSegmentMultNmax " + chimMainSegmentMultNmax} \
${"--chimOutJunctionFormat " + chimOutJunctionFormat} \
${"--sjdbFileChrStartEnd " + sjdbFileChrStartEnd} \
${"--readFilesCommand " + readFilesCommand}\
--threads ${num_threads}
}
output {
File bam_file = "star_out/${prefix}.Aligned.sortedByCoord.out.bam"
File bam_index = "star_out/${prefix}.Aligned.sortedByCoord.out.bam.bai"
File transcriptome_bam = "star_out/${prefix}.Aligned.toTranscriptome.out.bam"
File chimeric_junctions = "star_out/${prefix}.Chimeric.out.junction.gz"
File chimeric_bam_file = "star_out/${prefix}.Chimeric.out.sorted.bam"
File chimeric_bam_index = "star_out/${prefix}.Chimeric.out.sorted.bam.bai"
File read_counts = "star_out/${prefix}.ReadsPerGene.out.tab.gz"
File junctions = "star_out/${prefix}.SJ.out.tab.gz"
File junctions_pass1 = "star_out/${prefix}._STARpass1/${prefix}.SJ.pass1.out.tab.gz"
Array[File] logs = ["star_out/${prefix}.Log.final.out", "star_out/${prefix}.Log.out", "star_out/${prefix}.Log.progress.out"]
}
runtime {
docker: select_first([docker, "broadinstitute/gtex_rnaseq:V10"])
memory: "${memory}GB"
disks: "local-disk ${disk_space} HDD"
cpu: "${num_threads}"
preemptible: "${num_preempt}"
}
meta {
author: "Francois Aguet"
}
}
workflow star_workflow {
call star
}