-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathBamToCram.wdl
70 lines (60 loc) · 2.36 KB
/
BamToCram.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
version 1.0
import "https://raw.githubusercontent.com/trinhan/wgsAlignment/main/Utilities.wdl?token=ABVSYKBEMUCIWY36QNLGO23A6EGVK" as Utils
import "https://raw.githubusercontent.com/trinhan/wgsAlignment/main/Qc.wdl?token=ABVSYKAUTHJEFZZC72KDE4DA6EGTI" as QC
workflow BamToCram {
input {
File input_bam
File ref_fasta
File ref_fasta_index
File ref_dict
File duplication_metrics
File chimerism_metrics
String base_file_name
Int agg_preemptible_tries
}
# ValidateSamFile runs out of memory in mate validation on crazy edge case data, so we want to skip the mate validation
# in those cases. These values set the thresholds for what is considered outside the normal realm of "reasonable" data.
Float max_duplication_in_reasonable_sample = 0.30
Float max_chimerism_in_reasonable_sample = 0.15
# Convert the final merged recalibrated BAM file to CRAM format
call Utils.ConvertToCram as ConvertToCram {
input:
input_bam = input_bam,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
output_basename = base_file_name,
preemptible_tries = agg_preemptible_tries
}
# Check whether the data has massively high duplication or chimerism rates
call QC.CheckPreValidation as CheckPreValidation {
input:
duplication_metrics = duplication_metrics,
chimerism_metrics = chimerism_metrics,
max_duplication_in_reasonable_sample = max_duplication_in_reasonable_sample,
max_chimerism_in_reasonable_sample = max_chimerism_in_reasonable_sample,
preemptible_tries = agg_preemptible_tries
}
# Validate the CRAM file
call QC.ValidateSamFile as ValidateCram {
input:
input_bam = ConvertToCram.output_cram,
input_bam_index = ConvertToCram.output_cram_index,
report_filename = base_file_name + ".cram.validation_report",
ref_dict = ref_dict,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
ignore = ["MISSING_TAG_NM"],
max_output = 1000000000,
is_outlier_data = CheckPreValidation.is_outlier_data,
preemptible_tries = agg_preemptible_tries
}
output {
File output_cram = ConvertToCram.output_cram
File output_cram_index = ConvertToCram.output_cram_index
File output_cram_md5 = ConvertToCram.output_cram_md5
File validate_cram_file_report = ValidateCram.report
}
meta {
allowNestedInputs: true
}
}