Skip to content

Commit

Permalink
Starting to generate separated config files.
Browse files Browse the repository at this point in the history
  • Loading branch information
RaqManzano committed Aug 21, 2023
1 parent fac25ea commit 770495e
Show file tree
Hide file tree
Showing 4 changed files with 518 additions and 0 deletions.
174 changes: 174 additions & 0 deletions conf/modules/bam_align/bam_align.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
ext.prefix = File name prefix for output files.
----------------------------------------------------------------------------------------
*/

// BAM_ALIGN config

process {

if (params.step == 'mapping'){

// DNA aligners

withName: "BWAMEM1_MEM" {
ext.when = { params.aligner == "bwa-mem" }
}

withName: "BWAMEM2_MEM" {
ext.when = { params.aligner == "bwa-mem2" }
}

withName: "DRAGMAP_ALIGN" {
ext.when = { params.aligner == "dragmap" }
ext.args = { "--RGSM ${meta.patient}_${meta.sample} --RGID ${meta.read_group}" }
}

withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" {
ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/" },
pattern: "*bam",
// Only save if save_output_as_bam AND
// (save_mapped OR no_markduplicates OR sentieon_dedup) AND
// only a single BAM file per sample
saveAs: {
if (params.save_output_as_bam &&
(
params.save_mapped ||
(params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))
) && (meta.size * meta.num_lanes == 1)
) { "mapped/${meta.id}/${it}" }
else { null }
}
]
}

withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" {
// Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof
// However if it's skipped, reads need to be coordinate-sorted
// Only name sort if Spark for Markduplicates + duplicate marking is not skipped
ext.args2 = { (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' }
}

withName: "BWAMEM.*_MEM|SENTIEON_BWAMEM" {
// Using -B 3 for tumor samples
ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" }
}
}


withName: 'MERGE_BAM|INDEX_MERGE_BAM' {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/" },
pattern: "*{bam,bai}",
// Only save if (save_output_as_bam AND (no_markduplicates OR save_mapped ))
saveAs: { (params.save_output_as_bam && (params.save_mapped || params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) ? "mapped/${meta.id}/${it}" : null }
]
}

withName: 'MERGE_BAM' {
ext.prefix = { "${meta.id}.sorted" }
}


// RNA aligners
withName: 'STAR_GENOMEGENERATE' {
ext.args = params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : ''
}

withName: 'UNTAR_.*|STAR_GENOMEGENERATE|HISAT2_BUILD|HISAT2_EXTRACTSPLICESITES' {
publishDir = [
enabled: params.save_reference,
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/index" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: STAR_ALIGN {
ext.args = [
'--outSAMtype BAM SortedByCoordinate',
'--readFilesCommand zcat',
'--outFilterMultimapScoreRange 1',
'--outFilterMultimapNmax 20',
'--outFilterMismatchNmax 10',
'--alignMatesGapMax 1000000',
'--sjdbScore 2',
'--alignSJDBoverhangMin 1',
'--genomeLoad NoSharedMemory',
'--outFilterMatchNminOverLread 0.33',
'--outFilterScoreMinOverLread 0.33',
'--twopass1readsN -1',
params.save_unaligned ? '--outReadsUnmapped Fastx' : '',
params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '',
params.star_twopass ? '--twopassMode Basic' : '',
params.star_max_memory_bamsort > 0 ? "--limitBAMsortRAM ${params.star_max_memory_bamsort}" : "",
params.star_bins_bamsort > 0 ? "--outBAMsortingBinsN ${params.star_bins_bamsort}" : "",
params.star_max_collapsed_junc > 0 ? "--limitOutSJcollapsed ${params.star_max_collapsed_junc}" : ""
].join(' ').trim()
ext.args2 = { "--outSAMattrRGline ${meta.read_group}" }
ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "" }
publishDir = [
[
path: { "${params.outdir}/reports/star/${meta.patient}/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: '*.{out,tab}',
enabled: params.save_align_intermeds
],
[
path: { "${params.outdir}/preprocessing/star/${meta.patient}/${meta.id}/mapped/" },
mode: params.publish_dir_mode,
pattern: '*.bam',
enabled: params.save_align_intermeds
],
[
path: { "${params.outdir}/preprocessing/star/${meta.patient}/${meta.id}/unmapped/" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_align_intermeds
]
]
}

// HISAT2 for second run
withName: '.*:ALIGN_HISAT2:HISAT2_ALIGN' {
ext.args = '--met-stderr --new-summary'
publishDir = [
[
path: { "${params.outdir}/report/hisat2/${meta.patient}/${meta.id}" },
mode: params.publish_dir_mode,
pattern: '*.log',
enabled: params.save_align_intermeds
],
[
path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: '*.bam',
enabled: params.save_align_intermeds
],
[
path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/unmapped" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_unaligned
]
]
}

}





}
138 changes: 138 additions & 0 deletions conf/modules/prepare_genome_and_intervals/prepare_genome.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
ext.prefix = File name prefix for output files.
ext.when = When to run the module.
----------------------------------------------------------------------------------------
*/

// PREPARE_GENOME TODO: add stuff and remove redundant code

process {

withName: 'BWAMEM1_INDEX' {
ext.when = { !params.bwa && params.step == "mapping" && (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem")}
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference" },
pattern: "bwa"
]
}

withName: 'BWAMEM2_INDEX' {
ext.when = { !params.bwamem2 && params.step == "mapping" && params.aligner == "bwa-mem2" }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference" },
pattern: "bwamem2"
]
}

withName: 'DRAGMAP_HASHTABLE' {
ext.when = { !params.dragmap && params.step == "mapping" && params.aligner == "dragmap" }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference" },
pattern: "dragmap"
]
}

withName: 'GATK4_CREATESEQUENCEDICTIONARY' {
ext.when = { !params.dict && params.step != "annotate" && params.step != "controlfreec" }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/dict" },
pattern: "*dict"
]
}

withName: 'MSISENSORPRO_SCAN' {
ext.when = { params.tools && params.tools.split(',').contains('msisensorpro') }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/msi" },
pattern: "*list"
]
}

withName: 'SAMTOOLS_FAIDX' {
ext.when = { !params.fasta_fai && params.step != "annotate" }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/fai" },
pattern: "*fai"
]
}

withName: 'TABIX_DBSNP' {
ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('mutect2'))) }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/dbsnp" },
pattern: "*vcf.gz.tbi"
]
}

withName: 'TABIX_GERMLINE_RESOURCE' {
ext.when = { !params.germline_resource_tbi && params.germline_resource && params.tools && params.tools.split(',').contains('mutect2') }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/germline_resource" },
pattern: "*vcf.gz.tbi"
]
}

withName: 'TABIX_KNOWN_INDELS' {
ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/known_indels" },
pattern: "*vcf.gz.tbi"
]
}

withName: 'TABIX_KNOWN_SNPS' {
ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/known_snps" },
pattern: "*vcf.gz.tbi"
]
}

withName: 'TABIX_PON' {
ext.when = { !params.pon_tbi && params.pon && params.tools && params.tools.split(',').contains('mutect2') }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/pon" },
pattern: "*vcf.gz.tbi"
]
}

withName: 'UNZIP_ALLELES|UNZIP_LOCI|UNZIP_GC|UNZIP_RT' {
ext.when = { params.tools && params.tools.split(',').contains('ascat')}
publishDir = [
enabled: false
]
}

withName: 'UNTAR_CHR_DIR' {
ext.when = { params.tools && params.tools.split(',').contains('controlfreec')}
}
}
45 changes: 45 additions & 0 deletions conf/modules/prepare_genome_and_intervals/prepare_intervals.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
ext.prefix = File name prefix for output files.
ext.when = When to run the module.
----------------------------------------------------------------------------------------
*/

// PREPARE INTERVALS

process {

withName: 'CREATE_INTERVALS_BED' {
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/intervals" },
pattern: "*bed"
]
}

withName: 'GATK4_INTERVALLISTTOBED' {
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/intervals" },
pattern: "*bed"
]
}

withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT' {
ext.prefix = {"${meta.id}"}
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
path: { "${params.outdir}/reference/intervals" },
pattern: "*bed.gz"
]
}
}
Loading

0 comments on commit 770495e

Please sign in to comment.