diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index f0b69bdcd5..a2737a1c95 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,14 +1,19 @@ -# 3.4.5 +# 5.0.0 +2024-05-20 (Date of Last Commit) + +* Updated SnapATAC2 docker to SnapATAC2 v2.6.3; this impacts the workflow output metrics + +# 4.0.2 2024-05-14 (Date of Last Commit) * Updated the Paired-tag Demultiplex task so that some intermediate input names have been renamed; this change does not impact the Multiome workflow -# 3.4.4 +# 4.0.1 2024-05-10 (Date of Last Commit) * Updated the Paired-tag Demultiplex task; this change does not impact the Multiome workflow -# 3.4.3 +# 4.0.0 2024-04-24 (Date of Last Commit) * Updated the input parameters for STARsolo in STARsoloFastq task. These include the parameters: soloCBmatchWLtype, soloUMIdedup and soloUMIfiltering diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 526381b707..e6cd1017a0 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -7,7 +7,7 @@ import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/c workflow Multiome { - String pipeline_version = "3.4.5" + String pipeline_version = "5.0.0" input { String input_id @@ -79,10 +79,10 @@ workflow Multiome { read3_fastq_gzipped = atac_r3_fastq, input_id = input_id + "_atac", tar_bwa_reference = tar_bwa_reference, - annotations_gtf = annotations_gtf, chrom_sizes = chrom_sizes, whitelist = atac_whitelist, adapter_seq_read1 = adapter_seq_read1, + annotations_gtf = annotations_gtf, adapter_seq_read3 = adapter_seq_read3 } call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes { diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md index 54948b799f..7478a49e0d 100644 --- a/pipelines/skylab/multiome/atac.changelog.md +++ b/pipelines/skylab/multiome/atac.changelog.md @@ -1,3 +1,8 @@ +# 2.0.0 +2024-05-20 (Date of Last Commit) + +* Updated SnapATAC2 docker to SnapATAC2 v2.6.3; this impacts the workflow output metrics + # 1.2.3 2024-05-14 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index e8fa467264..e3a63d7e4c 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -29,10 +29,10 @@ workflow ATAC { Int mem_size_bwa = 512 String cpu_platform_bwa = "Intel Ice Lake" - # GTF for SnapATAC2 to calculate TSS sites of fragment file - File annotations_gtf # Text file containing chrom_sizes for genome build (i.e. hg38) File chrom_sizes + #File for annotations for calculating ATAC TSSE + File annotations_gtf # Whitelist File whitelist @@ -41,7 +41,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "1.2.3" + String pipeline_version = "2.0.0" parameter_meta { read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads" @@ -436,12 +436,12 @@ task BWAPairedEndAlignment { task CreateFragmentFile { input { File bam - File annotations_gtf File chrom_sizes + File annotations_gtf Boolean preindex Int disk_size = 500 Int mem_size = 16 - Int nthreads = 1 + Int nthreads = 4 String cpuPlatform = "Intel Cascade Lake" } @@ -449,8 +449,8 @@ task CreateFragmentFile { parameter_meta { bam: "Aligned bam with CB in CB tag. This is the output of the BWAPairedEndAlignment task." - annotations_gtf: "GTF for SnapATAC2 to calculate TSS sites of fragment file." chrom_sizes: "Text file containing chrom_sizes for genome build (i.e. hg38)." + annotations_gtf: "GTF for SnapATAC2 to calculate TSS sites of fragment file." disk_size: "Disk size used in create fragment file step." mem_size: "The size of memory used in create fragment file." } @@ -461,10 +461,10 @@ task CreateFragmentFile { python3 <>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1" + docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.9-2.6.3-1715865353" disks: "local-disk ${disk_size} SSD" memory: "${mem_size} GiB" cpu: nthreads diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index 1b6844d493..dbbf6814ab 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,4 +1,9 @@ -# 6.6.2 +# 7.1.0 +2024-05-20 (Date of Last Commit) + +* Updated SnapATAC2 docker to SnapATAC2 v2.6.3; this does not impact the Optimus workflow + +# 7.0.0 2024-04-24 (Date of Last Commit) * Updated the input parameters for STARsolo in STARsoloFastq task. These include the parameters: soloCBmatchWLtype, soloUMIdedup and soloUMIfiltering diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 59c2610b97..2163740eb9 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -65,7 +65,7 @@ workflow Optimus { # version of this pipeline - String pipeline_version = "6.6.2" + String pipeline_version = "7.1.0" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index 04597010ea..3638009b7d 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,13 +1,16 @@ -# 0.7.0 -2024-05016 (Date of Last Commit) +# 0.7.0 +2024-05-20 + +* Updated SnapATAC2 docker and tasks to run SnapATAC v2.6.3 +* Added testing infrastructure for paired-tag plumbing data and example data sets -* Added Paired-tag testing infrastructure and example test inputs # 0.6.1 2024-05-14 (Date of Last Commit) * Updated the demultiplex task so that some intermediate input names have been renamed. There is no change to the outputs. + # 0.6.0 2024-05-10 (Date) diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index c19dd233fa..5e5a0d667b 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -86,11 +86,11 @@ workflow PairedTag { read3_fastq_gzipped = demultiplex.fastq3, input_id = input_id + "_atac", tar_bwa_reference = tar_bwa_reference, - annotations_gtf = annotations_gtf, chrom_sizes = chrom_sizes, whitelist = atac_whitelist, adapter_seq_read1 = adapter_seq_read1, adapter_seq_read3 = adapter_seq_read3, + annotations_gtf = annotations_gtf, preindex = preindex } diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index 3910f51df0..52672034ec 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,3 +1,8 @@ +# 3.1.6 +2024-05-20 (Date of Last Commit) + +* Updated SnapATAC2 docker to SnapATAC2 v2.6.3; this does not impact the SlideSeq workflow + # 3.1.5 2024-04-12 (Date of Last Commit) diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index ca41374852..2e9346de98 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -23,7 +23,7 @@ import "../../../tasks/skylab/MergeSortBam.wdl" as Merge workflow SlideSeq { - String pipeline_version = "3.1.5" + String pipeline_version = "3.1.6" input { Array[File] r1_fastq diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index 18fed45fc1..924b19a770 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -223,6 +223,7 @@ task JoinMultiomeBarcodes { # import anndata to manipulate h5ad files import anndata as ad import pandas as pd + import snapatac2 as snap print("Reading ATAC h5ad:") print("~{atac_h5ad}") print("Read ATAC fragment file:") @@ -234,7 +235,7 @@ task JoinMultiomeBarcodes { atac_tsv = pd.read_csv("~{atac_fragment}", sep="\t", names=['chr','start', 'stop', 'barcode','n_reads']) whitelist_gex = pd.read_csv("~{gex_whitelist}", header=None, names=["gex_barcodes"]) whitelist_atac = pd.read_csv("~{atac_whitelist}", header=None, names=["atac_barcodes"]) - + # get dataframes df_atac = atac_data.obs df_gex = gex_data.obs @@ -261,6 +262,7 @@ task JoinMultiomeBarcodes { # set gene_data.obs to new dataframe print("Setting Optimus obs to new dataframe") gex_data.obs = df_gex + # write out the files gex_data.write("~{gex_base_name}.h5ad") atac_data.write_h5ad("~{atac_base_name}.h5ad") @@ -277,7 +279,7 @@ task JoinMultiomeBarcodes { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1-1700590229" + docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.9-2.6.3-1715865353" disks: "local-disk ~{disk} HDD" memory: "${machine_mem_mb} MiB" cpu: nthreads diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl index ca5b6cf885..e11a5e9d3d 100644 --- a/tasks/skylab/PairedTagUtils.wdl +++ b/tasks/skylab/PairedTagUtils.wdl @@ -227,11 +227,13 @@ task ParseBarcodes { # import anndata to manipulate h5ad files import anndata as ad import pandas as pd + import snapatac2 as snap print("Reading ATAC h5ad:") atac_data = ad.read_h5ad("~{atac_h5ad}") print("Reading ATAC fragment file:") test_fragment = pd.read_csv("~{atac_fragment}", sep="\t", names=['chr','start', 'stop', 'barcode','n_reads']) - + + # Separate out CB and preindex in the h5ad and identify sample barcodes assigned to more than one cell barcode print("Setting preindex and CB columns in h5ad") df_h5ad = atac_data.obs @@ -271,7 +273,7 @@ task ParseBarcodes { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1-1700590229" + docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.9-2.6.3-1715865353" disks: "local-disk ~{disk} HDD" memory: "${machine_mem_mb} MiB" cpu: nthreads