Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PD-2863: Run one star instance #1479

Open
wants to merge 17 commits into
base: develop
Choose a base branch
from
8 changes: 4 additions & 4 deletions pipelines/skylab/multiome/Multiome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,10 @@ workflow Multiome {
File gene_metrics_gex = Optimus.gene_metrics
File? cell_calls_gex = Optimus.cell_calls
File h5ad_output_file_gex = JoinBarcodes.gex_h5ad_file
Array[File?] multimappers_EM_matrix = Optimus.multimappers_EM_matrix
Array[File?] multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix
Array[File?] multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix
Array[File?] multimappers_PropUnique_matrix = Optimus.multimappers_PropUnique_matrix
File? multimappers_EM_matrix = Optimus.multimappers_EM_matrix
File? multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix
File? multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix
File? multimappers_PropUnique_matrix = Optimus.multimappers_PropUnique_matrix
File? gex_aligner_metrics = Optimus.aligner_metrics
File? library_metrics = Optimus.library_metrics
File? mtx_files = Optimus.mtx_files
Expand Down
73 changes: 26 additions & 47 deletions pipelines/skylab/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -166,45 +166,25 @@ workflow Optimus {
ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker
}

call FastqProcessing.FastqProcessing as SplitFastq {
input:
i1_fastq = i1_fastq,
r1_fastq = r1_fastq,
r2_fastq = r2_fastq,
whitelist = whitelist,
chemistry = tenx_chemistry_version,
sample_id = input_id,
read_struct = read_struct,
warp_tools_docker_path = docker_prefix + warp_tools_docker
}

scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) {
call StarAlign.STARsoloFastq as STARsoloFastq {
call StarAlign.STARsoloFastq as STARsoloFastq {
input:
r1_fastq = [SplitFastq.fastq_R1_output_array[idx]],
r2_fastq = [SplitFastq.fastq_R2_output_array[idx]],
r1_fastq = r1_fastq,
r2_fastq = r2_fastq,
star_strand_mode = star_strand_mode,
white_list = whitelist,
tar_star_reference = tar_star_reference,
chemistry = tenx_chemistry_version,
counting_mode = counting_mode,
count_exons = count_exons,
output_bam_basename = output_bam_basename + "_" + idx,
output_bam_basename = output_bam_basename,
soloMultiMappers = soloMultiMappers,
samtools_star_docker_path = docker_prefix + samtools_star,
is_slidetags = is_slidetags
}
}
call Merge.MergeSortBamFiles as MergeBam {
input:
bam_inputs = STARsoloFastq.bam_output,
output_bam_filename = output_bam_basename + ".bam",
sort_order = "coordinate",
picard_cloud_docker_path = docker_prefix + picard_cloud_docker
}

call Metrics.CalculateGeneMetrics as GeneMetrics {
input:
bam_input = MergeBam.output_bam,
bam_input = STARsoloFastq.bam_output,
mt_genes = mt_genes,
original_gtf = annotations_gtf,
input_id = input_id,
Expand All @@ -213,7 +193,7 @@ workflow Optimus {

call Metrics.CalculateCellMetrics as CellMetrics {
input:
bam_input = MergeBam.output_bam,
bam_input = STARsoloFastq.bam_output,
mt_genes = mt_genes,
original_gtf = annotations_gtf,
input_id = input_id,
Expand All @@ -222,13 +202,13 @@ workflow Optimus {

call StarAlign.MergeStarOutput as MergeStarOutputs {
input:
barcodes = STARsoloFastq.barcodes,
features = STARsoloFastq.features,
matrix = STARsoloFastq.matrix,
cell_reads = STARsoloFastq.cell_reads,
summary = STARsoloFastq.summary,
align_features = STARsoloFastq.align_features,
umipercell = STARsoloFastq.umipercell,
barcodes = [STARsoloFastq.barcodes],
features = [STARsoloFastq.features],
matrix = [STARsoloFastq.matrix],
cell_reads = [STARsoloFastq.cell_reads],
summary = [STARsoloFastq.summary],
align_features = [STARsoloFastq.align_features],
umipercell = [STARsoloFastq.umipercell],
input_id = input_id,
counting_mode = counting_mode,
star_merge_docker_path = docker_prefix + star_merge_docker,
Expand Down Expand Up @@ -272,15 +252,15 @@ workflow Optimus {
if (count_exons && counting_mode=="sn_rna") {
call StarAlign.MergeStarOutput as MergeStarOutputsExons {
input:
barcodes = STARsoloFastq.barcodes_sn_rna,
features = STARsoloFastq.features_sn_rna,
matrix = STARsoloFastq.matrix_sn_rna,
cell_reads = STARsoloFastq.cell_reads_sn_rna,
barcodes = [STARsoloFastq.barcodes_sn_rna],
features = [STARsoloFastq.features_sn_rna],
matrix = [STARsoloFastq.matrix_sn_rna],
cell_reads = [STARsoloFastq.cell_reads_sn_rna],
input_id = input_id,
counting_mode = "sc_rna",
summary = STARsoloFastq.summary_sn_rna,
align_features = STARsoloFastq.align_features_sn_rna,
umipercell = STARsoloFastq.umipercell_sn_rna,
summary = [STARsoloFastq.summary_sn_rna],
align_features = [STARsoloFastq.align_features_sn_rna],
umipercell = [STARsoloFastq.umipercell_sn_rna],
star_merge_docker_path = docker_prefix + star_merge_docker,
gex_nhash_id = gex_nhash_id
}
Expand Down Expand Up @@ -351,7 +331,7 @@ workflow Optimus {
# version of this pipeline
String pipeline_version_out = pipeline_version
File genomic_reference_version = ReferenceCheck.genomic_ref_version
File bam = MergeBam.output_bam
File bam = STARsoloFastq.bam_output
File matrix = MergeStarOutputs.sparse_counts
File matrix_row_index = MergeStarOutputs.row_index
File matrix_col_index = MergeStarOutputs.col_index
Expand All @@ -363,12 +343,11 @@ workflow Optimus {
File? mtx_files = MergeStarOutputs.mtx_files
File? filtered_mtx_files = MergeStarOutputs.filtered_mtx_files

Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix
Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix
Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix
Array[File?] multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix
File? multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix
File? multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix
File? multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix
File? multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix


# h5ad
File h5ad_output_file = final_h5ad_output

Expand Down
8 changes: 4 additions & 4 deletions pipelines/skylab/paired_tag/PairedTag.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ workflow PairedTag {
File? cell_calls_gex = Optimus.cell_calls
File h5ad_output_file_gex = Optimus.h5ad_output_file
File? library_metrics = Optimus.library_metrics
Array[File?] multimappers_EM_matrix = Optimus.multimappers_EM_matrix
Array[File?] multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix
Array[File?] multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix
Array[File?] multimappers_PropUnique_matrix = Optimus.multimappers_PropUnique_matrix
File? multimappers_EM_matrix = Optimus.multimappers_EM_matrix
File? multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix
File? multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix
File? multimappers_PropUnique_matrix = Optimus.multimappers_PropUnique_matrix
File? cell_barcodes_csv = Optimus.cell_barcodes_csv
File? checkpoint_file = Optimus.checkpoint_file
Array[File]? h5_array = Optimus.h5_array
Expand Down
31 changes: 16 additions & 15 deletions tasks/skylab/StarAlign.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -224,21 +224,23 @@ task STARsoloFastq {
String output_bam_basename
Boolean? count_exons
String? soloMultiMappers
String soloCBmatchWLtype = "1MM_multi" #"1MM_multi_Nbase_pseudocounts"

# runtime values
String samtools_star_docker_path
Int machine_mem_mb = 64000
Int cpu = 8
# by default request non preemptible machine to make sure the slow star alignment step completes
Int preemptible = 3

# if slide_tags true set disk to 1000 otherwise dynamic allocation based on input size
# dynamic allocation multiplies input size by 2.2 to account for output bam file + 20% overhead, add size of reference.
Boolean is_slidetags
Int disk = if is_slidetags then 1000 else
ceil(size(tar_star_reference, "Gi") * 3) +
ceil(size(r1_fastq, "Gi") * 20) +
ceil(size(r2_fastq, "Gi") * 20)

# runtime values
String cpu_platform = "Intel Ice Lake"
Int mem_size = 512
Int machine_mem_mb = 512000
Int cpu = 128
Int disk = 2000
# by default request non preemptible machine to make sure the slow star alignment step completes
Int preemptible = 1
}

meta {
Expand Down Expand Up @@ -314,7 +316,7 @@ task STARsoloFastq {
echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna.
exit 1;
fi

# RAM limit 33195969137
STAR \
--soloType Droplet \
--soloStrand ~{star_strand_mode} \
Expand All @@ -327,11 +329,12 @@ task STARsoloFastq {
--soloFeatures $COUNTING_MODE \
--clipAdapterType CellRanger4 \
--outFilterScoreMin 30 \
--soloCBmatchWLtype 1MM_multi \
--soloCBmatchWLtype ~{soloCBmatchWLtype} \
--soloUMIdedup 1MM_CR \
--outSAMtype BAM SortedByCoordinate \
--outSAMattributes UB UR UY CR CB CY NH GX GN sF cN \
--soloBarcodeReadLength 0 \
--limitBAMsortRAM 33195969137 \
--soloCellReadStats Standard \
~{"--soloMultiMappers " + soloMultiMappers} \
--soloUMIfiltering MultiGeneUMI_CR \
Expand All @@ -340,9 +343,7 @@ task STARsoloFastq {
# validate the bam with samtools quickcheck
samtools quickcheck -v Aligned.sortedByCoord.out.bam


echo "UMI LEN " $UMILen

touch barcodes_sn_rna.tsv
touch features_sn_rna.tsv
touch matrix_sn_rna.mtx
Expand All @@ -351,7 +352,6 @@ task STARsoloFastq {
touch Summary_sn_rna.csv
touch UMIperCellSorted_sn_rna.txt


if [[ "~{counting_mode}" == "sc_rna" ]]
then
SoloDirectory="Solo.out/Gene/raw"
Expand Down Expand Up @@ -425,10 +425,11 @@ task STARsoloFastq {

runtime {
docker: samtools_star_docker_path
memory: "~{machine_mem_mb} MiB"
disks: "local-disk ~{disk} HDD"
memory: "~{mem_size} GiB"
disks: "local-disk ~{disk} SSD"
disk: disk + " GB" # TES
cpu: cpu
cpuPlatform: cpu_platform
preemptible: preemptible
}

Expand Down
Loading