Merge branch 'CW-3930' into 'dev'

Actually output the dexseq file Closes CW-3930 See merge request epi2melabs/workflows/wf-transcriptomes!188
epi2me-labs · Nov 5, 2024 · b3fbb11 · b3fbb11
2 parents 45a717a + 465d060
commit b3fbb11
Show file tree

Hide file tree

Showing 8 changed files with 9 additions and 8 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -11,7 +11,7 @@ repos:
           - epi2melabs==0.0.57
       - id: build_models
         name: build_models
-        entry: datamodel-codegen --strict-nullable --base-class workflow_glue.results_schema_helpers.BaseModel --use-schema-description --disable-timestamp --input results_schema.yml --input-file-type openapi --output bin/workflow_glue/results_schema.py
+        entry: datamodel-codegen --strict-nullable --base-class workflow_glue.results_schema_helpers.BaseModel --use-subclass-enum --use-schema-description --disable-timestamp --input results_schema.yml --input-file-type openapi --output bin/workflow_glue/results_schema.py
         language: python
         files: 'results_schema.yml'
         pass_filenames: false

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,11 +4,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## [v1.5.0]
 ### Updated
 - Workflow report updated to use `ezcharts`.
 ### Fixed
 - Exons per isoforms histogram reporting incorrect numbers.
+- Output the `results_dexseq.tsv` file when `--de_analysis` enabled.
 ### Removed
 - per-class gffcompare tracking files as there exists a combine tracking file. 
 

diff --git a/README.md b/README.md
@@ -232,7 +232,7 @@ Output files may be aggregated including information for all samples or provided
 | Transcript per million counts | de_analysis/unfiltered_tpm_transcript_counts.tsv | This file shows transcripts per million (TPM) of the raw counts to facilitate comparisons across samples. | aggregated |
 | Transcript counts filtered | de_analysis/filtered_transcript_counts_with_genes.tsv | Filtered transcript counts, used for differential transcript usage analysis. Includes a reference to the associated gene ID. | aggregated |
 | Transcript info table | {{ alias }}_transcripts_table.tsv | This file details each isoform that was reconstructed from the input reads. It contains a subset of columns from the .tmap output from [gffcompare](https://ccb.jhu.edu/software/stringtie/gffcompare.shtml) | per-sample |
-| Final non redundant transcriptome | de_analysis/final_non_redundant_transcriptome.fasta | Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. | aggregated |
+| Final non redundant transcriptome | de_analysis/final_non_redundant_transcriptome.fasta | Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. Only applicable when the ref_transcriptome parameter is not provided. | aggregated |
 | Index of reference FASTA file | igv_reference/{{ ref_genome file }}.fai | Reference genome index of the FASTA file required for IGV config. | aggregated |
 | GZI index of the reference FASTA file | igv_reference/{{ ref_genome file }}.gzi | GZI Index of the reference FASTA file. | aggregated |
 | JSON configuration file for IGV browser | igv.json | JSON configuration file to be loaded in IGV for visualising alignments against the reference. | aggregated |

diff --git a/docs/07_outputs.md b/docs/07_outputs.md
@@ -25,7 +25,7 @@ Output files may be aggregated including information for all samples or provided
 | Transcript per million counts | de_analysis/unfiltered_tpm_transcript_counts.tsv | This file shows transcripts per million (TPM) of the raw counts to facilitate comparisons across samples. | aggregated |
 | Transcript counts filtered | de_analysis/filtered_transcript_counts_with_genes.tsv | Filtered transcript counts, used for differential transcript usage analysis. Includes a reference to the associated gene ID. | aggregated |
 | Transcript info table | {{ alias }}_transcripts_table.tsv | This file details each isoform that was reconstructed from the input reads. It contains a subset of columns from the .tmap output from [gffcompare](https://ccb.jhu.edu/software/stringtie/gffcompare.shtml) | per-sample |
-| Final non redundant transcriptome | de_analysis/final_non_redundant_transcriptome.fasta | Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. | aggregated |
+| Final non redundant transcriptome | de_analysis/final_non_redundant_transcriptome.fasta | Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. Only applicable when the ref_transcriptome parameter is not provided. | aggregated |
 | Index of reference FASTA file | igv_reference/{{ ref_genome file }}.fai | Reference genome index of the FASTA file required for IGV config. | aggregated |
 | GZI index of the reference FASTA file | igv_reference/{{ ref_genome file }}.gzi | GZI Index of the reference FASTA file. | aggregated |
 | JSON configuration file for IGV browser | igv.json | JSON configuration file to be loaded in IGV for visualising alignments against the reference. | aggregated |

diff --git a/main.nf b/main.nf
@@ -257,7 +257,7 @@ process assemble_transcripts{
 
     """
     stringtie --rf ${G_FLAG} -L -v -p ${task.cpus} ${params.stringtie_opts} \
-    -o  ${prefix}.gff -l ${prefix} ${bam} 2>/dev/null
+    -o  ${prefix}.gff -l ${prefix} ${bam}
      """
 }
 

diff --git a/nextflow.config b/nextflow.config
@@ -106,7 +106,7 @@ manifest {
     description     = 'Transcriptome analysis including differential expression as well as assembly and annotation of cDNA and direct RNA sequencing data.'
     mainScript      = 'main.nf'
     nextflowVersion = '>=23.04.2'
-    version         = 'v1.4.0'
+    version         = 'v1.5.0'
 }
 
 epi2melabs {

diff --git a/output_definition.json b/output_definition.json
@@ -187,7 +187,7 @@
     "final_non_redundant_transcriptome": {
       "filepath": "de_analysis/final_non_redundant_transcriptome.fasta",
       "title": "Final non redundant transcriptome",
-      "description": "Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis.",
+      "description": "Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. Only applicable when the ref_transcriptome parameter is not provided.",
       "mime-type": "text/x-fasta",
       "optional": true,
       "type": "aggregated"

diff --git a/subworkflows/differential_expression.nf b/subworkflows/differential_expression.nf
@@ -165,7 +165,7 @@ workflow differential_expression {
         de_report = analysis.flt_counts.concat(analysis.gene_counts, analysis.dge, analysis.dexseq,
         analysis.stageR, sample_sheet, merged, ref_annotation, merged_TPM, analysis.unflt_counts).collect()
         // Concat files required to be output to user without any changes
-        de_outputs_concat = analysis.cpm.concat(plotResults.out.dtu_plots, analysis.dge_pdf, analysis.dge_tsv,
+        de_outputs_concat = analysis.cpm.concat(plotResults.out.dtu_plots, analysis.dge_pdf, analysis.dge_tsv, analysis.dexseq,
         analysis.dtu_gene, analysis.dtu_transcript, analysis.dtu_stageR, analysis.dtu_pdf, analysis.flt_counts, analysis.gene_counts, merged_TPM).collect()
         count_transcripts_file = count_transcripts.out.seqkit_stats.collect()
 emit:
-Original file line number
+Diff line change
@@ Expand Up / @@ -257,7 +257,7 @@ process assemble_transcripts{ @@
         """
         stringtie --rf ${G_FLAG} -L -v -p ${task.cpus} ${params.stringtie_opts} \
-        -o  ${prefix}.gff -l ${prefix} ${bam} 2>/dev/null
+        -o  ${prefix}.gff -l ${prefix} ${bam}
          """
     }
@@ Expand Down @@