diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e950f47..4626336 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ repos: - epi2melabs==0.0.57 - id: build_models name: build_models - entry: datamodel-codegen --strict-nullable --base-class workflow_glue.results_schema_helpers.BaseModel --use-schema-description --disable-timestamp --input results_schema.yml --input-file-type openapi --output bin/workflow_glue/results_schema.py + entry: datamodel-codegen --strict-nullable --base-class workflow_glue.results_schema_helpers.BaseModel --use-subclass-enum --use-schema-description --disable-timestamp --input results_schema.yml --input-file-type openapi --output bin/workflow_glue/results_schema.py language: python files: 'results_schema.yml' pass_filenames: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 315a68c..b6e84c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,11 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [v1.5.0] ### Updated - Workflow report updated to use `ezcharts`. ### Fixed - Exons per isoforms histogram reporting incorrect numbers. +- Output the `results_dexseq.tsv` file when `--de_analysis` enabled. ### Removed - per-class gffcompare tracking files as there exists a combine tracking file. diff --git a/README.md b/README.md index bd8af7b..b51688c 100644 --- a/README.md +++ b/README.md @@ -232,7 +232,7 @@ Output files may be aggregated including information for all samples or provided | Transcript per million counts | de_analysis/unfiltered_tpm_transcript_counts.tsv | This file shows transcripts per million (TPM) of the raw counts to facilitate comparisons across samples. | aggregated | | Transcript counts filtered | de_analysis/filtered_transcript_counts_with_genes.tsv | Filtered transcript counts, used for differential transcript usage analysis. Includes a reference to the associated gene ID. | aggregated | | Transcript info table | {{ alias }}_transcripts_table.tsv | This file details each isoform that was reconstructed from the input reads. It contains a subset of columns from the .tmap output from [gffcompare](https://ccb.jhu.edu/software/stringtie/gffcompare.shtml) | per-sample | -| Final non redundant transcriptome | de_analysis/final_non_redundant_transcriptome.fasta | Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. | aggregated | +| Final non redundant transcriptome | de_analysis/final_non_redundant_transcriptome.fasta | Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. Only applicable when the ref_transcriptome parameter is not provided. | aggregated | | Index of reference FASTA file | igv_reference/{{ ref_genome file }}.fai | Reference genome index of the FASTA file required for IGV config. | aggregated | | GZI index of the reference FASTA file | igv_reference/{{ ref_genome file }}.gzi | GZI Index of the reference FASTA file. | aggregated | | JSON configuration file for IGV browser | igv.json | JSON configuration file to be loaded in IGV for visualising alignments against the reference. | aggregated | diff --git a/docs/07_outputs.md b/docs/07_outputs.md index 9a46245..2cf4632 100644 --- a/docs/07_outputs.md +++ b/docs/07_outputs.md @@ -25,7 +25,7 @@ Output files may be aggregated including information for all samples or provided | Transcript per million counts | de_analysis/unfiltered_tpm_transcript_counts.tsv | This file shows transcripts per million (TPM) of the raw counts to facilitate comparisons across samples. | aggregated | | Transcript counts filtered | de_analysis/filtered_transcript_counts_with_genes.tsv | Filtered transcript counts, used for differential transcript usage analysis. Includes a reference to the associated gene ID. | aggregated | | Transcript info table | {{ alias }}_transcripts_table.tsv | This file details each isoform that was reconstructed from the input reads. It contains a subset of columns from the .tmap output from [gffcompare](https://ccb.jhu.edu/software/stringtie/gffcompare.shtml) | per-sample | -| Final non redundant transcriptome | de_analysis/final_non_redundant_transcriptome.fasta | Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. | aggregated | +| Final non redundant transcriptome | de_analysis/final_non_redundant_transcriptome.fasta | Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. Only applicable when the ref_transcriptome parameter is not provided. | aggregated | | Index of reference FASTA file | igv_reference/{{ ref_genome file }}.fai | Reference genome index of the FASTA file required for IGV config. | aggregated | | GZI index of the reference FASTA file | igv_reference/{{ ref_genome file }}.gzi | GZI Index of the reference FASTA file. | aggregated | | JSON configuration file for IGV browser | igv.json | JSON configuration file to be loaded in IGV for visualising alignments against the reference. | aggregated | diff --git a/main.nf b/main.nf index ba2ac2c..228903e 100644 --- a/main.nf +++ b/main.nf @@ -257,7 +257,7 @@ process assemble_transcripts{ """ stringtie --rf ${G_FLAG} -L -v -p ${task.cpus} ${params.stringtie_opts} \ - -o ${prefix}.gff -l ${prefix} ${bam} 2>/dev/null + -o ${prefix}.gff -l ${prefix} ${bam} """ } diff --git a/nextflow.config b/nextflow.config index c13c030..2714539 100644 --- a/nextflow.config +++ b/nextflow.config @@ -106,7 +106,7 @@ manifest { description = 'Transcriptome analysis including differential expression as well as assembly and annotation of cDNA and direct RNA sequencing data.' mainScript = 'main.nf' nextflowVersion = '>=23.04.2' - version = 'v1.4.0' + version = 'v1.5.0' } epi2melabs { diff --git a/output_definition.json b/output_definition.json index 1aedcb3..ff783c1 100644 --- a/output_definition.json +++ b/output_definition.json @@ -187,7 +187,7 @@ "final_non_redundant_transcriptome": { "filepath": "de_analysis/final_non_redundant_transcriptome.fasta", "title": "Final non redundant transcriptome", - "description": "Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis.", + "description": "Transcripts that were used for differential expression analysis including novel transcripts with the identifiers used for DE analysis. Only applicable when the ref_transcriptome parameter is not provided.", "mime-type": "text/x-fasta", "optional": true, "type": "aggregated" diff --git a/subworkflows/differential_expression.nf b/subworkflows/differential_expression.nf index f21d1b5..aa0d732 100644 --- a/subworkflows/differential_expression.nf +++ b/subworkflows/differential_expression.nf @@ -165,7 +165,7 @@ workflow differential_expression { de_report = analysis.flt_counts.concat(analysis.gene_counts, analysis.dge, analysis.dexseq, analysis.stageR, sample_sheet, merged, ref_annotation, merged_TPM, analysis.unflt_counts).collect() // Concat files required to be output to user without any changes - de_outputs_concat = analysis.cpm.concat(plotResults.out.dtu_plots, analysis.dge_pdf, analysis.dge_tsv, + de_outputs_concat = analysis.cpm.concat(plotResults.out.dtu_plots, analysis.dge_pdf, analysis.dge_tsv, analysis.dexseq, analysis.dtu_gene, analysis.dtu_transcript, analysis.dtu_stageR, analysis.dtu_pdf, analysis.flt_counts, analysis.gene_counts, merged_TPM).collect() count_transcripts_file = count_transcripts.out.seqkit_stats.collect() emit: