diff --git a/docs/CHANGELOG.rst b/docs/CHANGELOG.rst index 5638fdc8e..63e03435d 100644 --- a/docs/CHANGELOG.rst +++ b/docs/CHANGELOG.rst @@ -35,6 +35,12 @@ Added - Add ``--bam-output`` input argument to ``vc-gatk4-hc`` - Add ``--max-mnp-distance`` input argument to ``vc-gatk4-hc`` +Changed +------- +- Change output data object name in ``gtf-to-bed`` process, + add geneset as a required field + and hide canonical transcripts table if gene feature type is selected + =================== 61.0.0 - 2024-11-21 diff --git a/resolwe_bio/processes/support_processors/gtf_to_bed.py b/resolwe_bio/processes/support_processors/gtf_to_bed.py index 6ca07e58a..b6d084173 100644 --- a/resolwe_bio/processes/support_processors/gtf_to_bed.py +++ b/resolwe_bio/processes/support_processors/gtf_to_bed.py @@ -15,7 +15,7 @@ class GTFtoBED(Process): - """GTF to BED conversion. + """GTF to BED conversion for predefined genes and feature types. Note that this process only works with ENSEMBL annotations. """ @@ -23,9 +23,9 @@ class GTFtoBED(Process): slug = "gtf-to-bed" name = "GTF to BED" process_type = "data:bed" - version = "1.1.0" + version = "1.2.0" category = "Other" - data_name = "Converted GTF to BED file" + data_name = "{{ geneset|name|default('?') }}" scheduling_class = SchedulingClass.BATCH persistence = Persistence.CACHED @@ -100,7 +100,7 @@ class Input: "geneset", label="Gene set", description="Gene set to use for filtering.", - required=False, + required=True, ) canonical_transcripts = DataField( @@ -108,6 +108,7 @@ class Input: label="Canonical transcripts", description="Canonical transcripts to use for filtering. Only used for transcript and exon feature types.", required=False, + disabled="feature_type == 'gene'", ) output_strand = BooleanField( @@ -180,17 +181,16 @@ def run(self, inputs, outputs): gtf = gtf[gtf["source"].isin(inputs.annotation_source)] gtf = gtf[gtf["feature_type"] == feature_type] - if inputs.geneset: - if inputs.annotation.output.species != inputs.geneset.output.species: - self.error( - "Gene set data object species does not match the annotation species." - ) - geneset = pd.read_csv( - inputs.geneset.output.geneset.path, - delimiter="\t", - names=["ID"], + if inputs.annotation.output.species != inputs.geneset.output.species: + self.error( + "Species of the gene set data object does not match the species of the annotation data object." ) - gtf = gtf[gtf["gene_id"].isin(geneset["ID"])] + geneset = pd.read_csv( + inputs.geneset.output.geneset.path, + delimiter="\t", + names=["ID"], + ) + gtf = gtf[gtf["gene_id"].isin(geneset["ID"])] if inputs.canonical_transcripts and not feature_type == "gene": if ( diff --git a/resolwe_bio/tests/processes/test_support_processors.py b/resolwe_bio/tests/processes/test_support_processors.py index 09783c676..2e7a64d30 100644 --- a/resolwe_bio/tests/processes/test_support_processors.py +++ b/resolwe_bio/tests/processes/test_support_processors.py @@ -1789,6 +1789,7 @@ def test_gtf_to_bed(self): "gtf-to-bed", { "annotation": gtf.id, + "geneset": geneset.id, "annotation_field": "gene_name", }, ) @@ -1798,6 +1799,7 @@ def test_gtf_to_bed(self): "gtf-to-bed", { "annotation": gtf.id, + "geneset": geneset.id, "annotation_field": "gene_id_feature_id", "feature_type": "exon", },