Make changes to gtf-to-bed process

genialis · Dec 20, 2024 · 5b21b53 · 5b21b53
1 parent 2510fb1
commit 5b21b53
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 14 deletions.
diff --git a/docs/CHANGELOG.rst b/docs/CHANGELOG.rst
@@ -35,6 +35,12 @@ Added
 - Add ``--bam-output`` input argument to ``vc-gatk4-hc``
 - Add ``--max-mnp-distance`` input argument to ``vc-gatk4-hc``
 
+Changed
+-------
+- Change output data object name in ``gtf-to-bed`` process,
+  add geneset as a required field
+  and hide canonical transcripts table if gene feature type is selected
+
 
 ===================
 61.0.0 - 2024-11-21

diff --git a/resolwe_bio/processes/support_processors/gtf_to_bed.py b/resolwe_bio/processes/support_processors/gtf_to_bed.py
@@ -15,17 +15,17 @@
 
 
 class GTFtoBED(Process):
-    """GTF to BED conversion.
+    """GTF to BED conversion for predefined genes and feature types.
 
     Note that this process only works with ENSEMBL annotations.
     """
 
     slug = "gtf-to-bed"
     name = "GTF to BED"
     process_type = "data:bed"
-    version = "1.1.0"
+    version = "1.2.0"
     category = "Other"
-    data_name = "Converted GTF to BED file"
+    data_name = "{{ geneset|name|default('?') }}"
     scheduling_class = SchedulingClass.BATCH
     persistence = Persistence.CACHED
 
@@ -100,14 +100,15 @@ class Input:
             "geneset",
             label="Gene set",
             description="Gene set to use for filtering.",
-            required=False,
+            required=True,
         )
 
         canonical_transcripts = DataField(
             "geneset",
             label="Canonical transcripts",
             description="Canonical transcripts to use for filtering. Only used for transcript and exon feature types.",
             required=False,
+            disabled="feature_type == 'gene'",
         )
 
         output_strand = BooleanField(
@@ -180,17 +181,16 @@ def run(self, inputs, outputs):
         gtf = gtf[gtf["source"].isin(inputs.annotation_source)]
         gtf = gtf[gtf["feature_type"] == feature_type]
 
-        if inputs.geneset:
-            if inputs.annotation.output.species != inputs.geneset.output.species:
-                self.error(
-                    "Gene set data object species does not match the annotation species."
-                )
-            geneset = pd.read_csv(
-                inputs.geneset.output.geneset.path,
-                delimiter="\t",
-                names=["ID"],
+        if inputs.annotation.output.species != inputs.geneset.output.species:
+            self.error(
+                "Species of the gene set data object does not match the species of the annotation data object."
             )
-            gtf = gtf[gtf["gene_id"].isin(geneset["ID"])]
+        geneset = pd.read_csv(
+            inputs.geneset.output.geneset.path,
+            delimiter="\t",
+            names=["ID"],
+        )
+        gtf = gtf[gtf["gene_id"].isin(geneset["ID"])]
 
         if inputs.canonical_transcripts and not feature_type == "gene":
             if (

diff --git a/resolwe_bio/tests/processes/test_support_processors.py b/resolwe_bio/tests/processes/test_support_processors.py
@@ -1789,6 +1789,7 @@ def test_gtf_to_bed(self):
             "gtf-to-bed",
             {
                 "annotation": gtf.id,
+                "geneset": geneset.id,
                 "annotation_field": "gene_name",
             },
         )
@@ -1798,6 +1799,7 @@ def test_gtf_to_bed(self):
             "gtf-to-bed",
             {
                 "annotation": gtf.id,
+                "geneset": geneset.id,
                 "annotation_field": "gene_id_feature_id",
                 "feature_type": "exon",
             },