Skip to content

Commit

Permalink
Make changes to gtf-to-bed process
Browse files Browse the repository at this point in the history
  • Loading branch information
marcellevstek committed Dec 20, 2024
1 parent 2510fb1 commit 5b21b53
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 14 deletions.
6 changes: 6 additions & 0 deletions docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ Added
- Add ``--bam-output`` input argument to ``vc-gatk4-hc``
- Add ``--max-mnp-distance`` input argument to ``vc-gatk4-hc``

Changed
-------
- Change output data object name in ``gtf-to-bed`` process,
add geneset as a required field
and hide canonical transcripts table if gene feature type is selected


===================
61.0.0 - 2024-11-21
Expand Down
28 changes: 14 additions & 14 deletions resolwe_bio/processes/support_processors/gtf_to_bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@


class GTFtoBED(Process):
"""GTF to BED conversion.
"""GTF to BED conversion for predefined genes and feature types.
Note that this process only works with ENSEMBL annotations.
"""

slug = "gtf-to-bed"
name = "GTF to BED"
process_type = "data:bed"
version = "1.1.0"
version = "1.2.0"
category = "Other"
data_name = "Converted GTF to BED file"
data_name = "{{ geneset|name|default('?') }}"
scheduling_class = SchedulingClass.BATCH
persistence = Persistence.CACHED

Expand Down Expand Up @@ -100,14 +100,15 @@ class Input:
"geneset",
label="Gene set",
description="Gene set to use for filtering.",
required=False,
required=True,
)

canonical_transcripts = DataField(
"geneset",
label="Canonical transcripts",
description="Canonical transcripts to use for filtering. Only used for transcript and exon feature types.",
required=False,
disabled="feature_type == 'gene'",
)

output_strand = BooleanField(
Expand Down Expand Up @@ -180,17 +181,16 @@ def run(self, inputs, outputs):
gtf = gtf[gtf["source"].isin(inputs.annotation_source)]
gtf = gtf[gtf["feature_type"] == feature_type]

if inputs.geneset:
if inputs.annotation.output.species != inputs.geneset.output.species:
self.error(
"Gene set data object species does not match the annotation species."
)
geneset = pd.read_csv(
inputs.geneset.output.geneset.path,
delimiter="\t",
names=["ID"],
if inputs.annotation.output.species != inputs.geneset.output.species:
self.error(
"Species of the gene set data object does not match the species of the annotation data object."
)
gtf = gtf[gtf["gene_id"].isin(geneset["ID"])]
geneset = pd.read_csv(
inputs.geneset.output.geneset.path,
delimiter="\t",
names=["ID"],
)
gtf = gtf[gtf["gene_id"].isin(geneset["ID"])]

if inputs.canonical_transcripts and not feature_type == "gene":
if (
Expand Down
2 changes: 2 additions & 0 deletions resolwe_bio/tests/processes/test_support_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1789,6 +1789,7 @@ def test_gtf_to_bed(self):
"gtf-to-bed",
{
"annotation": gtf.id,
"geneset": geneset.id,
"annotation_field": "gene_name",
},
)
Expand All @@ -1798,6 +1799,7 @@ def test_gtf_to_bed(self):
"gtf-to-bed",
{
"annotation": gtf.id,
"geneset": geneset.id,
"annotation_field": "gene_id_feature_id",
"feature_type": "exon",
},
Expand Down

0 comments on commit 5b21b53

Please sign in to comment.