Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: download alphamissense scores #307

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,7 @@ annotations:
# For integration of SpliceAI preprocessed snv and indel scores (including index files) are required.
# Scores are available on https://basespace.illumina.com/s/otSPW8hnhaZR
#- SpliceAI,snv=<path/to/spliceai_scores.raw.snv.hg38.vcf.gz,indel=<path/to/spliceai_scores.raw.indel.hg38.vcf.gz>
# For annotation of AlphaMissense scores a tsv-file containing processed scores is required.
# Scores are available on https://zenodo.org/records/10813168
# A tabix index is required and can be created by calling `tabix -s 1 -b 2 -e 2 -f -S 1 AlphaMissense_hg38.tsv.gz`
#- AlphaMissense,file=<path/to/AlphaMissense_hg38.tsv.gz>
#- AlphaMissense

# printing of variants in a table format (might be deprecated soon)
tables:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ views:
ensembl:
url: https://www.ensembl.org/Homo_sapiens/Transcript/Summary?t={feature}
hgvsc:
optional: true
custom: ?read_file(input.linkouts)
consequence:
plot:
Expand Down
2 changes: 2 additions & 0 deletions workflow/rules/annotation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ rule annotate_variants:
plugins="resources/vep/plugins",
revel=lambda wc: get_plugin_aux("REVEL"),
revel_tbi=lambda wc: get_plugin_aux("REVEL", True),
alphamissense=lambda wc: get_plugin_aux("AlphaMissense"),
alphamissense_tbi=lambda wc: get_plugin_aux("AlphaMissense", True),
fasta=genome,
fai=genome_fai,
output:
Expand Down
34 changes: 29 additions & 5 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ import yaml
import pandas as pd
from snakemake.utils import validate


ruleorder: tabix_plugin_scores > tabix_known_variants


validate(config, schema="../schemas/config.schema.yaml")

samples = (
Expand Down Expand Up @@ -516,7 +520,7 @@ def get_markduplicates_extra(wc):
def get_group_bams(wildcards, bai=False):
ext = "bai" if bai else "bam"
if is_activated("primers/trimming") and not group_is_paired_end(wildcards.group):
WorkflowError("Primer trimming is only available for paired end data.")
raise WorkflowError("Primer trimming is only available for paired end data.")
return expand(
"results/recal/{sample}.{ext}",
sample=get_group_samples(wildcards.group),
Expand Down Expand Up @@ -725,9 +729,12 @@ def get_vep_threads():

def get_plugin_aux(plugin, index=False):
if plugin in config["annotations"]["vep"]["final_calls"]["plugins"]:
suffix = ".tbi" if index else ""
if plugin == "REVEL":
suffix = ".tbi" if index else ""
return "resources/revel_scores.tsv.gz{suffix}".format(suffix=suffix)
if plugin == "AlphaMissense":
return "resources/alphamissense_scores.tsv.gz{suffix}".format(suffix=suffix)
return []


Expand Down Expand Up @@ -982,10 +989,27 @@ def get_tabix_params(wildcards):
raise ValueError("Invalid format for tabix: {}".format(wildcards.format))


def get_tabix_revel_params():
# Indexing of REVEL-score file where the column depends on the reference
column = 2 if config["ref"]["build"] == "GRCh37" else 3
return f"-f -s 1 -b {column} -e {column}"
def get_tabix_plugin_params(plugin):
if plugin == "revel":
# Indexing of REVEL-score file where the column depends on the reference
column = 2 if config["ref"]["build"] == "GRCh37" else 3
return f"-f -s 1 -b {column} -e {column}"
elif plugin == "alphamissense":
return "-f -s 1 -b 2 -e 2 -f -S 1"
else:
raise WorkflowError("Unsupported plugin for obtaining tabix parameteres")


def get_alphamissense_url(wc):
if config["ref"]["build"] == "GRCh37":
build = "hg19"
elif config["ref"]["build"] == "GRCh38":
build = "hg38"
else:
raise WorkflowError(
"Invalid reference for AlphaMissense annotation. Only GRCh37 and GRCh38 supported."
)
return f"https://zenodo.org/records/10813168/files/AlphaMissense_{build}.tsv.gz"


def get_untrimmed_fastqs(wc):
Expand Down
25 changes: 20 additions & 5 deletions workflow/rules/plugins.smk
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,27 @@ rule process_revel_scores:
"""


use rule tabix_known_variants as tabix_revel_scores with:
rule download_alphamissense_scores:
output:
"resources/alphamissense_scores.tsv.gz",
params:
url=get_alphamissense_url,
log:
"logs/vep_plugins/download_alphamissense.log",
conda:
"../envs/curl.yaml"
shell:
"""
curl {params.url} -o {output} &> {log}
"""


use rule tabix_known_variants as tabix_plugin_scores with:
input:
"resources/revel_scores.tsv.gz",
"resources/{plugin}_scores.tsv.gz",
output:
"resources/revel_scores.tsv.gz.tbi",
"resources/{plugin}_scores.tsv.gz.tbi",
params:
get_tabix_revel_params(),
lambda wc: get_tabix_plugin_params(wc.plugin),
log:
"logs/tabix/revel.log",
"logs/tabix/{plugin}.log",
Loading