diff --git a/cre.panel_filter.R b/cre.panel_filter.R index de9de4a..70157d3 100644 --- a/cre.panel_filter.R +++ b/cre.panel_filter.R @@ -5,7 +5,7 @@ # variant_report.csv is the output of cre has Ensembl_gene_id, # panel.csv = gene panel with ensembl_gene_id column filter_variants <- function(variant_report.csv, panel.csv, output.csv){ - variants <- read.csv(variant_report.csv, stringsAsFactors = F) + variants <- read.csv(variant_report.csv, stringsAsFactors = F) panel <- read.csv(panel.csv, stringsAsFactors = F) variants.panel <- variants[variants$Ensembl_gene_id %in% panel$ensembl_gene_id,] write.csv(variants.panel, output.csv, row.names = F) @@ -36,8 +36,8 @@ filter_variants_genomics_england_panel <- function(variant_report.csv, panel.tsv args = commandArgs(trailingOnly = T) -if (is.null(args[4])){ - filter_variants(args[1], args[2], args[3]) -}else{ - filter_variants_genomics_england_panel(args[1], args[2], args[3]) -} \ No newline at end of file +#if (is.null(args[4])){ +filter_variants(args[1], args[2], args[3]) +#}else{ +# filter_variants_genomics_england_panel(args[1], args[2], args[3]) +#} \ No newline at end of file diff --git a/cre.panel_filter.tcag.R b/cre.panel_filter.tcag.R index 772bf62..2f485f6 100644 --- a/cre.panel_filter.tcag.R +++ b/cre.panel_filter.tcag.R @@ -6,6 +6,6 @@ library(tidyverse) args = commandArgs(trailingOnly = T) input_table <- read_tsv(args[1]) panel <- read_csv(args[2]) -filtered_table <- input_table %>% filter(FILTER=="PASS", gene_symbol %in% panel$Gene_name, +filtered_table <- input_table %>% filter(FILTER=="PASS", gene_symbol %in% panel$external_gene_name, !typeseq_priority %in% c("intronic")) write_excel_csv(filtered_table, args[3]) diff --git a/cre.panel_filter.tcag.sv.R b/cre.panel_filter.tcag.sv.R new file mode 100644 index 0000000..f6b1e0f --- /dev/null +++ b/cre.panel_filter.tcag.sv.R @@ -0,0 +1,10 @@ +############################################################################### +# Filter TCAG small variant report +############################################################################### +library(tidyverse) + +args = commandArgs(trailingOnly = T) +input_table <- read_tsv(args[1]) +panel <- read_csv(args[2]) +filtered_table <- input_table %>% filter(FILTER=="PASS", gene_symbol %in% panel$external_gene_name) +write_excel_csv(filtered_table, args[3]) diff --git a/cre.vcf2db.sh b/cre.vcf2db.sh new file mode 100755 index 0000000..aba345f --- /dev/null +++ b/cre.vcf2db.sh @@ -0,0 +1,54 @@ +#!/bin/bash +#PBS -l walltime=23:00:00,nodes=1:ppn=1 +#PBS -joe . +#PBS -d . +#PBS -l vmem=50g,mem=50g + +# 50g is crucial -20,30 crashes sometimes + +# load annotated vcf to gemini.db +# parameters: +# vcf = file.vcf.gz, no tabix index is needed in the dir +# project = case = family = S11 (example) + +# qsub ~/cre/cre.vcf2cre.sh -v vcf=file.vcf.gz,project=412 + +. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.test_profile + +bname=`basename $vcf .vcf.gz` + +bcftools query -l $vcf > samples.txt +> $project.ped +for sample in `cat samples.txt` +do + echo -e "1\t"$sample"\t0\t0\t0\t0" >> $project.ped +done +ped=$project.ped + +vcf2db.py $vcf.vcf.gz $ped ${project}-ensemble.db + +mkdir $project + +mv ${project}-ensemble.db $project + +mv $project.sorted.decomposed.vepeffects.annotated.vcf.gz ${project}/${project}-ensemble-annotated-decomposed.vcf.gz +mv $project.sorted.decomposed.vepeffects.annotated.vcf.gz.tbi ${project}/${project}-ensemble-annotated-decomposed.vcf.gz.tbi + +cd $project +ln -s ${project}-ensemble-annotated-decomposed.vcf.gz ${project}-gatk-haplotype-annotated-decomposed.vcf.gz +ln -s ${project}-ensemble-annotated-decomposed.vcf.gz.tbi ${project}-gatk-haplotype-annotated-decomposed.vcf.gz.tbi + +cd .. + +rm $bname.no_anno.vcf.gz +rm $bname.no_anno.vcf.gz.tbi + +rm $project.vcf.gz +rm $project.vcf.gz.tbi + +rm $project.sorted*.vcf.gz +rm $project.sorted*.vcf.gz.tbi + +echo "#####################################################" +echo `date` " DONE" +echo "#####################################################"