From 5da3f021e8e914955d647dc7436cce083dac233d Mon Sep 17 00:00:00 2001 From: naumenko-sa Date: Sat, 11 May 2019 00:04:09 -0400 Subject: [PATCH] test for ggd recipe --- bcbio.pbs | 2 +- cre.bcbio.upgrade.sh | 4 +-- cre.cloudbiolinux.prepare_gnomad_test.sh | 10 ++++++++ cre.immunopanels.R | 32 ------------------------ cre.rohet.naive.sh | 5 ++-- 5 files changed, 16 insertions(+), 37 deletions(-) create mode 100755 cre.cloudbiolinux.prepare_gnomad_test.sh delete mode 100644 cre.immunopanels.R diff --git a/bcbio.pbs b/bcbio.pbs index cb61e72..c7c1f74 100644 --- a/bcbio.pbs +++ b/bcbio.pbs @@ -32,7 +32,7 @@ then fi # testing environment of bcbio -if [ -n $testing ] && [ $testing == "T" ] +if [ -n $testing ] && [ "$testing" == "T" ] then . /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_testing/.test_profile echo "Using test bcbio instance" diff --git a/cre.bcbio.upgrade.sh b/cre.bcbio.upgrade.sh index cb2f503..b939657 100755 --- a/cre.bcbio.upgrade.sh +++ b/cre.bcbio.upgrade.sh @@ -63,11 +63,11 @@ which bcbio_nextgen.py # --genomes hg38 # gnomad 14h -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget gnomad +bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget gnomad # bcbio_nextgen.py upgrade -u skip --genomes hg38 --datatarget gnomad # dbnsfp -bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget dbnsfp +# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget dbnsfp # rnaseq # bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget rnaseq diff --git a/cre.cloudbiolinux.prepare_gnomad_test.sh b/cre.cloudbiolinux.prepare_gnomad_test.sh new file mode 100755 index 0000000..31561bd --- /dev/null +++ b/cre.cloudbiolinux.prepare_gnomad_test.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# prepares full chomosome test set for gnomad to test ggd recipe +prefix=gnomad.exomes.r2.1.sites.grch38.chr +for chrom in $(seq 1 22;echo X Y) +do + curl -r 0-1000000 -O http://ftp.ensemblorg.ebi.ac.uk/pub/data_files/homo_sapiens/GRCh38/variation_genotype/gnomad/r2.1/exomes/${prefix}${chrom}_noVEP.vcf.gz + gunzip -c ${prefix}${chrom}_noVEP.vcf.gz | head -n 1400 > ${prefix}${chrom}_noVEP.vcf + bgzip -f ${prefix}${chrom}_noVEP.vcf + tabix ${prefix}${chrom}_noVEP.vcf.gz +done diff --git a/cre.immunopanels.R b/cre.immunopanels.R deleted file mode 100644 index 5df143d..0000000 --- a/cre.immunopanels.R +++ /dev/null @@ -1,32 +0,0 @@ -args <- commandArgs(trailingOnly = T) - -#input_report = "182208.wes.2018-11-30.csv" - -input_report <- args[1] - -variants <- read.csv(input_report, stringsAsFactors = F) - -lupus <- read.csv("~/cre/data/lupus.csv", stringsAsFactors = F) - -lupus.langefeld.gwas <- read.csv("~/cre/data/lupus.langefeld.gwas.csv", stringsAsFactors = F) - -lupus.eastasians.gwas <- read.csv("~/cre/data/lupus.eastasians.gwas.csv", stringsAsFactors = F) -primary_immunodeficiency <- read.csv("~/cre/data/primary_immunodeficiency.csv", stringsAsFactors = F) - -periodic_fever <- read.csv("~/cre/data/periodic_fever_syndromes.csv", stringsAsFactors = F) - -mas <- read.csv("~/cre/data/mas.csv") - -recurrent_fever_SK <- read.csv("~/cre/data/recurrent_fever_syndrome_panel_from_crm.csv") - -variants$Lupus_panel <- ifelse(variants$Ensembl_gene_id %in% lupus$ensembl_gene_id,"Lupus_panel",NA) -variants$Lupus_langefeld_gwas <- ifelse(variants$Ensembl_gene_id %in% lupus.langefeld.gwas$ensembl_gene_id,"Lupus_langefeld_gwas",NA) -variants$Lupus_eastasians_gwas <- ifelse(variants$Ensembl_gene_id %in% lupus.eastasians.gwas$ensembl_gene_id,"Lupus_eastasians_gwas",NA) -variants$Primary_immunodeficiency_panel <- ifelse(variants$Ensembl_gene_id %in% primary_immunodeficiency$PanelAPP.EnsemblId.GRch37,"Primary_immunodeficiency_panel",NA) -variants$Periodic_fever_panel <- ifelse(variants$Ensembl_gene_id %in% periodic_fever$PanelAPP.EnsemblId.GRch37,"Periodic_fever",NA) -variants$MAS <- ifelse(variants$Ensembl_gene_id %in% mas$ensembl_gene_id,"MAS",NA) -variants$Recurrent_fever_SK <- ifelse(variants$Ensembl_gene_id %in% recurrent_fever_SK$ensembl_gene_id, "Recurrent_fever_SK",NA) - -output_report <- sub(".csv", ".immunopanels.csv", input_report) - -write.csv(variants, output_report, row.names = F) \ No newline at end of file diff --git a/cre.rohet.naive.sh b/cre.rohet.naive.sh index 81ff09e..2d34e4d 100755 --- a/cre.rohet.naive.sh +++ b/cre.rohet.naive.sh @@ -19,8 +19,9 @@ then maf=$3 fi #depth is for 3 samples -gemini query -q "select chrom,start+1 as pos, ref, alt,impact,qual,depth, gene, max_aaf_all as maf, gts."$sample",gt_types."$sample",gt_alt_depths."$sample" from variants where -type='snp' and depth>=10 and qual>=500 and max_aaf_all<="$maf --gt-filter "gt_types."$sample" != 2" $2 | grep -v chrGL | sed s/chr// | sed s/"\t"/","/g | sort -t "," -k1,1 -k2,2n \ +# depth is in old databases +gemini query -q "select chrom,start+1 as pos, ref, alt,impact,qual,dp, gene, max_aaf_all as maf, gts."$sample",gt_types."$sample",gt_alt_depths."$sample" from variants where +type='snp' and dp>=10 and qual>=500 and max_aaf_all<="$maf --gt-filter "gt_types."$sample" != 2" $2 | grep -v chrGL | sed s/chr// | sed s/"\t"/","/g | sort -t "," -k1,1 -k2,2n \ | tee -a $sample.rohet_variants.csv | awk -F "," ' BEGIN{ prev_genotype=0;