From cc9b1934dd2f978fcbc1b9571940544dbdd97dca Mon Sep 17 00:00:00 2001 From: naumenko-sa Date: Fri, 12 Apr 2019 23:51:38 -0400 Subject: [PATCH] ROH, ROHET naive analysis --- cre.roh.naive.sh | 6 ++-- cre.rohet.naive.sh | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 3 deletions(-) create mode 100755 cre.rohet.naive.sh diff --git a/cre.roh.naive.sh b/cre.roh.naive.sh index d9d8b0b..cb27163 100755 --- a/cre.roh.naive.sh +++ b/cre.roh.naive.sh @@ -1,7 +1,7 @@ #!/bin/bash # runs of HOM MAF 5% variant in proband (CH0620) affected sib (CH0621) and in both # filters DP=50, QUAL>=500 -# gt_types=0=HOM_REF or 3 = HOM_ALT, 1 = HET +# gt_types=0=HOM_REF or 3 = HOM_ALT, 1 = HET, 2 - no call # usage: # cre.roh.naive.sh sample gemini.db [maf=0.05] # output: @@ -11,7 +11,7 @@ sample=$1 echo "chrom,pos,ref,alt,impact,qual,dp,gene,maf,gts."$sample",gt_types."$sample",gt_alt_depths."$sample",stretch_length_variants,stretch_length_bp,stretch_id,stretch_genes" | tee $sample.roh_variants.tsv -maf=1 +maf=0.05 if [ -n "$3" ] then maf=$3 @@ -60,7 +60,7 @@ BEGIN{ prev=genotype; prev_chrom=$1; print $0","stretch_length_variants","stretch_length_bp","stretch_id",\""stretch_genes"\""; -}' | grep -v "0$" | awk -F ',' '{ if ($13>9) print $0;}' \ +}' | grep -v "0$" | awk -F ',' '{ if ($13>5) print $0;}' \ | sort -t "," -k1,1 -k15,15n \ | awk -F "," ' BEGIN{ diff --git a/cre.rohet.naive.sh b/cre.rohet.naive.sh new file mode 100755 index 0000000..6e55a66 --- /dev/null +++ b/cre.rohet.naive.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# runs of HETEROZYGOUS VARIANTS MAF 5% variant in proband (CH0620) affected sib (CH0621) and in both +# filters DP=50, QUAL>=500 +# gt_types 1 = HET +# usage: +# cre.roh.naive.sh sample gemini.db [maf=0.05] +# output: +# - sample.roh_variants.tsv - list of variants, print with tee for debugging +# - stdout: list of ROH + +sample=$1 +echo "chrom,pos,ref,alt,impact,qual,dp,gene,maf,gts."$sample",gt_types."$sample",gt_alt_depths."$sample",stretch_length_variants,stretch_length_bp,stretch_id,stretch_genes" | tee $sample.roh_variants.tsv + +maf=0.05 +if [ -n "$3" ] +then + maf=$3 +fi + +gemini query -q "select chrom,start+1 as pos, ref, alt,impact,qual,depth, gene, max_aaf_all as maf, gts."$sample",gt_types."$sample",gt_alt_depths."$sample" from variants where +type='snp' and depth>=10 and qual>=100 and max_aaf_all<="$maf --gt-filter "gt_types."$sample" != 2" $2 | sed s/"\t"/","/g | sort -t "," -k1,1n -k2,2n \ +| tee -a $sample.roh_variants.tsv | awk -F "," ' +BEGIN{ + prev=1; + prev_gene=""; + prev_chrom=""; + stretch_length_variants=0; + stretch_length_bp=0; + stretch_id=""; + stretch_genes=""; +} +{ + genotype=$11; + if(genotype != 1 || $1 != prev_chrom){ + stretch_length_variants=0; + stretch_length_bp=0; + stretch_id=0; + stretch_genes=""; + prev_gene=""; + }else{ + if(prev==0){ + stretch_length_variants=stretch_length_variants+1; + stretch_length_bp=$2-stretch_id+1; + if ($8 != prev_gene && $8 != ""){ + stretch_genes=stretch_genes","$8; + } + prev_gene=$8; + }else{ + stretch_length_variants=1; + stretch_id=$2; + stretch_length_bp=$2-stretch_id+1; + stretch_genes=$8; + prev_gene=$8; + }; + } + prev=genotype; + prev_chrom=$1; + print $0","stretch_length_variants","stretch_length_bp","stretch_id",\""stretch_genes"\""; +}' | grep -v "0$" | awk -F ',' '{ if ($13>=10) print $0;}' \ +| sort -t "," -k1,1 -k15,15n \ +| awk -F "," ' +BEGIN{ + prev_chr=""; + prev_stretch_id=""; + prev_stretch=""; +} +{ + if($1 != prev_chr){ + print prev_stretch; + }else{ + if (prev_stretch_id != $15){ + print prev_stretch; + } + } + prev_stretch=$0; + prev_chr=$1; + prev_stretch_id=$15; +}' | grep -v "^$" +