Skip to content

Commit

Permalink
modified: cre.bcbio.upgrade.sh
Browse files Browse the repository at this point in the history
	new file:   cre.vcf.has2dp.sh
	modified:   cre.vcf2cre.sh
  • Loading branch information
naumenko-sa committed Oct 25, 2018
1 parent 8ac8286 commit 19cc1d9
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
2 changes: 1 addition & 1 deletion cre.bcbio.upgrade.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@
#bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget rnaseq

#mouse
bcbio_nextgen.py upgrade -u skip --genomes mm10 --datatarget rnaseq --aligners star
bcbio_nextgen.py upgrade -u skip --genomes mm10 --datatarget rnaseq --aligners star --cores 5

28 changes: 28 additions & 0 deletions cre.vcf.has2dp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

# if input vcf is from TCAG (HAS) it does not have DP INFO field, we need to fake it from FORMAT DP for SNVs and from DPI for indels:

bname=`basename $1 .vcf.gz`

# remove chr
gunzip -c $1 | sed s/"ID=chrM"/"ID=MT"/ | sed s/"^chrM"/MT/ | sed s/"ID=chr"/"ID="/ | sed s/"^chr"// > $bname.nochr.vcf
bgzip $bname.nochr.vcf
tabix $bname.nochr.vcf.gz

gunzip -c $bname.nochr.vcf.gz | grep "^#" | head -n1 > $bname.dp.vcf
echo "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth; some reads may have been filtered\">" >> $bname.dp.vcf
gunzip -c $bname.nochr.vcf.gz | grep "^#" | sed 1d >> $bname.dp.vcf

#process SNVs
gunzip -c $bname.nochr.vcf.gz | grep -v "^#" | grep PASS | grep ":DP:" | awk -F ':' '{print $0"\tDP="$9}' | awk -F "\t" '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$11";"$8"\t"$9"\t"$10}' >> $bname.dp.vcf

#process indels
gunzip -c $bname.nochr.vcf.gz | grep -v "^#" | grep PASS | grep ":DPI:" | awk -F ':' '{print $0"\tDP="$8}' | awk -F "\t" '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$11";"$8"\t"$9"\t"$10}' | sed s/":DPI:"/":DP:"/ >> $bname.dp.vcf

bgzip $bname.dp.vcf
tabix $bname.dp.vcf.gz

bcftools sort -o $bname.dp.sorted.vcf.gz -Oz $bname.dp.vcf.gz
tabix $bname.dp.sorted.vcf.gz

rm $bname.nochr.vcf.gz $bname.dp.vcf.gz
8 changes: 7 additions & 1 deletion cre.vcf2cre.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/bin/bash

#PBS -l walltime=23:00:00,nodes=1:ppn=1
#PBS -joe .
#PBS -d .
Expand All @@ -19,6 +18,13 @@
# AC, AF, MLEAC, MLEAF Number=1 not A. Because of that vt is not properly decomposing multiallelic variants, and vcf2db can't create gemini database
# solution is to put Number=A in the vcf header or rerun variant calling with the latest bcbio

# if input is from TCAG (HAS) it does not have DP INFO field, we need to fake it from FORMAT DP for SNVs and from DPI for indels:
# cre.vcf.has2dp.sh
# gunzip -c 331606_S1.flt.nochr.vcf.gz | grep "^#" > 331606.vcf
# add to header:
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
# gunzip -c 331606_S1.flt.nochr.vcf.gz | grep -v "^#" | grep PASS | sed s/":DPI:"/":DP:"awk -F ':' '{print $0"\tDP="$9}' | awk -F "\t" '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$11";"$8"\t"$9"\t"$10}' >> 331606.vcf

bname=`basename $original_vcf .vcf.gz`

echo "###############################################"
Expand Down

0 comments on commit 19cc1d9

Please sign in to comment.