-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
new file: cre.vcf.has2dp.sh modified: cre.vcf2cre.sh
- Loading branch information
1 parent
8ac8286
commit 19cc1d9
Showing
3 changed files
with
36 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
|
||
# if input vcf is from TCAG (HAS) it does not have DP INFO field, we need to fake it from FORMAT DP for SNVs and from DPI for indels: | ||
|
||
bname=`basename $1 .vcf.gz` | ||
|
||
# remove chr | ||
gunzip -c $1 | sed s/"ID=chrM"/"ID=MT"/ | sed s/"^chrM"/MT/ | sed s/"ID=chr"/"ID="/ | sed s/"^chr"// > $bname.nochr.vcf | ||
bgzip $bname.nochr.vcf | ||
tabix $bname.nochr.vcf.gz | ||
|
||
gunzip -c $bname.nochr.vcf.gz | grep "^#" | head -n1 > $bname.dp.vcf | ||
echo "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth; some reads may have been filtered\">" >> $bname.dp.vcf | ||
gunzip -c $bname.nochr.vcf.gz | grep "^#" | sed 1d >> $bname.dp.vcf | ||
|
||
#process SNVs | ||
gunzip -c $bname.nochr.vcf.gz | grep -v "^#" | grep PASS | grep ":DP:" | awk -F ':' '{print $0"\tDP="$9}' | awk -F "\t" '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$11";"$8"\t"$9"\t"$10}' >> $bname.dp.vcf | ||
|
||
#process indels | ||
gunzip -c $bname.nochr.vcf.gz | grep -v "^#" | grep PASS | grep ":DPI:" | awk -F ':' '{print $0"\tDP="$8}' | awk -F "\t" '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$11";"$8"\t"$9"\t"$10}' | sed s/":DPI:"/":DP:"/ >> $bname.dp.vcf | ||
|
||
bgzip $bname.dp.vcf | ||
tabix $bname.dp.vcf.gz | ||
|
||
bcftools sort -o $bname.dp.sorted.vcf.gz -Oz $bname.dp.vcf.gz | ||
tabix $bname.dp.sorted.vcf.gz | ||
|
||
rm $bname.nochr.vcf.gz $bname.dp.vcf.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters