Skip to content

Commit

Permalink
new file: cre.annotate_str.sh
Browse files Browse the repository at this point in the history
	new file:   cre.annotate_str.toml
	new file:   protein_coding_genes.bed.gz
	new file:   protein_coding_genes.bed.gz.tbi
  • Loading branch information
naumenko-sa committed Oct 20, 2017
1 parent caffd79 commit 37106fc
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 0 deletions.
19 changes: 19 additions & 0 deletions cre.annotate_str.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

#use cre.annotate_str.sh vcf_from_lobstr.vcf

bname=`echo $1 | sed s/.vcf//`

cat $1 | grep "^#" > $bname.sorted.vcf
#sometimes lobSTR gives you unsorted vcf for some reason
cat $1 | grep -v "^#" | sort -t $'\t' -k1,1 -k2,2n >> $bname.sorted.vcf

bgzip $bname.sorted.vcf
tabix $bname.sorted.vcf.gz

#there may be no gene
vcfanno -base-path ~/cre/ ~/cre/cre.annotate_str.toml $bname.sorted.vcf.gz > $bname.annotated.vcf
rm $bname.insertions.txt
echo -e "CHR\tPOS\tREF\tALT\tGENE\tGENOTYPE\tINSERTION_LENGTH_BP" > $bname.insertions.txt
cat $bname.annotated.vcf | grep -v "^#" | sed s/"gene="/"\t"/g | grep "0/1:" | awk '{print $1"\t"$2"\t"$4"\t"$5"\t"$9"\t"$NF}' | sed s/":.*"// | \
awk '{if (length($3)<length($4)) print $0"\t"length($4)-length($3);}' | sort -k7,7 -rn >> $bname.insertions.txt
5 changes: 5 additions & 0 deletions cre.annotate_str.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[[annotation]]
file="protein_coding_genes.bed.gz"
columns = [4]
names=["gene"]
ops=["self"]
Binary file added protein_coding_genes.bed.gz
Binary file not shown.
Binary file added protein_coding_genes.bed.gz.tbi
Binary file not shown.

0 comments on commit 37106fc

Please sign in to comment.