diff --git a/bcbio.prepare_families.sh b/bcbio.prepare_families.sh index a13c609..9745853 100755 --- a/bcbio.prepare_families.sh +++ b/bcbio.prepare_families.sh @@ -2,7 +2,8 @@ # prepares a run of multiples families to run variant calling, one family may have several samples # $1 - a file table.txt in the format -# sample_id family_id absolute_path_to_bam_file +# sample_id family_id absolute_path_to_bam_file, i.e. +# 531_IN0067 531 /hpf/largeprojects/ccm_dccforge/dccdipg/dccc4r/c4r_wes/bam_files/531_IN0067.bam # creates one project per family # # run with diff --git a/fixit.sh b/fixit.sh new file mode 100755 index 0000000..47cde58 --- /dev/null +++ b/fixit.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +#$1 = family_id + +cat samples.txt | awk -v fam=$1 '{print fam"_"$1}' > samples.txt.fixed +rm samples.txt +mv samples.txt.fixed samples.txt + +for f in *ready.bam.bai;do mv $f `echo $f | sed s/"-ready"//`;done; +for f in *ready.bam;do mv $f `echo $f | sed s/"-ready"//`;done; +for f in *.bam;do mv $f `echo ${1}_${f}`;done; +for f in *.bam.bai;do mv $f `echo ${1}_${f}`;done; + + +for f in *.vcf.gz;do bcftools reheader -s samples.txt $f > $f.reheader;done; +rm *.vcf.gz +for f in *.reheader;do mv $f `echo $f | sed s/.reheader//`;done; +for f in *.vcf.gz; do tabix $f;done; + +vcf.split_multi.sh $1.vcf.gz + +# run gemini db gemini.vep2gemini.sh ${family}-ensemble-annotated-decomposed.vcf.gz +# rerun cre.sh diff --git a/vcf.split_multi.sh b/vcf.split_multi.sh new file mode 100755 index 0000000..7e54d22 --- /dev/null +++ b/vcf.split_multi.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# $1 = family.vcf.gz + +for sample in `cat samples.txt`; +do + bcftools view -c1 -Ov -s $sample -o $sample.vcf $1; +done;