Skip to content

Commit

Permalink
modified: README.md
Browse files Browse the repository at this point in the history
	new file:   bcbio.prepare_families.sh
	new file:   bcbio.sample_sheet_header.csv
	new file:   bcbio.templates.exome.yaml
  • Loading branch information
naumenko-sa committed Apr 18, 2017
1 parent ee9f1b7 commit 8a20793
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 1 deletion.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
clinical research exome - excel report generation using data from [bcbio variant2](https://bcbio-nextgen.readthedocs.io/en/latest/contents/pipelines.html#germline-variant-calling)
germline variant calling pipeline.

1. Create a project to run with bcbio.
#1. Create a project to run with bcbio.

##1a. If you start from bam files.
Suppose you have a trio, each sample is a bam file.
59 changes: 59 additions & 0 deletions bcbio.prepare_families.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash

# prepares a run of multiples families to run variant calling, one family may have several samples
# $1 - a file table.txt in the format
# sample_id family_id absolute_path_to_bam_file
# creates one project per family
#
# run with
# bcbio.prepare_families.sh table.txt &> file.log to track failed bams
# or
# qsub ~/bioscripts/bcbio.prepare_families.sh -v project_list=table.txt

# the scripts supposes it is install in ~/cre/ and bcbio is installed and available in the PATH
# uses
# bcbio.sample_sheet_header.csv
# bcbio.templates.exome.yaml

# to create table.txt from a directory of bam files with names family_sample.yyy.bam
# for f in *.bam;do echo $f | awk -F "." '{print $1"\t"$0}' | awk -F '_' '{print $2"\t"$0}' | awk -v dir=`pwd` '{print $1"\t"$2"\t"dir"/"$4}' >> ~/table.txt;done;

#PBS -l walltime=20:00:00,nodes=1:ppn=1
#PBS -joe .
#PBS -d .
#PBS -l vmem=10g,mem=10g

prepare_family()
{
local family=$1

mkdir -p ${family}/input
mkdir ${family}/work

cp ~/cre/bcbio.sample_sheet_header.csv $family.csv

while read sample fam bam
do
ln -s $bam ${family}/input/${sample}.bam
echo $sample","$sample","$family",,," >> $family.csv
done < $family.txt

bcbio_nextgen.py -w template ~/cre/bcbio.templates.exome.yaml $family.csv ${family}/input/*.bam

rm $family.csv
}

if [ -z $project_list ];
then
project_list=$1
fi

cat $project_list | awk '{print $2}' | sort | uniq > families.txt

for family in `cat families.txt`
do
# not grep because two family names may overlap
cat $project_list | awk -v fam=$family '{if ($2==fam) print $0}' > ${family}.txt
prepare_family $family
rm $family.txt
done
1 change: 1 addition & 0 deletions bcbio.sample_sheet_header.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
samplename,description,batch,phenotype,sex,variant_regions
35 changes: 35 additions & 0 deletions bcbio.templates.exome.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
details:
- algorithm:
aligner: bwa
effects: vep
effects_transcripts: all
ensemble:
numpass: 2
use_filtered: false
realign: true
recalibrate: true
save_diskspace: true
tools_on:
- svplots
- qualimap
variantcaller:
- gatk-haplotype
- samtools
- platypus
- freebayes
analysis: variant2
description: '166.3_5'
files:
- /hpf/largeprojects/ccmbio/naumenko/project_c4r_run10/input/166.3_5.bam
genome_build: GRCh37
metadata:
batch: 166
resources:
default:
cores: 5
jvm_opts:
- -Xms750m
- -Xmx7000m
memory: 7G
upload:
dir: ../final

0 comments on commit 8a20793

Please sign in to comment.