modified: README.md

new file: bcbio.prepare_families.sh new file: bcbio.sample_sheet_header.csv new file: bcbio.templates.exome.yaml
naumenko-sa · Apr 18, 2017 · 8a20793 · 8a20793
1 parent ee9f1b7
commit 8a20793
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -2,4 +2,7 @@
 clinical research exome - excel report generation using data from [bcbio variant2](https://bcbio-nextgen.readthedocs.io/en/latest/contents/pipelines.html#germline-variant-calling) 
 germline variant calling pipeline.
 
-1. Create a project to run with bcbio.
+#1. Create a project to run with bcbio.
+
+##1a. If you start from bam files.
+Suppose you have a trio, each sample is a bam file. 
diff --git a/bcbio.prepare_families.sh b/bcbio.prepare_families.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# prepares a run of multiples families to run variant calling, one family may have several samples
+# $1 - a file table.txt in the format
+# sample_id	family_id	absolute_path_to_bam_file
+# creates one project per family
+# 
+# run with 
+# bcbio.prepare_families.sh table.txt &> file.log to track failed bams
+# or
+# qsub ~/bioscripts/bcbio.prepare_families.sh -v project_list=table.txt 
+
+# the scripts supposes it is install in ~/cre/ and bcbio is installed and available in the PATH
+# uses 
+# bcbio.sample_sheet_header.csv
+# bcbio.templates.exome.yaml
+
+# to create table.txt from a directory of bam files with names family_sample.yyy.bam
+# for f in *.bam;do echo $f | awk -F "." '{print $1"\t"$0}' | awk -F '_' '{print $2"\t"$0}' | awk -v dir=`pwd` '{print $1"\t"$2"\t"dir"/"$4}' >> ~/table.txt;done;
+
+#PBS -l walltime=20:00:00,nodes=1:ppn=1
+#PBS -joe .
+#PBS -d .
+#PBS -l vmem=10g,mem=10g
+
+prepare_family()
+{
+    local family=$1
+
+    mkdir -p ${family}/input
+    mkdir ${family}/work
+
+    cp ~/cre/bcbio.sample_sheet_header.csv $family.csv
+
+    while read sample fam bam
+    do
+	ln -s $bam ${family}/input/${sample}.bam
+        echo $sample","$sample","$family",,," >> $family.csv
+    done < $family.txt
+
+    bcbio_nextgen.py -w template ~/cre/bcbio.templates.exome.yaml $family.csv ${family}/input/*.bam
+
+    rm $family.csv
+}
+
+if [ -z $project_list ];
+then
+    project_list=$1
+fi
+
+cat $project_list | awk '{print $2}' | sort | uniq >  families.txt
+
+for family in `cat families.txt`
+do
+    # not grep because two family names may overlap
+    cat $project_list | awk -v fam=$family '{if ($2==fam) print $0}' > ${family}.txt
+    prepare_family $family
+    rm $family.txt
+done
diff --git a/bcbio.sample_sheet_header.csv b/bcbio.sample_sheet_header.csv
@@ -0,0 +1 @@
+samplename,description,batch,phenotype,sex,variant_regions
diff --git a/bcbio.templates.exome.yaml b/bcbio.templates.exome.yaml
@@ -0,0 +1,35 @@
+details:
+- algorithm:
+    aligner: bwa
+    effects: vep
+    effects_transcripts: all
+    ensemble:
+      numpass: 2
+      use_filtered: false
+    realign: true
+    recalibrate: true
+    save_diskspace: true
+    tools_on:
+    - svplots
+    - qualimap
+    variantcaller:
+    - gatk-haplotype
+    - samtools
+    - platypus
+    - freebayes
+  analysis: variant2
+  description: '166.3_5'
+  files:
+  - /hpf/largeprojects/ccmbio/naumenko/project_c4r_run10/input/166.3_5.bam
+  genome_build: GRCh37
+  metadata:
+    batch: 166
+resources:
+  default:
+    cores: 5
+    jvm_opts:
+    - -Xms750m
+    - -Xmx7000m
+    memory: 7G
+upload:
+  dir: ../final
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		samplename,description,batch,phenotype,sex,variant_regions