-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
modified: README.md new file: cre.bam.validate.sh new file: cre.database.header new file: cre.database.py new file: cre.database_merge.py new file: cre.fixit.sh new file: cre.gemini_load.sh new file: cre.locate_sample.py modified: cre.package.sh new file: cre.phenomecentral.upload_vcf_attachment.prototype.py deleted: bam.validate.sh deleted: fixit.sh deleted: gemini.vep2gemini.sh
- Loading branch information
naumenko-sa
committed
Jul 13, 2017
1 parent
63eace9
commit dac884d
Showing
11 changed files
with
399 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
- 2017-07-13: added cre.package.sh. It packages reports to send. | ||
- 2017-07-07: added bam.validate.sh, it produces bam.check file with picard's validation status. | ||
- 2017-07-04: bcbio uses vep-merged, a merged cache of refseq and vep, no need to annotate with refseq anymore. | ||
- 2017-07-04: bcbio uses vep-merged, a merged cache of refseq and vep, no need to annotate with refseq anymore, removed refseq from cre.sh. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
#PBS -l walltime=5:00:00,nodes=1:ppn=1 | ||
#PBS -joe . | ||
#PBS -d . | ||
#PBS -l vmem=20g,mem=20g | ||
|
||
if [ -z $bam ] | ||
then | ||
bam=$1 | ||
fi | ||
|
||
#if file is truncated, it would be the first error message in $bam.check | ||
#uses picard wrapper from bcbio | ||
|
||
picard ValidateSamFile I=$bam > $bam.check |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Position,Ref,Alt,Variation,Zygosity,Protein_change_ensembl,Gene,Conserved_in_29_mammals,Sift_score,Polyphen_score,Cadd_score,Sample |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/hpf/largeprojects/ccmbio/naumenko/tools/bcbio/anaconda/bin/python | ||
|
||
import csv | ||
import sys | ||
import re | ||
|
||
family = sys.argv[1] | ||
|
||
with open('samples.txt','rb') as f_samples: | ||
samples = f_samples.readlines() | ||
samples = [x.strip() for x in samples] | ||
|
||
n_samples = len(samples) | ||
|
||
for sample in samples: | ||
with open(family+".csv",'rb') as f_csv: | ||
reader = csv.DictReader(f_csv) | ||
with open(family+'.'+sample+'.c4r','w') as f_sample: | ||
zygosity_field = 'Zygosity.'+sample | ||
#because R when building report substitutes - with . in column names | ||
zygosity_field = zygosity_field.replace("-",".") | ||
fieldnames=['Position','Ref','Alt','Variation',zygosity_field,'Protein_change_ensembl','Gene','Conserved_in_29_mammals','Sift_score','Polyphen_score','Cadd_score'] | ||
for row in reader: | ||
l = [] | ||
for key in fieldnames: | ||
l.append(row[key]) | ||
zygosity=row[zygosity_field] | ||
if (zygosity != '-' and not re.search('Insufficient',zygosity)): | ||
f_sample.write(','.join(l)+','+sample) | ||
f_sample.write('\n') | ||
|
||
f_sample.close() | ||
f_samples.close() | ||
f_csv.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/hpf/largeprojects/ccmbio/naumenko/tools/bcbio/anaconda/bin/python | ||
|
||
import csv | ||
import sys | ||
from collections import defaultdict | ||
|
||
fieldnames=['Position','Ref','Alt','Variation','Zygosity','Protein_change_ensembl','Gene','Conserved_in_29_mammals','Sift_score','Polyphen_score','Cadd_score'] | ||
#Frequency','Samples'] | ||
|
||
frequencies = defaultdict(list) | ||
samples = defaultdict(list) | ||
annotations = defaultdict(list) | ||
|
||
with open(sys.argv[1],'r') as f_csv: | ||
reader = csv.DictReader(f_csv) | ||
for row in reader: | ||
superkey=row['Position']+'-'+row['Ref']+'-'+row['Alt'] | ||
if superkey in frequencies: | ||
frequencies[superkey] += 1 | ||
samples[superkey].append(row['Sample']) | ||
else: | ||
frequencies[superkey] = 1 | ||
l = [] | ||
for key in fieldnames: | ||
l.append(row[key]) | ||
annotations[superkey] = ','.join(l) | ||
ll = [] | ||
ll.append(row['Sample']) | ||
samples[superkey] = ll | ||
|
||
with open(sys.argv[2],'w') as f_out: | ||
f_out.write(','.join(fieldnames)+',Frequency,Samples') | ||
f_out.write('\n') | ||
for key in sorted(frequencies.keys()): | ||
f_out.write(annotations[key]+','+str(frequencies[key])+','+';'.join(samples[key])) | ||
f_out.write('\n') | ||
|
||
f_out.close() | ||
f_csv.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
|
||
#fixes sample names to family_sample | ||
|
||
#$1 = family_id | ||
|
||
cat samples.txt | awk -v fam=$1 '{print fam"_"$1}' > samples.txt.fixed | ||
rm samples.txt | ||
mv samples.txt.fixed samples.txt | ||
|
||
for f in *ready.bam.bai;do mv $f `echo $f | sed s/"-ready"//`;done; | ||
for f in *ready.bam;do mv $f `echo $f | sed s/"-ready"//`;done; | ||
for f in *.bam;do mv $f `echo ${1}_${f}`;done; | ||
for f in *.bam.bai;do mv $f `echo ${1}_${f}`;done; | ||
|
||
|
||
for f in *.vcf.gz;do bcftools reheader -s samples.txt $f > $f.reheader;done; | ||
rm *.vcf.gz | ||
for f in *.reheader;do mv $f `echo $f | sed s/.reheader//`;done; | ||
for f in *.vcf.gz; do tabix $f;done; | ||
|
||
vcf.split_multi.sh $1.vcf.gz | ||
|
||
# rerun cre.gemini_load.sh | ||
# rerun cre.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#/bin/bash | ||
|
||
#vep2gemini - loads vep annotated vcf file to gemini database | ||
#based on bcbio.log | ||
|
||
#PBS -l walltime=23:00:00,nodes=1:ppn=16 | ||
#PBS -joe . | ||
#PBS -d . | ||
#PBS -l vmem=50g,mem=50g | ||
|
||
if [ -z $vcf ] | ||
then | ||
vcf=$1 | ||
fi | ||
|
||
bname=`echo $vcf | sed s/.vcf.gz//` | ||
|
||
#--skip-cadd if no cadd | ||
#remove GL chromosomes - sometimes they cause problems | ||
mv $vcf $bname.tmp.vcf.gz | ||
gunzip -c $bname.tmp.vcf.gz | grep "^#" > $bname.vcf | ||
gunzip -c $bname.tmp.vcf.gz | grep -v "^#" | grep -v -i "^GL00" >> $bname.vcf | ||
bgzip $bname.vcf | ||
tabix $bname.vcf.gz | ||
rm $bname.tmp.vcf.gz | ||
|
||
#remove --passonly to load all variants | ||
#add --skip-cadd to remove cadd | ||
gemini load --passonly --skip-gerp-bp -v $vcf -t VEP --cores 16 --tempdir . $bname.db | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/hpf/largeprojects/ccmbio/naumenko/tools/bcbio/anaconda/bin/python | ||
|
||
""" | ||
Looks for a specific sample | ||
""" | ||
|
||
import re | ||
import sys | ||
import os | ||
import os.path | ||
|
||
sample = sys.argv[1] | ||
|
||
family,sample_only = sample.split("_") | ||
|
||
match = re.match('\d*',family) | ||
|
||
if match: | ||
prefix=str(int(match.group(0))/100) | ||
report_path = prefix+'x/'+family+'/'+family | ||
|
||
report=0 | ||
bam=0 | ||
|
||
if os.path.isfile(report_path+'.csv'): | ||
#print("Report exists") | ||
report=1 | ||
else: | ||
print("Error: no report") | ||
|
||
if os.path.isfile(report_path+'_'+sample_only+'.bam'): | ||
#print("Bam exists") | ||
bam=1 | ||
else: | ||
print("ERROR: no bam") | ||
if (bam==1 and report==1): | ||
print(os.getcwd()+"/"+family+"\t"+os.getcwd()+"/"+report_path+'.bam') | ||
else: | ||
print("Family ID is not starting with digital") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.