diff --git a/HISTORY.md b/HISTORY.md index a11623d..c72c923 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,6 @@ +## 0.0.4 (23 May 2019) +- fixed a bug in clinvar column + ## 0.0.3 (21 May 2019) - bug fixes, WES report generated for NA12878 looks ok diff --git a/bcbio.pbs b/bcbio.pbs index d300a0b..248717a 100644 --- a/bcbio.pbs +++ b/bcbio.pbs @@ -11,6 +11,8 @@ # experience is that the best combination for WES and SK HPC is 7cores/50G # wgs alignment: -v threads=40 -l mem=150G,vmem=150G, bigmem nodes=512G RAM + 64 cores, 2 nodes - crashes w memory error, 40 threads is too much +# 30G min RAM for STAR + #PBS -l walltime=240:00:00,nodes=1:ppn=7 #PBS -joe . #PBS -d . diff --git a/cre.bcbio.upgrade.sh b/cre.bcbio.upgrade.sh index cf1b91e..06598aa 100755 --- a/cre.bcbio.upgrade.sh +++ b/cre.bcbio.upgrade.sh @@ -34,7 +34,7 @@ which bcbio_nextgen.py ###################################################################### # 4. Install indices # bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners bwa --cores 10 -bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners star --cores 10 +# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners star --cores 10 # bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners hisat2 --cores 10 # bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners rtg --cores 10 ######################################################################### @@ -70,7 +70,7 @@ bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners star --cores 10 # bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget dbnsfp # rnaseq -bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget rnaseq +# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget rnaseq ###################################################################### # fresh installation for Sam with human and mouse genome diff --git a/cre.gemini2txt.vcf2db.sh b/cre.gemini2txt.vcf2db.sh index 1d04b53..af72ddb 100755 --- a/cre.gemini2txt.vcf2db.sh +++ b/cre.gemini2txt.vcf2db.sh @@ -38,7 +38,7 @@ sQuery="select \ dp as Depth,\ qual as Quality,\ gene as Gene,\ - clinvar_sig as Clinvar,\ + clinvar_pathogenic as Clinvar,\ ensembl_gene_id as Ensembl_gene_id,\ transcript as Ensembl_transcript_id,\ aa_length as AA_position,\ diff --git a/cre.vcf2cre.sh b/cre.vcf2cre.sh index 9590912..9487c4d 100755 --- a/cre.vcf2cre.sh +++ b/cre.vcf2cre.sh @@ -25,6 +25,8 @@ ##INFO= # gunzip -c 331606_S1.flt.nochr.vcf.gz | grep -v "^#" | grep PASS | sed s/":DPI:"/":DP:"awk -F ':' '{print $0"\tDP="$9}' | awk -F "\t" '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$11";"$8"\t"$9"\t"$10}' >> 331606.vcf +. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.test_profile + bname=`basename $original_vcf .vcf.gz` echo "###############################################" diff --git a/cre.vcfanno.conf b/cre.vcfanno.conf index 2ddcc21..c364aa1 100644 --- a/cre.vcfanno.conf +++ b/cre.vcfanno.conf @@ -51,13 +51,6 @@ file="variation/clinvar.vcf.gz" fields=["CLNSIG"] names=["clinvar_pathogenic"] ops=["concat"] - -# convert 5 to 'pathogenic', 255 to 'unknown', etc. -[[postannotation]] -fields=["clinvar_pathogenic"] -op="lua:clinvar_sig(clinvar_pathogenic)" -name="clinvar_sig" -type="String" #dbNSFP v3.4 [[annotation]] diff --git a/cre.vep.sh b/cre.vep.sh index 547aae6..95bf1ee 100755 --- a/cre.vep.sh +++ b/cre.vep.sh @@ -20,6 +20,8 @@ then threads=5 fi +. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.test_profile + bname=`basename $vcf .vcf.gz` #find reference @@ -38,14 +40,12 @@ echo "Threads:" $threads # --plugin SpliceRegion --sift b --polyphen b --hgvs --shift_hgvs 1 --merged \ # | sed '/^#/! s/;;/;/g' | bgzip -c > $bname.vepeffects.vcf.gz -unset PERL5LIB && export PATH=/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/bin:"$PATH" && \ - /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/bin/vep --vcf -o stdout \ +unset PERL5LIB && vep --vcf -o stdout \ -i $vcf --fork $threads --species homo_sapiens --no_stats --cache --offline --dir ${reference}/vep --symbol --numbers --biotype --total_length \ --canonical --gene_phenotype --ccds --uniprot --domains --regulatory --protein --tsl --appris --af --max_af --af_1kg --af_esp --af_gnomad --pubmed --variant_class \ --allele_number \ --fasta ${reference}/seq/GRCh37.fq.gz \ --plugin LoF,human_ancestor_fa:${reference}/human_ancestor.fa.gz,loftee_path:$vep_reference \ - --plugin G2P,file:/hpf/largeprojects/ccmbio/naumenko/validation/test_bcbio_runs/WES/variation/G2P.csv \ --plugin MaxEntScan,/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/share/maxentscan-0_2004.04.21-1 \ --plugin SpliceRegion --sift b --polyphen b --hgvsg --hgvs --shift_hgvs 1 --merged \ | sed '/^#/! s/;;/;/g' | bgzip -c > $bname.vepeffects.vcf.gz