Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
naumenko-sa committed May 27, 2019
1 parent 6b3ac62 commit 98e8a1a
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 10 deletions.
9 changes: 5 additions & 4 deletions cre.bcbio.upgrade.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,18 @@ date
# create a .test_profile:
# export PATH=$HOME/cre:$HOME/crt:$HOME/crg:/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/bin:$HOME/tools/mc-4.8.16/bin:$HOME/jkent_tools:$HOME/bioscripts:.:/usr/local/bin:/opt/moab/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin
# export PYTHONPATH=/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/lib/python3.6
. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.test_profile
. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.profile115
which python
echo $PYTHONPATH
######################################################################
# 3. Upgrade tools. If tooldir was set before, no need to specify it again
which bcbio_nextgen.py
# bcbio_nextgen.py upgrade -u skip --tools
bcbio_nextgen.py upgrade -u skip --tools
#--tooldir $1
######################################################################
# 4. Install indices
# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners bwa --cores 10
# genomes = {GRCh37, hg38}
# bcbio_nextgen.py upgrade -u skip --genomes hg38 --aligners bwa --cores 10
# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners star --cores 10
# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners hisat2 --cores 10
# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners rtg --cores 10
Expand All @@ -49,7 +50,7 @@ which bcbio_nextgen.py
# 1.1.5 - a huge update to python3, installed from scratch

# upgrades data installed before (gemini, cadd) for all references
# bcbio_nextgen.py upgrade --data
bcbio_nextgen.py upgrade --data --genomes GRCh37

# VEP is upgraded quite often ~2-3 months - when upgrading tools it looks for new VEP cache
# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget vep
Expand Down
13 changes: 8 additions & 5 deletions cre.bcbio.upgrade.star.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
#!/bin/bash
#PBS -l walltime=240:00:00,nodes=1:ppn=30
#PBS -l walltime=150:00:00,nodes=1:ppn=30
#PBS -joe .
#PBS -d .
#PBS -l vmem=100g,mem=100g
#PBS -l vmem=50g,mem=50g

. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.test_profile
. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.profile115
which bcbio_nextgen.py

hostname

bcbio_path=/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5

#GRCh37
reference="hg38"

# sometimes cannot upgrade STAR on data nodes - memory is low, or cannot take much CPUs
# can take more than a day
export PATH=${bcbio_path}/bin:$PATH && STAR \
--genomeDir ${bcbio_path}/genomes/Hsapiens/GRCh37/star \
--genomeFastaFiles ${bcbio_path}/genomes/Hsapiens/GRCh37/seq/GRCh37.fa \
--genomeDir ${bcbio_path}/genomes/Hsapiens/${reference}/star \
--genomeFastaFiles ${bcbio_path}/genomes/Hsapiens/${reference}/seq/${reference}.fa \
--runThreadN 30 --limitGenomeGenerateRAM 30000000000 --genomeChrBinNbits 14 --runMode genomeGenerate --genomeSAindexNbases 14
14 changes: 14 additions & 0 deletions cre.bcbio.upgrade.star.sh.o47757114
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/bin/bcbio_nextgen.py
node080
May 25 22:55:29 ..... started STAR run
May 25 22:55:30 ... starting to generate Genome files
May 25 22:56:48 ... starting to sort Suffix Array. This may take a long time...
May 25 22:57:14 ... sorting Suffix Array chunks and saving them to disk...
May 25 23:22:50 ... loading chunks from disk, packing SA...
May 25 23:30:16 ... finished generating suffix array
May 25 23:30:16 ... generating Suffix Array index
May 25 23:35:50 ... completed Suffix Array index
May 25 23:35:50 ... writing Genome to disk ...
May 25 23:36:04 ... writing Suffix Array to disk ...
May 25 23:39:16 ... writing SAindex to disk
May 25 23:39:41 ..... finished successfully
2 changes: 1 addition & 1 deletion cre.gemini2txt.vcf2db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# when using v.chr = g.chr AND v.gene = g.gene it becomes very slow
# by default bcbio writes PASS only variants to the database

# example call: cre.gemini2txt.sh S28-ensemble.db 5 ALL
# example call: cre.gemini2txt.sh S28-ensemble.db 5 ALL 0.01
# when using vcfanno/vcfdb loader some fields are different
# for some reason \n in the query string does not work here

Expand Down

0 comments on commit 98e8a1a

Please sign in to comment.