diff --git a/cre.bcbio.custom_genome.sh b/cre.bcbio.custom_genome.sh deleted file mode 100755 index efb464c..0000000 --- a/cre.bcbio.custom_genome.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# crashes on qlogin node, building indexes takes time - -#PBS -l walltime=48:00:00,nodes=1:ppn=20 -#PBS -joe . -#PBS -d . -#PBS -l vmem=50g,mem=50g - -hostname -echo $PATH -echo $PYTHONPATH - -wget ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz - -gunzip hs37d5.fa.gz - -bcbio_setup_genome.py -f hs37d5.fa -g /hpf/largeprojects/ccmbio/naumenko/tools/bcbio/genomes/Hsapiens/GRCh37/rnaseq/ref-transcripts.gtf -n Hsapiens -b GRCh37d5 -i bwa star rtg -c 20 - diff --git a/cre.bcbio.upgrade.sh b/cre.bcbio.upgrade.sh deleted file mode 100755 index 4f2bf77..0000000 --- a/cre.bcbio.upgrade.sh +++ /dev/null @@ -1,95 +0,0 @@ -#!/bin/bash -#PBS -l walltime=23:00:00,nodes=1:ppn=10 -#PBS -joe . -#PBS -d . -#PBS -l vmem=10g,mem=10g - -date -# nohups are dying on qlogin nodes, data nodes are better for long data installation runs -# data2 has modules, data7 does not. -###################################################################### -# fresh install of new bcbio instance: -# 1. Don't mix with old environments -# mv ~/.conda/environments.txt ~/.conda/environments.default.txt - move back -# export PATH=/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin -# export PYTHONPATH= -# wget https://raw.github.com/bcbio/bcbio-nextgen/master/scripts/bcbio_nextgen_install.py -# echo "Installing to " $1 -# module load python/2.7.12 - not working, just plain python from the system is better -# python bcbio_nextgen_install.py $1 --tooldir $1 --genomes GRCh37 --aligners bwa - #--isolate --nodata -###################################################################### -# 2. Use the new environment: -# create a .test_profile: -# export PATH=$HOME/cre:$HOME/crt:$HOME/crg:/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/bin:$HOME/tools/mc-4.8.16/bin:$HOME/jkent_tools:$HOME/bioscripts:.:/usr/local/bin:/opt/moab/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin -# export PYTHONPATH=/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/lib/python3.6 -. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.profile115 -which python -echo $PYTHONPATH -###################################################################### -# 3. Upgrade tools. If tooldir was set before, no need to specify it again -which bcbio_nextgen.py -bcbio_nextgen.py upgrade -u skip --tools -#--tooldir $1 -###################################################################### -# 4. Install indices -# genomes = {GRCh37, hg38} -# bcbio_nextgen.py upgrade -u skip --genomes hg38 --aligners bwa --cores 10 -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners star --cores 10 -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners hisat2 --cores 10 -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners rtg --cores 10 -######################################################################### -# 5. upgrade code to the latest stable version -# bcbio_nextgen.py upgrade -u stable -# upgrade code to development -# bcbio_nextgen.py upgrade -u development -######################################################################### -# 6. data installation/upgrade -# GRCh37, hg38, mm10 -# data installation takes a lot of time (gnomad, dbnsfp) it is better to have data and just upgrade bcbio code -# 1.1.5 - a huge update to python3, installed from scratch - -# upgrades data installed before (gemini, cadd) for all references -bcbio_nextgen.py upgrade --data --genomes GRCh37 - -# VEP is upgraded quite often ~2-3 months - when upgrading tools it looks for new VEP cache -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget vep - -# gemini ~3h for GRCh37 -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget gemini -# --genomes hg38 - -# cadd is in dbnsfp -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget cadd -# --genomes hg38 - -# gnomad 14h -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget gnomad -# bcbio_nextgen.py upgrade -u skip --genomes hg38 --datatarget gnomad - -# dbnsfp -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget dbnsfp - -# rnaseq -# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget rnaseq - -###################################################################### -# fresh installation for Sam with human and mouse genome - -# to check what enviroments were picked up during the installation -# conda info --envs --json -# check file ~/.conda/environments.txt - if it has environments from all installations they could interfere -# wget https://raw.github.com/bcbio/bcbio-nextgen/master/scripts/bcbio_nextgen_install.py -# export PYTHONPATH=/hpf/largeprojects/lauryl/bcbio110/anaconda/lib/python2.7 - -# PATH=/hpf/largeprojects/lauryl/bcbio110/anaconda/bin -# PATH=${PATH}:/usr/local/bin:/opt/moab/bin:/home/naumenko/cre:/home/naumenko/crt:/home/naumenko/crg:/home/naumenko/tools/mc-4.8.16/bin:/home/naumenko/jkent_tools -# PATH=${PATH}:/home/naumenko/bioscripts:.:/home/naumenko/.aspera/connect/bin:/usr/local/bin:/usr/lib64/qt-3.3/bin:/opt/moab/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin -# PATH=${PATH}:/opt/ibutils/bin:/sbin:/usr/sbin:/sbin:/usr/sbin -# export PATH - -# python bcbio_nextgen_install.py /hpf/largeprojects/lauryl/bcbio110 --tooldir=/hpf/largeprojects/lauryl/bcbio110 --genomes mm10 --aligners bwa --isolate -# bcbio_nextgen.py upgrade -u skip --tools --tooldir /hpf/largeprojects/lauryl/bcbio110 -# bcbio_nextgen.py upgrade -u skip --data --genomes mm10 --datatarget variation --datatarget vep - -date diff --git a/cre.bcbio.upgrade.star.sh b/cre.bcbio.upgrade.star.sh deleted file mode 100755 index 324ffb5..0000000 --- a/cre.bcbio.upgrade.star.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -#PBS -l walltime=150:00:00,nodes=1:ppn=30 -#PBS -joe . -#PBS -d . -#PBS -l vmem=50g,mem=50g - -. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.profile115 -which bcbio_nextgen.py - -hostname - -bcbio_path=/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5 - -#GRCh37 -reference="hg38" - -# sometimes cannot upgrade STAR on data nodes - memory is low, or cannot take much CPUs -# can take more than a day -export PATH=${bcbio_path}/bin:$PATH && STAR \ ---genomeDir ${bcbio_path}/genomes/Hsapiens/${reference}/star \ ---genomeFastaFiles ${bcbio_path}/genomes/Hsapiens/${reference}/seq/${reference}.fa \ ---runThreadN 30 --limitGenomeGenerateRAM 30000000000 --genomeChrBinNbits 14 --runMode genomeGenerate --genomeSAindexNbases 14 diff --git a/cre.bcbio.upgrade.star.sh.o47757114 b/cre.bcbio.upgrade.star.sh.o47757114 deleted file mode 100644 index ee4aa55..0000000 --- a/cre.bcbio.upgrade.star.sh.o47757114 +++ /dev/null @@ -1,14 +0,0 @@ -/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/bin/bcbio_nextgen.py -node080 -May 25 22:55:29 ..... started STAR run -May 25 22:55:30 ... starting to generate Genome files -May 25 22:56:48 ... starting to sort Suffix Array. This may take a long time... -May 25 22:57:14 ... sorting Suffix Array chunks and saving them to disk... -May 25 23:22:50 ... loading chunks from disk, packing SA... -May 25 23:30:16 ... finished generating suffix array -May 25 23:30:16 ... generating Suffix Array index -May 25 23:35:50 ... completed Suffix Array index -May 25 23:35:50 ... writing Genome to disk ... -May 25 23:36:04 ... writing Suffix Array to disk ... -May 25 23:39:16 ... writing SAindex to disk -May 25 23:39:41 ..... finished successfully diff --git a/cre.vcf2cre.sh b/cre.vcf2cre.sh index 9487c4d..bcba477 100755 --- a/cre.vcf2cre.sh +++ b/cre.vcf2cre.sh @@ -25,7 +25,7 @@ ##INFO= # gunzip -c 331606_S1.flt.nochr.vcf.gz | grep -v "^#" | grep PASS | sed s/":DPI:"/":DP:"awk -F ':' '{print $0"\tDP="$9}' | awk -F "\t" '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$11";"$8"\t"$9"\t"$10}' >> 331606.vcf -. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.test_profile +. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.profile115 bname=`basename $original_vcf .vcf.gz`