From 2648968c549e5b67d383b2af5d76fd3a1a9b9977 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 24 Oct 2023 21:29:05 -0400 Subject: [PATCH] Release pangene-1.0 (r183) --- cmd-log.sh | 17 +++++++++++++++++ pangene.1 | 2 +- pangene.h | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/cmd-log.sh b/cmd-log.sh index a73963e..4bcf66e 100644 --- a/cmd-log.sh +++ b/cmd-log.sh @@ -37,3 +37,20 @@ pangene -P Mtb.H37Rv.txt -p.001 aln/*.paf.gz 2> Mtb-merge-p0.r177.gfa.log | gzip # run panaroo runlog panaroo -i *.gff -o panaroo.strict -t 16 --clean-mode strict > panaroo.strict.log 2>&1 runlog panaroo -i *.gff -o panaroo.strict-merge -t 16 --clean-mode strict --merge_paralogs > panaroo.strict-merge.log 2>&1 + +############# +### Human ### +############# + +ls *.fa.gz | sed s,.fa.gz,, | xargs -i echo miniprot --outs=0.97 --no-cs -Iut16 {}.fa.gz ../HUMAN.faa.gz \| gzip \> {}.paf.gz|asub -j run-mp -M36 -n16 +../pangene -a2 -I@gape.huamn.incl -X@gape.human.excl aln-human/[14]*.paf.gz 2> human-human98-r182.gfa.log | gzip > human-human98-r182.gfa.gz + +################## +### Great apes ### +################## + +# generate proteins +cat HUMAN.faa.gz PANTR.faa.gz PANPA.faa.gz GROGO.faa.gz PONAB.faa.gz > merged.faa.gz +ls *.fa.gz|sed s,.fa.gz,,|xargs -i echo miniprot --outs=0.97 --no-cs -Iut16 {}.fa.gz merged.faa.gz \| gzip \> {}.paf.gz | asub -j run-mp -M40 -n16 +../pangene 1100_GRCh38.0.paf.gz 7031_chimpanzee.paf.gz 7032_bonobo.paf.gz 7033_gorilla.paf.gz 7034_orangutan.paf.gz > 5a.gfa +(zcat ../proteins/HUMAN.faa.gz; seqtk comp merged.faa.gz|cut -f1|grep -wFf <(../pangene.js gfa2matrix 5a.gfa|grep _|awk '$2==0'|cut -f1) | seqtk subseq -l80 merged.faa.gz -) | gzip > human+other.faa.gz diff --git a/pangene.1 b/pangene.1 index b213233..f775f3d 100644 --- a/pangene.1 +++ b/pangene.1 @@ -1,4 +1,4 @@ -.TH pangene 1 "19 October 2023" "pangene-0.0-dirty (r177)" "Bioinformatics tools" +.TH pangene 1 "24 October 2023" "pangene-1.0 (r183)" "Bioinformatics tools" .SH NAME .PP pangene - building pangenome gene graphs diff --git a/pangene.h b/pangene.h index 0f8217f..2affc3c 100644 --- a/pangene.h +++ b/pangene.h @@ -3,7 +3,7 @@ #include -#define PG_VERSION "0.0-r182-dirty" +#define PG_VERSION "1.0-r183-dirty" #define PG_F_WRITE_BED_RAW 0x1 #define PG_F_WRITE_BED_WALK 0x2