From 6afd6b259623371b72ca83da77b4dfb268e60a49 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 16 Jan 2025 17:59:26 +0100 Subject: [PATCH 1/9] vcf2table --- plugins/vcf2table.c | 611 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 611 insertions(+) create mode 100644 plugins/vcf2table.c diff --git a/plugins/vcf2table.c b/plugins/vcf2table.c new file mode 100644 index 00000000..9fdce43a --- /dev/null +++ b/plugins/vcf2table.c @@ -0,0 +1,611 @@ +/* The MIT License + + Copyright (c) 2019-2025 Genome Research Ltd. + + Author: Pierre Lindenbaum PhD Institut-du-Thorax. U1087. Nantes. France + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + + */ +#include +#include +#include +#include +#include +#include +#include // for isatty +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_NOT_NULL(a) do {if(a==NULL) {fprintf(stderr,"[%s:%d]NULL Ptr exception\n",__FILE__,__LINE__);abort();}} while(0) +#define WHERE fprintf(stderr,"[%s:%s:%d]\n",__FUNCTION__,__FILE__,__LINE__) +typedef unsigned char color_t; +typedef struct RGB { + color_t r,g,b,a; +} RGB; +typedef struct Cell { + kstring_t text; + kstring_t url; + RGB color; +} Cell,*CellPtr; + +typedef struct Row { + unsigned int size; + CellPtr* cells; +} Row,*RowPtr; + +typedef struct Table { + RowPtr header; + unsigned int size; + RowPtr* rows; +} Table,*TablePtr; + + +typedef struct KStringArray { + unsigned int size; + kstring_t* strings; +} KStringArray,*KStringArrayPtr; + + +typedef struct +{ +bcf_hdr_t* header; +FILE* out; +int ascii; +unsigned long n_variants; +} args_t; + +static args_t args; + +/** build a new Cell */ +static CellPtr CellNew() { + CellPtr ptr = (CellPtr)calloc(1UL,sizeof(Cell)); + ASSERT_NOT_NULL(ptr); + ks_initialize(&(ptr->text)); + ks_initialize(&(ptr->url)); + return ptr; + } +static CellPtr CellClear(CellPtr ptr) { + ASSERT_NOT_NULL(ptr); + ks_clear(&(ptr->text)); + return ptr; + } + static CellPtr CellAppendText(CellPtr ptr, const char* s) { + ASSERT_NOT_NULL(ptr); + if(s!=NULL) kputs(s,&(ptr->text)); + return ptr; + } + + static CellPtr CellAppendTextN(CellPtr ptr, const char* s,unsigned int n) { + ASSERT_NOT_NULL(ptr); + if(s!=NULL) kputsn(s,n,&(ptr->text)); + return ptr; + } + + +/** build a new Cell */ +static CellPtr CellSetText(CellPtr ptr, const char* s) { + CellClear(ptr); + CellAppendText(ptr,s); + return ptr; + } +static CellPtr CellSetLL(CellPtr ptr, long long v) { + CellClear(ptr); + kputll(v,&(ptr->text)); + return ptr; + } +static CellPtr CellSetD(CellPtr ptr, double v) { + CellClear(ptr); + kputd(v,&(ptr->text)); + return ptr; + } + + +/** build a new Cell with string content */ +static CellPtr CellNewStr(const char* s) { + CellPtr ptr = CellNew(); + ASSERT_NOT_NULL(ptr); + return CellSetText(ptr,s); + } +static unsigned int CellWidth(CellPtr ptr) { + ASSERT_NOT_NULL(ptr); + return ks_len(&(ptr->text)); + } + +static void CellPrint(CellPtr ptr,args_t* args) { + ASSERT_NOT_NULL(ptr); + if(args->ascii) { + fwrite((void*)ks_c_str(&(ptr->text)), sizeof(char), ks_len(&(ptr->text)), args->out); + } + else + { + fwprintf(args->out,L"%s",ks_c_str(&(ptr->text))); + } + } +static void CellFree(CellPtr ptr) { + if(ptr==NULL) return; + ks_free(&(ptr->text)); + ks_free(&(ptr->url)); + free(ptr); + } + +static unsigned int RowSize(RowPtr row) { + return row->size; + } + +static void RowFree(RowPtr ptr) { + if(ptr==NULL) return; + if(ptr->cells!=NULL) { + unsigned int i; + for(i=0;i< ptr->size;++i) { + CellFree(ptr->cells[i]); + } + free(ptr->cells); + } + free(ptr); + } + + +static RowPtr RowNew(unsigned int size) { + unsigned int i; + RowPtr ptr = (RowPtr)calloc(1UL,sizeof(Row)); + ASSERT_NOT_NULL(ptr); + ptr->cells = (CellPtr*)calloc(size,sizeof(CellPtr)); + ASSERT_NOT_NULL(ptr->cells); + ptr->size = size; + for(i=0;i< size;++i) { + ptr->cells[i] = CellNew(); + ASSERT_NOT_NULL(ptr->cells[i]); + } + return ptr; + } + +static RowPtr RowAppend(RowPtr ptr,CellPtr cell) { + ASSERT_NOT_NULL(ptr); + ASSERT_NOT_NULL(cell); + ptr->cells = (CellPtr*)realloc(ptr->cells,(ptr->size+1)*sizeof(CellPtr)); + ASSERT_NOT_NULL(ptr->cells); + ptr->cells[ptr->size] = cell; + ptr->size++; + return ptr; + } + +static RowPtr RowAppendStr(RowPtr row,const char* s) { + return RowAppend(row,CellNewStr(s)); + } + + +static CellPtr RowGet(RowPtr row,unsigned int idx) { + assert(idx < RowSize(row)); + return row->cells[idx]; + } + +static unsigned int TableNCols(TablePtr t) { +return RowSize(t->header); +} + +static unsigned int TableNRows(TablePtr t) { +ASSERT_NOT_NULL(t); +return t->size; +} + +static void TableFree(TablePtr ptr) { + if(ptr==NULL) return; + RowFree(ptr->header); + if(ptr->rows!=NULL) { + unsigned int i; + for(i=0;i< ptr->size;++i) { + RowFree(ptr->rows[i]); + } + free(ptr->rows); + } + free(ptr); + } + +static TablePtr TableNew(unsigned int ncols) { + TablePtr ptr = (TablePtr)(calloc(1UL,sizeof(Table))); + ASSERT_NOT_NULL(ptr); + ptr->size = 0UL; + ptr->rows = NULL; + ptr->header= RowNew(ncols); + ASSERT_NOT_NULL(ptr->header); + return ptr; + } + +static RowPtr TableRowAt(TablePtr ptr,unsigned int y) { +ASSERT_NOT_NULL(ptr); +assert(y < TableNRows(ptr)); +ASSERT_NOT_NULL(ptr->rows); +ASSERT_NOT_NULL(ptr->rows[y]); +return ptr->rows[y]; +} + +static TablePtr TableAppendColumn(TablePtr ptr,const char* title) { + unsigned int y; + ASSERT_NOT_NULL(ptr); + RowAppendStr(ptr->header,title); + for(y=0;y< TableNRows(ptr);++y) { + RowAppendStr(TableRowAt(ptr,y),""); + } + return ptr; + } + +static TablePtr TableNewStr(const char* str,...) { + va_list arg; + TablePtr ptr = TableNew(0UL); + ASSERT_NOT_NULL(ptr); + va_start(arg, str); + while (str) { + TableAppendColumn(ptr,str); + str = va_arg(arg, const char *); + } + va_end(arg); + return ptr; + } + + + + + +static CellPtr TableAt(TablePtr ptr,unsigned int x,unsigned int y) { +RowPtr row = TableRowAt(ptr,y); +return RowGet(row,x); +} + +static RowPtr TableNewRow(TablePtr ptr) { + RowPtr row = RowNew(TableNCols(ptr)); + ptr->rows = (RowPtr*)realloc(ptr->rows,(ptr->size+1)*sizeof(RowPtr)); + ASSERT_NOT_NULL(ptr->rows); + ptr->rows[ptr->size] = row; + ptr->size++; + return row; + } + +static void printSymbol(args_t* args,unsigned int repeat, wchar_t wc, char c) { +unsigned int i; +if(args->ascii==1) { + for(i=0;i< repeat;i++) { + fputc(c,args->out); + } + } +else + { + for(i=0;i< repeat;i++) { + fputwc(wc,args->out); + } + } +} +#define FPUTC(C) do { if(args->ascii) fputc(C,args->out); else fputwc(C,args->out);} while(0) + +static void TablePrint(TablePtr ptr,args_t* args) { +unsigned int y,x; +unsigned int* widths = calloc(TableNCols(ptr),sizeof(unsigned int)); +ASSERT_NOT_NULL(ptr); + +for(x=0; xheader,x)); + if(width>widths[x]) widths[x] = width; + } + +for(y=0;y< TableNRows(ptr);++y) { + for(x=0; xwidths[x]) widths[x] = width; + } + } + + //print header + + // line 1 of header + for(x=0;xheader,x),args); + printSymbol(args,widths[x]-CellWidth(RowGet(ptr->header,x)),' ',' '); + FPUTC(' '); + } + printSymbol(args,1,L'\u2502','|'); + FPUTC('\n'); + + //line 3 of header + for(int x=0;x0) + { + for(x=0;xstrings=(kstring_t*)realloc(ptr->strings,sizeof(kstring_t)*(ptr->size+1)); + ASSERT_NOT_NULL(ptr->strings); + ks_initialize(&ptr->strings[ptr->size]); + kputsn(prev,p-prev,&ptr->strings[ptr->size]); + ptr->size++; + if(*p==0) break; + prev=p+1; + } + p++; + } +return ptr; +} +void KStringArrayFree(KStringArray* ptr) { +unsigned int i; +if(ptr==NULL) return; +for(i=0;i< ptr->size;++i) { + ks_free(&ptr->strings[i]); + } +free(ptr); +} + + +/* + Called for each VCF record. Return rec to output the line or NULL + to suppress output. +*/ +bcf1_t *process(bcf1_t *v) { +unsigned int i; +args.n_variants++; +bcf_hdr_t *h = args.header; +kstring_t vcf_line = KS_INITIALIZE; +vcf_format(args.header, v, &vcf_line); +//remove last CR/LF +if(vcf_line.s[vcf_line.l-1]=='\n') { + vcf_line.s[vcf_line.l-1]=0; + vcf_line.l--; + } +KStringArrayPtr tokens = KStringArrayNew(vcf_line.s,'\t'); + +TablePtr p = TableNewStr("KEY","VALUE",NULL); + +RowPtr row = TableNewRow(p); +CellSetText(RowGet(row,0), "CHROM"); +CellSetText(RowGet(row,1), bcf_seqname(h, v)); + +row = TableNewRow(p); +CellSetText(RowGet(row,0), "POS"); +CellSetLL(RowGet(row,1), v->pos + 1); + +row = TableNewRow(p); +CellSetText(RowGet(row,0), "ID"); +CellSetText(RowGet(row,1),tokens->strings[2].s); + +row = TableNewRow(p); +CellSetText(RowGet(row,0), "REF"); +CellSetText(RowGet(row,1),tokens->strings[3].s); + +row = TableNewRow(p); +CellSetText(RowGet(row,0), "ALT"); +CellSetText(RowGet(row,1),tokens->strings[4].s); + +row = TableNewRow(p); +CellSetText(RowGet(row,0), "QUAL"); +CellSetText(RowGet(row,1),tokens->strings[5].s); + +row = TableNewRow(p); +CellSetText(RowGet(row,0), "FILTER"); +CellSetText(RowGet(row,1),tokens->strings[6].s); + + +TablePrint(p,&args); +TableFree(p); + +if(tokens->size>7 && strcmp(tokens->strings[7].s,".")!=0) { + KStringArrayPtr infos = KStringArrayNew(tokens->strings[7].s,';'); + TablePtr p = TableNewStr("KEY","IDX","VALUE",NULL); + for(i=0;i< infos->size;i++) { + unsigned int j; + char* info = infos->strings[i].s; + char* eq = strchr(info,'='); + if(eq==NULL || eq==info) continue; + KStringArrayPtr values = KStringArrayNew(eq+1,','); + for(j=0;j< values->size;j++) { + row = TableNewRow(p); + CellAppendTextN(RowGet(row,0),info,eq-info); + if(values->size>1) CellSetD(RowGet(row,1),(int)(j+1)); + CellSetText(RowGet(row,2),values->strings[j].s); + } + KStringArrayFree(values); + } + TablePrint(p,&args); + TableFree(p); + KStringArrayFree(infos); + } + + +//fputc('\n',args.out); + +if(tokens->size>9) { + + KStringArrayPtr formats = KStringArrayNew(tokens->strings[8].s,':'); + TablePtr p = TableNewStr("SAMPLE",NULL); + TableAppendColumn(p, "GTYPE"); + int gt_col = -1; + for(i=0; isize;i++) { + TableAppendColumn(p, formats->strings[i].s); + if(strcmp("GT",formats->strings[i].s)==0) gt_col=(int)i; + } + + for(i=9;i< tokens->size;i++) { + kstring_t gtype_name = KS_INITIALIZE; + int count_allele_0=0; + int count_allele_1=0; + int count_allele_missing=0; + int count_allele_other=0; + int print_it = 1; + KStringArrayPtr values = KStringArrayNew(tokens->strings[i].s,':'); + unsigned int j; + if(gt_col!=-1) { + char* gt = strdup(values->strings[gt_col].s); + for(j=0;gt[j]!=0;j++) { + if(gt[j]=='|') gt[j]='/'; + } + KStringArrayPtr alleles = KStringArrayNew(gt,'/'); + for(j=0;j< alleles->size;++j) { + char* allele = alleles->strings[j].s; + if(strcmp(allele,"0")==0) count_allele_0++; + else if(strcmp(allele,"1")==0) count_allele_1++; + else if(strcmp(allele,".")==0) count_allele_missing++; + else count_allele_other++; + } + if(alleles->size==2) { + if(count_allele_0==0 && count_allele_1==0 && count_allele_other==0) kputs("NO_CALL",>ype_name); + else if(count_allele_0==2) kputs("HOM_REF",>ype_name); + else if(count_allele_1==2) kputs("HOM_VAR",>ype_name); + else if(count_allele_0==1 && count_allele_1==1) kputs("HET",>ype_name); + } + else if(alleles->size==1) { + if(count_allele_0==1) kputs("REF",>ype_name); + else if(count_allele_1==1) kputs("VAR",>ype_name); + else if(count_allele_missing==1) kputs("NO_CALL",>ype_name); + } + KStringArrayFree(alleles); + free(gt); + } + if(print_it) { + row = TableNewRow(p); + CellSetText(RowGet(row,0),args.header->samples[i-9]); + CellSetText(RowGet(row,1),gtype_name.s); + + for(j=0; j< values->size;j++) { + CellSetText(RowGet(row,j+2), values->strings[j].s); + } + } + KStringArrayFree(values); + ks_free(>ype_name); + } + TablePrint(p,&args); + TableFree(p); + KStringArrayFree(formats); + } + + +fflush(args.out); +fputc('\n',args.out); + + + + + +for(i=0;i< tokens->size;i++) { + fprintf(stderr,"[%d] = %s\n",i,tokens->strings[i].s); + } +fflush(args.out); +fprintf(args.out,"<<< %s:%s:%s\n",tokens->strings[0].s,tokens->strings[1].s,tokens->strings[3].s); +fflush(args.out); + +ks_free(&vcf_line); +KStringArrayFree(tokens); +return NULL; +} + +void destroy(void) +{ +} + + From 8bba9598dbc202588c00ca5556f4a2182c249868 Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum Date: Fri, 17 Jan 2025 00:30:52 +0100 Subject: [PATCH 2/9] cont --- plugins/vcf2table.c | 665 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 520 insertions(+), 145 deletions(-) diff --git a/plugins/vcf2table.c b/plugins/vcf2table.c index 9fdce43a..4e041f51 100644 --- a/plugins/vcf2table.c +++ b/plugins/vcf2table.c @@ -25,11 +25,13 @@ */ #include #include -#include + #include #include +#include #include #include #include // for isatty +#include "hts_internal.h" #include #include #include @@ -37,17 +39,40 @@ #include #include #include +#include +#include "../bcftools.h" #define ASSERT_NOT_NULL(a) do {if(a==NULL) {fprintf(stderr,"[%s:%d]NULL Ptr exception\n",__FILE__,__LINE__);abort();}} while(0) #define WHERE fprintf(stderr,"[%s:%s:%d]\n",__FUNCTION__,__FILE__,__LINE__) + +#define DEFINE_ANSI_IOMANIP(NAME,OPCODE) const char* COLOR_##NAME="\033[" #OPCODE "m"; + +DEFINE_ANSI_IOMANIP(RESET,0) +DEFINE_ANSI_IOMANIP(BLACK,30) +DEFINE_ANSI_IOMANIP(RED,31) +DEFINE_ANSI_IOMANIP(GREEN,32) +DEFINE_ANSI_IOMANIP(YELLOW,33) +DEFINE_ANSI_IOMANIP(BLUE,34) +DEFINE_ANSI_IOMANIP(MAGENTA,35) +DEFINE_ANSI_IOMANIP(CYAN,36) +DEFINE_ANSI_IOMANIP(WHITE,37) + + + +KHASH_MAP_INIT_STR(vdict, bcf_idinfo_t) +typedef khash_t(vdict) vdict_t; + +KHASH_MAP_INIT_STR(hdict, bcf_hrec_t*) +typedef khash_t(hdict) hdict_t; + + + typedef unsigned char color_t; -typedef struct RGB { - color_t r,g,b,a; -} RGB; + typedef struct Cell { kstring_t text; kstring_t url; - RGB color; + const char* color; } Cell,*CellPtr; typedef struct Row { @@ -62,10 +87,18 @@ typedef struct Table { } Table,*TablePtr; -typedef struct KStringArray { +enum build_t { + undefined, + human_hg19, + human_hg38 + }; + +typedef struct StringList_t { unsigned int size; - kstring_t* strings; -} KStringArray,*KStringArrayPtr; + char** strings; +} StringList,*StringListPtr; + + typedef struct @@ -73,7 +106,13 @@ typedef struct bcf_hdr_t* header; FILE* out; int ascii; +StringListPtr vepTokens; +StringListPtr bcsqTokens; +regex_t regex_rsid; unsigned long n_variants; +enum build_t build; +int hide_HOM_REF; +int hide_NO_CALL; } args_t; static args_t args; @@ -82,6 +121,7 @@ static args_t args; static CellPtr CellNew() { CellPtr ptr = (CellPtr)calloc(1UL,sizeof(Cell)); ASSERT_NOT_NULL(ptr); + ptr->color = COLOR_BLACK; ks_initialize(&(ptr->text)); ks_initialize(&(ptr->url)); return ptr; @@ -133,15 +173,20 @@ static unsigned int CellWidth(CellPtr ptr) { return ks_len(&(ptr->text)); } + static const char* CellCStr(CellPtr ptr) { + ASSERT_NOT_NULL(ptr); + return ks_c_str(&(ptr->text)); + } + static void CellPrint(CellPtr ptr,args_t* args) { ASSERT_NOT_NULL(ptr); - if(args->ascii) { - fwrite((void*)ks_c_str(&(ptr->text)), sizeof(char), ks_len(&(ptr->text)), args->out); - } - else - { - fwprintf(args->out,L"%s",ks_c_str(&(ptr->text))); - } + if(args->ascii==0 && ptr->color!=NULL && ptr->color!=COLOR_BLACK) { + fputs(ptr->color,args->out); + } + fwrite((void*)ks_c_str(&(ptr->text)), sizeof(char), ks_len(&(ptr->text)), args->out); + if(args->ascii==0 && ptr->color!=NULL && ptr->color!=COLOR_BLACK) { + fputs(COLOR_RESET,args->out); + } } static void CellFree(CellPtr ptr) { if(ptr==NULL) return; @@ -191,12 +236,21 @@ static RowPtr RowAppend(RowPtr ptr,CellPtr cell) { return ptr; } +static RowPtr RowRemoveAt(RowPtr ptr,unsigned int idx) { + ASSERT_NOT_NULL(ptr); + assert(idx < ptr->size); + CellFree(ptr->cells[idx]); + memmove(&ptr->cells[idx], &ptr->cells[idx+1], sizeof(CellPtr)*((ptr->size-1)-idx)); + ptr->size--; + return ptr; + } + static RowPtr RowAppendStr(RowPtr row,const char* s) { return RowAppend(row,CellNewStr(s)); } -static CellPtr RowGet(RowPtr row,unsigned int idx) { +static CellPtr RowAt(RowPtr row,unsigned int idx) { assert(idx < RowSize(row)); return row->cells[idx]; } @@ -234,12 +288,12 @@ static TablePtr TableNew(unsigned int ncols) { } static RowPtr TableRowAt(TablePtr ptr,unsigned int y) { -ASSERT_NOT_NULL(ptr); -assert(y < TableNRows(ptr)); -ASSERT_NOT_NULL(ptr->rows); -ASSERT_NOT_NULL(ptr->rows[y]); -return ptr->rows[y]; -} + ASSERT_NOT_NULL(ptr); + assert(y < TableNRows(ptr)); + ASSERT_NOT_NULL(ptr->rows); + ASSERT_NOT_NULL(ptr->rows[y]); + return ptr->rows[y]; + } static TablePtr TableAppendColumn(TablePtr ptr,const char* title) { unsigned int y; @@ -266,12 +320,45 @@ static TablePtr TableNewStr(const char* str,...) { +static CellPtr TableAt(TablePtr ptr,unsigned int x,unsigned int y) { + RowPtr row = TableRowAt(ptr,y); + return RowAt(row,x); + } +static int TableIsColumnEmpty(TablePtr ptr,unsigned int x) { + unsigned int y; + ASSERT_NOT_NULL(ptr); + assert(x < TableNCols(ptr)); + for(y=0;y < TableNRows(ptr);++y) { + CellPtr cell = TableAt(ptr,x,y); + if(CellWidth(cell)!=0) return 0; + } + return 1; + } + +static TablePtr TableRemoveColumn(TablePtr ptr,unsigned int x) { + RowRemoveAt(ptr->header,x); + for(int i=0;i< ptr->size;i++) { + RowRemoveAt(ptr->rows[i],x); + } + return ptr; + } + +static TablePtr TableRemoveEmptyColumns(TablePtr ptr) { + unsigned int x=0; + ASSERT_NOT_NULL(ptr); + while(x strings=(char**)realloc(ptr->strings,sizeof(char*)*(ptr->size+1)); + ASSERT_NOT_NULL(ptr->strings); + ptr->strings[ptr->size] = strndup(prev,p-prev); + ASSERT_NOT_NULL( ptr->strings[ptr->size]); + ptr->size++; + if(*p==0) break; + prev=p+1; + } + p++; + } +return ptr; +} +/** +Dispose list of String +*/ + + +void StringListFree(StringList* ptr) { +unsigned int i; +if(ptr==NULL) return; +for(i=0;i< ptr->size;++i) { + free(ptr->strings[i]); + } +free(ptr); +} + +const char* StringListAt(StringList* ptr,unsigned int idx) { +ASSERT_NOT_NULL(ptr); +assert(idx < ptr->size); +return ptr->strings[idx]; +} + + +/** +print symbol +*/ + + +static void printSymbol(args_t* args,unsigned int repeat, const char* wc, char c) { unsigned int i; if(args->ascii==1) { for(i=0;i< repeat;i++) { @@ -292,11 +425,11 @@ if(args->ascii==1) { else { for(i=0;i< repeat;i++) { - fputwc(wc,args->out); + fputs(wc,args->out); } } } -#define FPUTC(C) do { if(args->ascii) fputc(C,args->out); else fputwc(C,args->out);} while(0) + static void TablePrint(TablePtr ptr,args_t* args) { unsigned int y,x; @@ -304,7 +437,7 @@ unsigned int* widths = calloc(TableNCols(ptr),sizeof(unsigned int)); ASSERT_NOT_NULL(ptr); for(x=0; xheader,x)); + unsigned int width = CellWidth(RowAt(ptr->header,x)); if(width>widths[x]) widths[x] = width; } @@ -320,73 +453,101 @@ for(y=0;y< TableNRows(ptr);++y) { // line 1 of header for(x=0;xout); //line 2 of header for(int x=0;xheader,x),args); - printSymbol(args,widths[x]-CellWidth(RowGet(ptr->header,x)),' ',' '); - FPUTC(' '); + printSymbol(args,1,"\u2502",'|'); + fputc(' ',args->out); + CellPrint(RowAt(ptr->header,x),args); + printSymbol(args,widths[x]-CellWidth(RowAt(ptr->header,x))," ",' '); + fputc(' ',args->out); } - printSymbol(args,1,L'\u2502','|'); - FPUTC('\n'); + printSymbol(args,1,"\u2502",'|'); + fputc('\n',args->out); //line 3 of header for(int x=0;xout); //print body for(y=0;y< TableNRows(ptr);++y) { RowPtr row = TableRowAt(ptr,y); //line of data for(x=0;xout); CellPrint(cell,args); - printSymbol(args,widths[x]-CellWidth(cell),' ',' '); - FPUTC(' '); + printSymbol(args,widths[x]-CellWidth(cell)," ",' '); + fputc(' ',args->out); } - printSymbol(args,1,L'\u2502','|'); - FPUTC('\n'); + printSymbol(args,1,"\u2502",'|'); + fputc('\n',args->out); } //last line if(TableNRows(ptr)>0) { for(x=0;xout); } free(widths); } +static void HyperLinkTableAdd(TablePtr table, const char* allele, const char* key, const char* value) { +if(value==NULL || strcmp(value,"")==0) return; +RowPtr row = TableNewRow(table); +CellSetText(RowAt(row,0),value); +} + +static int findContigs(bcf_hdr_t *hdr_in, const char* ctg1a, uint64_t len1, const char* ctg2a, uint64_t len2) { + char ctg1b[10]; + char ctg2b[10]; + sprintf(ctg1b, "chr%s", ctg1a); + sprintf(ctg2b, "chr%s", ctg2a); + int found=0; + int i,n_contigs= hdr_in->n[BCF_DT_CTG]; + for(i=0;i< n_contigs && found<2 ;i++) { + uint64_t len; + bcf_idpair_t c = hdr_in->id[BCF_DT_CTG][i]; + len = c.val->info[0]; + const char* contig_name = c.key; + if(len == len1 && (strcmp(ctg1a,contig_name)==0 || strcmp(ctg1b,contig_name)==0)) { + found++; + } + else if(len == len2 && (strcmp(ctg2a,contig_name)==0 || strcmp(ctg2b,contig_name)==0)) { + found++; + } + } + return found==2; + } + const char *about(void) { return "Convert VCF to table.\n"; @@ -396,13 +557,56 @@ const char *about(void) Called once at startup, it initializes local variables. Return 1 to suppress VCF/BCF header from printing, 0 otherwise. */ -int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out) +int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) { - args.header = in; + int c; + args.header = hdr_in; args.ascii = 0; args.out = stdout; args.n_variants=0L; + args.vepTokens = NULL; + args.bcsqTokens = NULL; + args.hide_HOM_REF = 0; + args.hide_NO_CALL = 0; + c = regcomp(&args.regex_rsid,"rs[0-9]+",REG_EXTENDED|REG_ICASE|REG_NOSUB); + assert(c==0); + static struct option loptions[] = + { + {"hide",required_argument,NULL,'x'}, + {0,0,0,0} + }; + + while ((c = getopt_long(argc, argv, "hx:",loptions,NULL)) >= 0) + { + switch (c) + { + case 'x': + { + int i; + StringListPtr hide = StringListNew(optarg,','); + for(i=0; i< hide->size;++i) { + char* hidden = StringListAt(hide,i); + if(strcasecmp(hidden, "HOM_REF")==0) { + args.hide_HOM_REF = 1; + } + else if(strcasecmp(hidden, "NO_CALL")==0 || strcasecmp(hidden, "MISSING")==0) { + args.hide_NO_CALL = 1; + } + } + StringListFree(hide); + break; + } + case 'h': + case '?': + default: error("wrong arguments"); break; + } + } + + if ( !isatty(fileno((FILE *)stdout)) ) { + args.ascii=1; + } + if(args.ascii==0) { if (setlocale(LC_CTYPE, "") == NULL) { @@ -410,45 +614,84 @@ int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out) args.ascii=1; } } + /* guess the build ?*/ + { + if( findContigs(hdr_in,"1",249250621,"2",243199373)) { + args.build = human_hg19; + } + else if( findContigs(hdr_in,"1",248956422,"2",242193529)) { + args.build = human_hg38; + } + else { + args.build = undefined; + } + } + /** find INFO/CSQ and decode it */ + bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr_in, BCF_HL_INFO, NULL, "CSQ", NULL); + if(hrec!=NULL) { + int ret = bcf_hrec_find_key(hrec, "Description"); + char *format = ret < 0 ? NULL: strstr(hrec->vals[ret], "Format: "); + if(format!=NULL) { + format += 8; + char* vep_format = strdup(format); + //remove trailing quote + if(vep_format[strlen(vep_format)-1]=='"') { + vep_format[strlen(vep_format)-1] = 0; + } + args.vepTokens = StringListNew(vep_format,'|'); + free(vep_format); + } + } + /** find INFO/BCSQ */ + hrec = bcf_hdr_get_hrec(hdr_in, BCF_HL_INFO, NULL, "BCSQ", NULL); + if(hrec!=NULL) { + int ret = bcf_hrec_find_key(hrec, "Description"); + char *format = ret < 0 ? NULL: strstr(hrec->vals[ret], "Format: "); + if(format!=NULL) { + format += 8; + args.bcsqTokens = StringListNew(format,'|'); + } + } + + return 1;//suppress VCF/BCF header } -static KStringArrayPtr KStringArrayNew(const char* str,char delim) { -KStringArrayPtr ptr = calloc(1UL,sizeof(KStringArray)); -ASSERT_NOT_NULL(ptr); -char* prev=(char*)str; -char* p =(char*)str; -for(;;) { - if(*p==delim || *p==0) { - ptr->strings=(kstring_t*)realloc(ptr->strings,sizeof(kstring_t)*(ptr->size+1)); - ASSERT_NOT_NULL(ptr->strings); - ks_initialize(&ptr->strings[ptr->size]); - kputsn(prev,p-prev,&ptr->strings[ptr->size]); - ptr->size++; - if(*p==0) break; - prev=p+1; - } - p++; - } -return ptr; -} -void KStringArrayFree(KStringArray* ptr) { -unsigned int i; -if(ptr==NULL) return; -for(i=0;i< ptr->size;++i) { - ks_free(&ptr->strings[i]); - } -free(ptr); + +static void escapeHttp(kstring_t* k,const char* s) { +char* p=(char*)s; +while(*p!=0) { + if(isalnum(*p)) { + kputc(*p,k); + } + else + { + kputc(*p,k); + //ksprintf(k,"%%%02x", (unsigned char)*p); + } + p++ ; + } } +#define PRINT_HEADER \ + switch(args.build) {\ + case human_hg19 : fputs(" GRCh37 : ",args.out); break;\ + case human_hg38 : fputs(" GRCh38 : ",args.out); break;\ + default:break;\ + }\ + fprintf(args.out,"%s:%s:%s (%ld)\n",tokens->strings[0],tokens->strings[1],tokens->strings[3], args.n_variants) + /* Called for each VCF record. Return rec to output the line or NULL to suppress output. */ bcf1_t *process(bcf1_t *v) { +TablePtr vepTable = NULL; +TablePtr bcsqTable = NULL; +TablePtr hyperlinksTable = TableNewStr("DB","Allele","URL",NULL); unsigned int i; args.n_variants++; bcf_hdr_t *h = args.header; @@ -459,76 +702,200 @@ if(vcf_line.s[vcf_line.l-1]=='\n') { vcf_line.s[vcf_line.l-1]=0; vcf_line.l--; } -KStringArrayPtr tokens = KStringArrayNew(vcf_line.s,'\t'); + + + +StringListPtr tokens = StringListNew(vcf_line.s,'\t'); +StringListPtr alt_alleles = StringListNew(StringListAt(tokens,4),','); + + +fputs("<<<",args.out); +PRINT_HEADER; TablePtr p = TableNewStr("KEY","VALUE",NULL); RowPtr row = TableNewRow(p); -CellSetText(RowGet(row,0), "CHROM"); -CellSetText(RowGet(row,1), bcf_seqname(h, v)); +CellSetText(RowAt(row,0), "CHROM"); +CellSetText(RowAt(row,1),StringListAt(tokens,0)); row = TableNewRow(p); -CellSetText(RowGet(row,0), "POS"); -CellSetLL(RowGet(row,1), v->pos + 1); +CellSetText(RowAt(row,0), "POS"); +CellSetText(RowAt(row,1),StringListAt(tokens,1)); row = TableNewRow(p); -CellSetText(RowGet(row,0), "ID"); -CellSetText(RowGet(row,1),tokens->strings[2].s); +CellSetText(RowAt(row,0), "ID"); +CellSetText(RowAt(row,1),StringListAt(tokens,2)); +if(regexec(&args.regex_rsid, StringListAt(tokens,2),0,NULL,0)==0) { + HyperLinkTableAdd(hyperlinksTable,NULL, "RSID", StringListAt(tokens,2)); + } row = TableNewRow(p); -CellSetText(RowGet(row,0), "REF"); -CellSetText(RowGet(row,1),tokens->strings[3].s); +CellSetText(RowAt(row,0), "REF"); +CellSetText(RowAt(row,1),StringListAt(tokens,3)); row = TableNewRow(p); -CellSetText(RowGet(row,0), "ALT"); -CellSetText(RowGet(row,1),tokens->strings[4].s); +CellSetText(RowAt(row,0), "ALT"); +CellSetText(RowAt(row,1),StringListAt(tokens,4)); row = TableNewRow(p); -CellSetText(RowGet(row,0), "QUAL"); -CellSetText(RowGet(row,1),tokens->strings[5].s); +CellSetText(RowAt(row,0), "QUAL"); +CellSetText(RowAt(row,1),tokens->strings[5]); row = TableNewRow(p); -CellSetText(RowGet(row,0), "FILTER"); -CellSetText(RowGet(row,1),tokens->strings[6].s); - +CellSetText(RowAt(row,0), "FILTER"); +CellSetText(RowAt(row,1),StringListAt(tokens,6)); +if(strcmp(StringListAt(tokens,6),".")!=0 && strcmp(StringListAt(tokens,6),"PASS")!=0) { + RowAt(row,1)->color = COLOR_RED; + } + fprintf(args.out, "# Variant\n"); TablePrint(p,&args); TableFree(p); +fputc('\n',args.out); -if(tokens->size>7 && strcmp(tokens->strings[7].s,".")!=0) { - KStringArrayPtr infos = KStringArrayNew(tokens->strings[7].s,';'); + +/* ADD HYPERLINKS */ +if(args.build == human_hg19 || args.build==human_hg38) { + kstring_t url = KS_INITIALIZE; + for(i=0;i< alt_alleles->size;++i) { + ks_clear(&url); + const char* alt_allele= StringListAt(alt_alleles,i); + + + RowPtr annot = TableNewRow(hyperlinksTable); + CellSetText(RowAt(annot,0), "GNOMAD"); + CellSetText(RowAt(annot,1), alt_allele); + kputs("https://gnomad.broadinstitute.org/variant/",&url); + escapeHttp(&url,StringListAt(tokens,0)); + escapeHttp(&url,"-"); + kputs(StringListAt(tokens,1),&url); + escapeHttp(&url,"-"); + escapeHttp(&url,StringListAt(tokens,3)); + escapeHttp(&url,"-"); + escapeHttp(&url,alt_allele); + + + CellSetText(RowAt(annot,2), url.s); + } + // StringUtils.escapeHttp(ensemblCtg) + "-" + ctx.getStart() +"-"+ctx.getReference().getDisplayString()+"-"+alt.getDisplayString()+"?dataset=gnomad_r2_1" + //HyperLinkTableAdd(hyperlinksTable,NULL, "RSID",url.s); + + + + ks_free(&url); + } + + +if(tokens->size>7 && strcmp(tokens->strings[7],".")!=0) { + StringListPtr infos = StringListNew(tokens->strings[7],';'); TablePtr p = TableNewStr("KEY","IDX","VALUE",NULL); for(i=0;i< infos->size;i++) { unsigned int j; - char* info = infos->strings[i].s; + const char* info = StringListAt(infos,i); char* eq = strchr(info,'='); if(eq==NULL || eq==info) continue; - KStringArrayPtr values = KStringArrayNew(eq+1,','); + + + + StringListPtr values = StringListNew(eq+1,','); for(j=0;j< values->size;j++) { + //skip CSQ + if(args.vepTokens!=NULL && strncmp(info,"CSQ=",4)==0) { + unsigned int k; + //build VEP table if needed + if(vepTable==NULL) { + vepTable = TableNew(0); + for(k=0;k< args.vepTokens->size;++k) { + TableAppendColumn(vepTable,StringListAt( args.vepTokens,k)); + } + } + // fill VEP table + row = TableNewRow(vepTable); + StringListPtr veps = StringListNew( StringListAt(values,j),'|'); + for(k=0;k< args.vepTokens->size && k < veps->size;++k) { + CellSetText(RowAt(row,k),StringListAt( veps,k)); + + if(strcmp(StringListAt(args.vepTokens,k), "SYMBOL")==0) { + HyperLinkTableAdd(hyperlinksTable, NULL,StringListAt(args.vepTokens,k), StringListAt( veps,k)); + } + + } + StringListFree(veps); + continue; + } + + //skip BCSQ + if(args.bcsqTokens!=NULL && strncmp(info,"BCSQ=",5)==0) {WHERE; + unsigned int k; + //build BCSQ table if needed + if(bcsqTable==NULL) { + bcsqTable = TableNew(0); + for(k=0;k< args.bcsqTokens->size;++k) { + TableAppendColumn(bcsqTable,StringListAt( args.bcsqTokens,k)); + } + } + + // fill BCSQ table + row = TableNewRow(bcsqTable); + StringListPtr bcsq = StringListNew( StringListAt(values,j),'|'); + for(k=0;k< args.bcsqTokens->size && k < bcsq->size;++k) { + CellSetText(RowAt(row,k),StringListAt( bcsq,k)); + + if(strcmp(StringListAt(args.bcsqTokens,k), "SYMBOL")==0) { + HyperLinkTableAdd(hyperlinksTable, NULL,StringListAt(args.bcsqTokens,k), StringListAt( bcsq,k)); + } + + } + StringListFree(bcsq); + continue; + } + row = TableNewRow(p); - CellAppendTextN(RowGet(row,0),info,eq-info); - if(values->size>1) CellSetD(RowGet(row,1),(int)(j+1)); - CellSetText(RowGet(row,2),values->strings[j].s); + CellAppendTextN(RowAt(row,0),info,eq-info); + if(values->size>1) CellSetD(RowAt(row,1),(int)(j+1)); + CellSetText(RowAt(row,2),values->strings[j]); } - KStringArrayFree(values); + StringListFree(values); } + fprintf(args.out, "# INFO\n"); TablePrint(p,&args); - TableFree(p); - KStringArrayFree(infos); + TableFree(p); + StringListFree(infos); + fputc('\n',args.out); } -//fputc('\n',args.out); +if(TableNRows(hyperlinksTable)>0) { + fprintf(args.out, "# HYPERLINKS\n"); + TablePrint(hyperlinksTable,&args); + fputc('\n',args.out); + } + + + +if(vepTable!=NULL && TableNRows(vepTable)>0) { + fprintf(args.out, "# VEP/CSQ\n"); + TableRemoveEmptyColumns(vepTable); + TablePrint(vepTable,&args); + fputc('\n',args.out); + } + +if(bcsqTable!=NULL && TableNRows(bcsqTable)>0) { + fprintf(args.out, "# BCSQ\n"); + TableRemoveEmptyColumns(bcsqTable); + TablePrint(bcsqTable,&args); + fputc('\n',args.out); + } if(tokens->size>9) { - - KStringArrayPtr formats = KStringArrayNew(tokens->strings[8].s,':'); + + StringListPtr formats = StringListNew(tokens->strings[8],':'); TablePtr p = TableNewStr("SAMPLE",NULL); TableAppendColumn(p, "GTYPE"); int gt_col = -1; for(i=0; isize;i++) { - TableAppendColumn(p, formats->strings[i].s); - if(strcmp("GT",formats->strings[i].s)==0) gt_col=(int)i; + TableAppendColumn(p, formats->strings[i]); + if(strcmp("GT",formats->strings[i])==0) gt_col=(int)i; } for(i=9;i< tokens->size;i++) { @@ -538,74 +905,82 @@ if(tokens->size>9) { int count_allele_missing=0; int count_allele_other=0; int print_it = 1; - KStringArrayPtr values = KStringArrayNew(tokens->strings[i].s,':'); + StringListPtr values = StringListNew(tokens->strings[i],':'); + const char* color = COLOR_BLACK; unsigned int j; if(gt_col!=-1) { - char* gt = strdup(values->strings[gt_col].s); + char* gt = strdup(values->strings[gt_col]); for(j=0;gt[j]!=0;j++) { if(gt[j]=='|') gt[j]='/'; } - KStringArrayPtr alleles = KStringArrayNew(gt,'/'); + StringListPtr alleles = StringListNew(gt,'/'); for(j=0;j< alleles->size;++j) { - char* allele = alleles->strings[j].s; + char* allele = alleles->strings[j]; if(strcmp(allele,"0")==0) count_allele_0++; else if(strcmp(allele,"1")==0) count_allele_1++; else if(strcmp(allele,".")==0) count_allele_missing++; else count_allele_other++; } if(alleles->size==2) { - if(count_allele_0==0 && count_allele_1==0 && count_allele_other==0) kputs("NO_CALL",>ype_name); - else if(count_allele_0==2) kputs("HOM_REF",>ype_name); - else if(count_allele_1==2) kputs("HOM_VAR",>ype_name); - else if(count_allele_0==1 && count_allele_1==1) kputs("HET",>ype_name); + if(count_allele_0==0 && count_allele_1==0 && count_allele_other==0) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} + else if(count_allele_0==2) { kputs("HOM_REF",>ype_name); color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} + else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))==0) {kputs("HOM_VAR",>ype_name); color=COLOR_RED;} + else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))!=0) {kputs("HET",>ype_name); color=COLOR_CYAN;} } else if(alleles->size==1) { - if(count_allele_0==1) kputs("REF",>ype_name); - else if(count_allele_1==1) kputs("VAR",>ype_name); - else if(count_allele_missing==1) kputs("NO_CALL",>ype_name); + if(count_allele_0==1) {kputs("REF",>ype_name);color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} + else if(count_allele_1==1) {kputs("ALT",>ype_name);color=COLOR_RED;} + else if(count_allele_missing==1) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} } - KStringArrayFree(alleles); + else + { + if(count_allele_0==alleles->size) {kputs("HOM_REF",>ype_name);; color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} + else if(count_allele_missing==alleles->size) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} + } + StringListFree(alleles); free(gt); } if(print_it) { row = TableNewRow(p); - CellSetText(RowGet(row,0),args.header->samples[i-9]); - CellSetText(RowGet(row,1),gtype_name.s); + CellSetText(RowAt(row,0),args.header->samples[i-9]); + CellSetText(RowAt(row,1),gtype_name.s); + RowAt(row,1)->color = color; for(j=0; j< values->size;j++) { - CellSetText(RowGet(row,j+2), values->strings[j].s); + CellSetText(RowAt(row,j+2), values->strings[j]); } } - KStringArrayFree(values); + StringListFree(values); ks_free(>ype_name); } + fprintf(args.out, "# GENOTYPES\n"); TablePrint(p,&args); TableFree(p); - KStringArrayFree(formats); + StringListFree(formats); } -fflush(args.out); -fputc('\n',args.out); - +fputs(">>>",args.out); +PRINT_HEADER; - -for(i=0;i< tokens->size;i++) { - fprintf(stderr,"[%d] = %s\n",i,tokens->strings[i].s); - } -fflush(args.out); -fprintf(args.out,"<<< %s:%s:%s\n",tokens->strings[0].s,tokens->strings[1].s,tokens->strings[3].s); -fflush(args.out); +fputc('\n',args.out); ks_free(&vcf_line); -KStringArrayFree(tokens); +StringListFree(tokens); +StringListFree(alt_alleles); +TableFree(hyperlinksTable); +TableFree(bcsqTable); +TableFree(vepTable); return NULL; } void destroy(void) { +regfree(&args.regex_rsid); +StringListFree(args.vepTokens); +StringListFree(args.bcsqTokens); } From c052538e1e4f0ba68871e71645183869c3536c43 Mon Sep 17 00:00:00 2001 From: lindenb Date: Fri, 17 Jan 2025 10:11:58 +0100 Subject: [PATCH 3/9] cont --- plugins/vcf2table.c | 611 ++++++++++++++++++++++++-------------------- 1 file changed, 329 insertions(+), 282 deletions(-) diff --git a/plugins/vcf2table.c b/plugins/vcf2table.c index 4e041f51..80fcbb8b 100644 --- a/plugins/vcf2table.c +++ b/plugins/vcf2table.c @@ -101,19 +101,22 @@ typedef struct StringList_t { -typedef struct -{ -bcf_hdr_t* header; -FILE* out; -int ascii; -StringListPtr vepTokens; -StringListPtr bcsqTokens; -regex_t regex_rsid; -unsigned long n_variants; -enum build_t build; -int hide_HOM_REF; -int hide_NO_CALL; -} args_t; +typedef struct { + bcf_hdr_t* header; + FILE* out; + int ascii; + /** columns for VEP predictions */ + StringListPtr vepTokens; + /** columns for bcftools csq predictions */ + StringListPtr bcsqTokens; + /** columns for SNPEFF ANN predictions */ + StringListPtr annTokens; + regex_t regex_rsid; + unsigned long n_variants; + enum build_t build; + int hide_HOM_REF; + int hide_NO_CALL; + } args_t; static args_t args; @@ -150,11 +153,13 @@ static CellPtr CellSetText(CellPtr ptr, const char* s) { CellAppendText(ptr,s); return ptr; } +/* static CellPtr CellSetLL(CellPtr ptr, long long v) { CellClear(ptr); kputll(v,&(ptr->text)); return ptr; - } + }*/ + static CellPtr CellSetD(CellPtr ptr, double v) { CellClear(ptr); kputd(v,&(ptr->text)); @@ -173,10 +178,12 @@ static unsigned int CellWidth(CellPtr ptr) { return ks_len(&(ptr->text)); } + /* static const char* CellCStr(CellPtr ptr) { ASSERT_NOT_NULL(ptr); return ks_c_str(&(ptr->text)); } + */ static void CellPrint(CellPtr ptr,args_t* args) { ASSERT_NOT_NULL(ptr); @@ -255,6 +262,11 @@ static CellPtr RowAt(RowPtr row,unsigned int idx) { return row->cells[idx]; } +/** set content of idx-th column */ +static CellPtr RowSetText(RowPtr row,unsigned int idx,const char* value) { + return CellSetText(RowAt(row,idx),value); + } + static unsigned int TableNCols(TablePtr t) { return RowSize(t->header); } @@ -568,6 +580,13 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) args.bcsqTokens = NULL; args.hide_HOM_REF = 0; args.hide_NO_CALL = 0; + args.annTokens = StringListNew( + "Allele,Annotation,Annotation_Impact,Gene_Name,Gene_ID," + "Feature_Type,Feature_ID,Transcript_BioType,Rank," + "HGVS.c,HGVS.p,cDNA.pos/length,CDS.pos/length,AA.pos/length,Distance,Message", + ','); + + c = regcomp(&args.regex_rsid,"rs[0-9]+",REG_EXTENDED|REG_ICASE|REG_NOSUB); assert(c==0); @@ -586,7 +605,7 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) int i; StringListPtr hide = StringListNew(optarg,','); for(i=0; i< hide->size;++i) { - char* hidden = StringListAt(hide,i); + const char* hidden = StringListAt(hide,i); if(strcasecmp(hidden, "HOM_REF")==0) { args.hide_HOM_REF = 1; } @@ -689,298 +708,326 @@ while(*p!=0) { to suppress output. */ bcf1_t *process(bcf1_t *v) { -TablePtr vepTable = NULL; -TablePtr bcsqTable = NULL; -TablePtr hyperlinksTable = TableNewStr("DB","Allele","URL",NULL); -unsigned int i; -args.n_variants++; -bcf_hdr_t *h = args.header; -kstring_t vcf_line = KS_INITIALIZE; -vcf_format(args.header, v, &vcf_line); -//remove last CR/LF -if(vcf_line.s[vcf_line.l-1]=='\n') { - vcf_line.s[vcf_line.l-1]=0; - vcf_line.l--; - } - - - -StringListPtr tokens = StringListNew(vcf_line.s,'\t'); -StringListPtr alt_alleles = StringListNew(StringListAt(tokens,4),','); - - -fputs("<<<",args.out); -PRINT_HEADER; - -TablePtr p = TableNewStr("KEY","VALUE",NULL); - -RowPtr row = TableNewRow(p); -CellSetText(RowAt(row,0), "CHROM"); -CellSetText(RowAt(row,1),StringListAt(tokens,0)); - -row = TableNewRow(p); -CellSetText(RowAt(row,0), "POS"); -CellSetText(RowAt(row,1),StringListAt(tokens,1)); - -row = TableNewRow(p); -CellSetText(RowAt(row,0), "ID"); -CellSetText(RowAt(row,1),StringListAt(tokens,2)); -if(regexec(&args.regex_rsid, StringListAt(tokens,2),0,NULL,0)==0) { - HyperLinkTableAdd(hyperlinksTable,NULL, "RSID", StringListAt(tokens,2)); - } - -row = TableNewRow(p); -CellSetText(RowAt(row,0), "REF"); -CellSetText(RowAt(row,1),StringListAt(tokens,3)); - -row = TableNewRow(p); -CellSetText(RowAt(row,0), "ALT"); -CellSetText(RowAt(row,1),StringListAt(tokens,4)); + TablePtr vepTable = NULL; + TablePtr bcsqTable = NULL; + TablePtr annTable = NULL; + TablePtr hyperlinksTable = TableNewStr("DB","Allele","URL",NULL); + unsigned int i; + args.n_variants++; + kstring_t vcf_line = KS_INITIALIZE; + vcf_format(args.header, v, &vcf_line); + //remove last CR/LF + if(vcf_line.s[vcf_line.l-1]=='\n') { + vcf_line.s[vcf_line.l-1]=0; + vcf_line.l--; + } -row = TableNewRow(p); -CellSetText(RowAt(row,0), "QUAL"); -CellSetText(RowAt(row,1),tokens->strings[5]); -row = TableNewRow(p); -CellSetText(RowAt(row,0), "FILTER"); -CellSetText(RowAt(row,1),StringListAt(tokens,6)); -if(strcmp(StringListAt(tokens,6),".")!=0 && strcmp(StringListAt(tokens,6),"PASS")!=0) { - RowAt(row,1)->color = COLOR_RED; - } - fprintf(args.out, "# Variant\n"); -TablePrint(p,&args); -TableFree(p); -fputc('\n',args.out); + StringListPtr tokens = StringListNew(vcf_line.s,'\t'); + StringListPtr alt_alleles = StringListNew(StringListAt(tokens,4),','); -/* ADD HYPERLINKS */ -if(args.build == human_hg19 || args.build==human_hg38) { - kstring_t url = KS_INITIALIZE; - for(i=0;i< alt_alleles->size;++i) { - ks_clear(&url); - const char* alt_allele= StringListAt(alt_alleles,i); - - - RowPtr annot = TableNewRow(hyperlinksTable); - CellSetText(RowAt(annot,0), "GNOMAD"); - CellSetText(RowAt(annot,1), alt_allele); - kputs("https://gnomad.broadinstitute.org/variant/",&url); - escapeHttp(&url,StringListAt(tokens,0)); - escapeHttp(&url,"-"); - kputs(StringListAt(tokens,1),&url); - escapeHttp(&url,"-"); - escapeHttp(&url,StringListAt(tokens,3)); - escapeHttp(&url,"-"); - escapeHttp(&url,alt_allele); - - - CellSetText(RowAt(annot,2), url.s); - } - // StringUtils.escapeHttp(ensemblCtg) + "-" + ctx.getStart() +"-"+ctx.getReference().getDisplayString()+"-"+alt.getDisplayString()+"?dataset=gnomad_r2_1" - //HyperLinkTableAdd(hyperlinksTable,NULL, "RSID",url.s); - - - - ks_free(&url); - } + fputs("<<<",args.out); + PRINT_HEADER; + + TablePtr p = TableNewStr("KEY","VALUE",NULL); + + RowPtr row = TableNewRow(p); + CellSetText(RowAt(row,0), "CHROM"); + CellSetText(RowAt(row,1),StringListAt(tokens,0)); + + row = TableNewRow(p); + CellSetText(RowAt(row,0), "POS"); + CellSetText(RowAt(row,1),StringListAt(tokens,1)); + + row = TableNewRow(p); + CellSetText(RowAt(row,0), "ID"); + CellSetText(RowAt(row,1),StringListAt(tokens,2)); + if(regexec(&args.regex_rsid, StringListAt(tokens,2),0,NULL,0)==0) { + HyperLinkTableAdd(hyperlinksTable,NULL, "RSID", StringListAt(tokens,2)); + } + + row = TableNewRow(p); + CellSetText(RowAt(row,0), "REF"); + CellSetText(RowAt(row,1),StringListAt(tokens,3)); + + row = TableNewRow(p); + CellSetText(RowAt(row,0), "ALT"); + CellSetText(RowAt(row,1),StringListAt(tokens,4)); + + row = TableNewRow(p); + CellSetText(RowAt(row,0), "QUAL"); + CellSetText(RowAt(row,1),tokens->strings[5]); + + row = TableNewRow(p); + CellSetText(RowAt(row,0), "FILTER"); + CellSetText(RowAt(row,1),StringListAt(tokens,6)); + if(strcmp(StringListAt(tokens,6),".")!=0 && strcmp(StringListAt(tokens,6),"PASS")!=0) { + RowAt(row,1)->color = COLOR_RED; + } + + fprintf(args.out, "# Variant\n"); + TablePrint(p,&args); + TableFree(p); + fputc('\n',args.out); -if(tokens->size>7 && strcmp(tokens->strings[7],".")!=0) { - StringListPtr infos = StringListNew(tokens->strings[7],';'); - TablePtr p = TableNewStr("KEY","IDX","VALUE",NULL); - for(i=0;i< infos->size;i++) { - unsigned int j; - const char* info = StringListAt(infos,i); - char* eq = strchr(info,'='); - if(eq==NULL || eq==info) continue; - - - - StringListPtr values = StringListNew(eq+1,','); - for(j=0;j< values->size;j++) { - //skip CSQ - if(args.vepTokens!=NULL && strncmp(info,"CSQ=",4)==0) { - unsigned int k; - //build VEP table if needed - if(vepTable==NULL) { - vepTable = TableNew(0); - for(k=0;k< args.vepTokens->size;++k) { - TableAppendColumn(vepTable,StringListAt( args.vepTokens,k)); - } - } - // fill VEP table - row = TableNewRow(vepTable); - StringListPtr veps = StringListNew( StringListAt(values,j),'|'); - for(k=0;k< args.vepTokens->size && k < veps->size;++k) { - CellSetText(RowAt(row,k),StringListAt( veps,k)); - - if(strcmp(StringListAt(args.vepTokens,k), "SYMBOL")==0) { - HyperLinkTableAdd(hyperlinksTable, NULL,StringListAt(args.vepTokens,k), StringListAt( veps,k)); - } - - } - StringListFree(veps); - continue; - } - - //skip BCSQ - if(args.bcsqTokens!=NULL && strncmp(info,"BCSQ=",5)==0) {WHERE; - unsigned int k; - //build BCSQ table if needed - if(bcsqTable==NULL) { - bcsqTable = TableNew(0); - for(k=0;k< args.bcsqTokens->size;++k) { - TableAppendColumn(bcsqTable,StringListAt( args.bcsqTokens,k)); - } - } + /* ADD HYPERLINKS */ + if(args.build == human_hg19 || args.build==human_hg38) { + kstring_t url = KS_INITIALIZE; + for(i=0;i< alt_alleles->size;++i) { + ks_clear(&url); + const char* alt_allele= StringListAt(alt_alleles,i); + - // fill BCSQ table - row = TableNewRow(bcsqTable); - StringListPtr bcsq = StringListNew( StringListAt(values,j),'|'); - for(k=0;k< args.bcsqTokens->size && k < bcsq->size;++k) { - CellSetText(RowAt(row,k),StringListAt( bcsq,k)); - - if(strcmp(StringListAt(args.bcsqTokens,k), "SYMBOL")==0) { - HyperLinkTableAdd(hyperlinksTable, NULL,StringListAt(args.bcsqTokens,k), StringListAt( bcsq,k)); - } - - } - StringListFree(bcsq); - continue; - } + RowPtr annot = TableNewRow(hyperlinksTable); + CellSetText(RowAt(annot,0), "GNOMAD"); + CellSetText(RowAt(annot,1), alt_allele); + kputs("https://gnomad.broadinstitute.org/variant/",&url); + escapeHttp(&url,StringListAt(tokens,0)); + escapeHttp(&url,"-"); + kputs(StringListAt(tokens,1),&url); + escapeHttp(&url,"-"); + escapeHttp(&url,StringListAt(tokens,3)); + escapeHttp(&url,"-"); + escapeHttp(&url,alt_allele); + + + CellSetText(RowAt(annot,2), url.s); + } + // StringUtils.escapeHttp(ensemblCtg) + "-" + ctx.getStart() +"-"+ctx.getReference().getDisplayString()+"-"+alt.getDisplayString()+"?dataset=gnomad_r2_1" + //HyperLinkTableAdd(hyperlinksTable,NULL, "RSID",url.s); + + + + ks_free(&url); + } + + + if(tokens->size>7 && strcmp(tokens->strings[7],".")!=0) { + StringListPtr infos = StringListNew(tokens->strings[7],';'); + TablePtr p = TableNewStr("KEY","IDX","VALUE",NULL); + for(i=0;i< infos->size;i++) { + unsigned int j; + const char* info = StringListAt(infos,i); + char* eq = strchr(info,'='); + if(eq==NULL || eq==info) continue; + - row = TableNewRow(p); - CellAppendTextN(RowAt(row,0),info,eq-info); - if(values->size>1) CellSetD(RowAt(row,1),(int)(j+1)); - CellSetText(RowAt(row,2),values->strings[j]); + + StringListPtr values = StringListNew(eq+1,','); + for(j=0;j< values->size;j++) { + //skip CSQ + if(args.vepTokens!=NULL && strncmp(info,"CSQ=",4)==0) { + unsigned int k; + //build VEP table if needed + if(vepTable==NULL) { + vepTable = TableNew(0); + for(k=0;k< args.vepTokens->size;++k) { + TableAppendColumn(vepTable,StringListAt( args.vepTokens,k)); + } + } + // fill VEP table + row = TableNewRow(vepTable); + StringListPtr veps = StringListNew( StringListAt(values,j),'|'); + for(k=0;k< args.vepTokens->size && k < veps->size;++k) { + CellSetText(RowAt(row,k),StringListAt( veps,k)); + + if(strcmp(StringListAt(args.vepTokens,k), "SYMBOL")==0) { + HyperLinkTableAdd(hyperlinksTable, NULL,StringListAt(args.vepTokens,k), StringListAt( veps,k)); + } + + } + StringListFree(veps); + continue; + } + + //skip BCSQ + if(args.bcsqTokens!=NULL && strncmp(info,"BCSQ=",5)==0) { + unsigned int k; + //build BCSQ table if needed + if(bcsqTable==NULL) { + bcsqTable = TableNew(0); + for(k=0;k< args.bcsqTokens->size;++k) { + TableAppendColumn(bcsqTable,StringListAt( args.bcsqTokens,k)); + } + } + + // fill BCSQ table + row = TableNewRow(bcsqTable); + StringListPtr bcsq = StringListNew( StringListAt(values,j),'|'); + for(k=0;k< args.bcsqTokens->size && k < bcsq->size;++k) { + CellSetText(RowAt(row,k),StringListAt( bcsq,k)); + + if(strcmp(StringListAt(args.bcsqTokens,k), "SYMBOL")==0) { + HyperLinkTableAdd(hyperlinksTable, NULL,StringListAt(args.bcsqTokens,k), StringListAt( bcsq,k)); + } + + } + StringListFree(bcsq); + continue; + } + + //skip SNPEFF/ANN + if(args.annTokens!=NULL && strncmp(info,"ANN=",4)==0) { + unsigned int k; + //build BCSQ table if needed + if(annTable==NULL) { + annTable = TableNew(0); + for(k=0;k< args.annTokens->size;++k) { + TableAppendColumn(annTable,StringListAt( args.annTokens,k)); + } + } + // fill ANN table + row = TableNewRow(annTable); + StringListPtr ann = StringListNew( StringListAt(values,j),'|'); + for(k=0;k< args.annTokens->size && k < ann->size;++k) { + RowSetText(row,k,StringListAt(ann,k)); + } + StringListFree(ann); + continue; + } + + row = TableNewRow(p); + CellAppendTextN(RowAt(row,0),info,eq-info); + if(values->size>1) CellSetD(RowAt(row,1),(int)(j+1)); + RowSetText(row,2,values->strings[j]); + } + StringListFree(values); } - StringListFree(values); + fprintf(args.out, "# INFO\n"); + TablePrint(p,&args); + TableFree(p); + StringListFree(infos); + fputc('\n',args.out); } - fprintf(args.out, "# INFO\n"); - TablePrint(p,&args); - TableFree(p); - StringListFree(infos); - fputc('\n',args.out); - } -if(TableNRows(hyperlinksTable)>0) { - fprintf(args.out, "# HYPERLINKS\n"); - TablePrint(hyperlinksTable,&args); - fputc('\n',args.out); - } + if(TableNRows(hyperlinksTable)>0) { + fprintf(args.out, "# HYPERLINKS\n"); + TablePrint(hyperlinksTable,&args); + fputc('\n',args.out); + } -if(vepTable!=NULL && TableNRows(vepTable)>0) { - fprintf(args.out, "# VEP/CSQ\n"); - TableRemoveEmptyColumns(vepTable); - TablePrint(vepTable,&args); - fputc('\n',args.out); - } + if(vepTable!=NULL && TableNRows(vepTable)>0) { + fprintf(args.out, "# VEP/CSQ\n"); + TableRemoveEmptyColumns(vepTable); + TablePrint(vepTable,&args); + fputc('\n',args.out); + } -if(bcsqTable!=NULL && TableNRows(bcsqTable)>0) { - fprintf(args.out, "# BCSQ\n"); - TableRemoveEmptyColumns(bcsqTable); - TablePrint(bcsqTable,&args); - fputc('\n',args.out); - } - -if(tokens->size>9) { - - StringListPtr formats = StringListNew(tokens->strings[8],':'); - TablePtr p = TableNewStr("SAMPLE",NULL); - TableAppendColumn(p, "GTYPE"); - int gt_col = -1; - for(i=0; isize;i++) { - TableAppendColumn(p, formats->strings[i]); - if(strcmp("GT",formats->strings[i])==0) gt_col=(int)i; - } - - for(i=9;i< tokens->size;i++) { - kstring_t gtype_name = KS_INITIALIZE; - int count_allele_0=0; - int count_allele_1=0; - int count_allele_missing=0; - int count_allele_other=0; - int print_it = 1; - StringListPtr values = StringListNew(tokens->strings[i],':'); - const char* color = COLOR_BLACK; - unsigned int j; - if(gt_col!=-1) { - char* gt = strdup(values->strings[gt_col]); - for(j=0;gt[j]!=0;j++) { - if(gt[j]=='|') gt[j]='/'; - } - StringListPtr alleles = StringListNew(gt,'/'); - for(j=0;j< alleles->size;++j) { - char* allele = alleles->strings[j]; - if(strcmp(allele,"0")==0) count_allele_0++; - else if(strcmp(allele,"1")==0) count_allele_1++; - else if(strcmp(allele,".")==0) count_allele_missing++; - else count_allele_other++; - } - if(alleles->size==2) { - if(count_allele_0==0 && count_allele_1==0 && count_allele_other==0) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} - else if(count_allele_0==2) { kputs("HOM_REF",>ype_name); color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} - else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))==0) {kputs("HOM_VAR",>ype_name); color=COLOR_RED;} - else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))!=0) {kputs("HET",>ype_name); color=COLOR_CYAN;} - } - else if(alleles->size==1) { - if(count_allele_0==1) {kputs("REF",>ype_name);color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} - else if(count_allele_1==1) {kputs("ALT",>ype_name);color=COLOR_RED;} - else if(count_allele_missing==1) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} + if(bcsqTable!=NULL && TableNRows(bcsqTable)>0) { + fprintf(args.out, "# BCSQ\n"); + TableRemoveEmptyColumns(bcsqTable); + TablePrint(bcsqTable,&args); + fputc('\n',args.out); + } + + if(annTable!=NULL && TableNRows(annTable)>0) { + fprintf(args.out, "# ANN/SNPEFF\n"); + TableRemoveEmptyColumns(annTable); + TablePrint(annTable,&args); + fputc('\n',args.out); + } + + if(tokens->size>9) { + + StringListPtr formats = StringListNew(tokens->strings[8],':'); + TablePtr p = TableNewStr("SAMPLE",NULL); + TableAppendColumn(p, "GTYPE"); + int gt_col = -1; + for(i=0; isize;i++) { + TableAppendColumn(p, formats->strings[i]); + if(strcmp("GT",formats->strings[i])==0) gt_col=(int)i; + } + + for(i=9;i< tokens->size;i++) { + kstring_t gtype_name = KS_INITIALIZE; + int count_allele_0=0; + int count_allele_1=0; + int count_allele_missing=0; + int count_allele_other=0; + int print_it = 1; + StringListPtr values = StringListNew(tokens->strings[i],':'); + const char* color = COLOR_BLACK; + unsigned int j; + if(gt_col!=-1) { + char* gt = strdup(values->strings[gt_col]); + for(j=0;gt[j]!=0;j++) { + if(gt[j]=='|') gt[j]='/'; + } + StringListPtr alleles = StringListNew(gt,'/'); + for(j=0;j< alleles->size;++j) { + char* allele = alleles->strings[j]; + if(strcmp(allele,"0")==0) count_allele_0++; + else if(strcmp(allele,"1")==0) count_allele_1++; + else if(strcmp(allele,".")==0) count_allele_missing++; + else count_allele_other++; + } + if(alleles->size==2) { + if(count_allele_0==0 && count_allele_1==0 && count_allele_other==0) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} + else if(count_allele_0==2) { kputs("HOM_REF",>ype_name); color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} + else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))==0) {kputs("HOM_VAR",>ype_name); color=COLOR_RED;} + else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))!=0) {kputs("HET",>ype_name); color=COLOR_CYAN;} + } + else if(alleles->size==1) { + if(count_allele_0==1) {kputs("REF",>ype_name);color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} + else if(count_allele_1==1) {kputs("ALT",>ype_name);color=COLOR_RED;} + else if(count_allele_missing==1) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} + } + else + { + if(count_allele_0==alleles->size) {kputs("HOM_REF",>ype_name);; color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} + else if(count_allele_missing==alleles->size) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} + } + StringListFree(alleles); + free(gt); } - else - { - if(count_allele_0==alleles->size) {kputs("HOM_REF",>ype_name);; color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} - else if(count_allele_missing==alleles->size) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} - } - StringListFree(alleles); - free(gt); - } - if(print_it) { - row = TableNewRow(p); - CellSetText(RowAt(row,0),args.header->samples[i-9]); - CellSetText(RowAt(row,1),gtype_name.s); - RowAt(row,1)->color = color; - - for(j=0; j< values->size;j++) { - CellSetText(RowAt(row,j+2), values->strings[j]); - } + if(print_it) { + row = TableNewRow(p); + CellSetText(RowAt(row,0),args.header->samples[i-9]); + CellSetText(RowAt(row,1),gtype_name.s); + RowAt(row,1)->color = color; + + for(j=0; j< values->size;j++) { + CellSetText(RowAt(row,j+2), values->strings[j]); + } + } + StringListFree(values); + ks_free(>ype_name); } - StringListFree(values); - ks_free(>ype_name); - } - fprintf(args.out, "# GENOTYPES\n"); - TablePrint(p,&args); - TableFree(p); - StringListFree(formats); - } + fprintf(args.out, "# GENOTYPES\n"); + TablePrint(p,&args); + TableFree(p); + StringListFree(formats); + } -fputs(">>>",args.out); -PRINT_HEADER; + fputs(">>>",args.out); + PRINT_HEADER; -fputc('\n',args.out); + fputc('\n',args.out); -ks_free(&vcf_line); -StringListFree(tokens); -StringListFree(alt_alleles); -TableFree(hyperlinksTable); -TableFree(bcsqTable); -TableFree(vepTable); -return NULL; -} + ks_free(&vcf_line); + StringListFree(tokens); + StringListFree(alt_alleles); + TableFree(hyperlinksTable); + TableFree(bcsqTable); + TableFree(vepTable); + TableFree(annTable); + return NULL;/* suppress bcf output */ + } -void destroy(void) -{ -regfree(&args.regex_rsid); -StringListFree(args.vepTokens); -StringListFree(args.bcsqTokens); -} +void destroy(void) { + regfree(&args.regex_rsid); + StringListFree(args.vepTokens); + StringListFree(args.bcsqTokens); + StringListFree(args.annTokens); + } From 4fe18d1b2e0b2e42371c2d1c82dbcc4a7b182d93 Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum Date: Fri, 17 Jan 2025 16:27:37 +0100 Subject: [PATCH 4/9] avant suppression urls --- plugins/vcf2table.c | 660 ++++++++++++++++++++++++++++---------------- 1 file changed, 427 insertions(+), 233 deletions(-) diff --git a/plugins/vcf2table.c b/plugins/vcf2table.c index 80fcbb8b..8e6d6e5a 100644 --- a/plugins/vcf2table.c +++ b/plugins/vcf2table.c @@ -109,8 +109,16 @@ typedef struct { StringListPtr vepTokens; /** columns for bcftools csq predictions */ StringListPtr bcsqTokens; - /** columns for SNPEFF ANN predictions */ - StringListPtr annTokens; + /** table for SNPEFF ANN predictions */ + TablePtr annTable; + /** general info about the variant */ + TablePtr vcTable; + /** table for spliceai */ + TablePtr spliceaiTable; + /** table for INFO col */ + TablePtr infoTable; + /** table for hyperlinks */ + TablePtr hyperlinksTable; regex_t regex_rsid; unsigned long n_variants; enum build_t build; @@ -178,12 +186,12 @@ static unsigned int CellWidth(CellPtr ptr) { return ks_len(&(ptr->text)); } - /* - static const char* CellCStr(CellPtr ptr) { +/** get the content of a cell as a const char* */ +static const char* CellCStr(CellPtr ptr) { ASSERT_NOT_NULL(ptr); return ks_c_str(&(ptr->text)); } - */ + static void CellPrint(CellPtr ptr,args_t* args) { ASSERT_NOT_NULL(ptr); @@ -276,16 +284,24 @@ ASSERT_NOT_NULL(t); return t->size; } +/** remove all lines of a TablePtr */ +static TablePtr TableClear(TablePtr ptr) { + unsigned int i; + ASSERT_NOT_NULL(ptr); + for(i=0;i< ptr->size;++i) { + RowFree(ptr->rows[i]); + ptr->rows[i]= NULL; + } + ptr->size=0UL; + return ptr; + } + +/** dispose a TablePtr */ static void TableFree(TablePtr ptr) { if(ptr==NULL) return; + TableClear(ptr); + free(ptr->rows); RowFree(ptr->header); - if(ptr->rows!=NULL) { - unsigned int i; - for(i=0;i< ptr->size;++i) { - RowFree(ptr->rows[i]); - } - free(ptr->rows); - } free(ptr); } @@ -381,52 +397,126 @@ static RowPtr TableNewRow(TablePtr ptr) { return row; } +/** + * Create a new empty StringList + */ +static StringListPtr StringListNew() { + StringListPtr ptr = (StringListPtr)calloc(1UL,sizeof(StringList)); + ASSERT_NOT_NULL(ptr); + return ptr; + } -static StringListPtr StringListNew(const char* str,char delim) { -StringListPtr ptr = calloc(1UL,sizeof(StringList)); -ASSERT_NOT_NULL(ptr); -char* prev=(char*)str; -char* p =(char*)str; -for(;;) { - if(*p==delim || *p==0) { - ptr->strings=(char**)realloc(ptr->strings,sizeof(char*)*(ptr->size+1)); - ASSERT_NOT_NULL(ptr->strings); - ptr->strings[ptr->size] = strndup(prev,p-prev); - ASSERT_NOT_NULL( ptr->strings[ptr->size]); - ptr->size++; - if(*p==0) break; - prev=p+1; +/** add a string to the StringList */ +static StringListPtr StringListAdd(StringListPtr ptr, const char* str) { + ASSERT_NOT_NULL(ptr); + ASSERT_NOT_NULL(str); + ptr->strings=(char**)realloc(ptr->strings,sizeof(char*)*(ptr->size+1)); + ASSERT_NOT_NULL(ptr->strings); + ptr->strings[ptr->size] = strdup(str); + ptr->size++; + return ptr; + } + +/** treat StringList as a Key/Value associative array. return value associate to key or NULL if key is missing */ +static const char* StringListGet(StringListPtr ptr, const char* key) { + unsigned int i; + ASSERT_NOT_NULL(ptr); + ASSERT_NOT_NULL(key); + assert(ptr->size%2==0); + for(i=0;i+1< ptr->size;i+=2) { + /* key exists */ + if(strcmp(key,ptr->strings[i])==0) { + return ptr->strings[i+1]; + } + } + return NULL; + } + +/** treat StringList as a Key/Value associative array. Add a key, value. Replace value if key exists */ +static StringListPtr StringListPut(StringListPtr ptr, const char* key,const char* value) { + unsigned int i; + ASSERT_NOT_NULL(ptr); + ASSERT_NOT_NULL(key); + ASSERT_NOT_NULL(value); + assert(ptr->size%2==0); + for(i=0;i+1< ptr->size;i+=2) { + /* key exists */ + if(strcmp(key,ptr->strings[i])==0) { + if(strcmp(key,ptr->strings[i+1])!=0) { + free(ptr->strings[i+1]); + ptr->strings[i+1] = strdup(value); + ASSERT_NOT_NULL(ptr->strings[i+1]); + } + return ptr; + } + } + /* key missing */ + StringListAdd(ptr,key); + StringListAdd(ptr,value); + return ptr; + } + +/** + * Create a new StringList by splitting 'str' with 'delim' + */ +static StringListPtr StringListMake(const char* str,char delim) { + StringListPtr ptr = StringListNew(); + char* prev=(char*)str; + char* p =(char*)str; + for(;;) { + if(*p==delim || *p==0) { + ptr->strings=(char**)realloc(ptr->strings,sizeof(char*)*(ptr->size+1)); + ASSERT_NOT_NULL(ptr->strings); + ptr->strings[ptr->size] = strndup(prev,p-prev); + ASSERT_NOT_NULL( ptr->strings[ptr->size]); + ptr->size++; + if(*p==0) break; + prev=p+1; + } + p++; + } + return ptr; + } + +/** + * Create a new StringList until the last argument is empty + */ +static StringListPtr StringListCreate(const char* str,...) { + va_list arg; + StringListPtr ptr = StringListNew(); + va_start(arg, str); + while (str) { + StringListAdd(ptr,str); + str = va_arg(arg, const char *); } - p++; - } -return ptr; -} + va_end(arg); + return ptr; + } + /** Dispose list of String */ - - void StringListFree(StringList* ptr) { -unsigned int i; -if(ptr==NULL) return; -for(i=0;i< ptr->size;++i) { - free(ptr->strings[i]); - } -free(ptr); -} + unsigned int i; + if(ptr==NULL) return; + for(i=0;i< ptr->size;++i) { + free(ptr->strings[i]); + } + free(ptr); + } +/** return content of idx-th item as a const char* */ const char* StringListAt(StringList* ptr,unsigned int idx) { -ASSERT_NOT_NULL(ptr); -assert(idx < ptr->size); -return ptr->strings[idx]; -} + ASSERT_NOT_NULL(ptr); + assert(idx < ptr->size); + return ptr->strings[idx]; + } + /** -print symbol +print symbol used by TablePrint to print multiple unicode/plain characters */ - - static void printSymbol(args_t* args,unsigned int repeat, const char* wc, char c) { unsigned int i; if(args->ascii==1) { @@ -442,95 +532,95 @@ else } } - +/** print the content of a table */ static void TablePrint(TablePtr ptr,args_t* args) { -unsigned int y,x; -unsigned int* widths = calloc(TableNCols(ptr),sizeof(unsigned int)); -ASSERT_NOT_NULL(ptr); + unsigned int y,x; + unsigned int* widths = calloc(TableNCols(ptr),sizeof(unsigned int)); + ASSERT_NOT_NULL(ptr); -for(x=0; xheader,x)); - if(width>widths[x]) widths[x] = width; - } + for(x=0; xheader,x)); + if(width>widths[x]) widths[x] = width; + } -for(y=0;y< TableNRows(ptr);++y) { - for(x=0; xwidths[x]) widths[x] = width; - } - } + for(y=0;y< TableNRows(ptr);++y) { + for(x=0; xwidths[x]) widths[x] = width; + } + } - //print header - - // line 1 of header - for(x=0;xout); + + //line 2 of header + for(int x=0;xout); + CellPrint(RowAt(ptr->header,x),args); + printSymbol(args,widths[x]-CellWidth(RowAt(ptr->header,x))," ",' '); + fputc(' ',args->out); + } + printSymbol(args,1,"\u2502",'|'); + fputc('\n',args->out); + + //line 3 of header + for(int x=0;xout); - - //line 2 of header - for(int x=0;xout); + + //print body + for(y=0;y< TableNRows(ptr);++y) { + RowPtr row = TableRowAt(ptr,y); + //line of data + for(x=0;xout); - CellPrint(RowAt(ptr->header,x),args); - printSymbol(args,widths[x]-CellWidth(RowAt(ptr->header,x))," ",' '); + CellPrint(cell,args); + printSymbol(args,widths[x]-CellWidth(cell)," ",' '); fputc(' ',args->out); } printSymbol(args,1,"\u2502",'|'); fputc('\n',args->out); - - //line 3 of header - for(int x=0;x0) + { + for(x=0;xout); - - //print body - for(y=0;y< TableNRows(ptr);++y) { - RowPtr row = TableRowAt(ptr,y); - //line of data - for(x=0;xout); - CellPrint(cell,args); - printSymbol(args,widths[x]-CellWidth(cell)," ",' '); - fputc(' ',args->out); - } - printSymbol(args,1,"\u2502",'|'); - fputc('\n',args->out); - } - //last line - if(TableNRows(ptr)>0) - { - for(x=0;xout); - } + } -free(widths); -} + free(widths); + } static void HyperLinkTableAdd(TablePtr table, const char* allele, const char* key, const char* value) { if(value==NULL || strcmp(value,"")==0) return; @@ -538,9 +628,14 @@ RowPtr row = TableNewRow(table); CellSetText(RowAt(row,0),value); } +/** + * This method is used to find if a dictionary contains two known contig (name/length) in + * order to identify the build : hg19, hg38, etc... + */ static int findContigs(bcf_hdr_t *hdr_in, const char* ctg1a, uint64_t len1, const char* ctg2a, uint64_t len2) { char ctg1b[10]; char ctg2b[10]; + // try to add a 'chr' prefix to the chromosome name sprintf(ctg1b, "chr%s", ctg1a); sprintf(ctg2b, "chr%s", ctg2a); int found=0; @@ -548,6 +643,8 @@ static int findContigs(bcf_hdr_t *hdr_in, const char* ctg1a, uint64_t len1, con for(i=0;i< n_contigs && found<2 ;i++) { uint64_t len; bcf_idpair_t c = hdr_in->id[BCF_DT_CTG][i]; + if(c.val==NULL) continue; + if(c.val->info==NULL) continue; len = c.val->info[0]; const char* contig_name = c.key; if(len == len1 && (strcmp(ctg1a,contig_name)==0 || strcmp(ctg1b,contig_name)==0)) { @@ -560,17 +657,18 @@ static int findContigs(bcf_hdr_t *hdr_in, const char* ctg1a, uint64_t len1, con return found==2; } -const char *about(void) -{ - return "Convert VCF to table.\n"; -} +const char *about(void) { + return "Convert VCF to tables in the terminal.\n" + "Author Pierre Lindenbaum PhD. Institut-du-Thorax. U1087. Nantes/France\n" + ; + } /* Called once at startup, it initializes local variables. Return 1 to suppress VCF/BCF header from printing, 0 otherwise. */ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) -{ + { int c; args.header = hdr_in; args.ascii = 0; @@ -580,12 +678,16 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) args.bcsqTokens = NULL; args.hide_HOM_REF = 0; args.hide_NO_CALL = 0; - args.annTokens = StringListNew( - "Allele,Annotation,Annotation_Impact,Gene_Name,Gene_ID," - "Feature_Type,Feature_ID,Transcript_BioType,Rank," - "HGVS.c,HGVS.p,cDNA.pos/length,CDS.pos/length,AA.pos/length,Distance,Message", - ','); - + args.annTable = TableNewStr( + "Allele","Annotation","Annotation_Impact","Gene_Name","Gene_ID", + "Feature_Type","Feature_ID","Transcript_BioType","Rank", + "HGVS.c","HGVS.p","cDNA.pos/length","CDS.pos/length","AA.pos/length", + "Distance,Message",NULL + ); + args.spliceaiTable = TableNewStr("ALLELE","SYMBOL","DS_AG","DS_AL","DS_DG","DS_DL","DP_AG","DP_AL","DP_DG","DP_DL",NULL); + args.infoTable = TableNewStr("KEY","IDX","VALUE",NULL); + args.hyperlinksTable = TableNewStr("DB","Allele","URL",NULL); + args.vcTable = TableNewStr("KEY","VALUE",NULL); c = regcomp(&args.regex_rsid,"rs[0-9]+",REG_EXTENDED|REG_ICASE|REG_NOSUB); assert(c==0); @@ -603,10 +705,10 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) case 'x': { int i; - StringListPtr hide = StringListNew(optarg,','); + StringListPtr hide = StringListMake(optarg,','); for(i=0; i< hide->size;++i) { const char* hidden = StringListAt(hide,i); - if(strcasecmp(hidden, "HOM_REF")==0) { + if(strcasecmp(hidden, "HOM_REF")==0 || strcasecmp(hidden, "RR")==0) { args.hide_HOM_REF = 1; } else if(strcasecmp(hidden, "NO_CALL")==0 || strcasecmp(hidden, "MISSING")==0) { @@ -633,8 +735,7 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) args.ascii=1; } } - /* guess the build ?*/ - { + /* guess the build by finding signature of chromosome/length ?*/ if( findContigs(hdr_in,"1",249250621,"2",243199373)) { args.build = human_hg19; } @@ -644,7 +745,6 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) else { args.build = undefined; } - } /** find INFO/CSQ and decode it */ bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr_in, BCF_HL_INFO, NULL, "CSQ", NULL); @@ -658,7 +758,7 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) if(vep_format[strlen(vep_format)-1]=='"') { vep_format[strlen(vep_format)-1] = 0; } - args.vepTokens = StringListNew(vep_format,'|'); + args.vepTokens = StringListMake(vep_format,'|'); free(vep_format); } } @@ -669,7 +769,7 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) char *format = ret < 0 ? NULL: strstr(hrec->vals[ret], "Format: "); if(format!=NULL) { format += 8; - args.bcsqTokens = StringListNew(format,'|'); + args.bcsqTokens = StringListMake(format,'|'); } } @@ -678,21 +778,103 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) } +#define ESCAPE_HTTP(SRC,DEST) \ + do {\ + char* p=(char*)SRC;\ + while(*p!=0) {\ + if(isalnum(*p)||*p=='.' || *p=='-') {\ + kputc(*p,&DEST);\ + }\ + else\ + {\ + ksprintf(&DEST,"%%%02x", (unsigned char)*p);\ + }\ + p++;\ + }\ + } while(0) + + +static void AddHyperlink2(const char* database, const char* name,const char* url) { + unsigned int i; + for(i=0;i< TableNRows(args.hyperlinksTable);++i) { + CellPtr cell = TableAt(args.hyperlinksTable,2,i); + if(strcmp(CellCStr(cell),url)==0) return; + } + RowPtr row = TableNewRow(args.hyperlinksTable); + RowSetText(row,0,database); + RowSetText(row,1,name); + RowSetText(row,2,url); + } + +static void AddHyperlinkForInterval(const char* chrom, hts_pos_t pos1, hts_pos_t end1) { + kstring_t url = KS_INITIALIZE; + kstring_t ensembl = KS_INITIALIZE; + kstring_t ucsc = KS_INITIALIZE; -static void escapeHttp(kstring_t* k,const char* s) { -char* p=(char*)s; -while(*p!=0) { - if(isalnum(*p)) { - kputc(*p,k); + + { + ks_clear(&url); + kputs("https://127.0.0.1:60151/goto?locus=",&url); ESCAPE_HTTP(chrom,&url); kputs("%3A",&url); kputll(pos1,&url); kputc('-',&url); kputll(end1,&url); + AddHyperlink2("IGV","",ks_str(&url)); } - else + + if(args.build==human_hg38) { - kputc(*p,k); - //ksprintf(k,"%%%02x", (unsigned char)*p); + ks_clear(&url); + + kputs("https://gnomad.broadinstitute.org/region/",&url); ESCAPE_HTTP(ks_str(&ucsc),&url); kputc('-',&url); kputll(pos1,&url); kputc('-',&url); kputll(end1,&url);kputs("?dataset=gnomad_r4"); + AddHyperlink2("Region Gnomad 3","",ks_str(&url)); } - p++ ; + + ks_free(&url); + ks_free(&ensembl); + ks_free(&ucsc); } -} + +static void AddHyperlink3(const char* chrom, hts_pos_t pos, hts_pos_t end, const char* ref, const char* alt) + { + + } + + + +#define ADD_HYPERLINK_1(DATABASE,BASE) \ + {\ + kstring_t url = KS_INITIALIZE;\ + kputs(BASE,&url);\ + kputs(ks_str(&escaped),&url);\ + AddHyperlink2(DATABASE,name,ks_str(&url));\ + ks_free(&url);\ + } +static void AddHyperlink1(const char* columnName, const char* name) { + if(name==NULL || strlen(name)==0) return; + kstring_t escaped = KS_INITIALIZE; + ESCAPE_HTTP(name,escaped); + + if( strcasecmp(columnName,"genename")==0 ||strcasecmp(columnName,"gene_name")==0 ||strcasecmp(columnName,"symbol")==0 ) { + ADD_HYPERLINK_1("Pharos","https://pharos.nih.gov/diseases?associatedTarget=") + ADD_HYPERLINK_1("NCBI gene","https://www.ncbi.nlm.nih.gov/gene/?term=") + ADD_HYPERLINK_1("OMIM","https://www.omim.org/search?search=") + ADD_HYPERLINK_1("hugeamp","https://cvd.hugeamp.org/gene.html?gene=") + ADD_HYPERLINK_1("Archs4","https://maayanlab.cloud/archs4/gene/") + ADD_HYPERLINK_1("Enrichr","https://maayanlab.cloud/Enrichr/#find!gene=") + ADD_HYPERLINK_1("Biogps","http://biogps.org/#goto=search=&query=") + ADD_HYPERLINK_1("Gene ResearchAllOfUs","https://databrowser.researchallofus.org/genomic-variants/") + ADD_HYPERLINK_1("IRAVs","https://iravdb.io/gene/") + ADD_HYPERLINK_1("Finngen","https://public-metaresults-fg-ukbb.finngen.fi/gene/") + ADD_HYPERLINK_1("hugeamp","https://hugeamp.org:8000/research.html?ancestry=mixed&cohort=AoU_250k&file=600Traits.csv&pageid=600_traits_app&phenotype=phecode_425.0&gene=") + ADD_HYPERLINK_1("intogen","https://www.intogen.org/search?gene=") + ADD_HYPERLINK_1("ComplexPortal","https://www.ebi.ac.uk/complexportal/complex/search?query=") + } + + if( strcasecmp(columnName,"rsid")==0 || strcasecmp(columnName,"dbsnp")==0) { + + + } + + ks_free(&escaped); + } + #define PRINT_HEADER \ switch(args.build) {\ @@ -708,12 +890,14 @@ while(*p!=0) { to suppress output. */ bcf1_t *process(bcf1_t *v) { + hts_pos_t end_pos = v->pos + v->rlen ; /* no minus 1, one based */ TablePtr vepTable = NULL; TablePtr bcsqTable = NULL; - TablePtr annTable = NULL; - TablePtr hyperlinksTable = TableNewStr("DB","Allele","URL",NULL); + + unsigned int i; args.n_variants++; + /* instead of re-inventing the wheel: the conversion of v to text, let's use htslib/vcf_format to convert the whole line to string */ kstring_t vcf_line = KS_INITIALIZE; vcf_format(args.header, v, &vcf_line); //remove last CR/LF @@ -721,102 +905,89 @@ bcf1_t *process(bcf1_t *v) { vcf_line.s[vcf_line.l-1]=0; vcf_line.l--; } + + - - - StringListPtr tokens = StringListNew(vcf_line.s,'\t'); - StringListPtr alt_alleles = StringListNew(StringListAt(tokens,4),','); + + + /* split the VCF line into a list of string */ + StringListPtr tokens = StringListMake(vcf_line.s,'\t'); + /* split the ALT alleles */ + StringListPtr alt_alleles = StringListMake(StringListAt(tokens,4),','); + + /* add interval hyperlink */ + AddHyperlinkForInterval(StringListAt(tokens,0),v->pos+1, end_pos); fputs("<<<",args.out); PRINT_HEADER; - TablePtr p = TableNewStr("KEY","VALUE",NULL); + + - RowPtr row = TableNewRow(p); + RowPtr row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "CHROM"); CellSetText(RowAt(row,1),StringListAt(tokens,0)); - row = TableNewRow(p); + row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "POS"); CellSetText(RowAt(row,1),StringListAt(tokens,1)); - row = TableNewRow(p); + row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "ID"); CellSetText(RowAt(row,1),StringListAt(tokens,2)); - if(regexec(&args.regex_rsid, StringListAt(tokens,2),0,NULL,0)==0) { - HyperLinkTableAdd(hyperlinksTable,NULL, "RSID", StringListAt(tokens,2)); - } + AddHyperlink1("rsid",StringListAt(tokens,2)); + - row = TableNewRow(p); + row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "REF"); CellSetText(RowAt(row,1),StringListAt(tokens,3)); - row = TableNewRow(p); + row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "ALT"); CellSetText(RowAt(row,1),StringListAt(tokens,4)); - row = TableNewRow(p); + row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "QUAL"); CellSetText(RowAt(row,1),tokens->strings[5]); - row = TableNewRow(p); + row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "FILTER"); CellSetText(RowAt(row,1),StringListAt(tokens,6)); - if(strcmp(StringListAt(tokens,6),".")!=0 && strcmp(StringListAt(tokens,6),"PASS")!=0) { + if(strcmp(StringListAt(tokens,6),".")==0 || strcmp(StringListAt(tokens,6),"PASS")==0) { + RowAt(row,1)->color = COLOR_GREEN; + } + else + { RowAt(row,1)->color = COLOR_RED; } - fprintf(args.out, "# Variant\n"); - TablePrint(p,&args); - TableFree(p); + fprintf(args.out, "# Variant\n"); + TablePrint(args.vcTable,&args); fputc('\n',args.out); /* ADD HYPERLINKS */ if(args.build == human_hg19 || args.build==human_hg38) { - kstring_t url = KS_INITIALIZE; for(i=0;i< alt_alleles->size;++i) { - ks_clear(&url); const char* alt_allele= StringListAt(alt_alleles,i); - - - RowPtr annot = TableNewRow(hyperlinksTable); - CellSetText(RowAt(annot,0), "GNOMAD"); - CellSetText(RowAt(annot,1), alt_allele); - kputs("https://gnomad.broadinstitute.org/variant/",&url); - escapeHttp(&url,StringListAt(tokens,0)); - escapeHttp(&url,"-"); - kputs(StringListAt(tokens,1),&url); - escapeHttp(&url,"-"); - escapeHttp(&url,StringListAt(tokens,3)); - escapeHttp(&url,"-"); - escapeHttp(&url,alt_allele); - - - CellSetText(RowAt(annot,2), url.s); + + } - // StringUtils.escapeHttp(ensemblCtg) + "-" + ctx.getStart() +"-"+ctx.getReference().getDisplayString()+"-"+alt.getDisplayString()+"?dataset=gnomad_r2_1" - //HyperLinkTableAdd(hyperlinksTable,NULL, "RSID",url.s); - - - - ks_free(&url); - } + } - if(tokens->size>7 && strcmp(tokens->strings[7],".")!=0) { - StringListPtr infos = StringListNew(tokens->strings[7],';'); - TablePtr p = TableNewStr("KEY","IDX","VALUE",NULL); + /* parse values in the INFO column */ + if(tokens->size>7 && strcmp(StringListAt(tokens,7),".")!=0) { + /* split INFO by semicolon */ + StringListPtr infos = StringListMake(StringListAt(tokens,7),';'); for(i=0;i< infos->size;i++) { unsigned int j; const char* info = StringListAt(infos,i); char* eq = strchr(info,'='); if(eq==NULL || eq==info) continue; - - - - StringListPtr values = StringListNew(eq+1,','); + /* split multiple values for this info using commas */ + StringListPtr values = StringListMake(eq+1,','); for(j=0;j< values->size;j++) { //skip CSQ if(args.vepTokens!=NULL && strncmp(info,"CSQ=",4)==0) { @@ -830,12 +1001,12 @@ bcf1_t *process(bcf1_t *v) { } // fill VEP table row = TableNewRow(vepTable); - StringListPtr veps = StringListNew( StringListAt(values,j),'|'); + StringListPtr veps = StringListMake( StringListAt(values,j),'|'); for(k=0;k< args.vepTokens->size && k < veps->size;++k) { CellSetText(RowAt(row,k),StringListAt( veps,k)); if(strcmp(StringListAt(args.vepTokens,k), "SYMBOL")==0) { - HyperLinkTableAdd(hyperlinksTable, NULL,StringListAt(args.vepTokens,k), StringListAt( veps,k)); + AddHyperlink1("gene_name",StringListAt( veps,k)); } } @@ -856,12 +1027,12 @@ bcf1_t *process(bcf1_t *v) { // fill BCSQ table row = TableNewRow(bcsqTable); - StringListPtr bcsq = StringListNew( StringListAt(values,j),'|'); + StringListPtr bcsq = StringListMake( StringListAt(values,j),'|'); for(k=0;k< args.bcsqTokens->size && k < bcsq->size;++k) { CellSetText(RowAt(row,k),StringListAt( bcsq,k)); if(strcmp(StringListAt(args.bcsqTokens,k), "SYMBOL")==0) { - HyperLinkTableAdd(hyperlinksTable, NULL,StringListAt(args.bcsqTokens,k), StringListAt( bcsq,k)); + HyperLinkTableAdd(args.hyperlinksTable, NULL,StringListAt(args.bcsqTokens,k), StringListAt( bcsq,k)); } } @@ -870,43 +1041,51 @@ bcf1_t *process(bcf1_t *v) { } //skip SNPEFF/ANN - if(args.annTokens!=NULL && strncmp(info,"ANN=",4)==0) { + if(strncmp(info,"ANN=",4)==0) { unsigned int k; - //build BCSQ table if needed - if(annTable==NULL) { - annTable = TableNew(0); - for(k=0;k< args.annTokens->size;++k) { - TableAppendColumn(annTable,StringListAt( args.annTokens,k)); - } - } // fill ANN table - row = TableNewRow(annTable); - StringListPtr ann = StringListNew( StringListAt(values,j),'|'); - for(k=0;k< args.annTokens->size && k < ann->size;++k) { + row = TableNewRow(args.annTable); + StringListPtr ann = StringListMake( StringListAt(values,j),'|'); + for(k=0;k< TableNCols(args.annTable) && k < ann->size;++k) { RowSetText(row,k,StringListAt(ann,k)); } StringListFree(ann); continue; } + //skip SpliceAI + if(strncmp(info,"SpliceAI=",4)==0) { + unsigned int k; + // fill ANN table + row = TableNewRow(args.spliceaiTable); + StringListPtr spliceai = StringListMake( StringListAt(values,j),'|'); + for(k=0;k< TableNCols(args.spliceaiTable) && k < spliceai->size;++k) { + RowSetText(row,k,StringListAt(spliceai,k)); + } + AddHyperlink1("gene_name",StringListAt( spliceai,1)); + StringListFree(spliceai); + continue; + } + - row = TableNewRow(p); + row = TableNewRow(args.infoTable); CellAppendTextN(RowAt(row,0),info,eq-info); if(values->size>1) CellSetD(RowAt(row,1),(int)(j+1)); RowSetText(row,2,values->strings[j]); } StringListFree(values); } - fprintf(args.out, "# INFO\n"); - TablePrint(p,&args); - TableFree(p); - StringListFree(infos); - fputc('\n',args.out); + if(TableNRows(args.infoTable)>0) { + fprintf(args.out, "# INFO\n"); + TablePrint(args.infoTable,&args); + fputc('\n',args.out); + } + StringListFree(infos); } - if(TableNRows(hyperlinksTable)>0) { + if(TableNRows(args.hyperlinksTable)>0) { fprintf(args.out, "# HYPERLINKS\n"); - TablePrint(hyperlinksTable,&args); + TablePrint(args.hyperlinksTable,&args); fputc('\n',args.out); } @@ -926,16 +1105,23 @@ bcf1_t *process(bcf1_t *v) { fputc('\n',args.out); } - if(annTable!=NULL && TableNRows(annTable)>0) { + if(TableNRows(args.annTable)>0) { fprintf(args.out, "# ANN/SNPEFF\n"); - TableRemoveEmptyColumns(annTable); - TablePrint(annTable,&args); + //no keep it inummutable TableRemoveEmptyColumns(args.annTable); + TablePrint(args.annTable,&args); + fputc('\n',args.out); + } + + if(TableNRows(args.spliceaiTable)>0) { + fprintf(args.out, "# SpliceAI\n"); + //no keep it inummutable TableRemoveEmptyColumns(args.annTable); + TablePrint(args.spliceaiTable,&args); fputc('\n',args.out); } if(tokens->size>9) { - StringListPtr formats = StringListNew(tokens->strings[8],':'); + StringListPtr formats = StringListMake(tokens->strings[8],':'); TablePtr p = TableNewStr("SAMPLE",NULL); TableAppendColumn(p, "GTYPE"); int gt_col = -1; @@ -951,7 +1137,7 @@ bcf1_t *process(bcf1_t *v) { int count_allele_missing=0; int count_allele_other=0; int print_it = 1; - StringListPtr values = StringListNew(tokens->strings[i],':'); + StringListPtr values = StringListMake(tokens->strings[i],':'); const char* color = COLOR_BLACK; unsigned int j; if(gt_col!=-1) { @@ -959,7 +1145,7 @@ bcf1_t *process(bcf1_t *v) { for(j=0;gt[j]!=0;j++) { if(gt[j]=='|') gt[j]='/'; } - StringListPtr alleles = StringListNew(gt,'/'); + StringListPtr alleles = StringListMake(gt,'/'); for(j=0;j< alleles->size;++j) { char* allele = alleles->strings[j]; if(strcmp(allele,"0")==0) count_allele_0++; @@ -1013,13 +1199,17 @@ bcf1_t *process(bcf1_t *v) { fputc('\n',args.out); + /** final cleanup */ ks_free(&vcf_line); StringListFree(tokens); StringListFree(alt_alleles); - TableFree(hyperlinksTable); TableFree(bcsqTable); TableFree(vepTable); - TableFree(annTable); + TableClear(args.hyperlinksTable); + TableClear(args.annTable); + TableClear(args.spliceaiTable); + TableClear(args.infoTable); + TableClear(args.vcTable); return NULL;/* suppress bcf output */ } @@ -1027,7 +1217,11 @@ void destroy(void) { regfree(&args.regex_rsid); StringListFree(args.vepTokens); StringListFree(args.bcsqTokens); - StringListFree(args.annTokens); + TableFree(args.spliceaiTable); + TableFree(args.annTable); + TableFree(args.infoTable); + TableFree(args.hyperlinksTable); + TableFree(args.vcTable); } From 2c5342f6ebf8d066050cb3696204609d08d3fcff Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum Date: Fri, 17 Jan 2025 17:37:20 +0100 Subject: [PATCH 5/9] cont --- plugins/vcf2table.c | 483 +++++++++++++++++++++----------------------- 1 file changed, 229 insertions(+), 254 deletions(-) diff --git a/plugins/vcf2table.c b/plugins/vcf2table.c index 8e6d6e5a..7e7be3af 100644 --- a/plugins/vcf2table.c +++ b/plugins/vcf2table.c @@ -71,7 +71,7 @@ typedef unsigned char color_t; typedef struct Cell { kstring_t text; - kstring_t url; + //kstring_t url; for future use const char* color; } Cell,*CellPtr; @@ -111,19 +111,35 @@ typedef struct { StringListPtr bcsqTokens; /** table for SNPEFF ANN predictions */ TablePtr annTable; + /** table for SNPEFF LOF predictions */ + TablePtr lofTable; /** general info about the variant */ TablePtr vcTable; /** table for spliceai */ TablePtr spliceaiTable; /** table for INFO col */ TablePtr infoTable; - /** table for hyperlinks */ - TablePtr hyperlinksTable; - regex_t regex_rsid; + /** table for genotype types */ + TablePtr gtypeTable; unsigned long n_variants; enum build_t build; + + /* show/hide flags */ int hide_HOM_REF; int hide_NO_CALL; + int hide_HET; + int hide_HOM_VAR; + int hide_OTHER; + int hide_VC_table; + int hide_INFO_table; + int hide_GT_table; + int hide_GTTYPE_table; + int hide_VEP_table; + int hide_BCSQ_table; + int hide_ANN_table; + int hide_LOF_table; + int hide_SPLICEAI_table; + int hide_colors; } args_t; static args_t args; @@ -134,7 +150,7 @@ static CellPtr CellNew() { ASSERT_NOT_NULL(ptr); ptr->color = COLOR_BLACK; ks_initialize(&(ptr->text)); - ks_initialize(&(ptr->url)); + //ks_initialize(&(ptr->url)); return ptr; } static CellPtr CellClear(CellPtr ptr) { @@ -154,19 +170,18 @@ static CellPtr CellClear(CellPtr ptr) { return ptr; } - /** build a new Cell */ static CellPtr CellSetText(CellPtr ptr, const char* s) { CellClear(ptr); CellAppendText(ptr,s); return ptr; } -/* + static CellPtr CellSetLL(CellPtr ptr, long long v) { CellClear(ptr); kputll(v,&(ptr->text)); return ptr; - }*/ + } static CellPtr CellSetD(CellPtr ptr, double v) { CellClear(ptr); @@ -185,17 +200,10 @@ static unsigned int CellWidth(CellPtr ptr) { ASSERT_NOT_NULL(ptr); return ks_len(&(ptr->text)); } - -/** get the content of a cell as a const char* */ -static const char* CellCStr(CellPtr ptr) { - ASSERT_NOT_NULL(ptr); - return ks_c_str(&(ptr->text)); - } - static void CellPrint(CellPtr ptr,args_t* args) { ASSERT_NOT_NULL(ptr); - if(args->ascii==0 && ptr->color!=NULL && ptr->color!=COLOR_BLACK) { + if(args->ascii==0 && ptr->color!=NULL && ptr->color!=COLOR_BLACK) { fputs(ptr->color,args->out); } fwrite((void*)ks_c_str(&(ptr->text)), sizeof(char), ks_len(&(ptr->text)), args->out); @@ -206,7 +214,7 @@ static void CellPrint(CellPtr ptr,args_t* args) { static void CellFree(CellPtr ptr) { if(ptr==NULL) return; ks_free(&(ptr->text)); - ks_free(&(ptr->url)); + //ks_free(&(ptr->url)); free(ptr); } @@ -406,56 +414,6 @@ static StringListPtr StringListNew() { return ptr; } -/** add a string to the StringList */ -static StringListPtr StringListAdd(StringListPtr ptr, const char* str) { - ASSERT_NOT_NULL(ptr); - ASSERT_NOT_NULL(str); - ptr->strings=(char**)realloc(ptr->strings,sizeof(char*)*(ptr->size+1)); - ASSERT_NOT_NULL(ptr->strings); - ptr->strings[ptr->size] = strdup(str); - ptr->size++; - return ptr; - } - -/** treat StringList as a Key/Value associative array. return value associate to key or NULL if key is missing */ -static const char* StringListGet(StringListPtr ptr, const char* key) { - unsigned int i; - ASSERT_NOT_NULL(ptr); - ASSERT_NOT_NULL(key); - assert(ptr->size%2==0); - for(i=0;i+1< ptr->size;i+=2) { - /* key exists */ - if(strcmp(key,ptr->strings[i])==0) { - return ptr->strings[i+1]; - } - } - return NULL; - } - -/** treat StringList as a Key/Value associative array. Add a key, value. Replace value if key exists */ -static StringListPtr StringListPut(StringListPtr ptr, const char* key,const char* value) { - unsigned int i; - ASSERT_NOT_NULL(ptr); - ASSERT_NOT_NULL(key); - ASSERT_NOT_NULL(value); - assert(ptr->size%2==0); - for(i=0;i+1< ptr->size;i+=2) { - /* key exists */ - if(strcmp(key,ptr->strings[i])==0) { - if(strcmp(key,ptr->strings[i+1])!=0) { - free(ptr->strings[i+1]); - ptr->strings[i+1] = strdup(value); - ASSERT_NOT_NULL(ptr->strings[i+1]); - } - return ptr; - } - } - /* key missing */ - StringListAdd(ptr,key); - StringListAdd(ptr,value); - return ptr; - } - /** * Create a new StringList by splitting 'str' with 'delim' */ @@ -478,21 +436,6 @@ static StringListPtr StringListMake(const char* str,char delim) { return ptr; } -/** - * Create a new StringList until the last argument is empty - */ -static StringListPtr StringListCreate(const char* str,...) { - va_list arg; - StringListPtr ptr = StringListNew(); - va_start(arg, str); - while (str) { - StringListAdd(ptr,str); - str = va_arg(arg, const char *); - } - va_end(arg); - return ptr; - } - /** Dispose list of String */ @@ -622,12 +565,6 @@ static void TablePrint(TablePtr ptr,args_t* args) { free(widths); } -static void HyperLinkTableAdd(TablePtr table, const char* allele, const char* key, const char* value) { -if(value==NULL || strcmp(value,"")==0) return; -RowPtr row = TableNewRow(table); -CellSetText(RowAt(row,0),value); -} - /** * This method is used to find if a dictionary contains two known contig (name/length) in * order to identify the build : hg19, hg38, etc... @@ -670,6 +607,7 @@ const char *about(void) { int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) { int c; + memset((void*)&args,sizeof(args_t),1); args.header = hdr_in; args.ascii = 0; args.out = stdout; @@ -686,11 +624,9 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) ); args.spliceaiTable = TableNewStr("ALLELE","SYMBOL","DS_AG","DS_AL","DS_DG","DS_DL","DP_AG","DP_AL","DP_DG","DP_DL",NULL); args.infoTable = TableNewStr("KEY","IDX","VALUE",NULL); - args.hyperlinksTable = TableNewStr("DB","Allele","URL",NULL); args.vcTable = TableNewStr("KEY","VALUE",NULL); - - c = regcomp(&args.regex_rsid,"rs[0-9]+",REG_EXTENDED|REG_ICASE|REG_NOSUB); - assert(c==0); + args.lofTable = TableNewStr("Gene_Name","Gene_ID","Number_of_transcripts_in_gene","Percent_of_transcripts_affected",NULL); + args.gtypeTable = TableNewStr("Type","Count","%",NULL); static struct option loptions[] = { @@ -714,6 +650,42 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) else if(strcasecmp(hidden, "NO_CALL")==0 || strcasecmp(hidden, "MISSING")==0) { args.hide_NO_CALL = 1; } + else if(strcasecmp(hidden, "HOM_VAR")==0 || strcasecmp(hidden, "AA")==0) { + args.hide_HOM_VAR = 1; + } + else if(strcasecmp(hidden, "HET")==0 || strcasecmp(hidden, "AR")==0) { + args.hide_HET = 1; + } + else if(strcasecmp(hidden, "OTHER")==0 ) { + args.hide_OTHER = 1; + } + else if(strcasecmp(hidden, "ANN")==0 || strcasecmp(hidden, "SNPEFF")==0) { + args.hide_ANN_table = 1; + } + else if(strcasecmp(hidden, "CSQ")==0 || strcasecmp(hidden, "VEP")==0) { + args.hide_VEP_table = 1; + } + else if(strcasecmp(hidden, "BCSQ")==0 || strcasecmp(hidden, "BCFTOOLS")==0) { + args.hide_BCSQ_table = 1; + } + else if(strcasecmp(hidden, "SPLICEAI")==0) { + args.hide_SPLICEAI_table = 1; + } + else if(strcasecmp(hidden, "INFO")==0) { + args.hide_INFO_table = 1; + } + else if(strcasecmp(hidden, "VC")==0) { + args.hide_VC_table = 1; + } + else if(strcasecmp(hidden, "LOF")==0) { + args.hide_LOF_table = 1; + } + else if(strcasecmp(hidden, "GT")==0 || strcasecmp(hidden, "GENOTYPES")==0) { + args.hide_GT_table = 1; + } + else if(strcasecmp(hidden, "GTTYPES")==0) { + args.hide_GTTYPE_table = 1; + } } StringListFree(hide); break; @@ -777,105 +749,6 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) return 1;//suppress VCF/BCF header } - -#define ESCAPE_HTTP(SRC,DEST) \ - do {\ - char* p=(char*)SRC;\ - while(*p!=0) {\ - if(isalnum(*p)||*p=='.' || *p=='-') {\ - kputc(*p,&DEST);\ - }\ - else\ - {\ - ksprintf(&DEST,"%%%02x", (unsigned char)*p);\ - }\ - p++;\ - }\ - } while(0) - - -static void AddHyperlink2(const char* database, const char* name,const char* url) { - unsigned int i; - for(i=0;i< TableNRows(args.hyperlinksTable);++i) { - CellPtr cell = TableAt(args.hyperlinksTable,2,i); - if(strcmp(CellCStr(cell),url)==0) return; - } - RowPtr row = TableNewRow(args.hyperlinksTable); - RowSetText(row,0,database); - RowSetText(row,1,name); - RowSetText(row,2,url); - } - -static void AddHyperlinkForInterval(const char* chrom, hts_pos_t pos1, hts_pos_t end1) { - kstring_t url = KS_INITIALIZE; - kstring_t ensembl = KS_INITIALIZE; - kstring_t ucsc = KS_INITIALIZE; - - - { - ks_clear(&url); - kputs("https://127.0.0.1:60151/goto?locus=",&url); ESCAPE_HTTP(chrom,&url); kputs("%3A",&url); kputll(pos1,&url); kputc('-',&url); kputll(end1,&url); - AddHyperlink2("IGV","",ks_str(&url)); - } - - if(args.build==human_hg38) - { - ks_clear(&url); - - kputs("https://gnomad.broadinstitute.org/region/",&url); ESCAPE_HTTP(ks_str(&ucsc),&url); kputc('-',&url); kputll(pos1,&url); kputc('-',&url); kputll(end1,&url);kputs("?dataset=gnomad_r4"); - AddHyperlink2("Region Gnomad 3","",ks_str(&url)); - } - - ks_free(&url); - ks_free(&ensembl); - ks_free(&ucsc); - } - -static void AddHyperlink3(const char* chrom, hts_pos_t pos, hts_pos_t end, const char* ref, const char* alt) - { - - } - - - -#define ADD_HYPERLINK_1(DATABASE,BASE) \ - {\ - kstring_t url = KS_INITIALIZE;\ - kputs(BASE,&url);\ - kputs(ks_str(&escaped),&url);\ - AddHyperlink2(DATABASE,name,ks_str(&url));\ - ks_free(&url);\ - } -static void AddHyperlink1(const char* columnName, const char* name) { - if(name==NULL || strlen(name)==0) return; - kstring_t escaped = KS_INITIALIZE; - ESCAPE_HTTP(name,escaped); - - if( strcasecmp(columnName,"genename")==0 ||strcasecmp(columnName,"gene_name")==0 ||strcasecmp(columnName,"symbol")==0 ) { - ADD_HYPERLINK_1("Pharos","https://pharos.nih.gov/diseases?associatedTarget=") - ADD_HYPERLINK_1("NCBI gene","https://www.ncbi.nlm.nih.gov/gene/?term=") - ADD_HYPERLINK_1("OMIM","https://www.omim.org/search?search=") - ADD_HYPERLINK_1("hugeamp","https://cvd.hugeamp.org/gene.html?gene=") - ADD_HYPERLINK_1("Archs4","https://maayanlab.cloud/archs4/gene/") - ADD_HYPERLINK_1("Enrichr","https://maayanlab.cloud/Enrichr/#find!gene=") - ADD_HYPERLINK_1("Biogps","http://biogps.org/#goto=search=&query=") - ADD_HYPERLINK_1("Gene ResearchAllOfUs","https://databrowser.researchallofus.org/genomic-variants/") - ADD_HYPERLINK_1("IRAVs","https://iravdb.io/gene/") - ADD_HYPERLINK_1("Finngen","https://public-metaresults-fg-ukbb.finngen.fi/gene/") - ADD_HYPERLINK_1("hugeamp","https://hugeamp.org:8000/research.html?ancestry=mixed&cohort=AoU_250k&file=600Traits.csv&pageid=600_traits_app&phenotype=phecode_425.0&gene=") - ADD_HYPERLINK_1("intogen","https://www.intogen.org/search?gene=") - ADD_HYPERLINK_1("ComplexPortal","https://www.ebi.ac.uk/complexportal/complex/search?query=") - } - - if( strcasecmp(columnName,"rsid")==0 || strcasecmp(columnName,"dbsnp")==0) { - - - } - - ks_free(&escaped); - } - - #define PRINT_HEADER \ switch(args.build) {\ case human_hg19 : fputs(" GRCh37 : ",args.out); break;\ @@ -890,10 +763,9 @@ static void AddHyperlink1(const char* columnName, const char* name) { to suppress output. */ bcf1_t *process(bcf1_t *v) { - hts_pos_t end_pos = v->pos + v->rlen ; /* no minus 1, one based */ TablePtr vepTable = NULL; TablePtr bcsqTable = NULL; - + hts_pos_t variant_end = v->pos + v->rlen; unsigned int i; args.n_variants++; @@ -912,13 +784,7 @@ bcf1_t *process(bcf1_t *v) { /* split the VCF line into a list of string */ StringListPtr tokens = StringListMake(vcf_line.s,'\t'); - /* split the ALT alleles */ - StringListPtr alt_alleles = StringListMake(StringListAt(tokens,4),','); - /* add interval hyperlink */ - AddHyperlinkForInterval(StringListAt(tokens,0),v->pos+1, end_pos); - - fputs("<<<",args.out); PRINT_HEADER; @@ -932,11 +798,20 @@ bcf1_t *process(bcf1_t *v) { row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "POS"); CellSetText(RowAt(row,1),StringListAt(tokens,1)); + + if(v->pos +1 != variant_end) { + row = TableNewRow(args.vcTable); + CellSetText(RowAt(row,0), "end"); + CellSetLL(RowAt(row,1),variant_end); + + row = TableNewRow(args.vcTable); + CellSetText(RowAt(row,0), "length"); + CellSetLL(RowAt(row,1),variant_end-v->pos); + } row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "ID"); CellSetText(RowAt(row,1),StringListAt(tokens,2)); - AddHyperlink1("rsid",StringListAt(tokens,2)); row = TableNewRow(args.vcTable); @@ -962,21 +837,13 @@ bcf1_t *process(bcf1_t *v) { RowAt(row,1)->color = COLOR_RED; } - fprintf(args.out, "# Variant\n"); - TablePrint(args.vcTable,&args); - fputc('\n',args.out); - - - /* ADD HYPERLINKS */ - if(args.build == human_hg19 || args.build==human_hg38) { - for(i=0;i< alt_alleles->size;++i) { - const char* alt_allele= StringListAt(alt_alleles,i); - - - } - - } + if(!args.hide_VC_table) { + fprintf(args.out, "# Variant\n"); + TablePrint(args.vcTable,&args); + fputc('\n',args.out); + } + /* parse values in the INFO column */ if(tokens->size>7 && strcmp(StringListAt(tokens,7),".")!=0) { /* split INFO by semicolon */ @@ -991,6 +858,7 @@ bcf1_t *process(bcf1_t *v) { for(j=0;j< values->size;j++) { //skip CSQ if(args.vepTokens!=NULL && strncmp(info,"CSQ=",4)==0) { + if(args.hide_VEP_table) continue; unsigned int k; //build VEP table if needed if(vepTable==NULL) { @@ -1004,11 +872,6 @@ bcf1_t *process(bcf1_t *v) { StringListPtr veps = StringListMake( StringListAt(values,j),'|'); for(k=0;k< args.vepTokens->size && k < veps->size;++k) { CellSetText(RowAt(row,k),StringListAt( veps,k)); - - if(strcmp(StringListAt(args.vepTokens,k), "SYMBOL")==0) { - AddHyperlink1("gene_name",StringListAt( veps,k)); - } - } StringListFree(veps); continue; @@ -1016,6 +879,7 @@ bcf1_t *process(bcf1_t *v) { //skip BCSQ if(args.bcsqTokens!=NULL && strncmp(info,"BCSQ=",5)==0) { + if(args.hide_BCSQ_table) continue; unsigned int k; //build BCSQ table if needed if(bcsqTable==NULL) { @@ -1030,11 +894,6 @@ bcf1_t *process(bcf1_t *v) { StringListPtr bcsq = StringListMake( StringListAt(values,j),'|'); for(k=0;k< args.bcsqTokens->size && k < bcsq->size;++k) { CellSetText(RowAt(row,k),StringListAt( bcsq,k)); - - if(strcmp(StringListAt(args.bcsqTokens,k), "SYMBOL")==0) { - HyperLinkTableAdd(args.hyperlinksTable, NULL,StringListAt(args.bcsqTokens,k), StringListAt( bcsq,k)); - } - } StringListFree(bcsq); continue; @@ -1042,6 +901,7 @@ bcf1_t *process(bcf1_t *v) { //skip SNPEFF/ANN if(strncmp(info,"ANN=",4)==0) { + if(args.hide_ANN_table) continue; unsigned int k; // fill ANN table row = TableNewRow(args.annTable); @@ -1052,8 +912,30 @@ bcf1_t *process(bcf1_t *v) { StringListFree(ann); continue; } + //skip SNPEFF/LOF + if(strncmp(info,"LOF=",4)==0) { + if(args.hide_LOF_table) continue; + unsigned int k; + char * copy= strdup(StringListAt(values,j)); + ASSERT_NOT_NULL(copy); + //remove first & last char + if(copy[0]=='(') memmove((void*)©[0],©[1], strlen(copy)); + if(copy[strlen(copy)-1]==')') copy[strlen(copy)-1]=0; + + // fill ANN table + row = TableNewRow(args.lofTable); + StringListPtr ann = StringListMake(copy,'|'); + for(k=0;k< TableNCols(args.lofTable) && k < ann->size;++k) { + RowSetText(row,k,StringListAt(ann,k)); + } + StringListFree(ann); + free(copy); + continue; + } + //skip SpliceAI if(strncmp(info,"SpliceAI=",4)==0) { + if(args.hide_SPLICEAI_table) continue; unsigned int k; // fill ANN table row = TableNewRow(args.spliceaiTable); @@ -1061,7 +943,6 @@ bcf1_t *process(bcf1_t *v) { for(k=0;k< TableNCols(args.spliceaiTable) && k < spliceai->size;++k) { RowSetText(row,k,StringListAt(spliceai,k)); } - AddHyperlink1("gene_name",StringListAt( spliceai,1)); StringListFree(spliceai); continue; } @@ -1074,7 +955,7 @@ bcf1_t *process(bcf1_t *v) { } StringListFree(values); } - if(TableNRows(args.infoTable)>0) { + if(!args.hide_INFO_table && TableNRows(args.infoTable)>0) { fprintf(args.out, "# INFO\n"); TablePrint(args.infoTable,&args); fputc('\n',args.out); @@ -1082,15 +963,6 @@ bcf1_t *process(bcf1_t *v) { StringListFree(infos); } - - if(TableNRows(args.hyperlinksTable)>0) { - fprintf(args.out, "# HYPERLINKS\n"); - TablePrint(args.hyperlinksTable,&args); - fputc('\n',args.out); - } - - - if(vepTable!=NULL && TableNRows(vepTable)>0) { fprintf(args.out, "# VEP/CSQ\n"); TableRemoveEmptyColumns(vepTable); @@ -1111,6 +983,12 @@ bcf1_t *process(bcf1_t *v) { TablePrint(args.annTable,&args); fputc('\n',args.out); } + + if(TableNRows(args.lofTable)>0) { + fprintf(args.out, "# LOF\n"); + TablePrint(args.lofTable,&args); + fputc('\n',args.out); + } if(TableNRows(args.spliceaiTable)>0) { fprintf(args.out, "# SpliceAI\n"); @@ -1120,7 +998,12 @@ bcf1_t *process(bcf1_t *v) { } if(tokens->size>9) { - + int count_hom_ref = 0; + int count_het = 0; + int count_hom_var = 0; + int count_missing = 0; + int count_other = 0; + StringListPtr formats = StringListMake(tokens->strings[8],':'); TablePtr p = TableNewStr("SAMPLE",NULL); TableAppendColumn(p, "GTYPE"); @@ -1137,14 +1020,18 @@ bcf1_t *process(bcf1_t *v) { int count_allele_missing=0; int count_allele_other=0; int print_it = 1; + // split Genotype components StringListPtr values = StringListMake(tokens->strings[i],':'); const char* color = COLOR_BLACK; unsigned int j; - if(gt_col!=-1) { - char* gt = strdup(values->strings[gt_col]); + if(gt_col!=-1 && gt_col < values->size) { + // clone the GT value + char* gt = strdup(StringListAt(values,gt_col)); + // remove phasing for(j=0;gt[j]!=0;j++) { if(gt[j]=='|') gt[j]='/'; } + //split the alleles in the GT StringListPtr alleles = StringListMake(gt,'/'); for(j=0;j< alleles->size;++j) { char* allele = alleles->strings[j]; @@ -1153,26 +1040,89 @@ bcf1_t *process(bcf1_t *v) { else if(strcmp(allele,".")==0) count_allele_missing++; else count_allele_other++; } + if(alleles->size==2) { - if(count_allele_0==0 && count_allele_1==0 && count_allele_other==0) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} - else if(count_allele_0==2) { kputs("HOM_REF",>ype_name); color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} - else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))==0) {kputs("HOM_VAR",>ype_name); color=COLOR_RED;} - else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))!=0) {kputs("HET",>ype_name); color=COLOR_CYAN;} + if(count_allele_0==0 && count_allele_1==0 && count_allele_other==0) { + kputs("NO_CALL",>ype_name); + if(args.hide_NO_CALL) print_it=0; + count_missing++; + } + else if(count_allele_0==2) { + kputs("HOM_REF",>ype_name); + color=COLOR_GREEN; + if(args.hide_HOM_REF) print_it=0; + count_hom_ref++; + } + else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))==0) { + kputs("HOM_VAR",>ype_name); + color=COLOR_RED; + if(args.hide_HOM_VAR) print_it=0; + count_hom_var++; + } + else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))!=0) { + kputs("HET",>ype_name); + color=COLOR_CYAN; + count_het++; + if(args.hide_HET) print_it=0; + } + else { + if(args.hide_OTHER) print_it=0; + count_other++; + } } else if(alleles->size==1) { - if(count_allele_0==1) {kputs("REF",>ype_name);color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} - else if(count_allele_1==1) {kputs("ALT",>ype_name);color=COLOR_RED;} - else if(count_allele_missing==1) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} + if(count_allele_0==1) { + kputs("REF",>ype_name); + color=COLOR_GREEN; + if(args.hide_HOM_REF) print_it=0; + count_hom_ref++; + } + else if(count_allele_1==1) { + kputs("ALT",>ype_name); + color=COLOR_RED; + count_hom_var++; + } + else if(count_allele_missing==1) { + kputs("NO_CALL",>ype_name); + if(args.hide_NO_CALL) print_it=0; + count_missing++; + } + else { + if(args.hide_OTHER) print_it=0; + count_other++; + } } else { - if(count_allele_0==alleles->size) {kputs("HOM_REF",>ype_name);; color=COLOR_GREEN;if(args.hide_HOM_REF) print_it=0;} - else if(count_allele_missing==alleles->size) {kputs("NO_CALL",>ype_name);if(args.hide_NO_CALL) print_it=0;} + if(count_allele_0==alleles->size) { + kputs("HOM_REF",>ype_name); + color=COLOR_GREEN; + if(args.hide_HOM_REF) print_it=0; + count_hom_ref++; + } + else if(count_allele_1==alleles->size) { + kputs("HOM_VAR",>ype_name); + color=COLOR_RED; + if(args.hide_HOM_VAR) print_it=0; + count_hom_ref++; + } + else if(count_allele_missing==alleles->size) { + kputs("NO_CALL",>ype_name); + if(args.hide_NO_CALL) print_it=0; + count_missing++; + } + else { + if(args.hide_OTHER) print_it=0; + count_other++; + } } StringListFree(alleles); free(gt); } - if(print_it) { + + + + if(print_it && !args.hide_GT_table) { row = TableNewRow(p); CellSetText(RowAt(row,0),args.header->samples[i-9]); CellSetText(RowAt(row,1),gtype_name.s); @@ -1185,8 +1135,33 @@ bcf1_t *process(bcf1_t *v) { StringListFree(values); ks_free(>ype_name); } - fprintf(args.out, "# GENOTYPES\n"); - TablePrint(p,&args); + #define ADD_GT(LABEL,COUNT) if(COUNT>0 && total>0) {\ + row = TableNewRow(args.gtypeTable);\ + RowSetText(row,0,LABEL);\ + CellSetLL(RowAt(row,1),COUNT);\ + CellSetD(RowAt(row,2),100.0*(COUNT/((float)total)));\ + } + if(!args.hide_GTTYPE_table) { + int total = count_hom_ref + count_het + count_hom_var + count_missing + count_other; + ADD_GT("REF only ",count_hom_ref) + ADD_GT("HET",count_het) + ADD_GT("ALT only",count_hom_var) + ADD_GT("MISSING",count_missing) + ADD_GT("OTHER",count_other) + + if(TableNRows(args.gtypeTable)>0) { + fprintf(args.out, "# GENOTYPE TYPES\n"); + TablePrint(args.gtypeTable,&args); + fputc('\n',args.out); + } + } + #undef ADD_GT + + if(!args.hide_GT_table && TableNRows(p)>0) { + fprintf(args.out, "# GENOTYPES\n"); + TablePrint(p,&args); + fputc('\n',args.out); + } TableFree(p); StringListFree(formats); } @@ -1202,26 +1177,26 @@ bcf1_t *process(bcf1_t *v) { /** final cleanup */ ks_free(&vcf_line); StringListFree(tokens); - StringListFree(alt_alleles); TableFree(bcsqTable); TableFree(vepTable); - TableClear(args.hyperlinksTable); TableClear(args.annTable); + TableClear(args.lofTable); TableClear(args.spliceaiTable); TableClear(args.infoTable); TableClear(args.vcTable); + TableClear(args.gtypeTable); return NULL;/* suppress bcf output */ } void destroy(void) { - regfree(&args.regex_rsid); StringListFree(args.vepTokens); StringListFree(args.bcsqTokens); TableFree(args.spliceaiTable); TableFree(args.annTable); + TableFree(args.lofTable); TableFree(args.infoTable); - TableFree(args.hyperlinksTable); TableFree(args.vcTable); + TableFree(args.gtypeTable); } From 087dbe8954e3efef7f365c09a82e6de68b125ae6 Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum Date: Fri, 17 Jan 2025 21:58:33 +0100 Subject: [PATCH 6/9] cont --- plugins/vcf2table.c | 344 +++++++++++++++++++++++++++++++++----------- 1 file changed, 257 insertions(+), 87 deletions(-) diff --git a/plugins/vcf2table.c b/plugins/vcf2table.c index 7e7be3af..a55c8b15 100644 --- a/plugins/vcf2table.c +++ b/plugins/vcf2table.c @@ -1,6 +1,6 @@ /* The MIT License - Copyright (c) 2019-2025 Genome Research Ltd. + Copyright (c) 2019-2025 Pierre Lindenbaum Author: Pierre Lindenbaum PhD Institut-du-Thorax. U1087. Nantes. France @@ -25,11 +25,9 @@ */ #include #include - #include +#include #include -#include #include -#include #include // for isatty #include "hts_internal.h" #include @@ -39,7 +37,6 @@ #include #include #include -#include #include "../bcftools.h" #define ASSERT_NOT_NULL(a) do {if(a==NULL) {fprintf(stderr,"[%s:%d]NULL Ptr exception\n",__FILE__,__LINE__);abort();}} while(0) @@ -47,6 +44,7 @@ #define DEFINE_ANSI_IOMANIP(NAME,OPCODE) const char* COLOR_##NAME="\033[" #OPCODE "m"; +/** colors for ANSI output */ DEFINE_ANSI_IOMANIP(RESET,0) DEFINE_ANSI_IOMANIP(BLACK,30) DEFINE_ANSI_IOMANIP(RED,31) @@ -57,53 +55,57 @@ DEFINE_ANSI_IOMANIP(MAGENTA,35) DEFINE_ANSI_IOMANIP(CYAN,36) DEFINE_ANSI_IOMANIP(WHITE,37) - - -KHASH_MAP_INIT_STR(vdict, bcf_idinfo_t) -typedef khash_t(vdict) vdict_t; - -KHASH_MAP_INIT_STR(hdict, bcf_hrec_t*) -typedef khash_t(hdict) hdict_t; - - - -typedef unsigned char color_t; - +/** A Cell in a Row in a table */ typedef struct Cell { - kstring_t text; + kstring_t text; // text content //kstring_t url; for future use const char* color; } Cell,*CellPtr; +/** a Row in a table */ typedef struct Row { + // number of cells unsigned int size; + // malloc-ed cells CellPtr* cells; } Row,*RowPtr; +/** a table of data */ typedef struct Table { + // special row containing the table header = number of column RowPtr header; + // number of rows unsigned int size; + // malloc-ed rows RowPtr* rows; } Table,*TablePtr; -enum build_t { - undefined, - human_hg19, - human_hg38 - }; - +/** a list of C-strings */ typedef struct StringList_t { + // number of strings unsigned int size; + // malloc-ed strings char** strings; } StringList,*StringListPtr; +/** 'common' build */ +enum build_t { + undefined, + human_hg19, + human_hg38, + rotavirus_rf // for tests... + }; +/** global arguments */ typedef struct { + /** vcf header in */ bcf_hdr_t* header; + /** output stream (stdout) */ FILE* out; + /** force ascii only */ int ascii; /** columns for VEP predictions */ StringListPtr vepTokens; @@ -115,13 +117,17 @@ typedef struct { TablePtr lofTable; /** general info about the variant */ TablePtr vcTable; + /** table for hyperlinks */ + TablePtr hyperlinksTable; /** table for spliceai */ TablePtr spliceaiTable; /** table for INFO col */ TablePtr infoTable; /** table for genotype types */ TablePtr gtypeTable; + /** number of variant seen so far */ unsigned long n_variants; + /** genome build */ enum build_t build; /* show/hide flags */ @@ -140,8 +146,10 @@ typedef struct { int hide_LOF_table; int hide_SPLICEAI_table; int hide_colors; + int hide_links; } args_t; +/** global arguments for this plugin */ static args_t args; /** build a new Cell */ @@ -150,78 +158,102 @@ static CellPtr CellNew() { ASSERT_NOT_NULL(ptr); ptr->color = COLOR_BLACK; ks_initialize(&(ptr->text)); - //ks_initialize(&(ptr->url)); + //ks_initialize(&(ptr->url)); future use return ptr; } + +/** clear the content of a cell */ static CellPtr CellClear(CellPtr ptr) { ASSERT_NOT_NULL(ptr); ks_clear(&(ptr->text)); return ptr; } - static CellPtr CellAppendText(CellPtr ptr, const char* s) { + +/** append the content of a cell */ +static CellPtr CellAppendText(CellPtr ptr, const char* s) { ASSERT_NOT_NULL(ptr); if(s!=NULL) kputs(s,&(ptr->text)); return ptr; } - - static CellPtr CellAppendTextN(CellPtr ptr, const char* s,unsigned int n) { + +/** append n bytes to the content of a cell */ +static CellPtr CellAppendTextN(CellPtr ptr, const char* s,unsigned int n) { ASSERT_NOT_NULL(ptr); if(s!=NULL) kputsn(s,n,&(ptr->text)); return ptr; } -/** build a new Cell */ +/** set content from a C string */ static CellPtr CellSetText(CellPtr ptr, const char* s) { CellClear(ptr); CellAppendText(ptr,s); return ptr; } +/** set content from an integer */ static CellPtr CellSetLL(CellPtr ptr, long long v) { CellClear(ptr); kputll(v,&(ptr->text)); return ptr; } - + +/** set content from an floating number */ static CellPtr CellSetD(CellPtr ptr, double v) { CellClear(ptr); kputd(v,&(ptr->text)); return ptr; } - /** build a new Cell with string content */ static CellPtr CellNewStr(const char* s) { CellPtr ptr = CellNew(); ASSERT_NOT_NULL(ptr); return CellSetText(ptr,s); } + +/** return the length of the content for this cell */ static unsigned int CellWidth(CellPtr ptr) { ASSERT_NOT_NULL(ptr); return ks_len(&(ptr->text)); } +/** print the content of a cell */ static void CellPrint(CellPtr ptr,args_t* args) { + int color_flag=0; ASSERT_NOT_NULL(ptr); + //begin color if(args->ascii==0 && ptr->color!=NULL && ptr->color!=COLOR_BLACK) { + color_flag = 1; fputs(ptr->color,args->out); } fwrite((void*)ks_c_str(&(ptr->text)), sizeof(char), ks_len(&(ptr->text)), args->out); - if(args->ascii==0 && ptr->color!=NULL && ptr->color!=COLOR_BLACK) { + + //end color + if(color_flag) { fputs(COLOR_RESET,args->out); } } + +/** return the content of a Cell as a C-string */ +static const char* CellCStr(CellPtr ptr) { + ASSERT_NOT_NULL(ptr); + return ks_c_str(&(ptr->text)); + } + +/** destroy a cell */ static void CellFree(CellPtr ptr) { if(ptr==NULL) return; ks_free(&(ptr->text)); //ks_free(&(ptr->url)); free(ptr); } - + +/** return the number of cell in a row */ static unsigned int RowSize(RowPtr row) { return row->size; } +/** destroy a RowPtr */ static void RowFree(RowPtr ptr) { if(ptr==NULL) return; if(ptr->cells!=NULL) { @@ -234,7 +266,7 @@ static void RowFree(RowPtr ptr) { free(ptr); } - +/** create a RowPtr with 'size' empty cells */ static RowPtr RowNew(unsigned int size) { unsigned int i; RowPtr ptr = (RowPtr)calloc(1UL,sizeof(Row)); @@ -249,6 +281,7 @@ static RowPtr RowNew(unsigned int size) { return ptr; } +/** append the cell to this Row */ static RowPtr RowAppend(RowPtr ptr,CellPtr cell) { ASSERT_NOT_NULL(ptr); ASSERT_NOT_NULL(cell); @@ -259,20 +292,22 @@ static RowPtr RowAppend(RowPtr ptr,CellPtr cell) { return ptr; } +/** remove the idx-th cell of a row */ static RowPtr RowRemoveAt(RowPtr ptr,unsigned int idx) { ASSERT_NOT_NULL(ptr); assert(idx < ptr->size); CellFree(ptr->cells[idx]); - memmove(&ptr->cells[idx], &ptr->cells[idx+1], sizeof(CellPtr)*((ptr->size-1)-idx)); + memmove((void*)&ptr->cells[idx], (void*)&ptr->cells[idx+1], sizeof(CellPtr)*((ptr->size-1)-idx)); ptr->size--; return ptr; } +/** append a new Cell with the content 's' */ static RowPtr RowAppendStr(RowPtr row,const char* s) { return RowAppend(row,CellNewStr(s)); } - +/** return the idx-th row */ static CellPtr RowAt(RowPtr row,unsigned int idx) { assert(idx < RowSize(row)); return row->cells[idx]; @@ -283,14 +318,16 @@ static CellPtr RowSetText(RowPtr row,unsigned int idx,const char* value) { return CellSetText(RowAt(row,idx),value); } +/** return the number of columns in a table */ static unsigned int TableNCols(TablePtr t) { -return RowSize(t->header); -} + return RowSize(t->header); + } +/** return the number of rows in a table */ static unsigned int TableNRows(TablePtr t) { -ASSERT_NOT_NULL(t); -return t->size; -} + ASSERT_NOT_NULL(t); + return t->size; + } /** remove all lines of a TablePtr */ static TablePtr TableClear(TablePtr ptr) { @@ -313,6 +350,7 @@ static void TableFree(TablePtr ptr) { free(ptr); } +/** create a new table with 'ncrols' columns */ static TablePtr TableNew(unsigned int ncols) { TablePtr ptr = (TablePtr)(calloc(1UL,sizeof(Table))); ASSERT_NOT_NULL(ptr); @@ -323,6 +361,7 @@ static TablePtr TableNew(unsigned int ncols) { return ptr; } + /** return the y-th row in the table */ static RowPtr TableRowAt(TablePtr ptr,unsigned int y) { ASSERT_NOT_NULL(ptr); assert(y < TableNRows(ptr)); @@ -331,6 +370,7 @@ static RowPtr TableRowAt(TablePtr ptr,unsigned int y) { return ptr->rows[y]; } +/** append a new column named 'title' in the table */ static TablePtr TableAppendColumn(TablePtr ptr,const char* title) { unsigned int y; ASSERT_NOT_NULL(ptr); @@ -340,7 +380,8 @@ static TablePtr TableAppendColumn(TablePtr ptr,const char* title) { } return ptr; } - + +/** create a new empty table with the header 'str' until NULL */ static TablePtr TableNewStr(const char* str,...) { va_list arg; TablePtr ptr = TableNew(0UL); @@ -354,12 +395,12 @@ static TablePtr TableNewStr(const char* str,...) { return ptr; } - - +/** return the x-th Cell in the y-th row */ static CellPtr TableAt(TablePtr ptr,unsigned int x,unsigned int y) { - RowPtr row = TableRowAt(ptr,y); - return RowAt(row,x); + return RowAt(TableRowAt(ptr,y),x); } + +/** test wether the content of a column is empty */ static int TableIsColumnEmpty(TablePtr ptr,unsigned int x) { unsigned int y; ASSERT_NOT_NULL(ptr); @@ -371,6 +412,7 @@ static int TableIsColumnEmpty(TablePtr ptr,unsigned int x) { return 1; } +/** remove the x-th column in the table */ static TablePtr TableRemoveColumn(TablePtr ptr,unsigned int x) { RowRemoveAt(ptr->header,x); for(int i=0;i< ptr->size;i++) { @@ -379,6 +421,7 @@ static TablePtr TableRemoveColumn(TablePtr ptr,unsigned int x) { return ptr; } +/** remove any empty column in the table */ static TablePtr TableRemoveEmptyColumns(TablePtr ptr) { unsigned int x=0; ASSERT_NOT_NULL(ptr); @@ -394,8 +437,7 @@ static TablePtr TableRemoveEmptyColumns(TablePtr ptr) { return ptr; } - - +/** create and insert a new row in the table, return this new row */ static RowPtr TableNewRow(TablePtr ptr) { RowPtr row = RowNew(TableNCols(ptr)); ptr->rows = (RowPtr*)realloc(ptr->rows,(ptr->size+1)*sizeof(RowPtr)); @@ -561,7 +603,7 @@ static void TablePrint(TablePtr ptr,args_t* args) { printSymbol(args,1,"\u2518",'+'); fputc('\n',args->out); } - + fputc('\n',args->out); free(widths); } @@ -594,9 +636,38 @@ static int findContigs(bcf_hdr_t *hdr_in, const char* ctg1a, uint64_t len1, con return found==2; } +/** return true if allele is of ATGC */ +static int is_ATGC(const char* s) { + char* p=(char*)s; + if(*p==0) return 0; + while(*p!=0) { + if(strchr("ATGCatgc",*p)==NULL) return 0; + p++; + } + + return 1; + } + +/** insert a new hyperlink in the url table, ignore duplicates */ +static void InsertHyperLink(const char* database, const char* label, const char* url) { + unsigned int i; + for(i=0;i< TableNRows(args.hyperlinksTable);++i) { + CellPtr cell = TableAt(args.hyperlinksTable,2,i); + if(strcmp(CellCStr(cell),url)==0) return; + } + RowPtr row = TableNewRow(args.hyperlinksTable); + RowSetText(row,0,database); + RowSetText(row,1,label); + RowSetText(row,2,url); + } + const char *about(void) { return "Convert VCF to tables in the terminal.\n" "Author Pierre Lindenbaum PhD. Institut-du-Thorax. U1087. Nantes/France\n" + "Options:\n" + " --hide (string) comma separated of features to hide:\n" + " HOM_REF or RR : genotypes with REF allele only\n" + " HET or AR : heterozygous genotypes\n" ; } @@ -606,16 +677,11 @@ const char *about(void) { */ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) { - int c; - memset((void*)&args,sizeof(args_t),1); + int c; + memset((void*)&args,sizeof(args_t),1); args.header = hdr_in; - args.ascii = 0; args.out = stdout; - args.n_variants=0L; - args.vepTokens = NULL; - args.bcsqTokens = NULL; - args.hide_HOM_REF = 0; - args.hide_NO_CALL = 0; + // initialize table that will not change args.annTable = TableNewStr( "Allele","Annotation","Annotation_Impact","Gene_Name","Gene_ID", "Feature_Type","Feature_ID","Transcript_BioType","Rank", @@ -627,7 +693,8 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) args.vcTable = TableNewStr("KEY","VALUE",NULL); args.lofTable = TableNewStr("Gene_Name","Gene_ID","Number_of_transcripts_in_gene","Percent_of_transcripts_affected",NULL); args.gtypeTable = TableNewStr("Type","Count","%",NULL); - + args.hyperlinksTable = TableNewStr("DB",""/* empty/misc */,"URL",NULL); + static struct option loptions[] = { {"hide",required_argument,NULL,'x'}, @@ -686,6 +753,9 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) else if(strcasecmp(hidden, "GTTYPES")==0) { args.hide_GTTYPE_table = 1; } + else if(strcasecmp(hidden, "URL")==0 || strcasecmp(hidden, "URLS")==0) { + args.hide_links = 1; + } } StringListFree(hide); break; @@ -714,6 +784,9 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) else if( findContigs(hdr_in,"1",248956422,"2",242193529)) { args.build = human_hg38; } + else if( findContigs(hdr_in,"RF01",3302,"RF02",2687)) { + args.build = rotavirus_rf; + } else { args.build = undefined; } @@ -753,9 +826,10 @@ int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) switch(args.build) {\ case human_hg19 : fputs(" GRCh37 : ",args.out); break;\ case human_hg38 : fputs(" GRCh38 : ",args.out); break;\ + case rotavirus_rf : fputs(" Rotavirus : ",args.out); break;\ default:break;\ }\ - fprintf(args.out,"%s:%s:%s (%ld)\n",tokens->strings[0],tokens->strings[1],tokens->strings[3], args.n_variants) + fprintf(args.out," %s:%s:%s (n. %ld)\n",tokens->strings[0],tokens->strings[1],tokens->strings[3], args.n_variants) /* @@ -777,19 +851,16 @@ bcf1_t *process(bcf1_t *v) { vcf_line.s[vcf_line.l-1]=0; vcf_line.l--; } - - - - /* split the VCF line into a list of string */ StringListPtr tokens = StringListMake(vcf_line.s,'\t'); + /* split the ALT alleles */ + StringListPtr alt_alleles = StringListMake(StringListAt(tokens,4),','); + fputs("<<<",args.out); PRINT_HEADER; - - - + fputc('\n',args.out); RowPtr row = TableNewRow(args.vcTable); CellSetText(RowAt(row,0), "CHROM"); @@ -840,7 +911,90 @@ bcf1_t *process(bcf1_t *v) { if(!args.hide_VC_table) { fprintf(args.out, "# Variant\n"); TablePrint(args.vcTable,&args); - fputc('\n',args.out); + } + + /** fill URL */ + if(!args.hide_links) { + hts_pos_t pos1= v->pos+1; + kstring_t url = KS_INITIALIZE; + + if((args.build==human_hg19 || args.build==human_hg38)) { + ks_clear(&url); + kputs("https://genome.ucsc.edu/cgi-bin/hgTracks?db=",&url); + kputs((args.build==human_hg38?"hg38":"hg19"),&url); + kputs("&position=",&url); + kputs(StringListAt(tokens,0),&url); + kputs("%3A",&url); + kputll(pos1,&url); + kputc('-',&url); + kputll(variant_end,&url); + InsertHyperLink("UCSC","",ks_str(&url)); + } + + for(i=0; i < alt_alleles->size; ++i) { + if((args.build==human_hg19 || args.build==human_hg38) && is_ATGC(StringListAt(tokens,3)) && is_ATGC(StringListAt(alt_alleles,i))) { + ks_clear(&url); + kputs("https://gnomad.broadinstitute.org/variant/",&url); + kputs(StringListAt(tokens,0),&url); + kputc('-',&url); + kputll(pos1,&url); + kputc('-',&url); + kputs(StringListAt(tokens,3),&url); + kputc('-',&url); + kputs(StringListAt(alt_alleles,i),&url); + kputs((args.build==human_hg38?"?dataset=gnomad_r4":"?dataset=gnomad_r2_1"),&url); + InsertHyperLink("Gnomad",StringListAt(alt_alleles,i),ks_str(&url)); + + ks_clear(&url); + kputs("https://spliceailookup.broadinstitute.org/#variant=",&url); + kputs(StringListAt(tokens,0),&url); + kputc('-',&url); + kputll(pos1,&url); + kputc('-',&url); + kputs(StringListAt(tokens,3),&url); + kputc('-',&url); + kputs(StringListAt(alt_alleles,i),&url); + kputs((args.build==human_hg38?"?hg=38":"?hg=19"),&url); + InsertHyperLink("SpliceAI",StringListAt(alt_alleles,i),ks_str(&url)); + } + if(args.build==human_hg38 && is_ATGC(StringListAt(tokens,3)) && is_ATGC(StringListAt(alt_alleles,i))) { + ks_clear(&url); + kputs("https://genetics.opentargets.org/variant/",&url); + kputs(StringListAt(tokens,0),&url); + kputc('_',&url); + kputll(pos1,&url); + kputc('_',&url); + kputs(StringListAt(tokens,3),&url); + kputc('_',&url); + kputs(StringListAt(alt_alleles,i),&url); + InsertHyperLink("OpenTargets",StringListAt(alt_alleles,i),ks_str(&url)); + + + ks_clear(&url); + kputs("https://afb.ukbiobank.ac.uk/variant/",&url); + kputs(StringListAt(tokens,0),&url); + kputc('_',&url); + kputll(pos1,&url); + kputc('_',&url); + kputs(StringListAt(tokens,3),&url); + kputc('_',&url); + kputs(StringListAt(alt_alleles,i),&url); + InsertHyperLink("AF.ukbiobank",StringListAt(alt_alleles,i),ks_str(&url)); + + ks_clear(&url); + kputs("https://genebe.net/variant/hg38/",&url); + kputs(StringListAt(tokens,0),&url); + kputc('-',&url); + kputll(pos1,&url); + kputc('-',&url); + kputs(StringListAt(tokens,3),&url); + kputc('-',&url); + kputs(StringListAt(alt_alleles,i),&url); + InsertHyperLink("Genebe",StringListAt(alt_alleles,i),ks_str(&url)); + } + } + + ks_free(&url); } @@ -912,6 +1066,7 @@ bcf1_t *process(bcf1_t *v) { StringListFree(ann); continue; } + //skip SNPEFF/LOF if(strncmp(info,"LOF=",4)==0) { if(args.hide_LOF_table) continue; @@ -958,63 +1113,67 @@ bcf1_t *process(bcf1_t *v) { if(!args.hide_INFO_table && TableNRows(args.infoTable)>0) { fprintf(args.out, "# INFO\n"); TablePrint(args.infoTable,&args); - fputc('\n',args.out); } StringListFree(infos); } - + + if(TableNRows(args.hyperlinksTable)>0) { + fprintf(args.out, "# HYPERLINKS\n"); + TablePrint(args.hyperlinksTable,&args); + } + if(vepTable!=NULL && TableNRows(vepTable)>0) { fprintf(args.out, "# VEP/CSQ\n"); TableRemoveEmptyColumns(vepTable); TablePrint(vepTable,&args); - fputc('\n',args.out); } if(bcsqTable!=NULL && TableNRows(bcsqTable)>0) { fprintf(args.out, "# BCSQ\n"); TableRemoveEmptyColumns(bcsqTable); TablePrint(bcsqTable,&args); - fputc('\n',args.out); } if(TableNRows(args.annTable)>0) { fprintf(args.out, "# ANN/SNPEFF\n"); //no keep it inummutable TableRemoveEmptyColumns(args.annTable); TablePrint(args.annTable,&args); - fputc('\n',args.out); } if(TableNRows(args.lofTable)>0) { fprintf(args.out, "# LOF\n"); TablePrint(args.lofTable,&args); - fputc('\n',args.out); } if(TableNRows(args.spliceaiTable)>0) { fprintf(args.out, "# SpliceAI\n"); //no keep it inummutable TableRemoveEmptyColumns(args.annTable); TablePrint(args.spliceaiTable,&args); - fputc('\n',args.out); } + // is there any genotype here ? if(tokens->size>9) { int count_hom_ref = 0; int count_het = 0; int count_hom_var = 0; int count_missing = 0; int count_other = 0; - - StringListPtr formats = StringListMake(tokens->strings[8],':'); - TablePtr p = TableNewStr("SAMPLE",NULL); - TableAppendColumn(p, "GTYPE"); + // column for genotype int gt_col = -1; + // column for filter FT + int ft_col = -1; + StringListPtr formats = StringListMake(tokens->strings[8],':'); + TablePtr genotypeTable = TableNewStr("SAMPLE",NULL); + TableAppendColumn(genotypeTable, "GTYPE"); + for(i=0; isize;i++) { - TableAppendColumn(p, formats->strings[i]); + TableAppendColumn(genotypeTable, formats->strings[i]); if(strcmp("GT",formats->strings[i])==0) gt_col=(int)i; + if(strcmp("FT",formats->strings[i])==0) ft_col=(int)i; } for(i=9;i< tokens->size;i++) { - kstring_t gtype_name = KS_INITIALIZE; + kstring_t gtype_name = KS_INITIALIZE; int count_allele_0=0; int count_allele_1=0; int count_allele_missing=0; @@ -1123,13 +1282,23 @@ bcf1_t *process(bcf1_t *v) { if(print_it && !args.hide_GT_table) { - row = TableNewRow(p); + row = TableNewRow(genotypeTable); CellSetText(RowAt(row,0),args.header->samples[i-9]); CellSetText(RowAt(row,1),gtype_name.s); RowAt(row,1)->color = color; for(j=0; j< values->size;j++) { - CellSetText(RowAt(row,j+2), values->strings[j]); + CellSetText(RowAt(row,j+2), StringListAt(values,j)); + // color genotype if FORMAT/FT + if(ft_col==j) { + if(strcmp(StringListAt(values,j),"PASS")==0 ||strcmp( StringListAt(values,j),".")==0) { + RowAt(row,j+2)->color = COLOR_GREEN; + } + else + { + RowAt(row,j+2)->color = COLOR_RED; + } + } } } StringListFree(values); @@ -1152,17 +1321,15 @@ bcf1_t *process(bcf1_t *v) { if(TableNRows(args.gtypeTable)>0) { fprintf(args.out, "# GENOTYPE TYPES\n"); TablePrint(args.gtypeTable,&args); - fputc('\n',args.out); } } #undef ADD_GT - if(!args.hide_GT_table && TableNRows(p)>0) { + if(!args.hide_GT_table && TableNRows(genotypeTable)>0) { fprintf(args.out, "# GENOTYPES\n"); - TablePrint(p,&args); - fputc('\n',args.out); + TablePrint(genotypeTable,&args); } - TableFree(p); + TableFree(genotypeTable); StringListFree(formats); } @@ -1177,6 +1344,7 @@ bcf1_t *process(bcf1_t *v) { /** final cleanup */ ks_free(&vcf_line); StringListFree(tokens); + StringListFree(alt_alleles); TableFree(bcsqTable); TableFree(vepTable); TableClear(args.annTable); @@ -1185,6 +1353,7 @@ bcf1_t *process(bcf1_t *v) { TableClear(args.infoTable); TableClear(args.vcTable); TableClear(args.gtypeTable); + TableClear(args.hyperlinksTable); return NULL;/* suppress bcf output */ } @@ -1197,6 +1366,7 @@ void destroy(void) { TableFree(args.infoTable); TableFree(args.vcTable); TableFree(args.gtypeTable); + TableFree(args.hyperlinksTable); } From 24c1cafa34b0ecd8e360f4a38942d9e879d4b47f Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum Date: Fri, 17 Jan 2025 22:30:35 +0100 Subject: [PATCH 7/9] before format --- plugins/vcf2table.c | 51 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/plugins/vcf2table.c b/plugins/vcf2table.c index a55c8b15..66fa530b 100644 --- a/plugins/vcf2table.c +++ b/plugins/vcf2table.c @@ -667,8 +667,55 @@ const char *about(void) { "Options:\n" " --hide (string) comma separated of features to hide:\n" " HOM_REF or RR : genotypes with REF allele only\n" - " HET or AR : heterozygous genotypes\n" - ; + " HET or AR : heterozygous genotypes\n" + " NO_CALL or MISSING : missing genotypes\n" + " CSQ or VEP : VEP table\n" + " SPLICEAI : SPLICEAI table\n" + " ANN or SNPEFF : SNPEFF table\n" + " LOF: SNPEFF LOF table\n" + " VC: general table\n" + " INFO: INFO table\n" + " GT: Genotype table\n" + " GTTYPES: Genotype count table\n" + " URL: hyperlink table\n" + "Example:\n" + "$ wget -O - 'http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz' |\\\n\tbcftools +vcf2table -i 'AC<10' -- --hide 'HOM_REF,INFO,NO_CALL' \n" + "(...)\n" + "<<< 22:10714247:C (n. 446)\n" + "\n" + "# Variant\n" + "+--------+----------+\n" + "| KEY | VALUE |\n" + "+--------+----------+\n" + "| CHROM | 22 |\n" + "| POS | 10714247 |\n" + "| ID | . |\n" + "| REF | C |\n" + "| ALT | G |\n" + "| QUAL | . |\n" + "| FILTER | PASS |\n" + "+--------+----------+\n" + "\n" + "# GENOTYPE TYPES\n" + "+-----------+-------+----------+\n" + "| Type | Count | % |\n" + "+-----------+-------+----------+\n" + "| REF only | 2545 | 99.8823 |\n" + "| HET | 3 | 0.117739 |\n" + "+-----------+-------+----------+\n" + "\n" + "# GENOTYPES\n" + "+---------+-------+-----+\n" + "| SAMPLE | GTYPE | GT |\n" + "+---------+-------+-----+\n" + "| HG03136 | HET | 1|0 |\n" + "| HG03171 | HET | 0|1 |\n" + "| HG03270 | HET | 0|1 |\n" + "+---------+-------+-----+\n" + "\n" + ">>> 22:10714247:C (n. 446)\n" + + ; } /* From 6094a7d9a896d6493080d41c82b71b6d032dbba7 Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum Date: Fri, 17 Jan 2025 22:33:13 +0100 Subject: [PATCH 8/9] reformat --- plugins/vcf2table.c | 2427 +++++++++++++++++++++---------------------- 1 file changed, 1208 insertions(+), 1219 deletions(-) diff --git a/plugins/vcf2table.c b/plugins/vcf2table.c index 66fa530b..6ec60a7a 100644 --- a/plugins/vcf2table.c +++ b/plugins/vcf2table.c @@ -23,1397 +23,1386 @@ THE SOFTWARE. */ -#include -#include -#include #include -#include -#include // for isatty -#include "hts_internal.h" -#include -#include #include -#include +#include +#include #include +#include #include -#include +#include +#include +#include +#include +#include +#include // for isatty + #include "../bcftools.h" +#include "hts_internal.h" -#define ASSERT_NOT_NULL(a) do {if(a==NULL) {fprintf(stderr,"[%s:%d]NULL Ptr exception\n",__FILE__,__LINE__);abort();}} while(0) -#define WHERE fprintf(stderr,"[%s:%s:%d]\n",__FUNCTION__,__FILE__,__LINE__) +#define ASSERT_NOT_NULL(a) \ + do { \ + if (a == NULL) { \ + fprintf(stderr, "[%s:%d]NULL Ptr exception\n", __FILE__, __LINE__); \ + abort(); \ + } \ + } while (0) +#define WHERE fprintf(stderr, "[%s:%s:%d]\n", __FUNCTION__, __FILE__, __LINE__) -#define DEFINE_ANSI_IOMANIP(NAME,OPCODE) const char* COLOR_##NAME="\033[" #OPCODE "m"; +#define DEFINE_ANSI_IOMANIP(NAME, OPCODE) \ + const char* COLOR_##NAME = "\033[" #OPCODE "m"; /** colors for ANSI output */ -DEFINE_ANSI_IOMANIP(RESET,0) -DEFINE_ANSI_IOMANIP(BLACK,30) -DEFINE_ANSI_IOMANIP(RED,31) -DEFINE_ANSI_IOMANIP(GREEN,32) -DEFINE_ANSI_IOMANIP(YELLOW,33) -DEFINE_ANSI_IOMANIP(BLUE,34) -DEFINE_ANSI_IOMANIP(MAGENTA,35) -DEFINE_ANSI_IOMANIP(CYAN,36) -DEFINE_ANSI_IOMANIP(WHITE,37) +DEFINE_ANSI_IOMANIP(RESET, 0) +DEFINE_ANSI_IOMANIP(BLACK, 30) +DEFINE_ANSI_IOMANIP(RED, 31) +DEFINE_ANSI_IOMANIP(GREEN, 32) +DEFINE_ANSI_IOMANIP(YELLOW, 33) +DEFINE_ANSI_IOMANIP(BLUE, 34) +DEFINE_ANSI_IOMANIP(MAGENTA, 35) +DEFINE_ANSI_IOMANIP(CYAN, 36) +DEFINE_ANSI_IOMANIP(WHITE, 37) /** A Cell in a Row in a table */ typedef struct Cell { - kstring_t text; // text content - //kstring_t url; for future use - const char* color; -} Cell,*CellPtr; + kstring_t text; // text content + // kstring_t url; for future use + const char* color; +} Cell, *CellPtr; /** a Row in a table */ typedef struct Row { - // number of cells - unsigned int size; - // malloc-ed cells - CellPtr* cells; -} Row,*RowPtr; + // number of cells + unsigned int size; + // malloc-ed cells + CellPtr* cells; +} Row, *RowPtr; /** a table of data */ typedef struct Table { - // special row containing the table header = number of column - RowPtr header; - // number of rows - unsigned int size; - // malloc-ed rows - RowPtr* rows; -} Table,*TablePtr; - + // special row containing the table header = number of column + RowPtr header; + // number of rows + unsigned int size; + // malloc-ed rows + RowPtr* rows; +} Table, *TablePtr; /** a list of C-strings */ typedef struct StringList_t { - // number of strings - unsigned int size; - // malloc-ed strings - char** strings; -} StringList,*StringListPtr; - - + // number of strings + unsigned int size; + // malloc-ed strings + char** strings; +} StringList, *StringListPtr; /** 'common' build */ enum build_t { - undefined, - human_hg19, - human_hg38, - rotavirus_rf // for tests... - }; + undefined, + human_hg19, + human_hg38, + rotavirus_rf // for tests... +}; /** global arguments */ -typedef struct { - /** vcf header in */ - bcf_hdr_t* header; - /** output stream (stdout) */ - FILE* out; - /** force ascii only */ - int ascii; - /** columns for VEP predictions */ - StringListPtr vepTokens; - /** columns for bcftools csq predictions */ - StringListPtr bcsqTokens; - /** table for SNPEFF ANN predictions */ - TablePtr annTable; - /** table for SNPEFF LOF predictions */ - TablePtr lofTable; - /** general info about the variant */ - TablePtr vcTable; - /** table for hyperlinks */ - TablePtr hyperlinksTable; - /** table for spliceai */ - TablePtr spliceaiTable; - /** table for INFO col */ - TablePtr infoTable; - /** table for genotype types */ - TablePtr gtypeTable; - /** number of variant seen so far */ - unsigned long n_variants; - /** genome build */ - enum build_t build; - - /* show/hide flags */ - int hide_HOM_REF; - int hide_NO_CALL; - int hide_HET; - int hide_HOM_VAR; - int hide_OTHER; - int hide_VC_table; - int hide_INFO_table; - int hide_GT_table; - int hide_GTTYPE_table; - int hide_VEP_table; - int hide_BCSQ_table; - int hide_ANN_table; - int hide_LOF_table; - int hide_SPLICEAI_table; - int hide_colors; - int hide_links; - } args_t; +typedef struct { + /** vcf header in */ + bcf_hdr_t* header; + /** output stream (stdout) */ + FILE* out; + /** force ascii only */ + int ascii; + /** columns for VEP predictions */ + StringListPtr vepTokens; + /** columns for bcftools csq predictions */ + StringListPtr bcsqTokens; + /** table for SNPEFF ANN predictions */ + TablePtr annTable; + /** table for SNPEFF LOF predictions */ + TablePtr lofTable; + /** general info about the variant */ + TablePtr vcTable; + /** table for hyperlinks */ + TablePtr hyperlinksTable; + /** table for spliceai */ + TablePtr spliceaiTable; + /** table for INFO col */ + TablePtr infoTable; + /** table for genotype types */ + TablePtr gtypeTable; + /** number of variant seen so far */ + unsigned long n_variants; + /** genome build */ + enum build_t build; + + /* show/hide flags */ + int hide_HOM_REF; + int hide_NO_CALL; + int hide_HET; + int hide_HOM_VAR; + int hide_OTHER; + int hide_VC_table; + int hide_INFO_table; + int hide_GT_table; + int hide_GTTYPE_table; + int hide_VEP_table; + int hide_BCSQ_table; + int hide_ANN_table; + int hide_LOF_table; + int hide_SPLICEAI_table; + int hide_colors; + int hide_links; +} args_t; /** global arguments for this plugin */ static args_t args; /** build a new Cell */ static CellPtr CellNew() { - CellPtr ptr = (CellPtr)calloc(1UL,sizeof(Cell)); - ASSERT_NOT_NULL(ptr); - ptr->color = COLOR_BLACK; - ks_initialize(&(ptr->text)); - //ks_initialize(&(ptr->url)); future use - return ptr; - } + CellPtr ptr = (CellPtr)calloc(1UL, sizeof(Cell)); + ASSERT_NOT_NULL(ptr); + ptr->color = COLOR_BLACK; + ks_initialize(&(ptr->text)); + // ks_initialize(&(ptr->url)); future use + return ptr; +} /** clear the content of a cell */ static CellPtr CellClear(CellPtr ptr) { - ASSERT_NOT_NULL(ptr); - ks_clear(&(ptr->text)); - return ptr; - } + ASSERT_NOT_NULL(ptr); + ks_clear(&(ptr->text)); + return ptr; +} /** append the content of a cell */ static CellPtr CellAppendText(CellPtr ptr, const char* s) { - ASSERT_NOT_NULL(ptr); - if(s!=NULL) kputs(s,&(ptr->text)); - return ptr; - } + ASSERT_NOT_NULL(ptr); + if (s != NULL) kputs(s, &(ptr->text)); + return ptr; +} /** append n bytes to the content of a cell */ -static CellPtr CellAppendTextN(CellPtr ptr, const char* s,unsigned int n) { - ASSERT_NOT_NULL(ptr); - if(s!=NULL) kputsn(s,n,&(ptr->text)); - return ptr; - } - +static CellPtr CellAppendTextN(CellPtr ptr, const char* s, unsigned int n) { + ASSERT_NOT_NULL(ptr); + if (s != NULL) kputsn(s, n, &(ptr->text)); + return ptr; +} + /** set content from a C string */ static CellPtr CellSetText(CellPtr ptr, const char* s) { - CellClear(ptr); - CellAppendText(ptr,s); - return ptr; - } + CellClear(ptr); + CellAppendText(ptr, s); + return ptr; +} /** set content from an integer */ static CellPtr CellSetLL(CellPtr ptr, long long v) { - CellClear(ptr); - kputll(v,&(ptr->text)); - return ptr; - } + CellClear(ptr); + kputll(v, &(ptr->text)); + return ptr; +} /** set content from an floating number */ static CellPtr CellSetD(CellPtr ptr, double v) { - CellClear(ptr); - kputd(v,&(ptr->text)); - return ptr; - } + CellClear(ptr); + kputd(v, &(ptr->text)); + return ptr; +} /** build a new Cell with string content */ static CellPtr CellNewStr(const char* s) { - CellPtr ptr = CellNew(); - ASSERT_NOT_NULL(ptr); - return CellSetText(ptr,s); - } + CellPtr ptr = CellNew(); + ASSERT_NOT_NULL(ptr); + return CellSetText(ptr, s); +} /** return the length of the content for this cell */ static unsigned int CellWidth(CellPtr ptr) { - ASSERT_NOT_NULL(ptr); - return ks_len(&(ptr->text)); - } + ASSERT_NOT_NULL(ptr); + return ks_len(&(ptr->text)); +} /** print the content of a cell */ -static void CellPrint(CellPtr ptr,args_t* args) { - int color_flag=0; - ASSERT_NOT_NULL(ptr); - //begin color - if(args->ascii==0 && ptr->color!=NULL && ptr->color!=COLOR_BLACK) { - color_flag = 1; - fputs(ptr->color,args->out); - } - fwrite((void*)ks_c_str(&(ptr->text)), sizeof(char), ks_len(&(ptr->text)), args->out); - - //end color - if(color_flag) { - fputs(COLOR_RESET,args->out); - } - } +static void CellPrint(CellPtr ptr, args_t* args) { + int color_flag = 0; + ASSERT_NOT_NULL(ptr); + // begin color + if (args->ascii == 0 && ptr->color != NULL && ptr->color != COLOR_BLACK) { + color_flag = 1; + fputs(ptr->color, args->out); + } + fwrite((void*)ks_c_str(&(ptr->text)), sizeof(char), ks_len(&(ptr->text)), + args->out); + + // end color + if (color_flag) { + fputs(COLOR_RESET, args->out); + } +} /** return the content of a Cell as a C-string */ static const char* CellCStr(CellPtr ptr) { - ASSERT_NOT_NULL(ptr); - return ks_c_str(&(ptr->text)); - } + ASSERT_NOT_NULL(ptr); + return ks_c_str(&(ptr->text)); +} /** destroy a cell */ static void CellFree(CellPtr ptr) { - if(ptr==NULL) return; - ks_free(&(ptr->text)); - //ks_free(&(ptr->url)); - free(ptr); - } + if (ptr == NULL) return; + ks_free(&(ptr->text)); + // ks_free(&(ptr->url)); + free(ptr); +} -/** return the number of cell in a row */ -static unsigned int RowSize(RowPtr row) { - return row->size; - } +/** return the number of cell in a row */ +static unsigned int RowSize(RowPtr row) { return row->size; } /** destroy a RowPtr */ static void RowFree(RowPtr ptr) { - if(ptr==NULL) return; - if(ptr->cells!=NULL) { - unsigned int i; - for(i=0;i< ptr->size;++i) { - CellFree(ptr->cells[i]); - } - free(ptr->cells); - } - free(ptr); + if (ptr == NULL) return; + if (ptr->cells != NULL) { + unsigned int i; + for (i = 0; i < ptr->size; ++i) { + CellFree(ptr->cells[i]); } + free(ptr->cells); + } + free(ptr); +} /** create a RowPtr with 'size' empty cells */ static RowPtr RowNew(unsigned int size) { - unsigned int i; - RowPtr ptr = (RowPtr)calloc(1UL,sizeof(Row)); - ASSERT_NOT_NULL(ptr); - ptr->cells = (CellPtr*)calloc(size,sizeof(CellPtr)); - ASSERT_NOT_NULL(ptr->cells); - ptr->size = size; - for(i=0;i< size;++i) { - ptr->cells[i] = CellNew(); - ASSERT_NOT_NULL(ptr->cells[i]); - } - return ptr; - } + unsigned int i; + RowPtr ptr = (RowPtr)calloc(1UL, sizeof(Row)); + ASSERT_NOT_NULL(ptr); + ptr->cells = (CellPtr*)calloc(size, sizeof(CellPtr)); + ASSERT_NOT_NULL(ptr->cells); + ptr->size = size; + for (i = 0; i < size; ++i) { + ptr->cells[i] = CellNew(); + ASSERT_NOT_NULL(ptr->cells[i]); + } + return ptr; +} /** append the cell to this Row */ -static RowPtr RowAppend(RowPtr ptr,CellPtr cell) { - ASSERT_NOT_NULL(ptr); - ASSERT_NOT_NULL(cell); - ptr->cells = (CellPtr*)realloc(ptr->cells,(ptr->size+1)*sizeof(CellPtr)); - ASSERT_NOT_NULL(ptr->cells); - ptr->cells[ptr->size] = cell; - ptr->size++; - return ptr; - } +static RowPtr RowAppend(RowPtr ptr, CellPtr cell) { + ASSERT_NOT_NULL(ptr); + ASSERT_NOT_NULL(cell); + ptr->cells = (CellPtr*)realloc(ptr->cells, (ptr->size + 1) * sizeof(CellPtr)); + ASSERT_NOT_NULL(ptr->cells); + ptr->cells[ptr->size] = cell; + ptr->size++; + return ptr; +} /** remove the idx-th cell of a row */ -static RowPtr RowRemoveAt(RowPtr ptr,unsigned int idx) { - ASSERT_NOT_NULL(ptr); - assert(idx < ptr->size); - CellFree(ptr->cells[idx]); - memmove((void*)&ptr->cells[idx], (void*)&ptr->cells[idx+1], sizeof(CellPtr)*((ptr->size-1)-idx)); - ptr->size--; - return ptr; - } +static RowPtr RowRemoveAt(RowPtr ptr, unsigned int idx) { + ASSERT_NOT_NULL(ptr); + assert(idx < ptr->size); + CellFree(ptr->cells[idx]); + memmove((void*)&ptr->cells[idx], (void*)&ptr->cells[idx + 1], + sizeof(CellPtr) * ((ptr->size - 1) - idx)); + ptr->size--; + return ptr; +} /** append a new Cell with the content 's' */ -static RowPtr RowAppendStr(RowPtr row,const char* s) { - return RowAppend(row,CellNewStr(s)); - } +static RowPtr RowAppendStr(RowPtr row, const char* s) { + return RowAppend(row, CellNewStr(s)); +} /** return the idx-th row */ -static CellPtr RowAt(RowPtr row,unsigned int idx) { - assert(idx < RowSize(row)); - return row->cells[idx]; - } +static CellPtr RowAt(RowPtr row, unsigned int idx) { + assert(idx < RowSize(row)); + return row->cells[idx]; +} /** set content of idx-th column */ -static CellPtr RowSetText(RowPtr row,unsigned int idx,const char* value) { - return CellSetText(RowAt(row,idx),value); - } +static CellPtr RowSetText(RowPtr row, unsigned int idx, const char* value) { + return CellSetText(RowAt(row, idx), value); +} /** return the number of columns in a table */ -static unsigned int TableNCols(TablePtr t) { - return RowSize(t->header); - } +static unsigned int TableNCols(TablePtr t) { return RowSize(t->header); } /** return the number of rows in a table */ static unsigned int TableNRows(TablePtr t) { - ASSERT_NOT_NULL(t); - return t->size; - } + ASSERT_NOT_NULL(t); + return t->size; +} /** remove all lines of a TablePtr */ static TablePtr TableClear(TablePtr ptr) { - unsigned int i; - ASSERT_NOT_NULL(ptr); - for(i=0;i< ptr->size;++i) { - RowFree(ptr->rows[i]); - ptr->rows[i]= NULL; - } - ptr->size=0UL; - return ptr; - } + unsigned int i; + ASSERT_NOT_NULL(ptr); + for (i = 0; i < ptr->size; ++i) { + RowFree(ptr->rows[i]); + ptr->rows[i] = NULL; + } + ptr->size = 0UL; + return ptr; +} /** dispose a TablePtr */ static void TableFree(TablePtr ptr) { - if(ptr==NULL) return; - TableClear(ptr); - free(ptr->rows); - RowFree(ptr->header); - free(ptr); - } + if (ptr == NULL) return; + TableClear(ptr); + free(ptr->rows); + RowFree(ptr->header); + free(ptr); +} /** create a new table with 'ncrols' columns */ static TablePtr TableNew(unsigned int ncols) { - TablePtr ptr = (TablePtr)(calloc(1UL,sizeof(Table))); - ASSERT_NOT_NULL(ptr); - ptr->size = 0UL; - ptr->rows = NULL; - ptr->header= RowNew(ncols); - ASSERT_NOT_NULL(ptr->header); - return ptr; - } - - /** return the y-th row in the table */ -static RowPtr TableRowAt(TablePtr ptr,unsigned int y) { - ASSERT_NOT_NULL(ptr); - assert(y < TableNRows(ptr)); - ASSERT_NOT_NULL(ptr->rows); - ASSERT_NOT_NULL(ptr->rows[y]); - return ptr->rows[y]; - } + TablePtr ptr = (TablePtr)(calloc(1UL, sizeof(Table))); + ASSERT_NOT_NULL(ptr); + ptr->size = 0UL; + ptr->rows = NULL; + ptr->header = RowNew(ncols); + ASSERT_NOT_NULL(ptr->header); + return ptr; +} + +/** return the y-th row in the table */ +static RowPtr TableRowAt(TablePtr ptr, unsigned int y) { + ASSERT_NOT_NULL(ptr); + assert(y < TableNRows(ptr)); + ASSERT_NOT_NULL(ptr->rows); + ASSERT_NOT_NULL(ptr->rows[y]); + return ptr->rows[y]; +} /** append a new column named 'title' in the table */ -static TablePtr TableAppendColumn(TablePtr ptr,const char* title) { - unsigned int y; - ASSERT_NOT_NULL(ptr); - RowAppendStr(ptr->header,title); - for(y=0;y< TableNRows(ptr);++y) { - RowAppendStr(TableRowAt(ptr,y),""); - } - return ptr; - } - +static TablePtr TableAppendColumn(TablePtr ptr, const char* title) { + unsigned int y; + ASSERT_NOT_NULL(ptr); + RowAppendStr(ptr->header, title); + for (y = 0; y < TableNRows(ptr); ++y) { + RowAppendStr(TableRowAt(ptr, y), ""); + } + return ptr; +} + /** create a new empty table with the header 'str' until NULL */ -static TablePtr TableNewStr(const char* str,...) { - va_list arg; - TablePtr ptr = TableNew(0UL); - ASSERT_NOT_NULL(ptr); - va_start(arg, str); - while (str) { - TableAppendColumn(ptr,str); - str = va_arg(arg, const char *); - } - va_end(arg); - return ptr; - } +static TablePtr TableNewStr(const char* str, ...) { + va_list arg; + TablePtr ptr = TableNew(0UL); + ASSERT_NOT_NULL(ptr); + va_start(arg, str); + while (str) { + TableAppendColumn(ptr, str); + str = va_arg(arg, const char*); + } + va_end(arg); + return ptr; +} -/** return the x-th Cell in the y-th row */ -static CellPtr TableAt(TablePtr ptr,unsigned int x,unsigned int y) { - return RowAt(TableRowAt(ptr,y),x); - } +/** return the x-th Cell in the y-th row */ +static CellPtr TableAt(TablePtr ptr, unsigned int x, unsigned int y) { + return RowAt(TableRowAt(ptr, y), x); +} /** test wether the content of a column is empty */ -static int TableIsColumnEmpty(TablePtr ptr,unsigned int x) { - unsigned int y; - ASSERT_NOT_NULL(ptr); - assert(x < TableNCols(ptr)); - for(y=0;y < TableNRows(ptr);++y) { - CellPtr cell = TableAt(ptr,x,y); - if(CellWidth(cell)!=0) return 0; - } - return 1; - } +static int TableIsColumnEmpty(TablePtr ptr, unsigned int x) { + unsigned int y; + ASSERT_NOT_NULL(ptr); + assert(x < TableNCols(ptr)); + for (y = 0; y < TableNRows(ptr); ++y) { + CellPtr cell = TableAt(ptr, x, y); + if (CellWidth(cell) != 0) return 0; + } + return 1; +} /** remove the x-th column in the table */ -static TablePtr TableRemoveColumn(TablePtr ptr,unsigned int x) { - RowRemoveAt(ptr->header,x); - for(int i=0;i< ptr->size;i++) { - RowRemoveAt(ptr->rows[i],x); - } - return ptr; - } +static TablePtr TableRemoveColumn(TablePtr ptr, unsigned int x) { + RowRemoveAt(ptr->header, x); + for (int i = 0; i < ptr->size; i++) { + RowRemoveAt(ptr->rows[i], x); + } + return ptr; +} /** remove any empty column in the table */ static TablePtr TableRemoveEmptyColumns(TablePtr ptr) { - unsigned int x=0; - ASSERT_NOT_NULL(ptr); - while(x rows = (RowPtr*)realloc(ptr->rows,(ptr->size+1)*sizeof(RowPtr)); - ASSERT_NOT_NULL(ptr->rows); - ptr->rows[ptr->size] = row; - ptr->size++; - return row; - } + RowPtr row = RowNew(TableNCols(ptr)); + ptr->rows = (RowPtr*)realloc(ptr->rows, (ptr->size + 1) * sizeof(RowPtr)); + ASSERT_NOT_NULL(ptr->rows); + ptr->rows[ptr->size] = row; + ptr->size++; + return row; +} /** * Create a new empty StringList */ static StringListPtr StringListNew() { - StringListPtr ptr = (StringListPtr)calloc(1UL,sizeof(StringList)); - ASSERT_NOT_NULL(ptr); - return ptr; - } + StringListPtr ptr = (StringListPtr)calloc(1UL, sizeof(StringList)); + ASSERT_NOT_NULL(ptr); + return ptr; +} /** * Create a new StringList by splitting 'str' with 'delim' */ -static StringListPtr StringListMake(const char* str,char delim) { - StringListPtr ptr = StringListNew(); - char* prev=(char*)str; - char* p =(char*)str; - for(;;) { - if(*p==delim || *p==0) { - ptr->strings=(char**)realloc(ptr->strings,sizeof(char*)*(ptr->size+1)); - ASSERT_NOT_NULL(ptr->strings); - ptr->strings[ptr->size] = strndup(prev,p-prev); - ASSERT_NOT_NULL( ptr->strings[ptr->size]); - ptr->size++; - if(*p==0) break; - prev=p+1; - } - p++; - } - return ptr; - } +static StringListPtr StringListMake(const char* str, char delim) { + StringListPtr ptr = StringListNew(); + char* prev = (char*)str; + char* p = (char*)str; + for (;;) { + if (*p == delim || *p == 0) { + ptr->strings = + (char**)realloc(ptr->strings, sizeof(char*) * (ptr->size + 1)); + ASSERT_NOT_NULL(ptr->strings); + ptr->strings[ptr->size] = strndup(prev, p - prev); + ASSERT_NOT_NULL(ptr->strings[ptr->size]); + ptr->size++; + if (*p == 0) break; + prev = p + 1; + } + p++; + } + return ptr; +} /** Dispose list of String */ void StringListFree(StringList* ptr) { - unsigned int i; - if(ptr==NULL) return; - for(i=0;i< ptr->size;++i) { - free(ptr->strings[i]); - } - free(ptr); - } + unsigned int i; + if (ptr == NULL) return; + for (i = 0; i < ptr->size; ++i) { + free(ptr->strings[i]); + } + free(ptr); +} /** return content of idx-th item as a const char* */ -const char* StringListAt(StringList* ptr,unsigned int idx) { - ASSERT_NOT_NULL(ptr); - assert(idx < ptr->size); - return ptr->strings[idx]; - } - - +const char* StringListAt(StringList* ptr, unsigned int idx) { + ASSERT_NOT_NULL(ptr); + assert(idx < ptr->size); + return ptr->strings[idx]; +} /** print symbol used by TablePrint to print multiple unicode/plain characters */ -static void printSymbol(args_t* args,unsigned int repeat, const char* wc, char c) { -unsigned int i; -if(args->ascii==1) { - for(i=0;i< repeat;i++) { - fputc(c,args->out); - } - } -else - { - for(i=0;i< repeat;i++) { - fputs(wc,args->out); - } +static void printSymbol(args_t* args, unsigned int repeat, const char* wc, + char c) { + unsigned int i; + if (args->ascii == 1) { + for (i = 0; i < repeat; i++) { + fputc(c, args->out); + } + } else { + for (i = 0; i < repeat; i++) { + fputs(wc, args->out); } + } } /** print the content of a table */ -static void TablePrint(TablePtr ptr,args_t* args) { - unsigned int y,x; - unsigned int* widths = calloc(TableNCols(ptr),sizeof(unsigned int)); - ASSERT_NOT_NULL(ptr); - - for(x=0; xheader,x)); - if(width>widths[x]) widths[x] = width; - } - - for(y=0;y< TableNRows(ptr);++y) { - for(x=0; xwidths[x]) widths[x] = width; - } - } - - //print header - - // line 1 of header - for(x=0;xout); - - //line 2 of header - for(int x=0;xout); - CellPrint(RowAt(ptr->header,x),args); - printSymbol(args,widths[x]-CellWidth(RowAt(ptr->header,x))," ",' '); - fputc(' ',args->out); - } - printSymbol(args,1,"\u2502",'|'); - fputc('\n',args->out); - - //line 3 of header - for(int x=0;xout); - - //print body - for(y=0;y< TableNRows(ptr);++y) { - RowPtr row = TableRowAt(ptr,y); - //line of data - for(x=0;xout); - CellPrint(cell,args); - printSymbol(args,widths[x]-CellWidth(cell)," ",' '); - fputc(' ',args->out); - } - printSymbol(args,1,"\u2502",'|'); - fputc('\n',args->out); - } - //last line - if(TableNRows(ptr)>0) - { - for(x=0;xout); - } - fputc('\n',args->out); - free(widths); - } +static void TablePrint(TablePtr ptr, args_t* args) { + unsigned int y, x; + unsigned int* widths = calloc(TableNCols(ptr), sizeof(unsigned int)); + ASSERT_NOT_NULL(ptr); + + for (x = 0; x < TableNCols(ptr); ++x) { + unsigned int width = CellWidth(RowAt(ptr->header, x)); + if (width > widths[x]) widths[x] = width; + } + + for (y = 0; y < TableNRows(ptr); ++y) { + for (x = 0; x < TableNCols(ptr); ++x) { + ASSERT_NOT_NULL(TableAt(ptr, x, y)); + unsigned int width = CellWidth(TableAt(ptr, x, y)); + if (width > widths[x]) widths[x] = width; + } + } + + // print header + + // line 1 of header + for (x = 0; x < TableNCols(ptr); ++x) { + printSymbol(args, 1, (x == 0 ? "\u250C" : "\u252C"), '+'); + printSymbol(args, 2 + widths[x], "\u2500", '-'); + } + printSymbol(args, 1, "\u2510", '+'); + fputc('\n', args->out); + + // line 2 of header + for (int x = 0; x < TableNCols(ptr); ++x) { + printSymbol(args, 1, "\u2502", '|'); + fputc(' ', args->out); + CellPrint(RowAt(ptr->header, x), args); + printSymbol(args, widths[x] - CellWidth(RowAt(ptr->header, x)), " ", ' '); + fputc(' ', args->out); + } + printSymbol(args, 1, "\u2502", '|'); + fputc('\n', args->out); + + // line 3 of header + for (int x = 0; x < TableNCols(ptr); ++x) { + if (x == 0 && TableNRows(ptr) == 0) { + printSymbol(args, 1, "\u2514", '+'); + } else if (x == 0) { + printSymbol(args, 1, "\u251C", '+'); + } else { + printSymbol(args, 1, (TableNRows(ptr) == 0 ? "\u2534" : "\u253C"), '+'); + } + printSymbol(args, 2 + widths[x], "\u2500", '-'); + } + + printSymbol(args, 1, (TableNRows(ptr) == 0 ? "\u2518" : "\u2524"), '+'); + fputc('\n', args->out); + + // print body + for (y = 0; y < TableNRows(ptr); ++y) { + RowPtr row = TableRowAt(ptr, y); + // line of data + for (x = 0; x < TableNCols(ptr); ++x) { + CellPtr cell = RowAt(row, x); + printSymbol(args, 1, "\u2502", '|'); + fputc(' ', args->out); + CellPrint(cell, args); + printSymbol(args, widths[x] - CellWidth(cell), " ", ' '); + fputc(' ', args->out); + } + printSymbol(args, 1, "\u2502", '|'); + fputc('\n', args->out); + } + // last line + if (TableNRows(ptr) > 0) { + for (x = 0; x < TableNCols(ptr); ++x) { + printSymbol(args, 1, (x == 0 ? "\u2514" : "\u2534"), '+'); + printSymbol(args, 2 + widths[x], "\u2500", '-'); + } + printSymbol(args, 1, "\u2518", '+'); + fputc('\n', args->out); + } + fputc('\n', args->out); + free(widths); +} /** - * This method is used to find if a dictionary contains two known contig (name/length) in - * order to identify the build : hg19, hg38, etc... + * This method is used to find if a dictionary contains two known contig + * (name/length) in order to identify the build : hg19, hg38, etc... */ -static int findContigs(bcf_hdr_t *hdr_in, const char* ctg1a, uint64_t len1, const char* ctg2a, uint64_t len2) { - char ctg1b[10]; - char ctg2b[10]; - // try to add a 'chr' prefix to the chromosome name - sprintf(ctg1b, "chr%s", ctg1a); - sprintf(ctg2b, "chr%s", ctg2a); - int found=0; - int i,n_contigs= hdr_in->n[BCF_DT_CTG]; - for(i=0;i< n_contigs && found<2 ;i++) { - uint64_t len; - bcf_idpair_t c = hdr_in->id[BCF_DT_CTG][i]; - if(c.val==NULL) continue; - if(c.val->info==NULL) continue; - len = c.val->info[0]; - const char* contig_name = c.key; - if(len == len1 && (strcmp(ctg1a,contig_name)==0 || strcmp(ctg1b,contig_name)==0)) { - found++; - } - else if(len == len2 && (strcmp(ctg2a,contig_name)==0 || strcmp(ctg2b,contig_name)==0)) { - found++; - } - } - return found==2; +static int findContigs(bcf_hdr_t* hdr_in, const char* ctg1a, uint64_t len1, + const char* ctg2a, uint64_t len2) { + char ctg1b[10]; + char ctg2b[10]; + // try to add a 'chr' prefix to the chromosome name + sprintf(ctg1b, "chr%s", ctg1a); + sprintf(ctg2b, "chr%s", ctg2a); + int found = 0; + int i, n_contigs = hdr_in->n[BCF_DT_CTG]; + for (i = 0; i < n_contigs && found < 2; i++) { + uint64_t len; + bcf_idpair_t c = hdr_in->id[BCF_DT_CTG][i]; + if (c.val == NULL) continue; + if (c.val->info == NULL) continue; + len = c.val->info[0]; + const char* contig_name = c.key; + if (len == len1 && + (strcmp(ctg1a, contig_name) == 0 || strcmp(ctg1b, contig_name) == 0)) { + found++; + } else if (len == len2 && (strcmp(ctg2a, contig_name) == 0 || + strcmp(ctg2b, contig_name) == 0)) { + found++; } + } + return found == 2; +} /** return true if allele is of ATGC */ static int is_ATGC(const char* s) { - char* p=(char*)s; - if(*p==0) return 0; - while(*p!=0) { - if(strchr("ATGCatgc",*p)==NULL) return 0; - p++; - } - - return 1; - } + char* p = (char*)s; + if (*p == 0) return 0; + while (*p != 0) { + if (strchr("ATGCatgc", *p) == NULL) return 0; + p++; + } + + return 1; +} /** insert a new hyperlink in the url table, ignore duplicates */ -static void InsertHyperLink(const char* database, const char* label, const char* url) { - unsigned int i; - for(i=0;i< TableNRows(args.hyperlinksTable);++i) { - CellPtr cell = TableAt(args.hyperlinksTable,2,i); - if(strcmp(CellCStr(cell),url)==0) return; - } - RowPtr row = TableNewRow(args.hyperlinksTable); - RowSetText(row,0,database); - RowSetText(row,1,label); - RowSetText(row,2,url); - } - -const char *about(void) { - return "Convert VCF to tables in the terminal.\n" - "Author Pierre Lindenbaum PhD. Institut-du-Thorax. U1087. Nantes/France\n" - "Options:\n" - " --hide (string) comma separated of features to hide:\n" - " HOM_REF or RR : genotypes with REF allele only\n" - " HET or AR : heterozygous genotypes\n" - " NO_CALL or MISSING : missing genotypes\n" - " CSQ or VEP : VEP table\n" - " SPLICEAI : SPLICEAI table\n" - " ANN or SNPEFF : SNPEFF table\n" - " LOF: SNPEFF LOF table\n" - " VC: general table\n" - " INFO: INFO table\n" - " GT: Genotype table\n" - " GTTYPES: Genotype count table\n" - " URL: hyperlink table\n" - "Example:\n" - "$ wget -O - 'http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz' |\\\n\tbcftools +vcf2table -i 'AC<10' -- --hide 'HOM_REF,INFO,NO_CALL' \n" - "(...)\n" - "<<< 22:10714247:C (n. 446)\n" - "\n" - "# Variant\n" - "+--------+----------+\n" - "| KEY | VALUE |\n" - "+--------+----------+\n" - "| CHROM | 22 |\n" - "| POS | 10714247 |\n" - "| ID | . |\n" - "| REF | C |\n" - "| ALT | G |\n" - "| QUAL | . |\n" - "| FILTER | PASS |\n" - "+--------+----------+\n" - "\n" - "# GENOTYPE TYPES\n" - "+-----------+-------+----------+\n" - "| Type | Count | % |\n" - "+-----------+-------+----------+\n" - "| REF only | 2545 | 99.8823 |\n" - "| HET | 3 | 0.117739 |\n" - "+-----------+-------+----------+\n" - "\n" - "# GENOTYPES\n" - "+---------+-------+-----+\n" - "| SAMPLE | GTYPE | GT |\n" - "+---------+-------+-----+\n" - "| HG03136 | HET | 1|0 |\n" - "| HG03171 | HET | 0|1 |\n" - "| HG03270 | HET | 0|1 |\n" - "+---------+-------+-----+\n" - "\n" - ">>> 22:10714247:C (n. 446)\n" - - ; - } +static void InsertHyperLink(const char* database, const char* label, + const char* url) { + unsigned int i; + for (i = 0; i < TableNRows(args.hyperlinksTable); ++i) { + CellPtr cell = TableAt(args.hyperlinksTable, 2, i); + if (strcmp(CellCStr(cell), url) == 0) return; + } + RowPtr row = TableNewRow(args.hyperlinksTable); + RowSetText(row, 0, database); + RowSetText(row, 1, label); + RowSetText(row, 2, url); +} + +const char* about(void) { + return "Convert VCF to tables in the terminal.\n" + "Author Pierre Lindenbaum PhD. Institut-du-Thorax. U1087. " + "Nantes/France\n" + "Options:\n" + " --hide (string) comma separated of features to hide:\n" + " HOM_REF or RR : genotypes with REF allele only\n" + " HET or AR : heterozygous genotypes\n" + " NO_CALL or MISSING : missing genotypes\n" + " CSQ or VEP : VEP table\n" + " SPLICEAI : SPLICEAI table\n" + " ANN or SNPEFF : SNPEFF table\n" + " LOF: SNPEFF LOF table\n" + " VC: general table\n" + " INFO: INFO table\n" + " GT: Genotype table\n" + " GTTYPES: Genotype count table\n" + " URL: hyperlink table\n" + "Example:\n" + "$ wget -O - " + "'http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/" + "1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/" + "ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased." + "vcf.gz' |\\\n\tbcftools +vcf2table -i 'AC<10' -- --hide " + "'HOM_REF,INFO,NO_CALL' \n" + "(...)\n" + "<<< 22:10714247:C (n. 446)\n" + "\n" + "# Variant\n" + "+--------+----------+\n" + "| KEY | VALUE |\n" + "+--------+----------+\n" + "| CHROM | 22 |\n" + "| POS | 10714247 |\n" + "| ID | . |\n" + "| REF | C |\n" + "| ALT | G |\n" + "| QUAL | . |\n" + "| FILTER | PASS |\n" + "+--------+----------+\n" + "\n" + "# GENOTYPE TYPES\n" + "+-----------+-------+----------+\n" + "| Type | Count | % |\n" + "+-----------+-------+----------+\n" + "| REF only | 2545 | 99.8823 |\n" + "| HET | 3 | 0.117739 |\n" + "+-----------+-------+----------+\n" + "\n" + "# GENOTYPES\n" + "+---------+-------+-----+\n" + "| SAMPLE | GTYPE | GT |\n" + "+---------+-------+-----+\n" + "| HG03136 | HET | 1|0 |\n" + "| HG03171 | HET | 0|1 |\n" + "| HG03270 | HET | 0|1 |\n" + "+---------+-------+-----+\n" + "\n" + ">>> 22:10714247:C (n. 446)\n" + + ; +} /* Called once at startup, it initializes local variables. Return 1 to suppress VCF/BCF header from printing, 0 otherwise. */ -int init(int argc, char **argv, bcf_hdr_t *hdr_in, bcf_hdr_t *out) - { - int c; - memset((void*)&args,sizeof(args_t),1); - args.header = hdr_in; - args.out = stdout; - // initialize table that will not change - args.annTable = TableNewStr( - "Allele","Annotation","Annotation_Impact","Gene_Name","Gene_ID", - "Feature_Type","Feature_ID","Transcript_BioType","Rank", - "HGVS.c","HGVS.p","cDNA.pos/length","CDS.pos/length","AA.pos/length", - "Distance,Message",NULL - ); - args.spliceaiTable = TableNewStr("ALLELE","SYMBOL","DS_AG","DS_AL","DS_DG","DS_DL","DP_AG","DP_AL","DP_DG","DP_DL",NULL); - args.infoTable = TableNewStr("KEY","IDX","VALUE",NULL); - args.vcTable = TableNewStr("KEY","VALUE",NULL); - args.lofTable = TableNewStr("Gene_Name","Gene_ID","Number_of_transcripts_in_gene","Percent_of_transcripts_affected",NULL); - args.gtypeTable = TableNewStr("Type","Count","%",NULL); - args.hyperlinksTable = TableNewStr("DB",""/* empty/misc */,"URL",NULL); - - static struct option loptions[] = - { - {"hide",required_argument,NULL,'x'}, - {0,0,0,0} - }; - - while ((c = getopt_long(argc, argv, "hx:",loptions,NULL)) >= 0) - { - switch (c) - { - case 'x': - { - int i; - StringListPtr hide = StringListMake(optarg,','); - for(i=0; i< hide->size;++i) { - const char* hidden = StringListAt(hide,i); - if(strcasecmp(hidden, "HOM_REF")==0 || strcasecmp(hidden, "RR")==0) { - args.hide_HOM_REF = 1; - } - else if(strcasecmp(hidden, "NO_CALL")==0 || strcasecmp(hidden, "MISSING")==0) { - args.hide_NO_CALL = 1; - } - else if(strcasecmp(hidden, "HOM_VAR")==0 || strcasecmp(hidden, "AA")==0) { - args.hide_HOM_VAR = 1; - } - else if(strcasecmp(hidden, "HET")==0 || strcasecmp(hidden, "AR")==0) { - args.hide_HET = 1; - } - else if(strcasecmp(hidden, "OTHER")==0 ) { - args.hide_OTHER = 1; - } - else if(strcasecmp(hidden, "ANN")==0 || strcasecmp(hidden, "SNPEFF")==0) { - args.hide_ANN_table = 1; - } - else if(strcasecmp(hidden, "CSQ")==0 || strcasecmp(hidden, "VEP")==0) { - args.hide_VEP_table = 1; - } - else if(strcasecmp(hidden, "BCSQ")==0 || strcasecmp(hidden, "BCFTOOLS")==0) { - args.hide_BCSQ_table = 1; - } - else if(strcasecmp(hidden, "SPLICEAI")==0) { - args.hide_SPLICEAI_table = 1; - } - else if(strcasecmp(hidden, "INFO")==0) { - args.hide_INFO_table = 1; - } - else if(strcasecmp(hidden, "VC")==0) { - args.hide_VC_table = 1; - } - else if(strcasecmp(hidden, "LOF")==0) { - args.hide_LOF_table = 1; - } - else if(strcasecmp(hidden, "GT")==0 || strcasecmp(hidden, "GENOTYPES")==0) { - args.hide_GT_table = 1; - } - else if(strcasecmp(hidden, "GTTYPES")==0) { - args.hide_GTTYPE_table = 1; - } - else if(strcasecmp(hidden, "URL")==0 || strcasecmp(hidden, "URLS")==0) { - args.hide_links = 1; - } - } - StringListFree(hide); - break; - } - case 'h': - case '?': - default: error("wrong arguments"); break; +int init(int argc, char** argv, bcf_hdr_t* hdr_in, bcf_hdr_t* out) { + int c; + memset((void*)&args, sizeof(args_t), 1); + args.header = hdr_in; + args.out = stdout; + // initialize table that will not change + args.annTable = + TableNewStr("Allele", "Annotation", "Annotation_Impact", "Gene_Name", + "Gene_ID", "Feature_Type", "Feature_ID", "Transcript_BioType", + "Rank", "HGVS.c", "HGVS.p", "cDNA.pos/length", + "CDS.pos/length", "AA.pos/length", "Distance,Message", NULL); + args.spliceaiTable = + TableNewStr("ALLELE", "SYMBOL", "DS_AG", "DS_AL", "DS_DG", "DS_DL", + "DP_AG", "DP_AL", "DP_DG", "DP_DL", NULL); + args.infoTable = TableNewStr("KEY", "IDX", "VALUE", NULL); + args.vcTable = TableNewStr("KEY", "VALUE", NULL); + args.lofTable = + TableNewStr("Gene_Name", "Gene_ID", "Number_of_transcripts_in_gene", + "Percent_of_transcripts_affected", NULL); + args.gtypeTable = TableNewStr("Type", "Count", "%", NULL); + args.hyperlinksTable = TableNewStr("DB", "" /* empty/misc */, "URL", NULL); + + static struct option loptions[] = {{"hide", required_argument, NULL, 'x'}, + {0, 0, 0, 0}}; + + while ((c = getopt_long(argc, argv, "hx:", loptions, NULL)) >= 0) { + switch (c) { + case 'x': { + int i; + StringListPtr hide = StringListMake(optarg, ','); + for (i = 0; i < hide->size; ++i) { + const char* hidden = StringListAt(hide, i); + if (strcasecmp(hidden, "HOM_REF") == 0 || + strcasecmp(hidden, "RR") == 0) { + args.hide_HOM_REF = 1; + } else if (strcasecmp(hidden, "NO_CALL") == 0 || + strcasecmp(hidden, "MISSING") == 0) { + args.hide_NO_CALL = 1; + } else if (strcasecmp(hidden, "HOM_VAR") == 0 || + strcasecmp(hidden, "AA") == 0) { + args.hide_HOM_VAR = 1; + } else if (strcasecmp(hidden, "HET") == 0 || + strcasecmp(hidden, "AR") == 0) { + args.hide_HET = 1; + } else if (strcasecmp(hidden, "OTHER") == 0) { + args.hide_OTHER = 1; + } else if (strcasecmp(hidden, "ANN") == 0 || + strcasecmp(hidden, "SNPEFF") == 0) { + args.hide_ANN_table = 1; + } else if (strcasecmp(hidden, "CSQ") == 0 || + strcasecmp(hidden, "VEP") == 0) { + args.hide_VEP_table = 1; + } else if (strcasecmp(hidden, "BCSQ") == 0 || + strcasecmp(hidden, "BCFTOOLS") == 0) { + args.hide_BCSQ_table = 1; + } else if (strcasecmp(hidden, "SPLICEAI") == 0) { + args.hide_SPLICEAI_table = 1; + } else if (strcasecmp(hidden, "INFO") == 0) { + args.hide_INFO_table = 1; + } else if (strcasecmp(hidden, "VC") == 0) { + args.hide_VC_table = 1; + } else if (strcasecmp(hidden, "LOF") == 0) { + args.hide_LOF_table = 1; + } else if (strcasecmp(hidden, "GT") == 0 || + strcasecmp(hidden, "GENOTYPES") == 0) { + args.hide_GT_table = 1; + } else if (strcasecmp(hidden, "GTTYPES") == 0) { + args.hide_GTTYPE_table = 1; + } else if (strcasecmp(hidden, "URL") == 0 || + strcasecmp(hidden, "URLS") == 0) { + args.hide_links = 1; + } } + StringListFree(hide); + break; + } + case 'h': + case '?': + default: + error("wrong arguments"); + break; } + } - if ( !isatty(fileno((FILE *)stdout)) ) { - args.ascii=1; - } - - if(args.ascii==0) { - if (setlocale(LC_CTYPE, "") == NULL) - { - fprintf(stderr,"setlocale failed. Switching to ascii\n"); - args.ascii=1; - } - } - /* guess the build by finding signature of chromosome/length ?*/ - if( findContigs(hdr_in,"1",249250621,"2",243199373)) { - args.build = human_hg19; - } - else if( findContigs(hdr_in,"1",248956422,"2",242193529)) { - args.build = human_hg38; - } - else if( findContigs(hdr_in,"RF01",3302,"RF02",2687)) { - args.build = rotavirus_rf; - } - else { - args.build = undefined; - } - - /** find INFO/CSQ and decode it */ - bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr_in, BCF_HL_INFO, NULL, "CSQ", NULL); - if(hrec!=NULL) { - int ret = bcf_hrec_find_key(hrec, "Description"); - char *format = ret < 0 ? NULL: strstr(hrec->vals[ret], "Format: "); - if(format!=NULL) { - format += 8; - char* vep_format = strdup(format); - //remove trailing quote - if(vep_format[strlen(vep_format)-1]=='"') { - vep_format[strlen(vep_format)-1] = 0; - } - args.vepTokens = StringListMake(vep_format,'|'); - free(vep_format); - } - } - /** find INFO/BCSQ */ - hrec = bcf_hdr_get_hrec(hdr_in, BCF_HL_INFO, NULL, "BCSQ", NULL); - if(hrec!=NULL) { - int ret = bcf_hrec_find_key(hrec, "Description"); - char *format = ret < 0 ? NULL: strstr(hrec->vals[ret], "Format: "); - if(format!=NULL) { - format += 8; - args.bcsqTokens = StringListMake(format,'|'); - } - } - - - return 1;//suppress VCF/BCF header -} + if (!isatty(fileno((FILE*)stdout))) { + args.ascii = 1; + } -#define PRINT_HEADER \ - switch(args.build) {\ - case human_hg19 : fputs(" GRCh37 : ",args.out); break;\ - case human_hg38 : fputs(" GRCh38 : ",args.out); break;\ - case rotavirus_rf : fputs(" Rotavirus : ",args.out); break;\ - default:break;\ - }\ - fprintf(args.out," %s:%s:%s (n. %ld)\n",tokens->strings[0],tokens->strings[1],tokens->strings[3], args.n_variants) + if (args.ascii == 0) { + if (setlocale(LC_CTYPE, "") == NULL) { + fprintf(stderr, "setlocale failed. Switching to ascii\n"); + args.ascii = 1; + } + } + /* guess the build by finding signature of chromosome/length ?*/ + if (findContigs(hdr_in, "1", 249250621, "2", 243199373)) { + args.build = human_hg19; + } else if (findContigs(hdr_in, "1", 248956422, "2", 242193529)) { + args.build = human_hg38; + } else if (findContigs(hdr_in, "RF01", 3302, "RF02", 2687)) { + args.build = rotavirus_rf; + } else { + args.build = undefined; + } + + /** find INFO/CSQ and decode it */ + bcf_hrec_t* hrec = bcf_hdr_get_hrec(hdr_in, BCF_HL_INFO, NULL, "CSQ", NULL); + if (hrec != NULL) { + int ret = bcf_hrec_find_key(hrec, "Description"); + char* format = ret < 0 ? NULL : strstr(hrec->vals[ret], "Format: "); + if (format != NULL) { + format += 8; + char* vep_format = strdup(format); + // remove trailing quote + if (vep_format[strlen(vep_format) - 1] == '"') { + vep_format[strlen(vep_format) - 1] = 0; + } + args.vepTokens = StringListMake(vep_format, '|'); + free(vep_format); + } + } + /** find INFO/BCSQ */ + hrec = bcf_hdr_get_hrec(hdr_in, BCF_HL_INFO, NULL, "BCSQ", NULL); + if (hrec != NULL) { + int ret = bcf_hrec_find_key(hrec, "Description"); + char* format = ret < 0 ? NULL : strstr(hrec->vals[ret], "Format: "); + if (format != NULL) { + format += 8; + args.bcsqTokens = StringListMake(format, '|'); + } + } + + return 1; // suppress VCF/BCF header +} +#define PRINT_HEADER \ + switch (args.build) { \ + case human_hg19: \ + fputs(" GRCh37 : ", args.out); \ + break; \ + case human_hg38: \ + fputs(" GRCh38 : ", args.out); \ + break; \ + case rotavirus_rf: \ + fputs(" Rotavirus : ", args.out); \ + break; \ + default: \ + break; \ + } \ + fprintf(args.out, " %s:%s:%s (n. %ld)\n", tokens->strings[0], \ + tokens->strings[1], tokens->strings[3], args.n_variants) /* Called for each VCF record. Return rec to output the line or NULL to suppress output. */ -bcf1_t *process(bcf1_t *v) { - TablePtr vepTable = NULL; - TablePtr bcsqTable = NULL; - hts_pos_t variant_end = v->pos + v->rlen; - - unsigned int i; - args.n_variants++; - /* instead of re-inventing the wheel: the conversion of v to text, let's use htslib/vcf_format to convert the whole line to string */ - kstring_t vcf_line = KS_INITIALIZE; - vcf_format(args.header, v, &vcf_line); - //remove last CR/LF - if(vcf_line.s[vcf_line.l-1]=='\n') { - vcf_line.s[vcf_line.l-1]=0; - vcf_line.l--; - } - - /* split the VCF line into a list of string */ - StringListPtr tokens = StringListMake(vcf_line.s,'\t'); - /* split the ALT alleles */ - StringListPtr alt_alleles = StringListMake(StringListAt(tokens,4),','); - - - fputs("<<<",args.out); - PRINT_HEADER; - fputc('\n',args.out); - - RowPtr row = TableNewRow(args.vcTable); - CellSetText(RowAt(row,0), "CHROM"); - CellSetText(RowAt(row,1),StringListAt(tokens,0)); - - row = TableNewRow(args.vcTable); - CellSetText(RowAt(row,0), "POS"); - CellSetText(RowAt(row,1),StringListAt(tokens,1)); - - if(v->pos +1 != variant_end) { - row = TableNewRow(args.vcTable); - CellSetText(RowAt(row,0), "end"); - CellSetLL(RowAt(row,1),variant_end); - - row = TableNewRow(args.vcTable); - CellSetText(RowAt(row,0), "length"); - CellSetLL(RowAt(row,1),variant_end-v->pos); - } - - row = TableNewRow(args.vcTable); - CellSetText(RowAt(row,0), "ID"); - CellSetText(RowAt(row,1),StringListAt(tokens,2)); - - +bcf1_t* process(bcf1_t* v) { + TablePtr vepTable = NULL; + TablePtr bcsqTable = NULL; + hts_pos_t variant_end = v->pos + v->rlen; + + unsigned int i; + args.n_variants++; + /* instead of re-inventing the wheel: the conversion of v to text, let's use + * htslib/vcf_format to convert the whole line to string */ + kstring_t vcf_line = KS_INITIALIZE; + vcf_format(args.header, v, &vcf_line); + // remove last CR/LF + if (vcf_line.s[vcf_line.l - 1] == '\n') { + vcf_line.s[vcf_line.l - 1] = 0; + vcf_line.l--; + } + + /* split the VCF line into a list of string */ + StringListPtr tokens = StringListMake(vcf_line.s, '\t'); + /* split the ALT alleles */ + StringListPtr alt_alleles = StringListMake(StringListAt(tokens, 4), ','); + + fputs("<<<", args.out); + PRINT_HEADER; + fputc('\n', args.out); + + RowPtr row = TableNewRow(args.vcTable); + CellSetText(RowAt(row, 0), "CHROM"); + CellSetText(RowAt(row, 1), StringListAt(tokens, 0)); + + row = TableNewRow(args.vcTable); + CellSetText(RowAt(row, 0), "POS"); + CellSetText(RowAt(row, 1), StringListAt(tokens, 1)); + + if (v->pos + 1 != variant_end) { row = TableNewRow(args.vcTable); - CellSetText(RowAt(row,0), "REF"); - CellSetText(RowAt(row,1),StringListAt(tokens,3)); + CellSetText(RowAt(row, 0), "end"); + CellSetLL(RowAt(row, 1), variant_end); row = TableNewRow(args.vcTable); - CellSetText(RowAt(row,0), "ALT"); - CellSetText(RowAt(row,1),StringListAt(tokens,4)); + CellSetText(RowAt(row, 0), "length"); + CellSetLL(RowAt(row, 1), variant_end - v->pos); + } + + row = TableNewRow(args.vcTable); + CellSetText(RowAt(row, 0), "ID"); + CellSetText(RowAt(row, 1), StringListAt(tokens, 2)); + + row = TableNewRow(args.vcTable); + CellSetText(RowAt(row, 0), "REF"); + CellSetText(RowAt(row, 1), StringListAt(tokens, 3)); + + row = TableNewRow(args.vcTable); + CellSetText(RowAt(row, 0), "ALT"); + CellSetText(RowAt(row, 1), StringListAt(tokens, 4)); + + row = TableNewRow(args.vcTable); + CellSetText(RowAt(row, 0), "QUAL"); + CellSetText(RowAt(row, 1), tokens->strings[5]); + + row = TableNewRow(args.vcTable); + CellSetText(RowAt(row, 0), "FILTER"); + CellSetText(RowAt(row, 1), StringListAt(tokens, 6)); + if (strcmp(StringListAt(tokens, 6), ".") == 0 || + strcmp(StringListAt(tokens, 6), "PASS") == 0) { + RowAt(row, 1)->color = COLOR_GREEN; + } else { + RowAt(row, 1)->color = COLOR_RED; + } + + if (!args.hide_VC_table) { + fprintf(args.out, "# Variant\n"); + TablePrint(args.vcTable, &args); + } + + /** fill URL */ + if (!args.hide_links) { + hts_pos_t pos1 = v->pos + 1; + kstring_t url = KS_INITIALIZE; + + if ((args.build == human_hg19 || args.build == human_hg38)) { + ks_clear(&url); + kputs("https://genome.ucsc.edu/cgi-bin/hgTracks?db=", &url); + kputs((args.build == human_hg38 ? "hg38" : "hg19"), &url); + kputs("&position=", &url); + kputs(StringListAt(tokens, 0), &url); + kputs("%3A", &url); + kputll(pos1, &url); + kputc('-', &url); + kputll(variant_end, &url); + InsertHyperLink("UCSC", "", ks_str(&url)); + } - row = TableNewRow(args.vcTable); - CellSetText(RowAt(row,0), "QUAL"); - CellSetText(RowAt(row,1),tokens->strings[5]); + for (i = 0; i < alt_alleles->size; ++i) { + if ((args.build == human_hg19 || args.build == human_hg38) && + is_ATGC(StringListAt(tokens, 3)) && + is_ATGC(StringListAt(alt_alleles, i))) { + ks_clear(&url); + kputs("https://gnomad.broadinstitute.org/variant/", &url); + kputs(StringListAt(tokens, 0), &url); + kputc('-', &url); + kputll(pos1, &url); + kputc('-', &url); + kputs(StringListAt(tokens, 3), &url); + kputc('-', &url); + kputs(StringListAt(alt_alleles, i), &url); + kputs((args.build == human_hg38 ? "?dataset=gnomad_r4" + : "?dataset=gnomad_r2_1"), + &url); + InsertHyperLink("Gnomad", StringListAt(alt_alleles, i), ks_str(&url)); + + ks_clear(&url); + kputs("https://spliceailookup.broadinstitute.org/#variant=", &url); + kputs(StringListAt(tokens, 0), &url); + kputc('-', &url); + kputll(pos1, &url); + kputc('-', &url); + kputs(StringListAt(tokens, 3), &url); + kputc('-', &url); + kputs(StringListAt(alt_alleles, i), &url); + kputs((args.build == human_hg38 ? "?hg=38" : "?hg=19"), &url); + InsertHyperLink("SpliceAI", StringListAt(alt_alleles, i), ks_str(&url)); + } + if (args.build == human_hg38 && is_ATGC(StringListAt(tokens, 3)) && + is_ATGC(StringListAt(alt_alleles, i))) { + ks_clear(&url); + kputs("https://genetics.opentargets.org/variant/", &url); + kputs(StringListAt(tokens, 0), &url); + kputc('_', &url); + kputll(pos1, &url); + kputc('_', &url); + kputs(StringListAt(tokens, 3), &url); + kputc('_', &url); + kputs(StringListAt(alt_alleles, i), &url); + InsertHyperLink("OpenTargets", StringListAt(alt_alleles, i), + ks_str(&url)); + + ks_clear(&url); + kputs("https://afb.ukbiobank.ac.uk/variant/", &url); + kputs(StringListAt(tokens, 0), &url); + kputc('_', &url); + kputll(pos1, &url); + kputc('_', &url); + kputs(StringListAt(tokens, 3), &url); + kputc('_', &url); + kputs(StringListAt(alt_alleles, i), &url); + InsertHyperLink("AF.ukbiobank", StringListAt(alt_alleles, i), + ks_str(&url)); + + ks_clear(&url); + kputs("https://genebe.net/variant/hg38/", &url); + kputs(StringListAt(tokens, 0), &url); + kputc('-', &url); + kputll(pos1, &url); + kputc('-', &url); + kputs(StringListAt(tokens, 3), &url); + kputc('-', &url); + kputs(StringListAt(alt_alleles, i), &url); + InsertHyperLink("Genebe", StringListAt(alt_alleles, i), ks_str(&url)); + } + } - row = TableNewRow(args.vcTable); - CellSetText(RowAt(row,0), "FILTER"); - CellSetText(RowAt(row,1),StringListAt(tokens,6)); - if(strcmp(StringListAt(tokens,6),".")==0 || strcmp(StringListAt(tokens,6),"PASS")==0) { - RowAt(row,1)->color = COLOR_GREEN; - } - else - { - RowAt(row,1)->color = COLOR_RED; - } - - if(!args.hide_VC_table) { - fprintf(args.out, "# Variant\n"); - TablePrint(args.vcTable,&args); - } - - /** fill URL */ - if(!args.hide_links) { - hts_pos_t pos1= v->pos+1; - kstring_t url = KS_INITIALIZE; - - if((args.build==human_hg19 || args.build==human_hg38)) { - ks_clear(&url); - kputs("https://genome.ucsc.edu/cgi-bin/hgTracks?db=",&url); - kputs((args.build==human_hg38?"hg38":"hg19"),&url); - kputs("&position=",&url); - kputs(StringListAt(tokens,0),&url); - kputs("%3A",&url); - kputll(pos1,&url); - kputc('-',&url); - kputll(variant_end,&url); - InsertHyperLink("UCSC","",ks_str(&url)); - } - - for(i=0; i < alt_alleles->size; ++i) { - if((args.build==human_hg19 || args.build==human_hg38) && is_ATGC(StringListAt(tokens,3)) && is_ATGC(StringListAt(alt_alleles,i))) { - ks_clear(&url); - kputs("https://gnomad.broadinstitute.org/variant/",&url); - kputs(StringListAt(tokens,0),&url); - kputc('-',&url); - kputll(pos1,&url); - kputc('-',&url); - kputs(StringListAt(tokens,3),&url); - kputc('-',&url); - kputs(StringListAt(alt_alleles,i),&url); - kputs((args.build==human_hg38?"?dataset=gnomad_r4":"?dataset=gnomad_r2_1"),&url); - InsertHyperLink("Gnomad",StringListAt(alt_alleles,i),ks_str(&url)); - - ks_clear(&url); - kputs("https://spliceailookup.broadinstitute.org/#variant=",&url); - kputs(StringListAt(tokens,0),&url); - kputc('-',&url); - kputll(pos1,&url); - kputc('-',&url); - kputs(StringListAt(tokens,3),&url); - kputc('-',&url); - kputs(StringListAt(alt_alleles,i),&url); - kputs((args.build==human_hg38?"?hg=38":"?hg=19"),&url); - InsertHyperLink("SpliceAI",StringListAt(alt_alleles,i),ks_str(&url)); - } - if(args.build==human_hg38 && is_ATGC(StringListAt(tokens,3)) && is_ATGC(StringListAt(alt_alleles,i))) { - ks_clear(&url); - kputs("https://genetics.opentargets.org/variant/",&url); - kputs(StringListAt(tokens,0),&url); - kputc('_',&url); - kputll(pos1,&url); - kputc('_',&url); - kputs(StringListAt(tokens,3),&url); - kputc('_',&url); - kputs(StringListAt(alt_alleles,i),&url); - InsertHyperLink("OpenTargets",StringListAt(alt_alleles,i),ks_str(&url)); - - - ks_clear(&url); - kputs("https://afb.ukbiobank.ac.uk/variant/",&url); - kputs(StringListAt(tokens,0),&url); - kputc('_',&url); - kputll(pos1,&url); - kputc('_',&url); - kputs(StringListAt(tokens,3),&url); - kputc('_',&url); - kputs(StringListAt(alt_alleles,i),&url); - InsertHyperLink("AF.ukbiobank",StringListAt(alt_alleles,i),ks_str(&url)); - - ks_clear(&url); - kputs("https://genebe.net/variant/hg38/",&url); - kputs(StringListAt(tokens,0),&url); - kputc('-',&url); - kputll(pos1,&url); - kputc('-',&url); - kputs(StringListAt(tokens,3),&url); - kputc('-',&url); - kputs(StringListAt(alt_alleles,i),&url); - InsertHyperLink("Genebe",StringListAt(alt_alleles,i),ks_str(&url)); - } - } - - ks_free(&url); - } - - - /* parse values in the INFO column */ - if(tokens->size>7 && strcmp(StringListAt(tokens,7),".")!=0) { - /* split INFO by semicolon */ - StringListPtr infos = StringListMake(StringListAt(tokens,7),';'); - for(i=0;i< infos->size;i++) { - unsigned int j; - const char* info = StringListAt(infos,i); - char* eq = strchr(info,'='); - if(eq==NULL || eq==info) continue; - /* split multiple values for this info using commas */ - StringListPtr values = StringListMake(eq+1,','); - for(j=0;j< values->size;j++) { - //skip CSQ - if(args.vepTokens!=NULL && strncmp(info,"CSQ=",4)==0) { - if(args.hide_VEP_table) continue; - unsigned int k; - //build VEP table if needed - if(vepTable==NULL) { - vepTable = TableNew(0); - for(k=0;k< args.vepTokens->size;++k) { - TableAppendColumn(vepTable,StringListAt( args.vepTokens,k)); - } - } - // fill VEP table - row = TableNewRow(vepTable); - StringListPtr veps = StringListMake( StringListAt(values,j),'|'); - for(k=0;k< args.vepTokens->size && k < veps->size;++k) { - CellSetText(RowAt(row,k),StringListAt( veps,k)); - } - StringListFree(veps); - continue; - } - - //skip BCSQ - if(args.bcsqTokens!=NULL && strncmp(info,"BCSQ=",5)==0) { - if(args.hide_BCSQ_table) continue; - unsigned int k; - //build BCSQ table if needed - if(bcsqTable==NULL) { - bcsqTable = TableNew(0); - for(k=0;k< args.bcsqTokens->size;++k) { - TableAppendColumn(bcsqTable,StringListAt( args.bcsqTokens,k)); - } - } - - // fill BCSQ table - row = TableNewRow(bcsqTable); - StringListPtr bcsq = StringListMake( StringListAt(values,j),'|'); - for(k=0;k< args.bcsqTokens->size && k < bcsq->size;++k) { - CellSetText(RowAt(row,k),StringListAt( bcsq,k)); - } - StringListFree(bcsq); - continue; - } - - //skip SNPEFF/ANN - if(strncmp(info,"ANN=",4)==0) { - if(args.hide_ANN_table) continue; - unsigned int k; - // fill ANN table - row = TableNewRow(args.annTable); - StringListPtr ann = StringListMake( StringListAt(values,j),'|'); - for(k=0;k< TableNCols(args.annTable) && k < ann->size;++k) { - RowSetText(row,k,StringListAt(ann,k)); - } - StringListFree(ann); - continue; - } - - //skip SNPEFF/LOF - if(strncmp(info,"LOF=",4)==0) { - if(args.hide_LOF_table) continue; - unsigned int k; - char * copy= strdup(StringListAt(values,j)); - ASSERT_NOT_NULL(copy); - //remove first & last char - if(copy[0]=='(') memmove((void*)©[0],©[1], strlen(copy)); - if(copy[strlen(copy)-1]==')') copy[strlen(copy)-1]=0; - - // fill ANN table - row = TableNewRow(args.lofTable); - StringListPtr ann = StringListMake(copy,'|'); - for(k=0;k< TableNCols(args.lofTable) && k < ann->size;++k) { - RowSetText(row,k,StringListAt(ann,k)); - } - StringListFree(ann); - free(copy); - continue; - } - - //skip SpliceAI - if(strncmp(info,"SpliceAI=",4)==0) { - if(args.hide_SPLICEAI_table) continue; - unsigned int k; - // fill ANN table - row = TableNewRow(args.spliceaiTable); - StringListPtr spliceai = StringListMake( StringListAt(values,j),'|'); - for(k=0;k< TableNCols(args.spliceaiTable) && k < spliceai->size;++k) { - RowSetText(row,k,StringListAt(spliceai,k)); - } - StringListFree(spliceai); - continue; - } - - - row = TableNewRow(args.infoTable); - CellAppendTextN(RowAt(row,0),info,eq-info); - if(values->size>1) CellSetD(RowAt(row,1),(int)(j+1)); - RowSetText(row,2,values->strings[j]); - } - StringListFree(values); + ks_free(&url); + } + + /* parse values in the INFO column */ + if (tokens->size > 7 && strcmp(StringListAt(tokens, 7), ".") != 0) { + /* split INFO by semicolon */ + StringListPtr infos = StringListMake(StringListAt(tokens, 7), ';'); + for (i = 0; i < infos->size; i++) { + unsigned int j; + const char* info = StringListAt(infos, i); + char* eq = strchr(info, '='); + if (eq == NULL || eq == info) continue; + /* split multiple values for this info using commas */ + StringListPtr values = StringListMake(eq + 1, ','); + for (j = 0; j < values->size; j++) { + // skip CSQ + if (args.vepTokens != NULL && strncmp(info, "CSQ=", 4) == 0) { + if (args.hide_VEP_table) continue; + unsigned int k; + // build VEP table if needed + if (vepTable == NULL) { + vepTable = TableNew(0); + for (k = 0; k < args.vepTokens->size; ++k) { + TableAppendColumn(vepTable, StringListAt(args.vepTokens, k)); } - if(!args.hide_INFO_table && TableNRows(args.infoTable)>0) { - fprintf(args.out, "# INFO\n"); - TablePrint(args.infoTable,&args); - } - StringListFree(infos); - } - - if(TableNRows(args.hyperlinksTable)>0) { - fprintf(args.out, "# HYPERLINKS\n"); - TablePrint(args.hyperlinksTable,&args); - } - - if(vepTable!=NULL && TableNRows(vepTable)>0) { - fprintf(args.out, "# VEP/CSQ\n"); - TableRemoveEmptyColumns(vepTable); - TablePrint(vepTable,&args); - } - - if(bcsqTable!=NULL && TableNRows(bcsqTable)>0) { - fprintf(args.out, "# BCSQ\n"); - TableRemoveEmptyColumns(bcsqTable); - TablePrint(bcsqTable,&args); - } - - if(TableNRows(args.annTable)>0) { - fprintf(args.out, "# ANN/SNPEFF\n"); - //no keep it inummutable TableRemoveEmptyColumns(args.annTable); - TablePrint(args.annTable,&args); - } - - if(TableNRows(args.lofTable)>0) { - fprintf(args.out, "# LOF\n"); - TablePrint(args.lofTable,&args); - } - - if(TableNRows(args.spliceaiTable)>0) { - fprintf(args.out, "# SpliceAI\n"); - //no keep it inummutable TableRemoveEmptyColumns(args.annTable); - TablePrint(args.spliceaiTable,&args); - } - - // is there any genotype here ? - if(tokens->size>9) { - int count_hom_ref = 0; - int count_het = 0; - int count_hom_var = 0; - int count_missing = 0; - int count_other = 0; - // column for genotype - int gt_col = -1; - // column for filter FT - int ft_col = -1; - StringListPtr formats = StringListMake(tokens->strings[8],':'); - TablePtr genotypeTable = TableNewStr("SAMPLE",NULL); - TableAppendColumn(genotypeTable, "GTYPE"); - - for(i=0; isize;i++) { - TableAppendColumn(genotypeTable, formats->strings[i]); - if(strcmp("GT",formats->strings[i])==0) gt_col=(int)i; - if(strcmp("FT",formats->strings[i])==0) ft_col=(int)i; } - - for(i=9;i< tokens->size;i++) { - kstring_t gtype_name = KS_INITIALIZE; - int count_allele_0=0; - int count_allele_1=0; - int count_allele_missing=0; - int count_allele_other=0; - int print_it = 1; - // split Genotype components - StringListPtr values = StringListMake(tokens->strings[i],':'); - const char* color = COLOR_BLACK; - unsigned int j; - if(gt_col!=-1 && gt_col < values->size) { - // clone the GT value - char* gt = strdup(StringListAt(values,gt_col)); - // remove phasing - for(j=0;gt[j]!=0;j++) { - if(gt[j]=='|') gt[j]='/'; - } - //split the alleles in the GT - StringListPtr alleles = StringListMake(gt,'/'); - for(j=0;j< alleles->size;++j) { - char* allele = alleles->strings[j]; - if(strcmp(allele,"0")==0) count_allele_0++; - else if(strcmp(allele,"1")==0) count_allele_1++; - else if(strcmp(allele,".")==0) count_allele_missing++; - else count_allele_other++; - } - - if(alleles->size==2) { - if(count_allele_0==0 && count_allele_1==0 && count_allele_other==0) { - kputs("NO_CALL",>ype_name); - if(args.hide_NO_CALL) print_it=0; - count_missing++; - } - else if(count_allele_0==2) { - kputs("HOM_REF",>ype_name); - color=COLOR_GREEN; - if(args.hide_HOM_REF) print_it=0; - count_hom_ref++; - } - else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))==0) { - kputs("HOM_VAR",>ype_name); - color=COLOR_RED; - if(args.hide_HOM_VAR) print_it=0; - count_hom_var++; - } - else if(count_allele_missing==0 && strcmp(StringListAt(alleles,0),StringListAt(alleles,1))!=0) { - kputs("HET",>ype_name); - color=COLOR_CYAN; - count_het++; - if(args.hide_HET) print_it=0; - } - else { - if(args.hide_OTHER) print_it=0; - count_other++; - } - } - else if(alleles->size==1) { - if(count_allele_0==1) { - kputs("REF",>ype_name); - color=COLOR_GREEN; - if(args.hide_HOM_REF) print_it=0; - count_hom_ref++; - } - else if(count_allele_1==1) { - kputs("ALT",>ype_name); - color=COLOR_RED; - count_hom_var++; - } - else if(count_allele_missing==1) { - kputs("NO_CALL",>ype_name); - if(args.hide_NO_CALL) print_it=0; - count_missing++; - } - else { - if(args.hide_OTHER) print_it=0; - count_other++; - } - } - else - { - if(count_allele_0==alleles->size) { - kputs("HOM_REF",>ype_name); - color=COLOR_GREEN; - if(args.hide_HOM_REF) print_it=0; - count_hom_ref++; - } - else if(count_allele_1==alleles->size) { - kputs("HOM_VAR",>ype_name); - color=COLOR_RED; - if(args.hide_HOM_VAR) print_it=0; - count_hom_ref++; - } - else if(count_allele_missing==alleles->size) { - kputs("NO_CALL",>ype_name); - if(args.hide_NO_CALL) print_it=0; - count_missing++; - } - else { - if(args.hide_OTHER) print_it=0; - count_other++; - } - } - StringListFree(alleles); - free(gt); - } - - - - if(print_it && !args.hide_GT_table) { - row = TableNewRow(genotypeTable); - CellSetText(RowAt(row,0),args.header->samples[i-9]); - CellSetText(RowAt(row,1),gtype_name.s); - RowAt(row,1)->color = color; - - for(j=0; j< values->size;j++) { - CellSetText(RowAt(row,j+2), StringListAt(values,j)); - // color genotype if FORMAT/FT - if(ft_col==j) { - if(strcmp(StringListAt(values,j),"PASS")==0 ||strcmp( StringListAt(values,j),".")==0) { - RowAt(row,j+2)->color = COLOR_GREEN; - } - else - { - RowAt(row,j+2)->color = COLOR_RED; - } - } - } - } - StringListFree(values); - ks_free(>ype_name); - } - #define ADD_GT(LABEL,COUNT) if(COUNT>0 && total>0) {\ - row = TableNewRow(args.gtypeTable);\ - RowSetText(row,0,LABEL);\ - CellSetLL(RowAt(row,1),COUNT);\ - CellSetD(RowAt(row,2),100.0*(COUNT/((float)total)));\ - } - if(!args.hide_GTTYPE_table) { - int total = count_hom_ref + count_het + count_hom_var + count_missing + count_other; - ADD_GT("REF only ",count_hom_ref) - ADD_GT("HET",count_het) - ADD_GT("ALT only",count_hom_var) - ADD_GT("MISSING",count_missing) - ADD_GT("OTHER",count_other) - - if(TableNRows(args.gtypeTable)>0) { - fprintf(args.out, "# GENOTYPE TYPES\n"); - TablePrint(args.gtypeTable,&args); - } - } - #undef ADD_GT - - if(!args.hide_GT_table && TableNRows(genotypeTable)>0) { - fprintf(args.out, "# GENOTYPES\n"); - TablePrint(genotypeTable,&args); - } - TableFree(genotypeTable); - StringListFree(formats); + // fill VEP table + row = TableNewRow(vepTable); + StringListPtr veps = StringListMake(StringListAt(values, j), '|'); + for (k = 0; k < args.vepTokens->size && k < veps->size; ++k) { + CellSetText(RowAt(row, k), StringListAt(veps, k)); + } + StringListFree(veps); + continue; } + // skip BCSQ + if (args.bcsqTokens != NULL && strncmp(info, "BCSQ=", 5) == 0) { + if (args.hide_BCSQ_table) continue; + unsigned int k; + // build BCSQ table if needed + if (bcsqTable == NULL) { + bcsqTable = TableNew(0); + for (k = 0; k < args.bcsqTokens->size; ++k) { + TableAppendColumn(bcsqTable, StringListAt(args.bcsqTokens, k)); + } + } + // fill BCSQ table + row = TableNewRow(bcsqTable); + StringListPtr bcsq = StringListMake(StringListAt(values, j), '|'); + for (k = 0; k < args.bcsqTokens->size && k < bcsq->size; ++k) { + CellSetText(RowAt(row, k), StringListAt(bcsq, k)); + } + StringListFree(bcsq); + continue; + } + // skip SNPEFF/ANN + if (strncmp(info, "ANN=", 4) == 0) { + if (args.hide_ANN_table) continue; + unsigned int k; + // fill ANN table + row = TableNewRow(args.annTable); + StringListPtr ann = StringListMake(StringListAt(values, j), '|'); + for (k = 0; k < TableNCols(args.annTable) && k < ann->size; ++k) { + RowSetText(row, k, StringListAt(ann, k)); + } + StringListFree(ann); + continue; + } - fputs(">>>",args.out); - PRINT_HEADER; + // skip SNPEFF/LOF + if (strncmp(info, "LOF=", 4) == 0) { + if (args.hide_LOF_table) continue; + unsigned int k; + char* copy = strdup(StringListAt(values, j)); + ASSERT_NOT_NULL(copy); + // remove first & last char + if (copy[0] == '(') memmove((void*)©[0], ©[1], strlen(copy)); + if (copy[strlen(copy) - 1] == ')') copy[strlen(copy) - 1] = 0; + + // fill ANN table + row = TableNewRow(args.lofTable); + StringListPtr ann = StringListMake(copy, '|'); + for (k = 0; k < TableNCols(args.lofTable) && k < ann->size; ++k) { + RowSetText(row, k, StringListAt(ann, k)); + } + StringListFree(ann); + free(copy); + continue; + } - fputc('\n',args.out); + // skip SpliceAI + if (strncmp(info, "SpliceAI=", 4) == 0) { + if (args.hide_SPLICEAI_table) continue; + unsigned int k; + // fill ANN table + row = TableNewRow(args.spliceaiTable); + StringListPtr spliceai = StringListMake(StringListAt(values, j), '|'); + for (k = 0; k < TableNCols(args.spliceaiTable) && k < spliceai->size; + ++k) { + RowSetText(row, k, StringListAt(spliceai, k)); + } + StringListFree(spliceai); + continue; + } - /** final cleanup */ - ks_free(&vcf_line); - StringListFree(tokens); - StringListFree(alt_alleles); - TableFree(bcsqTable); - TableFree(vepTable); - TableClear(args.annTable); - TableClear(args.lofTable); - TableClear(args.spliceaiTable); - TableClear(args.infoTable); - TableClear(args.vcTable); - TableClear(args.gtypeTable); - TableClear(args.hyperlinksTable); - return NULL;/* suppress bcf output */ + row = TableNewRow(args.infoTable); + CellAppendTextN(RowAt(row, 0), info, eq - info); + if (values->size > 1) CellSetD(RowAt(row, 1), (int)(j + 1)); + RowSetText(row, 2, values->strings[j]); + } + StringListFree(values); + } + if (!args.hide_INFO_table && TableNRows(args.infoTable) > 0) { + fprintf(args.out, "# INFO\n"); + TablePrint(args.infoTable, &args); + } + StringListFree(infos); + } + + if (TableNRows(args.hyperlinksTable) > 0) { + fprintf(args.out, "# HYPERLINKS\n"); + TablePrint(args.hyperlinksTable, &args); + } + + if (vepTable != NULL && TableNRows(vepTable) > 0) { + fprintf(args.out, "# VEP/CSQ\n"); + TableRemoveEmptyColumns(vepTable); + TablePrint(vepTable, &args); + } + + if (bcsqTable != NULL && TableNRows(bcsqTable) > 0) { + fprintf(args.out, "# BCSQ\n"); + TableRemoveEmptyColumns(bcsqTable); + TablePrint(bcsqTable, &args); + } + + if (TableNRows(args.annTable) > 0) { + fprintf(args.out, "# ANN/SNPEFF\n"); + // no keep it inummutable TableRemoveEmptyColumns(args.annTable); + TablePrint(args.annTable, &args); + } + + if (TableNRows(args.lofTable) > 0) { + fprintf(args.out, "# LOF\n"); + TablePrint(args.lofTable, &args); + } + + if (TableNRows(args.spliceaiTable) > 0) { + fprintf(args.out, "# SpliceAI\n"); + // no keep it inummutable TableRemoveEmptyColumns(args.annTable); + TablePrint(args.spliceaiTable, &args); + } + + // is there any genotype here ? + if (tokens->size > 9) { + int count_hom_ref = 0; + int count_het = 0; + int count_hom_var = 0; + int count_missing = 0; + int count_other = 0; + // column for genotype + int gt_col = -1; + // column for filter FT + int ft_col = -1; + StringListPtr formats = StringListMake(tokens->strings[8], ':'); + TablePtr genotypeTable = TableNewStr("SAMPLE", NULL); + TableAppendColumn(genotypeTable, "GTYPE"); + + for (i = 0; i < formats->size; i++) { + TableAppendColumn(genotypeTable, formats->strings[i]); + if (strcmp("GT", formats->strings[i]) == 0) gt_col = (int)i; + if (strcmp("FT", formats->strings[i]) == 0) ft_col = (int)i; } -void destroy(void) { - StringListFree(args.vepTokens); - StringListFree(args.bcsqTokens); - TableFree(args.spliceaiTable); - TableFree(args.annTable); - TableFree(args.lofTable); - TableFree(args.infoTable); - TableFree(args.vcTable); - TableFree(args.gtypeTable); - TableFree(args.hyperlinksTable); + for (i = 9; i < tokens->size; i++) { + kstring_t gtype_name = KS_INITIALIZE; + int count_allele_0 = 0; + int count_allele_1 = 0; + int count_allele_missing = 0; + int count_allele_other = 0; + int print_it = 1; + // split Genotype components + StringListPtr values = StringListMake(tokens->strings[i], ':'); + const char* color = COLOR_BLACK; + unsigned int j; + if (gt_col != -1 && gt_col < values->size) { + // clone the GT value + char* gt = strdup(StringListAt(values, gt_col)); + // remove phasing + for (j = 0; gt[j] != 0; j++) { + if (gt[j] == '|') gt[j] = '/'; + } + // split the alleles in the GT + StringListPtr alleles = StringListMake(gt, '/'); + for (j = 0; j < alleles->size; ++j) { + char* allele = alleles->strings[j]; + if (strcmp(allele, "0") == 0) + count_allele_0++; + else if (strcmp(allele, "1") == 0) + count_allele_1++; + else if (strcmp(allele, ".") == 0) + count_allele_missing++; + else + count_allele_other++; + } + + if (alleles->size == 2) { + if (count_allele_0 == 0 && count_allele_1 == 0 && + count_allele_other == 0) { + kputs("NO_CALL", >ype_name); + if (args.hide_NO_CALL) print_it = 0; + count_missing++; + } else if (count_allele_0 == 2) { + kputs("HOM_REF", >ype_name); + color = COLOR_GREEN; + if (args.hide_HOM_REF) print_it = 0; + count_hom_ref++; + } else if (count_allele_missing == 0 && + strcmp(StringListAt(alleles, 0), + StringListAt(alleles, 1)) == 0) { + kputs("HOM_VAR", >ype_name); + color = COLOR_RED; + if (args.hide_HOM_VAR) print_it = 0; + count_hom_var++; + } else if (count_allele_missing == 0 && + strcmp(StringListAt(alleles, 0), + StringListAt(alleles, 1)) != 0) { + kputs("HET", >ype_name); + color = COLOR_CYAN; + count_het++; + if (args.hide_HET) print_it = 0; + } else { + if (args.hide_OTHER) print_it = 0; + count_other++; + } + } else if (alleles->size == 1) { + if (count_allele_0 == 1) { + kputs("REF", >ype_name); + color = COLOR_GREEN; + if (args.hide_HOM_REF) print_it = 0; + count_hom_ref++; + } else if (count_allele_1 == 1) { + kputs("ALT", >ype_name); + color = COLOR_RED; + count_hom_var++; + } else if (count_allele_missing == 1) { + kputs("NO_CALL", >ype_name); + if (args.hide_NO_CALL) print_it = 0; + count_missing++; + } else { + if (args.hide_OTHER) print_it = 0; + count_other++; + } + } else { + if (count_allele_0 == alleles->size) { + kputs("HOM_REF", >ype_name); + color = COLOR_GREEN; + if (args.hide_HOM_REF) print_it = 0; + count_hom_ref++; + } else if (count_allele_1 == alleles->size) { + kputs("HOM_VAR", >ype_name); + color = COLOR_RED; + if (args.hide_HOM_VAR) print_it = 0; + count_hom_ref++; + } else if (count_allele_missing == alleles->size) { + kputs("NO_CALL", >ype_name); + if (args.hide_NO_CALL) print_it = 0; + count_missing++; + } else { + if (args.hide_OTHER) print_it = 0; + count_other++; + } + } + StringListFree(alleles); + free(gt); + } + + if (print_it && !args.hide_GT_table) { + row = TableNewRow(genotypeTable); + CellSetText(RowAt(row, 0), args.header->samples[i - 9]); + CellSetText(RowAt(row, 1), gtype_name.s); + RowAt(row, 1)->color = color; + + for (j = 0; j < values->size; j++) { + CellSetText(RowAt(row, j + 2), StringListAt(values, j)); + // color genotype if FORMAT/FT + if (ft_col == j) { + if (strcmp(StringListAt(values, j), "PASS") == 0 || + strcmp(StringListAt(values, j), ".") == 0) { + RowAt(row, j + 2)->color = COLOR_GREEN; + } else { + RowAt(row, j + 2)->color = COLOR_RED; + } + } + } + } + StringListFree(values); + ks_free(>ype_name); + } +#define ADD_GT(LABEL, COUNT) \ + if (COUNT > 0 && total > 0) { \ + row = TableNewRow(args.gtypeTable); \ + RowSetText(row, 0, LABEL); \ + CellSetLL(RowAt(row, 1), COUNT); \ + CellSetD(RowAt(row, 2), 100.0 * (COUNT / ((float)total))); \ + } + if (!args.hide_GTTYPE_table) { + int total = count_hom_ref + count_het + count_hom_var + count_missing + + count_other; + ADD_GT("REF only ", count_hom_ref) + ADD_GT("HET", count_het) + ADD_GT("ALT only", count_hom_var) + ADD_GT("MISSING", count_missing) + ADD_GT("OTHER", count_other) + + if (TableNRows(args.gtypeTable) > 0) { + fprintf(args.out, "# GENOTYPE TYPES\n"); + TablePrint(args.gtypeTable, &args); + } } +#undef ADD_GT + if (!args.hide_GT_table && TableNRows(genotypeTable) > 0) { + fprintf(args.out, "# GENOTYPES\n"); + TablePrint(genotypeTable, &args); + } + TableFree(genotypeTable); + StringListFree(formats); + } + + fputs(">>>", args.out); + PRINT_HEADER; + + fputc('\n', args.out); + + /** final cleanup */ + ks_free(&vcf_line); + StringListFree(tokens); + StringListFree(alt_alleles); + TableFree(bcsqTable); + TableFree(vepTable); + TableClear(args.annTable); + TableClear(args.lofTable); + TableClear(args.spliceaiTable); + TableClear(args.infoTable); + TableClear(args.vcTable); + TableClear(args.gtypeTable); + TableClear(args.hyperlinksTable); + return NULL; /* suppress bcf output */ +} +void destroy(void) { + StringListFree(args.vepTokens); + StringListFree(args.bcsqTokens); + TableFree(args.spliceaiTable); + TableFree(args.annTable); + TableFree(args.lofTable); + TableFree(args.infoTable); + TableFree(args.vcTable); + TableFree(args.gtypeTable); + TableFree(args.hyperlinksTable); +} From 376bfb1e40ae34e85ec0056afa0e2ffb1aad147e Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum Date: Fri, 17 Jan 2025 22:44:14 +0100 Subject: [PATCH 9/9] updatedoc --- AUTHORS | 1 + doc/bcftools.html | 6 +++++- doc/bcftools.txt | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 3d902d4e..a8128b3f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -9,6 +9,7 @@ Alphabetical list of people who have made contributions: Javier Herrero Warren Kretzschmar Heng Li + Pierre Lindenbaum Shane McCarthy John Marshall Joel Martin diff --git a/doc/bcftools.html b/doc/bcftools.html index eb8b1cbe..061ccbfa 100644 --- a/doc/bcftools.html +++ b/doc/bcftools.html @@ -3977,6 +3977,10 @@

List of plugins coming wi

generate unsorted VariantKey-RSid index files in hexadecimal format

+
vcf2table
+
+

print the variants as a set of tables

+
@@ -5721,4 +5725,4 @@

COPYING

- \ No newline at end of file + diff --git a/doc/bcftools.txt b/doc/bcftools.txt index 932258ba..dfaeff69 100644 --- a/doc/bcftools.txt +++ b/doc/bcftools.txt @@ -2991,6 +2991,8 @@ By default, appropriate system directories are searched for installed plugins. *variantkey-hex*:: generate unsorted VariantKey-RSid index files in hexadecimal format +*vcf2table*:: + print the variants as a set of tables ==== Examples: