-
Notifications
You must be signed in to change notification settings - Fork 442
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
362 additions
and
141 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
/* The MIT License | ||
Copyright (C) 2011 by Attractive Chaos <[email protected]> | ||
Copyright (C) 2013-2014, 2016, 2018-2020, 2022, 2024 Genome Research Ltd. | ||
Copyright (C) 2013-2014, 2016, 2018-2020, 2022, 2024-2025 Genome Research Ltd. | ||
Permission is hereby granted, free of charge, to any person obtaining | ||
a copy of this software and associated documentation files (the | ||
|
@@ -449,6 +449,57 @@ static inline int *ksplit(kstring_t *s, int delimiter, int *n) | |
return offsets; | ||
} | ||
|
||
/** | ||
* kinsert_char - inserts a char to kstring | ||
* @param c - char to insert | ||
* @param pos - position at which to insert, starting from 0 | ||
* @param s - pointer to output string | ||
* Returns 0 on success and -1 on failure | ||
* 0 for pos inserts at start and length of current string as pos appends at | ||
* the end. | ||
*/ | ||
static inline int kinsert_char(char c, size_t pos, kstring_t *s) | ||
{ | ||
if (!s || pos > s->l) { | ||
return EOF; | ||
} | ||
if (ks_resize(s, s->l + 2) < 0) { | ||
return EOF; | ||
} | ||
memmove(s->s + pos + 1, s->s + pos, s->l - pos); | ||
s->s[pos] = c; | ||
s->s[++s->l] = 0; | ||
return 0; | ||
} | ||
|
||
/** | ||
* kinsert_str - inserts a null terminated string to kstring | ||
* @param str - string to insert | ||
* @param pos - position at which to insert, starting from 0 | ||
* @param s - pointer to output string | ||
* Returns 0 on success and -1 on failure | ||
* 0 for pos inserts at start and length of current string as pos appends at | ||
* the end. empty string makes no update. | ||
*/ | ||
static inline int kinsert_str(const char *str, size_t pos, kstring_t *s) | ||
{ | ||
size_t len = 0; | ||
if (!s || pos > s->l || !str) { | ||
return EOF; | ||
} | ||
if (!(len = strlen(str))) { | ||
return 0; | ||
} | ||
if (ks_resize(s, s->l + len + 1) < 0) { | ||
return EOF; | ||
} | ||
memmove(s->s + pos + len, s->s + pos, s->l - pos); | ||
memcpy(s->s + pos, str, len); | ||
s->l += len; | ||
s->s[s->l] = '\0'; | ||
return 0; | ||
} | ||
|
||
#ifdef HTSLIB_SSIZE_T | ||
#undef HTSLIB_SSIZE_T | ||
#undef ssize_t | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
/// High-level VCF/BCF variant calling file operations. | ||
/* | ||
Copyright (C) 2012, 2013 Broad Institute. | ||
Copyright (C) 2012-2020, 2022-2023 Genome Research Ltd. | ||
Copyright (C) 2012-2020, 2022-2025 Genome Research Ltd. | ||
Author: Heng Li <[email protected]> | ||
|
@@ -1501,141 +1501,25 @@ static inline int bcf_float_is_vector_end(float f) | |
return u.i==bcf_float_vector_end ? 1 : 0; | ||
} | ||
|
||
typedef enum bcf_version {v41 = 1, v42, v43, v44} bcf_version; | ||
/** | ||
* bcf_get_version - get the version as bcf_version enumeration | ||
* @param hdr - bcf header, to get version | ||
* @param ipver - pointer to return version | ||
* Returns 0 on success and -1 on failure | ||
*/ | ||
static inline int bcf_get_version(const bcf_hdr_t *hdr, bcf_version *ver) | ||
{ | ||
const char *version = NULL; | ||
|
||
if (!hdr || !ver) { | ||
return -1; | ||
} | ||
|
||
version = bcf_hdr_get_version(hdr); | ||
if (!strcmp("VCFv4.1", version)) { | ||
*ver = v41; | ||
} else if (!strcmp("VCFv4.2", version)) { | ||
*ver = v42; | ||
} else if (!strcmp("VCFv4.3", version)) { | ||
*ver = v43; | ||
} else { | ||
*ver = v44; | ||
} | ||
return 0; | ||
} | ||
|
||
static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) | ||
{ | ||
uint32_t e = 0; | ||
#define BRANCH(type_t, convert, missing, vector_end) { \ | ||
uint8_t *ptr = fmt->p + isample*fmt->size; \ | ||
int i; \ | ||
for (i=0; i<fmt->n; i++, ptr += sizeof(type_t)) \ | ||
{ \ | ||
type_t val = convert(ptr); \ | ||
if ( val == vector_end ) break; \ | ||
if ( i ) e |= kputc("/|"[val&1], str) < 0; \ | ||
if ( !(val>>1) ) e |= kputc('.', str) < 0; \ | ||
else e |= kputw((val>>1) - 1, str) < 0; \ | ||
} \ | ||
if (i == 0) e |= kputc('.', str) < 0; \ | ||
} | ||
switch (fmt->type) { | ||
case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break; | ||
case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break; | ||
case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break; | ||
case BCF_BT_NULL: e |= kputc('.', str) < 0; break; | ||
default: hts_log_error("Unexpected type %d", fmt->type); return -2; | ||
} | ||
#undef BRANCH | ||
return e == 0 ? 0 : -1; | ||
} | ||
|
||
/** | ||
* bcf_format_gt1 - formats GT information on a string | ||
* bcf_format_gt_v2 - formats GT information on a string | ||
* @param hdr - bcf header, to get version | ||
* @param fmt - pointer to bcf format data | ||
* @param isample - position of interested sample in data | ||
* @param str - pointer to output string | ||
* Returns 0 on success and -1 on failure | ||
* This method is extended from bcf_format_gt to output phasing information | ||
* in accordance with v4.4 format, which supports explicit / prefixed phasing | ||
* for 1st allele. | ||
* Explicit / prefixed phasing for 1st allele is used only when it is a must to | ||
* correctly express phasing. | ||
* This method is preferred over bcf_format_gt as this supports vcf4.4 and | ||
* prefixed phasing. Explicit / prefixed phasing for 1st allele is used only | ||
* when it is a must to correctly express phasing. | ||
*/ | ||
static inline int bcf_format_gt1(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isample, kstring_t *str) | ||
HTSLIB_EXPORT | ||
int bcf_format_gt_v2(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isample, | ||
kstring_t *str) HTS_RESULT_USED; | ||
|
||
static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) | ||
{ | ||
uint32_t e = 0; | ||
bcf_version ver = v42; | ||
int ploidy = 1, anyunphased = 0; | ||
int32_t val0 = 0; | ||
kstring_t tmp1 = KS_INITIALIZE, tmp2 = KS_INITIALIZE; | ||
|
||
if (bcf_get_version(hdr, &ver)) { | ||
hts_log_error("Failed to get version information"); | ||
return -1; | ||
} | ||
#define BRANCH(type_t, convert, missing, vector_end) { \ | ||
uint8_t *ptr = fmt->p + isample*fmt->size; \ | ||
int i; \ | ||
for (i=0; i<fmt->n; i++, ptr += sizeof(type_t)) \ | ||
{ \ | ||
type_t val = convert(ptr); \ | ||
if ( val == vector_end ) break; \ | ||
if (!i) { val0 = val; } \ | ||
if (i) { \ | ||
e |= kputc("/|"[val & 1], &tmp1) < 0; \ | ||
anyunphased |= !(val & 1); \ | ||
} \ | ||
if (!(val >> 1)) e |= kputc('.', &tmp1) < 0; \ | ||
else e |= kputw((val >> 1) - 1, &tmp1) < 0; \ | ||
} \ | ||
if (i == 0) e |= kputc('.', &tmp1) < 0; \ | ||
ploidy = i; \ | ||
} | ||
switch (fmt->type) { | ||
case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break; | ||
case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break; | ||
case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break; | ||
case BCF_BT_NULL: e |= kputc('.', &tmp1) < 0; break; | ||
default: hts_log_error("Unexpected type %d", fmt->type); return -2; | ||
} | ||
#undef BRANCH | ||
|
||
if (ver >= v44) { //output which supports prefixed phasing | ||
/* update 1st allele's phasing if required and append rest to it. | ||
use prefixed phasing only when it is a must. i.e. without which the | ||
inferred value will be incorrect */ | ||
if (val0 & 1) { | ||
/* 1st one is phased, if ploidy is > 1 and an unphased allele exists | ||
need to specify explicitly */ | ||
e |= (ploidy > 1 && anyunphased) ? | ||
(kputc('|', &tmp2) < 0) : | ||
(ploidy <= 1 && !((val0 >> 1)) ? //|. needs explicit o/p | ||
(kputc('|', &tmp2) < 0) : | ||
0); | ||
} else { | ||
/* 1st allele is unphased, if ploidy is = 1 or allele is '.' or | ||
ploidy > 1 and no other unphased allele exist, need to specify | ||
explicitly */ | ||
e |= ((ploidy <= 1 && val0 != 0) || (ploidy > 1 && !anyunphased)) ? | ||
(kputc('/', &tmp2) < 0) : | ||
0; | ||
} | ||
e |= kputsn(tmp1.s, tmp1.l, &tmp2) < 0; //append rest with updated one | ||
ks_free(&tmp1); | ||
tmp1 = tmp2; | ||
} | ||
//updated v44 string or <v44 without any update | ||
e |= kputsn(tmp1.s, tmp1.l, str) < 0; | ||
ks_free(&tmp1); | ||
return e == 0 ? 0 : -1; | ||
return bcf_format_gt_v2(NULL, fmt, isample, str); | ||
} | ||
|
||
static inline int bcf_enc_size(kstring_t *s, int size, int type) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/* test_kstring.c -- kstring unit tests | ||
Copyright (C) 2018, 2020, 2024 Genome Research Ltd. | ||
Copyright (C) 2018, 2020, 2024-2025 Genome Research Ltd. | ||
Author: Rob Davies <[email protected]> | ||
|
@@ -451,6 +451,134 @@ static int test_kgetline2(void) { | |
return EXIT_SUCCESS; | ||
} | ||
|
||
static int test_kinsertchar(void) { | ||
kstring_t t = KS_INITIALIZE, res = KS_INITIALIZE; | ||
int i = 0; | ||
struct data { | ||
int pos; | ||
const char *val; | ||
}; | ||
|
||
struct data tdata[] = { { -1, ""}, {0, "X0123"}, {1, "0X123"}, {2, "01X23"}, | ||
{3, "012X3"}, {4, "0123X"}, {5, ""} }; | ||
|
||
for (i = -1; i < 6; ++i) { | ||
kstring_t s = KS_INITIALIZE; | ||
kputs("0123", &s); | ||
if (kinsert_char('X', i, &s) < 0) { | ||
if ( i < 0 || i > 4) { ks_free(&s); continue; } //expected failures | ||
fprintf(stderr, "kinsert_char failed\n"); | ||
ks_free(&s); | ||
return -1; | ||
} | ||
if (s.s[s.l] != '\0') { | ||
fprintf(stderr, "No NUL termination on string from kinsert_char\n"); | ||
ks_free(&s); | ||
return -1; | ||
} | ||
if (memcmp(s.s, tdata[i + 1].val, s.l + 1)) { | ||
fprintf(stderr, "kinsert_char comparison failed\n"); | ||
ks_free(&s); | ||
return -1; | ||
} | ||
ks_free(&s); | ||
} | ||
//realloc checks | ||
for (i = 0; i < 7; ++i) { | ||
kputc('A' + i, &res); | ||
if (kinsert_char('A' + i, t.l, &t) < 0) { | ||
fprintf(stderr, "kinsert_char failed in realloc\n"); | ||
ks_free(&res); ks_free(&t); | ||
return -1; | ||
} | ||
if (t.s[t.l] != '\0') { | ||
fprintf(stderr, "No NUL termination on string from kinsert_char in realloc\n"); | ||
ks_free(&res); ks_free(&t); | ||
return -1; | ||
} | ||
if (memcmp(t.s, res.s, res.l+1)) { | ||
fprintf(stderr, "kinsert_char realloc comparison failed in realloc\n"); | ||
ks_free(&res); ks_free(&t); | ||
return -1; | ||
} | ||
} | ||
ks_free(&t); | ||
ks_free(&res); | ||
return 0; | ||
} | ||
|
||
static int test_kinsertstr(void) { | ||
kstring_t t = KS_INITIALIZE, res = KS_INITIALIZE; | ||
int i = 0; | ||
struct data { | ||
int pos; | ||
const char *val; | ||
}; | ||
|
||
struct data tdata[] = { { -1, ""}, {0, "XYZ0123"}, {1, "0XYZ123"}, | ||
{2, "01XYZ23"}, {3, "012XYZ3"}, {4, "0123XYZ"}, {5, ""} }; | ||
|
||
for (i = -1; i < 6; ++i) { | ||
kstring_t s = KS_INITIALIZE; | ||
kputs("0123", &s); | ||
if (kinsert_str("XYZ", i, &s) < 0) { | ||
if ( i < 0 || i > 4) { ks_free(&s); continue; } //expected failures | ||
fprintf(stderr, "kinsert_str failed\n"); | ||
return -1; | ||
} | ||
if (s.s[s.l] != '\0') { | ||
fprintf(stderr, "No NUL termination on string from kinsert_str\n"); | ||
return -1; | ||
} | ||
if (memcmp(s.s, tdata[i + 1].val, s.l + 1)) { | ||
fprintf(stderr, "kinsert_str comparison failed\n"); | ||
return -1; | ||
} | ||
ks_free(&s); | ||
} | ||
//realloc checks | ||
for (i = 0; i < 15; ++i) { | ||
kstring_t val = KS_INITIALIZE; | ||
ksprintf(&val, "%c", 'A' + i); | ||
kputs(val.s, &res); | ||
if (kinsert_str(val.s, t.l, &t) < 0) { | ||
ks_free(&val); | ||
fprintf(stderr, "kinsert_str failed in realloc\n"); | ||
return -1; | ||
} | ||
if (t.s[t.l] != '\0') { | ||
ks_free(&val); ks_free(&res); | ||
fprintf(stderr, "No NUL termination on string from kinsert_str in realloc\n"); | ||
return -1; | ||
} | ||
if (memcmp(t.s, res.s, res.l+1)) { | ||
ks_free(&val); ks_free(&res); | ||
fprintf(stderr, "kinsert_str realloc comparison failed in realloc\n"); | ||
return -1; | ||
} | ||
ks_free(&val); | ||
} | ||
//empty strings | ||
ks_free(&t); | ||
if (kinsert_str("", 1, &t)) { //expected | ||
if (kinsert_str("", 0, &t) || t.l != 0) { | ||
fprintf(stderr, "kinsert_str empty insertion failed\n"); | ||
return -1; | ||
} | ||
} else { | ||
fprintf(stderr, "kinsert_str empty ins to invalid pos succeeded\n"); | ||
return -1; | ||
} | ||
i = res.l; | ||
if (kinsert_str("", 1, &res) || i != res.l) { | ||
fprintf(stderr, "kinsert_str empty ins to valid pos failed\n"); | ||
ks_free(&res); | ||
return -1; | ||
} | ||
ks_free(&res); | ||
return 0; | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
int opt, res = EXIT_SUCCESS; | ||
int64_t start = 0; | ||
|
@@ -500,5 +628,11 @@ int main(int argc, char **argv) { | |
if (!test || strcmp(test, "kgetline2") == 0) | ||
if (test_kgetline2() != 0) res = EXIT_FAILURE; | ||
|
||
if (!test || strcmp(test, "kinsertchar") == 0) | ||
if (test_kinsertchar() != 0) res = EXIT_FAILURE; | ||
|
||
if (!test || strcmp(test, "kinsertstr") == 0) | ||
if (test_kinsertstr() != 0) res = EXIT_FAILURE; | ||
|
||
return res; | ||
} |
Oops, something went wrong.