diff --git a/htslib/vcf.h b/htslib/vcf.h index 6f049e611..0e5607a32 100644 --- a/htslib/vcf.h +++ b/htslib/vcf.h @@ -1617,12 +1617,15 @@ static inline int bcf_format_gt1(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isamp need to specify explicitly */ e |= (ploidy > 1 && anyunphased) ? (kputc('|', &tmp2) < 0) : + (ploidy <= 1 && !((val0 >> 1)) ? //|. needs explicit o/p + (kputc('|', &tmp2) < 0) : + 0); 0; } else { /* 1st allele is unphased, if ploidy is = 1 or allele is '.' or ploidy > 1 and no other unphased allele exist, need to specify explicitly */ - e |= ((ploidy <= 1) || (ploidy > 1 && !anyunphased)) ? + e |= ((ploidy <= 1 && val0 != 0) || (ploidy > 1 && !anyunphased)) ? (kputc('/', &tmp2) < 0) : 0; } diff --git a/test/vcf44_1.expected b/test/vcf44_1.expected index c696f09b5..b35b98412 100644 --- a/test/vcf44_1.expected +++ b/test/vcf44_1.expected @@ -1,6 +1,7 @@ ##fileformat=VCFv4.4 ##FILTER= ##contig= +##reference=file://test ##FORMAT= ##failue="test file on explicit and implicit phasing markers in 4.4" #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 @@ -22,6 +23,13 @@ 1 61494 rs56992752 T A 100 PASS . GT /0|0 1/0 1 61495 rs56992752 T A 100 PASS . GT 0|0 |1/0 1 61496 rs56992752 T A 100 PASS . GT . . -1 61497 rs56992752 T A 100 PASS . GT ./1 .|1 -1 61498 rs56992752 T A 100 PASS . GT 1/. 1|. -1 61499 rs56992752 T A 100 PASS . GT ./. .|. +1 61497 rs56992752 T A 100 PASS . GT . |. +1 61498 rs56992752 T A 100 PASS . GT ./1 .|1 +1 61499 rs56992752 T A 100 PASS . GT ./1 .|1 +1 61500 rs56992752 T A 100 PASS . GT |./1 /.|1 +1 61501 rs56992752 T A 100 PASS . GT 1/. 1|. +1 61502 rs56992752 T A 100 PASS . GT 1/. /1|. +1 61503 rs56992752 T A 100 PASS . GT |1/. 1|. +1 61504 rs56992752 T A 100 PASS . GT ./. .|. +1 61505 rs56992752 T A 100 PASS . GT ./. .|. +1 61506 rs56992752 T A 100 PASS . GT |./. /.|. diff --git a/test/vcf44_1.vcf b/test/vcf44_1.vcf index ed726e7f5..b4a1cbb34 100644 --- a/test/vcf44_1.vcf +++ b/test/vcf44_1.vcf @@ -1,5 +1,6 @@ ##fileformat=VCFv4.4 ##contig= +##reference=file://test ##FORMAT= ##failue="test file on explicit and implicit phasing markers in 4.4" #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 @@ -21,6 +22,13 @@ 1 61494 rs56992752 T A 100 PASS . GT /0|0 1/0 1 61495 rs56992752 T A 100 PASS . GT 0|0 |1/0 1 61496 rs56992752 T A 100 PASS . GT . . -1 61497 rs56992752 T A 100 PASS . GT ./1 .|1 -1 61498 rs56992752 T A 100 PASS . GT 1/. 1|. -1 61499 rs56992752 T A 100 PASS . GT ./. .|. +1 61497 rs56992752 T A 100 PASS . GT /. |. +1 61498 rs56992752 T A 100 PASS . GT ./1 .|1 +1 61499 rs56992752 T A 100 PASS . GT /./1 |.|1 +1 61500 rs56992752 T A 100 PASS . GT |./1 /.|1 +1 61501 rs56992752 T A 100 PASS . GT 1/. 1|. +1 61502 rs56992752 T A 100 PASS . GT /1/. /1|. +1 61503 rs56992752 T A 100 PASS . GT |1/. |1|. +1 61504 rs56992752 T A 100 PASS . GT ./. .|. +1 61505 rs56992752 T A 100 PASS . GT /./. |.|. +1 61506 rs56992752 T A 100 PASS . GT |./. /.|. \ No newline at end of file diff --git a/vcf.c b/vcf.c index 1c0e4b922..fa0438fae 100644 --- a/vcf.c +++ b/vcf.c @@ -3111,7 +3111,7 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, uint32_t unreadable = 0; uint32_t max = 0; int overflow = 0, ploidy = 0, anyunphased = 0, \ - phasingprfx = 0; + phasingprfx = 0, unknown1 = 0; /* with prefixed phasing, it is explicitly given for 1st one with non-prefixed, set based on ploidy and phasing of other @@ -3126,6 +3126,9 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, ploidy++; if (*t == '.') { ++t, x[l++] = is_phased; + if (l==1) { //for 1st allele only + unknown1 = 1; + } } else { const char *tt = t; uint32_t val; @@ -3151,9 +3154,11 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, /* no explicit phasing for 1st allele, set based on other alleles and ploidy */ if (ploidy == 1) { //implicitly phased - x[0]|= 1; + if (!unknown1) { + x[0] |= 1; + } } else { //set by other unphased alleles - x[0] |= anyunphased ? 0 : 1; + x[0] |= (anyunphased)? 0 : 1; } } // Possibly check max against v->n_allele instead?