From 7b9dc85cebdb51c1f209d12ed2f3e66ca36c82ae Mon Sep 17 00:00:00 2001 From: Jeremy Myslinski Date: Mon, 24 Jun 2024 17:47:06 -0700 Subject: [PATCH] fix: A sample column value of 0|0 is not being parsed correctly --- tests/test_call.py | 13 +++++++++++++ tests/test_header.py | 22 ++++++++++++++++++++++ tests/test_parser_record.py | 16 ++++++++++++++++ vcfpy/header.py | 10 +++++----- 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/tests/test_call.py b/tests/test_call.py index 6eafd64..beae695 100644 --- a/tests/test_call.py +++ b/tests/test_call.py @@ -27,6 +27,19 @@ def build_rec(calls=None, format_extras=None): ) +# Call.__init__() ------------------------------------------------------------ + + +def test_call_init_with_gt(): + call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|1")])) + expected_length = 2 + expected_allele_0 = 0 + expected_allele_1 = 1 + assert len(call.gt_alleles) == expected_length + assert call.gt_alleles[0] == expected_allele_0 + assert call.gt_alleles[1] == expected_allele_1 + + # Call.is_phased() ------------------------------------------------------------ diff --git a/tests/test_header.py b/tests/test_header.py index 6c844c8..d026f59 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -348,3 +348,25 @@ def test_header_has_header_line_positive_no_samples(): assert not hdr.has_header_line("INFO", "AD") assert not hdr.has_header_line("FILTER", "PASS") assert not hdr.has_header_line("contig", "1") + + +def test_header_get_format_field_info(): + lines = [] + samples = header.SamplesInfos(["one", "two", "three"]) + hdr = header.Header(lines, samples) + gt_field_info = hdr.get_format_field_info("GT") + + expected = header.RESERVED_FORMAT["GT"] + + assert gt_field_info is expected + + +def test_header_get_info_format_field_info(): + lines = [] + samples = header.SamplesInfos(["one", "two", "three"]) + hdr = header.Header(lines, samples) + gt_field_info = hdr.get_info_field_info("AA") + + expected = header.RESERVED_INFO["AA"] + + assert gt_field_info is expected diff --git a/tests/test_parser_record.py b/tests/test_parser_record.py index b8d5f02..af50cf5 100644 --- a/tests/test_parser_record.py +++ b/tests/test_parser_record.py @@ -10,6 +10,12 @@ __author__ = "Manuel Holtgrewe " +SMALL_HEADER = """ +##fileformat=VCFv4.3 +#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA00001\tNA00002\tNA00003 +""".lstrip() + + MEDIUM_HEADER = """ ##fileformat=VCFv4.3 ##fileDate=20090805 @@ -178,3 +184,13 @@ def test_missing_pass(recwarn): RESULT = p.parse_next_record() assert str(RESULT) == EXPECTED assert list(recwarn) == [] + + +def test_parse_record_with_gt_data(): + LINES = "20\t1\t.\tC\tG\t.\t.\tAA=G\tGT\t0|1\t1/1\t.\n" + p = parser.Parser(io.StringIO(SMALL_HEADER + LINES)) + p.parse_header() + record = p.parse_next_record() + assert record.calls[0].data["GT"] == "0|1" + assert record.calls[1].data["GT"] == "1/1" + assert record.calls[2].data["GT"] is None diff --git a/vcfpy/header.py b/vcfpy/header.py index 1e3960e..d15bc87 100644 --- a/vcfpy/header.py +++ b/vcfpy/header.py @@ -396,18 +396,18 @@ def add_line(self, header_line): def get_info_field_info(self, key): """Return :py:class:`FieldInfo` for the given INFO field""" - return self._get_field_info("INFO", key) + return self._get_field_info("INFO", key, RESERVED_INFO) def get_format_field_info(self, key): """Return :py:class:`FieldInfo` for the given INFO field""" - return self._get_field_info("FORMAT", key) + return self._get_field_info("FORMAT", key, RESERVED_FORMAT) - def _get_field_info(self, type_, key): + def _get_field_info(self, type_, key, reserved): result = self._indices[type_].get(key) if result: return result - if key in RESERVED_INFO: - res = FieldInfo(RESERVED_INFO[key].type, RESERVED_INFO[key].number) + if key in reserved: + res = reserved[key] else: res = FieldInfo("String", HEADER_NUMBER_UNBOUNDED) warnings.warn(