From 7aeb97ebfa9219762114beceba7be10481da2f69 Mon Sep 17 00:00:00 2001 From: Johannes Elferich Date: Thu, 17 Oct 2024 20:36:36 -0400 Subject: [PATCH] Fix NaN parsing --- src/starfile/parser.py | 1 + tests/test_parsing.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/starfile/parser.py b/src/starfile/parser.py index 0febed0..92afd60 100644 --- a/src/starfile/parser.py +++ b/src/starfile/parser.py @@ -128,6 +128,7 @@ def _parse_loop_block(self) -> pd.DataFrame: comment='#', dtype={column_name_to_index[k]: str for k in self.parse_as_string if k in loop_column_names}, keep_default_na=False, + na_values=['nan','NaN',''], engine='c', ) df.columns = loop_column_names diff --git a/tests/test_parsing.py b/tests/test_parsing.py index a94bf9e..fc12479 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -287,3 +287,18 @@ def test_parse_as_string(): df = parser.data_blocks['fsc'] assert df['rlnResolution'].dtype == 'object' + +def test_parse_na(): + import tempfile + import starfile + parts = pd.DataFrame({"property1":np.arange(10), "property2": np.random.rand(10)}) + parts["property2"].values[-1] *= np.nan + data = { + "particles":parts + } + + with tempfile.NamedTemporaryFile(mode="w") as tmpfile: + starfile.write(data, tmpfile.name) + tmpfile.seek(0) + data = starfile.read(tmpfile.name) + assert data["property2"].dtype == "float64"