From 930b65ffefbb8c7ef4bee109a64abae5432b3b3f Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Wed, 7 Aug 2024 12:26:50 -0400 Subject: [PATCH] test: Use NaN for missing matches --- tests/test_api.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 6d1be181..3e2c6458 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,5 +1,6 @@ import bdikit as bdi import pandas as pd +import numpy as np from bdikit.mapping_algorithms.value_mapping.value_mappers import ( FunctionValueMapper, IdentityValueMapper, @@ -205,7 +206,7 @@ def test_end_to_end_api_integration(): assert len(column_mappings.index) == 1 # when: pass output of match_schema() directly to materialize_mapping(), - # the column must be ranamed to the target column without any value mapping + # the column must be renamed to the target column without any value mapping df_mapped = bdi.materialize_mapping(df_source, column_mappings) # then assert "tgt_column" in df_mapped.columns @@ -237,17 +238,18 @@ def test_end_to_end_api_integration(): # matching values found during the value matching step assert isinstance(df_mapped, pd.DataFrame) assert "tgt_column" in df_mapped.columns - assert df_mapped["tgt_column"].tolist() == ["apple", "banana", "orange", None] + assert df_mapped["tgt_column"].tolist() == ["apple", "banana", "orange", np.nan] # when: pass output of match_values() to merge_mappings() and then to # materialize_mapping() + harmonization_spec = bdi.merge_mappings(value_mappings, []) df_mapped = bdi.materialize_mapping(df_source, harmonization_spec) - # then: the column must be ranamed and values must be mapped + # then: the column must be renamed and values must be mapped assert isinstance(df_mapped, pd.DataFrame) assert "tgt_column" in df_mapped.columns - assert df_mapped["tgt_column"].tolist() == ["apple", "banana", "orange", None] + assert df_mapped["tgt_column"].tolist() == ["apple", "banana", "orange", np.nan] # when: user mappings are specified in merge_mappings() user_mappings = [ @@ -264,10 +266,10 @@ def test_end_to_end_api_integration(): harmonization_spec = bdi.merge_mappings(value_mappings, user_mappings) df_mapped = bdi.materialize_mapping(df_source, harmonization_spec) - # then: user mappings take precedence, so the column must be ranamed and + # then: user mappings take precedence, so the column must be renamed and # values must be mapped according the provide user_mappings assert "tgt_column" in df_mapped.columns - assert df_mapped["tgt_column"].tolist() == ["APPLE", "BANANA", "ORANGE", None] + assert df_mapped["tgt_column"].tolist() == ["APPLE", "BANANA", "ORANGE", np.nan] def test_top_matches_and_match_values_integration():