Skip to content

Commit

Permalink
test: Use NaN for missing matches
Browse files Browse the repository at this point in the history
  • Loading branch information
roquelopez committed Aug 7, 2024
1 parent 3fa37fb commit 930b65f
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import bdikit as bdi
import pandas as pd
import numpy as np
from bdikit.mapping_algorithms.value_mapping.value_mappers import (
FunctionValueMapper,
IdentityValueMapper,
Expand Down Expand Up @@ -205,7 +206,7 @@ def test_end_to_end_api_integration():
assert len(column_mappings.index) == 1

# when: pass output of match_schema() directly to materialize_mapping(),
# the column must be ranamed to the target column without any value mapping
# the column must be renamed to the target column without any value mapping
df_mapped = bdi.materialize_mapping(df_source, column_mappings)
# then
assert "tgt_column" in df_mapped.columns
Expand Down Expand Up @@ -237,17 +238,18 @@ def test_end_to_end_api_integration():
# matching values found during the value matching step
assert isinstance(df_mapped, pd.DataFrame)
assert "tgt_column" in df_mapped.columns
assert df_mapped["tgt_column"].tolist() == ["apple", "banana", "orange", None]
assert df_mapped["tgt_column"].tolist() == ["apple", "banana", "orange", np.nan]

# when: pass output of match_values() to merge_mappings() and then to
# materialize_mapping()

harmonization_spec = bdi.merge_mappings(value_mappings, [])
df_mapped = bdi.materialize_mapping(df_source, harmonization_spec)

# then: the column must be ranamed and values must be mapped
# then: the column must be renamed and values must be mapped
assert isinstance(df_mapped, pd.DataFrame)
assert "tgt_column" in df_mapped.columns
assert df_mapped["tgt_column"].tolist() == ["apple", "banana", "orange", None]
assert df_mapped["tgt_column"].tolist() == ["apple", "banana", "orange", np.nan]

# when: user mappings are specified in merge_mappings()
user_mappings = [
Expand All @@ -264,10 +266,10 @@ def test_end_to_end_api_integration():
harmonization_spec = bdi.merge_mappings(value_mappings, user_mappings)
df_mapped = bdi.materialize_mapping(df_source, harmonization_spec)

# then: user mappings take precedence, so the column must be ranamed and
# then: user mappings take precedence, so the column must be renamed and
# values must be mapped according the provide user_mappings
assert "tgt_column" in df_mapped.columns
assert df_mapped["tgt_column"].tolist() == ["APPLE", "BANANA", "ORANGE", None]
assert df_mapped["tgt_column"].tolist() == ["APPLE", "BANANA", "ORANGE", np.nan]


def test_top_matches_and_match_values_integration():
Expand Down

0 comments on commit 930b65f

Please sign in to comment.