From 08f30334ff2a126b7ebbf22167a9dc73d024409e Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Mon, 25 Nov 2024 12:26:22 -0500 Subject: [PATCH 1/2] fix: Fix preview_domain() when there are no sample values --- bdikit/api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bdikit/api.py b/bdikit/api.py index 92f0d9f..0987085 100644 --- a/bdikit/api.py +++ b/bdikit/api.py @@ -651,7 +651,10 @@ def preview_domain( value_names = value_names[:limit] value_descriptions = value_descriptions[:limit] - domain = {"value_name": value_names} + domain = {} + + if len(value_names) > 0: + domain["value_name"] = value_names if len(value_descriptions) > 0: domain["value_description"] = value_descriptions From a131caabd6a66a4f62d344ddf8384332111b6cd4 Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Mon, 25 Nov 2024 12:27:07 -0500 Subject: [PATCH 2/2] tests: Add tests to preview_domain() --- tests/test_api.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/test_api.py b/tests/test_api.py index f4ad733..52c3445 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -385,3 +385,39 @@ def test_top_value_matches(): assert "source" in df_match.columns assert "target" in df_match.columns assert "similarity" in df_match.columns + +def test_preview_domain(): + # given + source = pd.DataFrame( + { + "name": ["John Doe", "Jane Doe", "Alice Smith", "Bob Smith"], + "age": [30, 25, 45, 35], + } + ) + + # when + preview = bdi.preview_domain(source, "age") + + # then + # preview must contain only the column "value_name" and the unique + # values of the column "age" + assert preview is not None + assert isinstance(preview, pd.DataFrame) + assert "value_name" in preview.columns + assert "column_description" not in preview.columns + assert "value_description" not in preview.columns + assert source["age"].eq(preview["value_name"]).all() + + # when + preview = bdi.preview_domain("gdc", "age_at_diagnosis") + + # then + # preview must contain only the column "column_description" since there + # are sample values in the GDC dictionary + assert preview is not None + assert isinstance(preview, pd.DataFrame) + assert "value_name" not in preview.columns + assert "value_description" not in preview.columns + assert "column_description" in preview.columns + +