From adbd640ebff63dad582a61def07e7c300dd913c5 Mon Sep 17 00:00:00 2001 From: Nate Parsons Date: Wed, 25 Oct 2023 09:52:19 -0500 Subject: [PATCH] fix bug in PercentTrue primitive --- .../standard/aggregation/percent_true.py | 3 +- .../test_percent_true.py | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py diff --git a/featuretools/primitives/standard/aggregation/percent_true.py b/featuretools/primitives/standard/aggregation/percent_true.py index 32ce39a183..a24acc3f28 100644 --- a/featuretools/primitives/standard/aggregation/percent_true.py +++ b/featuretools/primitives/standard/aggregation/percent_true.py @@ -1,3 +1,4 @@ +import pandas as pd from woodwork.column_schema import ColumnSchema from woodwork.logical_types import Boolean, BooleanNullable, Double @@ -30,7 +31,7 @@ class PercentTrue(AggregationPrimitive): return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"}) stack_on = [] stack_on_exclude = [] - default_value = 0 + default_value = pd.NA compatibility = [Library.PANDAS, Library.DASK] description_template = "the percentage of true values in {}" diff --git a/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py b/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py new file mode 100644 index 0000000000..86ff728bd3 --- /dev/null +++ b/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py @@ -0,0 +1,40 @@ +import pandas as pd +from woodwork.logical_types import BooleanNullable + +import featuretools as ft + + +def test_percent_true_default_value_with_dfs(): + es = ft.EntitySet(id="customer_data") + + customers_df = pd.DataFrame(data={"customer_id": [1, 2]}) + transactions_df = pd.DataFrame( + data={"tx_id": [1], "customer_id": [1], "is_foo": [True]}, + ) + + es.add_dataframe( + dataframe_name="customers_df", + dataframe=customers_df, + index="customer_id", + ) + es.add_dataframe( + dataframe_name="transactions_df", + dataframe=transactions_df, + index="tx_id", + logical_types={"is_foo": BooleanNullable}, + ) + + es = es.add_relationship( + "customers_df", + "customer_id", + "transactions_df", + "customer_id", + ) + + feature_matrix, _ = ft.dfs( + entityset=es, + target_dataframe_name="customers_df", + agg_primitives=["percent_true"], + ) + + assert pd.isna(feature_matrix["PERCENT_TRUE(transactions_df.is_foo)"][2])