diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 92229543b0..79aa960eb7 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -7,6 +7,7 @@ Future Release ============== * Enhancements * Fixes + * Fix bug with default value in ``PercentTrue`` primitive (:pr:`2626`) * Changes * Refactor ``featuretools/tests/primitive_tests/utils.py`` to leverage list comprehensions for improved Pythonic quality (:pr:`2607`) * Refactor ``can_stack_primitive_on_inputs`` (:pr:`2522`) diff --git a/featuretools/primitives/standard/aggregation/percent_true.py b/featuretools/primitives/standard/aggregation/percent_true.py index 32ce39a183..a24acc3f28 100644 --- a/featuretools/primitives/standard/aggregation/percent_true.py +++ b/featuretools/primitives/standard/aggregation/percent_true.py @@ -1,3 +1,4 @@ +import pandas as pd from woodwork.column_schema import ColumnSchema from woodwork.logical_types import Boolean, BooleanNullable, Double @@ -30,7 +31,7 @@ class PercentTrue(AggregationPrimitive): return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"}) stack_on = [] stack_on_exclude = [] - default_value = 0 + default_value = pd.NA compatibility = [Library.PANDAS, Library.DASK] description_template = "the percentage of true values in {}" diff --git a/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py b/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py new file mode 100644 index 0000000000..86ff728bd3 --- /dev/null +++ b/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py @@ -0,0 +1,40 @@ +import pandas as pd +from woodwork.logical_types import BooleanNullable + +import featuretools as ft + + +def test_percent_true_default_value_with_dfs(): + es = ft.EntitySet(id="customer_data") + + customers_df = pd.DataFrame(data={"customer_id": [1, 2]}) + transactions_df = pd.DataFrame( + data={"tx_id": [1], "customer_id": [1], "is_foo": [True]}, + ) + + es.add_dataframe( + dataframe_name="customers_df", + dataframe=customers_df, + index="customer_id", + ) + es.add_dataframe( + dataframe_name="transactions_df", + dataframe=transactions_df, + index="tx_id", + logical_types={"is_foo": BooleanNullable}, + ) + + es = es.add_relationship( + "customers_df", + "customer_id", + "transactions_df", + "customer_id", + ) + + feature_matrix, _ = ft.dfs( + entityset=es, + target_dataframe_name="customers_df", + agg_primitives=["percent_true"], + ) + + assert pd.isna(feature_matrix["PERCENT_TRUE(transactions_df.is_foo)"][2])