From adbd640ebff63dad582a61def07e7c300dd913c5 Mon Sep 17 00:00:00 2001 From: Nate Parsons Date: Wed, 25 Oct 2023 09:52:19 -0500 Subject: [PATCH 1/4] fix bug in PercentTrue primitive --- .../standard/aggregation/percent_true.py | 3 +- .../test_percent_true.py | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py diff --git a/featuretools/primitives/standard/aggregation/percent_true.py b/featuretools/primitives/standard/aggregation/percent_true.py index 32ce39a183..a24acc3f28 100644 --- a/featuretools/primitives/standard/aggregation/percent_true.py +++ b/featuretools/primitives/standard/aggregation/percent_true.py @@ -1,3 +1,4 @@ +import pandas as pd from woodwork.column_schema import ColumnSchema from woodwork.logical_types import Boolean, BooleanNullable, Double @@ -30,7 +31,7 @@ class PercentTrue(AggregationPrimitive): return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"}) stack_on = [] stack_on_exclude = [] - default_value = 0 + default_value = pd.NA compatibility = [Library.PANDAS, Library.DASK] description_template = "the percentage of true values in {}" diff --git a/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py b/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py new file mode 100644 index 0000000000..86ff728bd3 --- /dev/null +++ b/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py @@ -0,0 +1,40 @@ +import pandas as pd +from woodwork.logical_types import BooleanNullable + +import featuretools as ft + + +def test_percent_true_default_value_with_dfs(): + es = ft.EntitySet(id="customer_data") + + customers_df = pd.DataFrame(data={"customer_id": [1, 2]}) + transactions_df = pd.DataFrame( + data={"tx_id": [1], "customer_id": [1], "is_foo": [True]}, + ) + + es.add_dataframe( + dataframe_name="customers_df", + dataframe=customers_df, + index="customer_id", + ) + es.add_dataframe( + dataframe_name="transactions_df", + dataframe=transactions_df, + index="tx_id", + logical_types={"is_foo": BooleanNullable}, + ) + + es = es.add_relationship( + "customers_df", + "customer_id", + "transactions_df", + "customer_id", + ) + + feature_matrix, _ = ft.dfs( + entityset=es, + target_dataframe_name="customers_df", + agg_primitives=["percent_true"], + ) + + assert pd.isna(feature_matrix["PERCENT_TRUE(transactions_df.is_foo)"][2]) From d11ee4564316f8628f45d00bd6c3dd8fd7f0572a Mon Sep 17 00:00:00 2001 From: Nate Parsons Date: Wed, 25 Oct 2023 10:08:03 -0500 Subject: [PATCH 2/4] Revert "fix bug in PercentTrue primitive" This reverts commit adbd640ebff63dad582a61def07e7c300dd913c5. --- .../standard/aggregation/percent_true.py | 3 +- .../test_percent_true.py | 40 ------------------- 2 files changed, 1 insertion(+), 42 deletions(-) delete mode 100644 featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py diff --git a/featuretools/primitives/standard/aggregation/percent_true.py b/featuretools/primitives/standard/aggregation/percent_true.py index a24acc3f28..32ce39a183 100644 --- a/featuretools/primitives/standard/aggregation/percent_true.py +++ b/featuretools/primitives/standard/aggregation/percent_true.py @@ -1,4 +1,3 @@ -import pandas as pd from woodwork.column_schema import ColumnSchema from woodwork.logical_types import Boolean, BooleanNullable, Double @@ -31,7 +30,7 @@ class PercentTrue(AggregationPrimitive): return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"}) stack_on = [] stack_on_exclude = [] - default_value = pd.NA + default_value = 0 compatibility = [Library.PANDAS, Library.DASK] description_template = "the percentage of true values in {}" diff --git a/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py b/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py deleted file mode 100644 index 86ff728bd3..0000000000 --- a/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py +++ /dev/null @@ -1,40 +0,0 @@ -import pandas as pd -from woodwork.logical_types import BooleanNullable - -import featuretools as ft - - -def test_percent_true_default_value_with_dfs(): - es = ft.EntitySet(id="customer_data") - - customers_df = pd.DataFrame(data={"customer_id": [1, 2]}) - transactions_df = pd.DataFrame( - data={"tx_id": [1], "customer_id": [1], "is_foo": [True]}, - ) - - es.add_dataframe( - dataframe_name="customers_df", - dataframe=customers_df, - index="customer_id", - ) - es.add_dataframe( - dataframe_name="transactions_df", - dataframe=transactions_df, - index="tx_id", - logical_types={"is_foo": BooleanNullable}, - ) - - es = es.add_relationship( - "customers_df", - "customer_id", - "transactions_df", - "customer_id", - ) - - feature_matrix, _ = ft.dfs( - entityset=es, - target_dataframe_name="customers_df", - agg_primitives=["percent_true"], - ) - - assert pd.isna(feature_matrix["PERCENT_TRUE(transactions_df.is_foo)"][2]) From 8ca1595658ab527dd412e11a91ffc9eb69f296f4 Mon Sep 17 00:00:00 2001 From: Nate Parsons Date: Wed, 25 Oct 2023 10:13:48 -0500 Subject: [PATCH 3/4] fix bug in PercentTrue primitive --- .../standard/aggregation/percent_true.py | 3 +- .../test_percent_true.py | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py diff --git a/featuretools/primitives/standard/aggregation/percent_true.py b/featuretools/primitives/standard/aggregation/percent_true.py index 32ce39a183..a24acc3f28 100644 --- a/featuretools/primitives/standard/aggregation/percent_true.py +++ b/featuretools/primitives/standard/aggregation/percent_true.py @@ -1,3 +1,4 @@ +import pandas as pd from woodwork.column_schema import ColumnSchema from woodwork.logical_types import Boolean, BooleanNullable, Double @@ -30,7 +31,7 @@ class PercentTrue(AggregationPrimitive): return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"}) stack_on = [] stack_on_exclude = [] - default_value = 0 + default_value = pd.NA compatibility = [Library.PANDAS, Library.DASK] description_template = "the percentage of true values in {}" diff --git a/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py b/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py new file mode 100644 index 0000000000..86ff728bd3 --- /dev/null +++ b/featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py @@ -0,0 +1,40 @@ +import pandas as pd +from woodwork.logical_types import BooleanNullable + +import featuretools as ft + + +def test_percent_true_default_value_with_dfs(): + es = ft.EntitySet(id="customer_data") + + customers_df = pd.DataFrame(data={"customer_id": [1, 2]}) + transactions_df = pd.DataFrame( + data={"tx_id": [1], "customer_id": [1], "is_foo": [True]}, + ) + + es.add_dataframe( + dataframe_name="customers_df", + dataframe=customers_df, + index="customer_id", + ) + es.add_dataframe( + dataframe_name="transactions_df", + dataframe=transactions_df, + index="tx_id", + logical_types={"is_foo": BooleanNullable}, + ) + + es = es.add_relationship( + "customers_df", + "customer_id", + "transactions_df", + "customer_id", + ) + + feature_matrix, _ = ft.dfs( + entityset=es, + target_dataframe_name="customers_df", + agg_primitives=["percent_true"], + ) + + assert pd.isna(feature_matrix["PERCENT_TRUE(transactions_df.is_foo)"][2]) From 3553073363cddcb6cdba3e20b56b6c73eb974413 Mon Sep 17 00:00:00 2001 From: Nate Parsons Date: Wed, 25 Oct 2023 10:17:35 -0500 Subject: [PATCH 4/4] update release notes --- docs/source/release_notes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 92229543b0..79aa960eb7 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -7,6 +7,7 @@ Future Release ============== * Enhancements * Fixes + * Fix bug with default value in ``PercentTrue`` primitive (:pr:`2626`) * Changes * Refactor ``featuretools/tests/primitive_tests/utils.py`` to leverage list comprehensions for improved Pythonic quality (:pr:`2607`) * Refactor ``can_stack_primitive_on_inputs`` (:pr:`2522`)