From 9222a4bc635f5876e7c8ccbf92d9d6330ec5be75 Mon Sep 17 00:00:00 2001 From: Gaurav Sheni Date: Mon, 4 Dec 2023 11:21:38 -0500 Subject: [PATCH] update holidays --- .pre-commit-config.yaml | 17 +++++------ .readthedocs.yaml | 2 +- Makefile | 8 +++--- docs/source/getting_started/primitives.ipynb | 7 +++-- .../guides/advanced_custom_primitives.ipynb | 10 ++++--- docs/source/guides/feature_selection.ipynb | 5 ++-- docs/source/guides/time_series.ipynb | 5 ++-- .../source/guides/using_dask_entitysets.ipynb | 5 ++-- .../guides/using_spark_entitysets.ipynb | 3 +- .../frequently_asked_questions.ipynb | 13 +++++---- .../resources/transition_to_ft_v1.0.ipynb | 4 +-- .../feature_base/feature_descriptions.py | 3 +- .../transform/datetime/distance_to_holiday.py | 5 +++- .../synthesis/deep_feature_synthesis.py | 7 +++-- .../test_feature_serialization.py | 4 +-- .../test_rolling_primitive_utils.py | 14 ++++++---- .../test_distancetoholiday_primitive.py | 28 ++++++------------- featuretools/utils/recommend_primitives.py | 22 +++++++-------- pyproject.toml | 11 ++------ 19 files changed, 82 insertions(+), 91 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5948fbad6f..b89b67125b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,18 +25,15 @@ repos: hooks: - id: add-trailing-comma name: Add trailing comma - - repo: https://github.com/python/black - rev: 22.12.0 - hooks: - - id: black - args: - - --config=./pyproject.toml - additional_dependencies: [".[jupyter]"] - types_or: [python, jupyter] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.239' + rev: 'v0.1.6' hooks: - id: ruff + types_or: [ python, pyi, jupyter ] args: - - --config=./pyproject.toml - --fix + - --config=./pyproject.toml + - id: ruff-format + types_or: [ python, pyi, jupyter ] + args: + - --config=./pyproject.toml diff --git a/.readthedocs.yaml b/.readthedocs.yaml index b7658c191d..9e1d98e92b 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -20,7 +20,7 @@ build: - graphviz - openjdk-11-jre-headless jobs: - post_build: + post_build: - export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64" python: diff --git a/Makefile b/Makefile index 0635674e97..323ef82282 100644 --- a/Makefile +++ b/Makefile @@ -9,14 +9,14 @@ clean: .PHONY: lint lint: python docs/notebook_version_standardizer.py check-execution - black . --config=./pyproject.toml --check - ruff . --config=./pyproject.toml + ruff check . --config=./pyproject.toml + ruff format . --check --config=./pyproject.toml .PHONY: lint-fix lint-fix: python docs/notebook_version_standardizer.py standardize - black . --config=./pyproject.toml - ruff . --fix --config=./pyproject.toml + ruff check . --fix --config=./pyproject.toml + ruff format . --config=./pyproject.toml .PHONY: test test: diff --git a/docs/source/getting_started/primitives.ipynb b/docs/source/getting_started/primitives.ipynb index 5c35c3c5fe..50fcfd851e 100644 --- a/docs/source/getting_started/primitives.ipynb +++ b/docs/source/getting_started/primitives.ipynb @@ -186,11 +186,12 @@ "metadata": {}, "outputs": [], "source": [ - "from featuretools.primitives import AggregationPrimitive, TransformPrimitive\n", - "from featuretools.tests.testing_utils import make_ecommerce_entityset\n", + "import pandas as pd\n", "from woodwork.column_schema import ColumnSchema\n", "from woodwork.logical_types import Datetime, NaturalLanguage\n", - "import pandas as pd" + "\n", + "from featuretools.primitives import AggregationPrimitive, TransformPrimitive\n", + "from featuretools.tests.testing_utils import make_ecommerce_entityset" ] }, { diff --git a/docs/source/guides/advanced_custom_primitives.ipynb b/docs/source/guides/advanced_custom_primitives.ipynb index f57ea18e65..b4418c784a 100644 --- a/docs/source/guides/advanced_custom_primitives.ipynb +++ b/docs/source/guides/advanced_custom_primitives.ipynb @@ -13,13 +13,15 @@ "metadata": {}, "outputs": [], "source": [ - "from featuretools.primitives import TransformPrimitive\n", - "from featuretools.tests.testing_utils import make_ecommerce_entityset\n", + "import re\n", + "\n", + "import numpy as np\n", "from woodwork.column_schema import ColumnSchema\n", "from woodwork.logical_types import Datetime, NaturalLanguage\n", + "\n", "import featuretools as ft\n", - "import numpy as np\n", - "import re" + "from featuretools.primitives import TransformPrimitive\n", + "from featuretools.tests.testing_utils import make_ecommerce_entityset" ] }, { diff --git a/docs/source/guides/feature_selection.ipynb b/docs/source/guides/feature_selection.ipynb index 0df96b4efc..5ce4667e72 100644 --- a/docs/source/guides/feature_selection.ipynb +++ b/docs/source/guides/feature_selection.ipynb @@ -24,16 +24,15 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import featuretools as ft\n", "\n", + "import featuretools as ft\n", + "from featuretools.demo.flight import load_flight\n", "from featuretools.selection import (\n", " remove_highly_correlated_features,\n", " remove_highly_null_features,\n", " remove_single_value_features,\n", ")\n", "\n", - "from featuretools.demo.flight import load_flight\n", - "\n", "es = load_flight(nrows=50)\n", "es" ] diff --git a/docs/source/guides/time_series.ipynb b/docs/source/guides/time_series.ipynb index a434f31fca..0c104849a9 100644 --- a/docs/source/guides/time_series.ipynb +++ b/docs/source/guides/time_series.ipynb @@ -12,10 +12,11 @@ "import warnings\n", "\n", "warnings.filterwarnings(\"ignore\")\n", + "import pandas as pd\n", + "\n", "import featuretools as ft\n", - "from featuretools.primitives import RollingMean, Lag, RollingMin\n", "from featuretools.demo.weather import load_weather\n", - "import pandas as pd" + "from featuretools.primitives import Lag, RollingMean, RollingMin" ] }, { diff --git a/docs/source/guides/using_dask_entitysets.ipynb b/docs/source/guides/using_dask_entitysets.ipynb index 2c4fe3526b..73e2929d3c 100644 --- a/docs/source/guides/using_dask_entitysets.ipynb +++ b/docs/source/guides/using_dask_entitysets.ipynb @@ -42,9 +42,10 @@ "metadata": {}, "outputs": [], "source": [ - "import featuretools as ft\n", - "import pandas as pd\n", "import dask.dataframe as dd\n", + "import pandas as pd\n", + "\n", + "import featuretools as ft\n", "\n", "id = [0, 1, 2, 3, 4]\n", "values = [12, -35, 14, 103, -51]\n", diff --git a/docs/source/guides/using_spark_entitysets.ipynb b/docs/source/guides/using_spark_entitysets.ipynb index 500b895edc..e863b7aaee 100644 --- a/docs/source/guides/using_spark_entitysets.ipynb +++ b/docs/source/guides/using_spark_entitysets.ipynb @@ -64,9 +64,10 @@ "metadata": {}, "outputs": [], "source": [ - "import featuretools as ft\n", "import pyspark.pandas as ps\n", "\n", + "import featuretools as ft\n", + "\n", "ps.set_option(\"compute.default_index_type\", \"distributed\")\n", "\n", "id = [0, 1, 2, 3, 4]\n", diff --git a/docs/source/resources/frequently_asked_questions.ipynb b/docs/source/resources/frequently_asked_questions.ipynb index ea50e0765e..de024d5b33 100644 --- a/docs/source/resources/frequently_asked_questions.ipynb +++ b/docs/source/resources/frequently_asked_questions.ipynb @@ -15,10 +15,10 @@ "metadata": {}, "outputs": [], "source": [ - "import featuretools as ft\n", "import pandas as pd\n", - "import numpy as np\n", - "import woodwork as ww" + "import woodwork as ww\n", + "\n", + "import featuretools as ft" ] }, { @@ -720,7 +720,7 @@ " },\n", " # For mode, ignore the \"sessions\" DataFrame and only include \"brands\" in the\n", " # \"products\" dataframe and \"product_id\" in the \"transactions\" DataFrame\n", - " (\"count\", \"mean\"): {\"include_dataframes\": [\"sessions\", \"transactions\"]}\n", + " (\"count\", \"mean\"): {\"include_dataframes\": [\"sessions\", \"transactions\"]},\n", " # For count and mean, only include the dataframes \"sessions\" and \"transactions\"\n", " },\n", ")" @@ -1472,6 +1472,7 @@ "outputs": [], "source": [ "import pandas as pd\n", + "\n", "import featuretools as ft\n", "\n", "df = pd.DataFrame(\n", @@ -1689,9 +1690,9 @@ "source": [ "from featuretools.primitives import (\n", " TimeSince,\n", - " TimeSincePrevious,\n", - " TimeSinceLast,\n", " TimeSinceFirst,\n", + " TimeSinceLast,\n", + " TimeSincePrevious,\n", ")\n", "\n", "time_since = TimeSince(unit=\"minutes\")\n", diff --git a/docs/source/resources/transition_to_ft_v1.0.ipynb b/docs/source/resources/transition_to_ft_v1.0.ipynb index 6be14130d4..f10b39faf6 100644 --- a/docs/source/resources/transition_to_ft_v1.0.ipynb +++ b/docs/source/resources/transition_to_ft_v1.0.ipynb @@ -98,9 +98,9 @@ "metadata": {}, "outputs": [], "source": [ - "import featuretools as ft\n", "import pandas as pd\n", - "import woodwork as ww" + "\n", + "import featuretools as ft" ] }, { diff --git a/featuretools/feature_base/feature_descriptions.py b/featuretools/feature_base/feature_descriptions.py index cd7199f59c..b3be510436 100644 --- a/featuretools/feature_base/feature_descriptions.py +++ b/featuretools/feature_base/feature_descriptions.py @@ -148,8 +148,7 @@ def generate_description(feature, feature_descriptions, primitive_templates): def get_direct_description(feature): direct_description = ( - ' the instance of "{}" associated with this ' - 'instance of "{}"'.format( + ' the instance of "{}" associated with this ' 'instance of "{}"'.format( feature.relationship_path[-1][1].parent_dataframe.ww.name, feature.dataframe_name, ) diff --git a/featuretools/primitives/standard/transform/datetime/distance_to_holiday.py b/featuretools/primitives/standard/transform/datetime/distance_to_holiday.py index 1f4c9bc600..47a6068515 100644 --- a/featuretools/primitives/standard/transform/datetime/distance_to_holiday.py +++ b/featuretools/primitives/standard/transform/datetime/distance_to_holiday.py @@ -59,7 +59,10 @@ def __init__(self, holiday="New Year's Day", country="US"): available_holidays = list(set(self.holidayUtil.federal_holidays.values())) if self.holiday not in available_holidays: - error = "must be one of the available holidays:\n%s" % available_holidays + error = "{} must be one of the available holidays:\n{}".format( + self.holiday, + available_holidays, + ) raise ValueError(error) def get_function(self): diff --git a/featuretools/synthesis/deep_feature_synthesis.py b/featuretools/synthesis/deep_feature_synthesis.py index 2c29cc2460..36c1944dde 100644 --- a/featuretools/synthesis/deep_feature_synthesis.py +++ b/featuretools/synthesis/deep_feature_synthesis.py @@ -1290,8 +1290,11 @@ def check_primitive( primitive = handle_primitive(primitive) if not isinstance(primitive, supertype): raise ValueError( - "Primitive {} in {} is not {} " - "primitive".format(type(primitive), arg_name, s), + "Primitive {} in {} is not {} " "primitive".format( + type(primitive), + arg_name, + s, + ), ) return primitive diff --git a/featuretools/tests/primitive_tests/test_feature_serialization.py b/featuretools/tests/primitive_tests/test_feature_serialization.py index 3c50ad3e42..8595ab504c 100644 --- a/featuretools/tests/primitive_tests/test_feature_serialization.py +++ b/featuretools/tests/primitive_tests/test_feature_serialization.py @@ -462,7 +462,7 @@ def test_deserializer_uses_common_primitive_instances_with_args(es, tmp_path): # Test primitive with multiple args - pandas only due to primitive compatibility if es.dataframe_type == Library.PANDAS: distance_to_holiday = DistanceToHoliday( - holiday="Victoria Day", + holiday="Canada Day", country="Canada", ) features = dfs( @@ -491,7 +491,7 @@ def test_deserializer_uses_common_primitive_instances_with_args(es, tmp_path): assert all( [f.primitive is new_distance_primitive for f in new_distance_features], ) - assert new_distance_primitive.holiday == "Victoria Day" + assert new_distance_primitive.holiday == "Canada Day" assert new_distance_primitive.country == "Canada" # Test primitive with list arg diff --git a/featuretools/tests/primitive_tests/test_rolling_primitive_utils.py b/featuretools/tests/primitive_tests/test_rolling_primitive_utils.py index b3e277f7c7..1b8c94b0c5 100644 --- a/featuretools/tests/primitive_tests/test_rolling_primitive_utils.py +++ b/featuretools/tests/primitive_tests/test_rolling_primitive_utils.py @@ -542,12 +542,14 @@ def count_wrapper(sub_s): def test_roll_series_with_gap_incorrect_types(window_series_pd): error = "Window length must be either an offset string or an integer." with pytest.raises(TypeError, match=error): - roll_series_with_gap( - window_series_pd, - window_length=4.2, - gap=4, - min_periods=1, - ), + ( + roll_series_with_gap( + window_series_pd, + window_length=4.2, + gap=4, + min_periods=1, + ), + ) error = "Gap must be either an offset string or an integer." with pytest.raises(TypeError, match=error): diff --git a/featuretools/tests/primitive_tests/transform_primitive_tests/test_distancetoholiday_primitive.py b/featuretools/tests/primitive_tests/transform_primitive_tests/test_distancetoholiday_primitive.py index ba237e589c..418f73ee86 100644 --- a/featuretools/tests/primitive_tests/transform_primitive_tests/test_distancetoholiday_primitive.py +++ b/featuretools/tests/primitive_tests/transform_primitive_tests/test_distancetoholiday_primitive.py @@ -1,10 +1,8 @@ from datetime import datetime -import holidays import numpy as np import pandas as pd import pytest -from packaging.version import parse from featuretools.primitives import DistanceToHoliday @@ -26,7 +24,7 @@ def test_distanceholiday(): def test_holiday_out_of_range(): - date_to_holiday = DistanceToHoliday("Boxing Day", country="Canada") + date_to_holiday = DistanceToHoliday("Canada Day", country="Canada") array = pd.Series( [ @@ -36,22 +34,12 @@ def test_holiday_out_of_range(): datetime(2020, 12, 31), ], ) - days_to_boxing_day = -157 if parse(holidays.__version__) >= parse("0.15.0") else 209 - edge_case_first_day_of_year = ( - -6 if parse(holidays.__version__) >= parse("0.17.0") else np.nan + answer = [181, 31, -30, 182] + pd.testing.assert_series_equal( + date_to_holiday(array), + pd.Series(answer), + check_names=False, ) - edge_case_last_day_of_year = ( - -5 if parse(holidays.__version__) >= parse("0.17.0") else np.nan - ) - answer = pd.Series( - [ - edge_case_first_day_of_year, - days_to_boxing_day, - 148, - edge_case_last_day_of_year, - ], - ) - pd.testing.assert_series_equal(date_to_holiday(array), answer, check_names=False) def test_unknown_country_error(): @@ -82,7 +70,7 @@ def test_nat(): def test_valid_country(): - distance_to_holiday = DistanceToHoliday("Victoria Day", country="Canada") + distance_to_holiday = DistanceToHoliday("Canada Day", country="Canada") case = pd.Series( [ "2010-01-01", @@ -91,7 +79,7 @@ def test_valid_country(): "2020-12-31", ], ).astype("datetime64[ns]") - answer = [143, -10, -70, 144] + answer = [181, 31, -30, 182] given_answer = distance_to_holiday(case).astype("float") np.testing.assert_array_equal(given_answer, answer) diff --git a/featuretools/utils/recommend_primitives.py b/featuretools/utils/recommend_primitives.py index 6859d34e92..a3a172264b 100644 --- a/featuretools/utils/recommend_primitives.py +++ b/featuretools/utils/recommend_primitives.py @@ -6,18 +6,16 @@ from featuretools.primitives.utils import get_transform_primitives from featuretools.synthesis import dfs, get_valid_primitives -ORDERED_PRIMITIVES = ( - [ # non-numeric primitives that require specific ordering or a time index to be set - "cum_count", - "cumulative_time_since_last_false", - "cumulative_time_since_last_true", - "diff", - "diff_datetime", - "is_first_occurrence", - "is_last_occurrence", - "time_since_previous", - ] -) +ORDERED_PRIMITIVES = [ # non-numeric primitives that require specific ordering or a time index to be set + "cum_count", + "cumulative_time_since_last_false", + "cumulative_time_since_last_true", + "diff", + "diff_datetime", + "is_first_occurrence", + "is_last_occurrence", + "time_since_previous", +] DEPRECATED_PRIMITIVES = [ diff --git a/pyproject.toml b/pyproject.toml index 31ddb355d9..a4d931aee3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ license = {text = "BSD 3-clause"} requires-python = ">=3.8,<4" dependencies = [ "cloudpickle >= 1.5.0", - "holidays >= 0.13, < 0.33", + "holidays >= 0.13", "numpy >= 1.21.0", "packaging >= 20.0", "pandas >= 1.5.0", @@ -113,8 +113,7 @@ docs = [ "featuretools[sklearn,dask,spark,test]", ] dev = [ - "ruff >= 0.0.239", - "black[jupyter] >= 23.1.0", + "ruff >= 0.1.6", "pre-commit >= 2.20.0", "featuretools[docs,dask,spark,test]", ] @@ -152,7 +151,6 @@ namespaces = true [tool.setuptools.dynamic] version = {attr = "featuretools.version.__version__"} - [tool.pytest.ini_options] addopts = "--doctest-modules --ignore=featuretools/tests/entry_point_tests/add-ons" testpaths = [ @@ -163,12 +161,9 @@ filterwarnings = [ "ignore::PendingDeprecationWarning" ] -[tool.black] -line-length = 88 -target-version = ["py311"] - [tool.ruff] line-length = 88 +target-version = "py311" ignore = ["E501"] select = [ # Pyflakes