Skip to content

Commit

Permalink
update holidays
Browse files Browse the repository at this point in the history
  • Loading branch information
Gaurav Sheni committed Dec 4, 2023
1 parent 1498c9a commit 9222a4b
Show file tree
Hide file tree
Showing 19 changed files with 82 additions and 91 deletions.
17 changes: 7 additions & 10 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,15 @@ repos:
hooks:
- id: add-trailing-comma
name: Add trailing comma
- repo: https://github.com/python/black
rev: 22.12.0
hooks:
- id: black
args:
- --config=./pyproject.toml
additional_dependencies: [".[jupyter]"]
types_or: [python, jupyter]
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: 'v0.0.239'
rev: 'v0.1.6'
hooks:
- id: ruff
types_or: [ python, pyi, jupyter ]
args:
- --config=./pyproject.toml
- --fix
- --config=./pyproject.toml
- id: ruff-format
types_or: [ python, pyi, jupyter ]
args:
- --config=./pyproject.toml
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ build:
- graphviz
- openjdk-11-jre-headless
jobs:
post_build:
post_build:
- export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"

python:
Expand Down
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ clean:
.PHONY: lint
lint:
python docs/notebook_version_standardizer.py check-execution
black . --config=./pyproject.toml --check
ruff . --config=./pyproject.toml
ruff check . --config=./pyproject.toml
ruff format . --check --config=./pyproject.toml

.PHONY: lint-fix
lint-fix:
python docs/notebook_version_standardizer.py standardize
black . --config=./pyproject.toml
ruff . --fix --config=./pyproject.toml
ruff check . --fix --config=./pyproject.toml
ruff format . --config=./pyproject.toml

.PHONY: test
test:
Expand Down
7 changes: 4 additions & 3 deletions docs/source/getting_started/primitives.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,12 @@
"metadata": {},
"outputs": [],
"source": [
"from featuretools.primitives import AggregationPrimitive, TransformPrimitive\n",
"from featuretools.tests.testing_utils import make_ecommerce_entityset\n",
"import pandas as pd\n",
"from woodwork.column_schema import ColumnSchema\n",
"from woodwork.logical_types import Datetime, NaturalLanguage\n",
"import pandas as pd"
"\n",
"from featuretools.primitives import AggregationPrimitive, TransformPrimitive\n",
"from featuretools.tests.testing_utils import make_ecommerce_entityset"
]
},
{
Expand Down
10 changes: 6 additions & 4 deletions docs/source/guides/advanced_custom_primitives.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
"metadata": {},
"outputs": [],
"source": [
"from featuretools.primitives import TransformPrimitive\n",
"from featuretools.tests.testing_utils import make_ecommerce_entityset\n",
"import re\n",
"\n",
"import numpy as np\n",
"from woodwork.column_schema import ColumnSchema\n",
"from woodwork.logical_types import Datetime, NaturalLanguage\n",
"\n",
"import featuretools as ft\n",
"import numpy as np\n",
"import re"
"from featuretools.primitives import TransformPrimitive\n",
"from featuretools.tests.testing_utils import make_ecommerce_entityset"
]
},
{
Expand Down
5 changes: 2 additions & 3 deletions docs/source/guides/feature_selection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,15 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"import featuretools as ft\n",
"\n",
"import featuretools as ft\n",
"from featuretools.demo.flight import load_flight\n",
"from featuretools.selection import (\n",
" remove_highly_correlated_features,\n",
" remove_highly_null_features,\n",
" remove_single_value_features,\n",
")\n",
"\n",
"from featuretools.demo.flight import load_flight\n",
"\n",
"es = load_flight(nrows=50)\n",
"es"
]
Expand Down
5 changes: 3 additions & 2 deletions docs/source/guides/time_series.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"import pandas as pd\n",
"\n",
"import featuretools as ft\n",
"from featuretools.primitives import RollingMean, Lag, RollingMin\n",
"from featuretools.demo.weather import load_weather\n",
"import pandas as pd"
"from featuretools.primitives import Lag, RollingMean, RollingMin"
]
},
{
Expand Down
5 changes: 3 additions & 2 deletions docs/source/guides/using_dask_entitysets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,10 @@
"metadata": {},
"outputs": [],
"source": [
"import featuretools as ft\n",
"import pandas as pd\n",
"import dask.dataframe as dd\n",
"import pandas as pd\n",
"\n",
"import featuretools as ft\n",
"\n",
"id = [0, 1, 2, 3, 4]\n",
"values = [12, -35, 14, 103, -51]\n",
Expand Down
3 changes: 2 additions & 1 deletion docs/source/guides/using_spark_entitysets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@
"metadata": {},
"outputs": [],
"source": [
"import featuretools as ft\n",
"import pyspark.pandas as ps\n",
"\n",
"import featuretools as ft\n",
"\n",
"ps.set_option(\"compute.default_index_type\", \"distributed\")\n",
"\n",
"id = [0, 1, 2, 3, 4]\n",
Expand Down
13 changes: 7 additions & 6 deletions docs/source/resources/frequently_asked_questions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
"metadata": {},
"outputs": [],
"source": [
"import featuretools as ft\n",
"import pandas as pd\n",
"import numpy as np\n",
"import woodwork as ww"
"import woodwork as ww\n",
"\n",
"import featuretools as ft"
]
},
{
Expand Down Expand Up @@ -720,7 +720,7 @@
" },\n",
" # For mode, ignore the \"sessions\" DataFrame and only include \"brands\" in the\n",
" # \"products\" dataframe and \"product_id\" in the \"transactions\" DataFrame\n",
" (\"count\", \"mean\"): {\"include_dataframes\": [\"sessions\", \"transactions\"]}\n",
" (\"count\", \"mean\"): {\"include_dataframes\": [\"sessions\", \"transactions\"]},\n",
" # For count and mean, only include the dataframes \"sessions\" and \"transactions\"\n",
" },\n",
")"
Expand Down Expand Up @@ -1472,6 +1472,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"import featuretools as ft\n",
"\n",
"df = pd.DataFrame(\n",
Expand Down Expand Up @@ -1689,9 +1690,9 @@
"source": [
"from featuretools.primitives import (\n",
" TimeSince,\n",
" TimeSincePrevious,\n",
" TimeSinceLast,\n",
" TimeSinceFirst,\n",
" TimeSinceLast,\n",
" TimeSincePrevious,\n",
")\n",
"\n",
"time_since = TimeSince(unit=\"minutes\")\n",
Expand Down
4 changes: 2 additions & 2 deletions docs/source/resources/transition_to_ft_v1.0.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@
"metadata": {},
"outputs": [],
"source": [
"import featuretools as ft\n",
"import pandas as pd\n",
"import woodwork as ww"
"\n",
"import featuretools as ft"
]
},
{
Expand Down
3 changes: 1 addition & 2 deletions featuretools/feature_base/feature_descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,7 @@ def generate_description(feature, feature_descriptions, primitive_templates):

def get_direct_description(feature):
direct_description = (
' the instance of "{}" associated with this '
'instance of "{}"'.format(
' the instance of "{}" associated with this ' 'instance of "{}"'.format(
feature.relationship_path[-1][1].parent_dataframe.ww.name,
feature.dataframe_name,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ def __init__(self, holiday="New Year's Day", country="US"):

available_holidays = list(set(self.holidayUtil.federal_holidays.values()))
if self.holiday not in available_holidays:
error = "must be one of the available holidays:\n%s" % available_holidays
error = "{} must be one of the available holidays:\n{}".format(
self.holiday,
available_holidays,
)
raise ValueError(error)

def get_function(self):
Expand Down
7 changes: 5 additions & 2 deletions featuretools/synthesis/deep_feature_synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1290,8 +1290,11 @@ def check_primitive(
primitive = handle_primitive(primitive)
if not isinstance(primitive, supertype):
raise ValueError(
"Primitive {} in {} is not {} "
"primitive".format(type(primitive), arg_name, s),
"Primitive {} in {} is not {} " "primitive".format(
type(primitive),
arg_name,
s,
),
)
return primitive

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def test_deserializer_uses_common_primitive_instances_with_args(es, tmp_path):
# Test primitive with multiple args - pandas only due to primitive compatibility
if es.dataframe_type == Library.PANDAS:
distance_to_holiday = DistanceToHoliday(
holiday="Victoria Day",
holiday="Canada Day",
country="Canada",
)
features = dfs(
Expand Down Expand Up @@ -491,7 +491,7 @@ def test_deserializer_uses_common_primitive_instances_with_args(es, tmp_path):
assert all(
[f.primitive is new_distance_primitive for f in new_distance_features],
)
assert new_distance_primitive.holiday == "Victoria Day"
assert new_distance_primitive.holiday == "Canada Day"
assert new_distance_primitive.country == "Canada"

# Test primitive with list arg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -542,12 +542,14 @@ def count_wrapper(sub_s):
def test_roll_series_with_gap_incorrect_types(window_series_pd):
error = "Window length must be either an offset string or an integer."
with pytest.raises(TypeError, match=error):
roll_series_with_gap(
window_series_pd,
window_length=4.2,
gap=4,
min_periods=1,
),
(
roll_series_with_gap(
window_series_pd,
window_length=4.2,
gap=4,
min_periods=1,
),
)

error = "Gap must be either an offset string or an integer."
with pytest.raises(TypeError, match=error):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from datetime import datetime

import holidays
import numpy as np
import pandas as pd
import pytest
from packaging.version import parse

from featuretools.primitives import DistanceToHoliday

Expand All @@ -26,7 +24,7 @@ def test_distanceholiday():


def test_holiday_out_of_range():
date_to_holiday = DistanceToHoliday("Boxing Day", country="Canada")
date_to_holiday = DistanceToHoliday("Canada Day", country="Canada")

array = pd.Series(
[
Expand All @@ -36,22 +34,12 @@ def test_holiday_out_of_range():
datetime(2020, 12, 31),
],
)
days_to_boxing_day = -157 if parse(holidays.__version__) >= parse("0.15.0") else 209
edge_case_first_day_of_year = (
-6 if parse(holidays.__version__) >= parse("0.17.0") else np.nan
answer = [181, 31, -30, 182]
pd.testing.assert_series_equal(
date_to_holiday(array),
pd.Series(answer),
check_names=False,
)
edge_case_last_day_of_year = (
-5 if parse(holidays.__version__) >= parse("0.17.0") else np.nan
)
answer = pd.Series(
[
edge_case_first_day_of_year,
days_to_boxing_day,
148,
edge_case_last_day_of_year,
],
)
pd.testing.assert_series_equal(date_to_holiday(array), answer, check_names=False)


def test_unknown_country_error():
Expand Down Expand Up @@ -82,7 +70,7 @@ def test_nat():


def test_valid_country():
distance_to_holiday = DistanceToHoliday("Victoria Day", country="Canada")
distance_to_holiday = DistanceToHoliday("Canada Day", country="Canada")
case = pd.Series(
[
"2010-01-01",
Expand All @@ -91,7 +79,7 @@ def test_valid_country():
"2020-12-31",
],
).astype("datetime64[ns]")
answer = [143, -10, -70, 144]
answer = [181, 31, -30, 182]
given_answer = distance_to_holiday(case).astype("float")
np.testing.assert_array_equal(given_answer, answer)

Expand Down
22 changes: 10 additions & 12 deletions featuretools/utils/recommend_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,16 @@
from featuretools.primitives.utils import get_transform_primitives
from featuretools.synthesis import dfs, get_valid_primitives

ORDERED_PRIMITIVES = (
[ # non-numeric primitives that require specific ordering or a time index to be set
"cum_count",
"cumulative_time_since_last_false",
"cumulative_time_since_last_true",
"diff",
"diff_datetime",
"is_first_occurrence",
"is_last_occurrence",
"time_since_previous",
]
)
ORDERED_PRIMITIVES = [ # non-numeric primitives that require specific ordering or a time index to be set
"cum_count",
"cumulative_time_since_last_false",
"cumulative_time_since_last_true",
"diff",
"diff_datetime",
"is_first_occurrence",
"is_last_occurrence",
"time_since_previous",
]


DEPRECATED_PRIMITIVES = [
Expand Down
Loading

0 comments on commit 9222a4b

Please sign in to comment.