diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e2bffbfefde..b928c39353c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -63,3 +63,9 @@ repos: rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d hooks: - id: validate-cff + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: + - tomli diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 8fc32e75cbd..91cb9d7cc52 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -95,12 +95,12 @@ def test_roundtrip_dataset(dataset) -> None: @given(numeric_series, st.text()) -def test_roundtrip_pandas_series(ser, ix_name) -> None: +def test_roundtrip_pandas_series(ser, ix_name) -> None: # codespell:ignore ser # Need to name the index, otherwise Xarray calls it 'dim_0'. - ser.index.name = ix_name - arr = xr.DataArray(ser) + ser.index.name = ix_name # codespell:ignore ser + arr = xr.DataArray(ser) # codespell:ignore ser roundtripped = arr.to_pandas() - pd.testing.assert_series_equal(ser, roundtripped) + pd.testing.assert_series_equal(ser, roundtripped) # codespell:ignore ser xr.testing.assert_identical(arr, roundtripped.to_xarray()) diff --git a/pyproject.toml b/pyproject.toml index dab280f0eba..bc3e6d194ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,8 +50,8 @@ dev = [ "sphinx_autosummary_accessors", "xarray[complete]", ] -io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"] etc = ["sparse"] +io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"] parallel = ["dask[complete]"] viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"] @@ -234,40 +234,39 @@ extend-exclude = [ ] [tool.ruff.lint] +extend-safe-fixes = [ + "TID252", # absolute imports +] extend-select = [ - "F", # Pyflakes - "E", # pycodestyle errors - "W", # pycodestyle warnings - "I", # isort - "UP", # pyupgrade - "B", # flake8-bugbear - "C4", # flake8-comprehensions - "PIE", # flake8-pie - "TID", # flake8-tidy-imports (absolute imports) - "PGH", # pygrep-hooks + "F", # Pyflakes + "E", # pycodestyle errors + "W", # pycodestyle warnings + "I", # isort + "UP", # pyupgrade + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "PIE", # flake8-pie + "TID", # flake8-tidy-imports (absolute imports) + "PGH", # pygrep-hooks "PERF", # Perflint "RUF", ] -extend-safe-fixes = [ - "TID252", # absolute imports -] ignore = [ - "E402", # module level import not at top of file - "E501", # line too long - let the formatter worry about that - "E731", # do not assign a lambda expression, use a def - "UP007", # use X | Y for type annotations - "UP027", # deprecated - "C40", # unnecessary generator, comprehension, or literal - "PIE790", # unnecessary pass statement + "E402", # module level import not at top of file + "E501", # line too long - let the formatter worry about that + "E731", # do not assign a lambda expression, use a def + "UP007", # use X | Y for type annotations + "UP027", # deprecated + "C40", # unnecessary generator, comprehension, or literal + "PIE790", # unnecessary pass statement "PERF203", # try-except within a loop incurs performance overhead - "RUF001", # string contains ambiguous unicode character - "RUF002", # docstring contains ambiguous acute accent unicode character - "RUF003", # comment contains ambiguous no-break space unicode character - "RUF005", # consider upacking operator instead of concatenation - "RUF012", # mutable class attributes + "RUF001", # string contains ambiguous unicode character + "RUF002", # docstring contains ambiguous acute accent unicode character + "RUF003", # comment contains ambiguous no-break space unicode character + "RUF005", # consider unpacking operator instead of concatenation + "RUF012", # mutable class attributes ] - [tool.ruff.lint.per-file-ignores] # don't enforce absolute imports "asv_bench/**" = ["TID252"] @@ -349,3 +348,8 @@ test = "pytest" ignore = [ "PP308", # This option creates a large amount of log lines. ] + +[tool.codespell] +enable-color = true +ignore-words-list = "nd,coo,COO,nin,Marge,Commun,Claus,Soler,Tung,Celles,slowy" +quiet-level = 3 diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index e44ef75a88b..2528e567c4c 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -2560,7 +2560,7 @@ def split( Returns ------- - splitted : same type as values or object array + split : same type as values or object array Examples -------- @@ -2576,8 +2576,8 @@ def split( Split once and put the results in a new dimension - >>> values.str.split(dim="splitted", maxsplit=1) - Size: 864B + >>> values.str.split(dim="split", maxsplit=1) + Size: 864B array([[['abc', 'def'], ['spam', 'eggs\tswallow'], ['red_blue', '']], @@ -2585,12 +2585,12 @@ def split( [['test0', 'test1\ntest2\n\ntest3'], ['', ''], ['abra', 'ka\nda\tbra']]], dtype='>> values.str.split(dim="splitted") - Size: 768B + >>> values.str.split(dim="split") + Size: 768B array([[['abc', 'def', '', ''], ['spam', 'eggs', 'swallow', ''], ['red_blue', '', '', '']], @@ -2598,7 +2598,7 @@ def split( [['test0', 'test1', 'test2', 'test3'], ['', '', '', ''], ['abra', 'ka', 'da', 'bra']]], dtype='>> values.str.split(dim="splitted", sep=" ") - Size: 2kB + >>> values.str.split(dim="split", sep=" ") + Size: 2kB array([[['abc', 'def', ''], ['spam\t\teggs\tswallow', '', ''], ['red_blue', '', '']], @@ -2631,7 +2631,7 @@ def split( [['test0\ntest1\ntest2\n\ntest3', '', ''], ['', '', ''], ['abra', '', 'ka\nda\tbra']]], dtype='>> values.str.rsplit(dim="splitted", maxsplit=1) - Size: 816B + >>> values.str.rsplit(dim="split", maxsplit=1) + Size: 816B array([[['abc', 'def'], ['spam\t\teggs', 'swallow'], ['', 'red_blue']], @@ -2703,12 +2703,12 @@ def rsplit( [['test0\ntest1\ntest2', 'test3'], ['', ''], ['abra ka\nda', 'bra']]], dtype='>> values.str.rsplit(dim="splitted") - Size: 768B + >>> values.str.rsplit(dim="split") + Size: 768B array([[['', '', 'abc', 'def'], ['', 'spam', 'eggs', 'swallow'], ['', '', '', 'red_blue']], @@ -2716,7 +2716,7 @@ def rsplit( [['test0', 'test1', 'test2', 'test3'], ['', '', '', ''], ['abra', 'ka', 'da', 'bra']]], dtype='>> values.str.rsplit(dim="splitted", sep=" ") - Size: 2kB + >>> values.str.rsplit(dim="split", sep=" ") + Size: 2kB array([[['', 'abc', 'def'], ['', '', 'spam\t\teggs\tswallow'], ['', '', 'red_blue']], @@ -2749,7 +2749,7 @@ def rsplit( [['', '', 'test0\ntest1\ntest2\n\ntest3'], ['', '', ''], ['abra', '', 'ka\nda\tbra']]], dtype=' None: assert_equal(result, expected) # case sensitive without regex - result = values.str.contains("fO", regex=False, case=True) + result = values.str.contains("fO", regex=False, case=True) # codespell:ignore expected = xr.DataArray([False, False, True, False]) assert result.dtype == expected.dtype assert_equal(result, expected) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index fd866cae5ee..2292ca09e2d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1250,12 +1250,12 @@ def test_roundtrip_endian(self) -> None: pass def test_invalid_dataarray_names_raise(self) -> None: - te = (TypeError, "string or None") - ve = (ValueError, "string must be length 1 or") + terr = (TypeError, "string or None") + verr = (ValueError, "string must be length 1 or") data = np.random.random((2, 2)) da = xr.DataArray(data) for name, (error, msg) in zip( - [0, (4, 5), True, ""], [te, te, te, ve], strict=True + [0, (4, 5), True, ""], [terr, terr, terr, verr], strict=True ): ds = Dataset({name: da}) with pytest.raises(error) as excinfo: diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index f6f97108c1d..504437ce572 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1548,7 +1548,7 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non if function == cftime_range and not has_cftime: pytest.skip("requires cftime") - with pytest.raises(ValueError, match="nclusive"): + with pytest.raises(ValueError, match="nclusi"): function("2000", periods=3, inclusive="foo") diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 116487e2bcf..9c92c0e19f1 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1102,8 +1102,8 @@ def test_cftimeindex_repr_formatting_width(periods, display_width): len_intro_str = len("CFTimeIndex(") with xr.set_options(display_width=display_width): repr_str = index.__repr__() - splitted = repr_str.split("\n") - for i, s in enumerate(splitted): + split = repr_str.split("\n") + for i, s in enumerate(split): # check that lines not longer than OPTIONS['display_width'] assert len(s) <= display_width, f"{len(s)} {s} {display_width}" if i > 0: diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index ab04a7b3cde..5280bdf0019 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -262,7 +262,7 @@ class TestCoarsenConstruct: def test_coarsen_construct(self, dask: bool) -> None: ds = Dataset( { - "vart": ("time", np.arange(48), {"a": "b"}), + "vart": ("time", np.arange(48), {"a": "b"}), # codespell:ignore vart "varx": ("x", np.arange(10), {"a": "b"}), "vartx": (("x", "time"), np.arange(480).reshape(10, 48), {"a": "b"}), "vary": ("y", np.arange(12)), @@ -275,9 +275,9 @@ def test_coarsen_construct(self, dask: bool) -> None: ds = ds.chunk({"x": 4, "time": 10}) expected = xr.Dataset(attrs={"foo": "bar"}) - expected["vart"] = ( + expected["vart"] = ( # codespell:ignore vart ("year", "month"), - duck_array_ops.reshape(ds.vart.data, (-1, 12)), + duck_array_ops.reshape(ds.vart.data, (-1, 12)), # codespell:ignore vart {"a": "b"}, ) expected["varx"] = ( diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 67d38aac0fe..1baad286c91 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4930,8 +4930,8 @@ def test_from_dataframe_categorical_index_string_categories(self) -> None: categories=pd.Index(["foo", "bar", "baz"], dtype="string"), ) ) - ser = pd.Series(1, index=cat) - ds = ser.to_xarray() + series = pd.Series(1, index=cat) + ds = series.to_xarray() assert ds.coords.dtypes["index"] == np.dtype("O") @requires_sparse diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 9c6f50037d3..11a7ec34b18 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -871,7 +871,7 @@ def test_getitem_error(self): v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5)) ind = Variable(["x"], [0, 1]) - with pytest.raises(IndexError, match=r"Dimensions of indexers mis"): + with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"): v[:, ind] @pytest.mark.parametrize(