diff --git a/pyproject.toml b/pyproject.toml
index 82067cee..fe9a72a1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,6 +72,7 @@ test = [
     "pytest-mpl >=0.12",
     "dask[dataframe] >=2022; python_version>='3.8'",
     "dask_histogram >=2023.1; python_version>='3.8'",
+    "numba_stats",
 ]
 dev = [
     "pytest >=6",
@@ -83,6 +84,7 @@ dev = [
     "ipykernel",
     "dask[dataframe] >=2022; python_version>='3.8'",
     "dask_histogram >=2023.1; python_version>='3.8'",
+    "numba_stats",
 ]
 docs = [
     "pytest >=6",
@@ -176,6 +178,7 @@ messages_control.disable = [
   "disallowed-name",
   "cyclic-import",
   "redefined-builtin",
+  "too-many-public-methods",
 ]
 
 [tool.ruff]
diff --git a/src/hist/basehist.py b/src/hist/basehist.py
index cc008b7a..e44faa71 100644
--- a/src/hist/basehist.py
+++ b/src/hist/basehist.py
@@ -630,3 +630,43 @@ def sum(self, flow: bool = False) -> float | bh.accumulators.Accumulator:
             raise AssertionError(f"Unsupported storage type {storage}")
 
         return super().sum(flow=flow)
+
+    def chisquare_1samp(
+        self,
+        distribution: str | Callable[..., Any],
+        flow: bool = False,
+        args: Any = (),
+        kwds: Any = None,
+    ) -> Any:
+        from hist import stats
+
+        return stats.chisquare_1samp(self, distribution, flow, args, kwds)
+
+    def chisquare_2samp(self, other: hist.BaseHist, flow: bool = False) -> Any:
+        from hist import stats
+
+        return stats.chisquare_2samp(self, other, flow)
+
+    def ks_1samp(
+        self,
+        distribution: str | Callable[..., Any],
+        args: Any = (),
+        kwds: Any = None,
+        alternative: str = "two-sided",
+        mode: str = "auto",
+    ) -> Any:
+        from hist import stats
+
+        return stats.ks_1samp(self, distribution, args, kwds, alternative, mode)
+
+    def ks_2samp(
+        self,
+        other: hist.BaseHist,
+        equal_bins: bool = True,
+        flow: bool = False,
+        alternative: str = "two-sided",
+        mode: str = "auto",
+    ) -> Any:
+        from hist import stats
+
+        return stats.ks_2samp(self, other, equal_bins, flow, alternative, mode)
diff --git a/src/hist/stats.py b/src/hist/stats.py
new file mode 100644
index 00000000..6eab17ac
--- /dev/null
+++ b/src/hist/stats.py
@@ -0,0 +1,600 @@
+from __future__ import annotations
+
+import warnings
+from typing import Any, Callable
+
+import numpy as np
+from scipy import special
+from scipy import stats as spstats
+from scipy.stats import rv_continuous, rv_discrete
+
+import hist
+
+
+def _get_cdf_if_valid(obj: Any) -> Any:
+    if isinstance(obj, (rv_continuous, rv_discrete)):
+        return obj.cdf
+    if isinstance(obj, str) and hasattr(spstats, obj):
+        dist = getattr(spstats, obj)
+        if isinstance(dist, (rv_continuous, rv_discrete)):
+            return dist.cdf
+    if hasattr(obj, "cdf") and callable(obj.cdf):
+        return obj.cdf
+    if callable(obj):
+        return obj
+    raise TypeError(
+        f"Unknown distribution type {obj}, try one of the scipy distributions, an object with a cdf method, or a callable cdf implementation"
+    )
+
+
+def _compute_outer_prob_inside_method(m: int, n: int, g: int, h: int) -> Any:
+    """
+    Count the proportion of paths that do not stay strictly inside two
+    diagonal lines.
+
+    Parameters
+    ----------
+    m : integer
+        m > 0
+    n : integer
+        n > 0
+    g : integer
+        g is greatest common divisor of m and n
+    h : integer
+        0 <= h <= lcm(m,n)
+
+    Returns
+    -------
+    p : float
+        The proportion of paths that do not stay inside the two lines.
+
+    The classical algorithm counts the integer lattice paths from (0, 0)
+    to (m, n) which satisfy |x/m - y/n| < h / lcm(m, n).
+    The paths make steps of size +1 in either positive x or positive y
+    directions.
+    We are, however, interested in 1 - proportion to computes p-values,
+    so we change the recursion to compute 1 - p directly while staying
+    within the "inside method" a described by Hodges.
+
+    We generally follow Hodges' treatment of Drion/Gnedenko/Korolyuk.
+    Hodges, J.L. Jr.,
+    "The Significance Probability of the Smirnov Two-Sample Test,"
+    Arkiv fiur Matematik, 3, No. 43 (1958), 469-86.
+
+    For the recursion for 1-p see
+    Viehmann, T.: "Numerically more stable computation of the p-values
+    for the two-sample Kolmogorov-Smirnov test," arXiv: 2102.08037
+
+    """
+    # Probability is symmetrical in m, n.  Computation below uses m >= n.
+    if m < n:
+        m, n = n, m
+    mg = m // g
+    ng = n // g
+
+    # Count the integer lattice paths from (0, 0) to (m, n) which satisfy
+    # |nx/g - my/g| < h.
+    # Compute matrix A such that:
+    #  A(x, 0) = A(0, y) = 1
+    #  A(x, y) = A(x, y-1) + A(x-1, y), for x,y>=1, except that
+    #  A(x, y) = 0 if |x/m - y/n|>= h
+    # Probability is A(m, n)/binom(m+n, n)
+    # Optimizations exist for m==n, m==n*p.
+    # Only need to preserve a single column of A, and only a
+    # sliding window of it.
+    # minj keeps track of the slide.
+    minj, maxj = 0, min(int(np.ceil(h / mg)), n + 1)
+    curlen = maxj - minj
+    # Make a vector long enough to hold maximum window needed.
+    lenA = min(2 * maxj + 2, n + 1)
+    # This is an integer calculation, but the entries are essentially
+    # binomial coefficients, hence grow quickly.
+    # Scaling after each column is computed avoids dividing by a
+    # large binomial coefficient at the end, but is not sufficient to avoid
+    # the large dyanamic range which appears during the calculation.
+    # Instead we rescale based on the magnitude of the right most term in
+    # the column and keep track of an exponent separately and apply
+    # it at the end of the calculation.  Similarly when multiplying by
+    # the binomial coefficient
+    dtype = np.float64
+    A = np.ones(lenA, dtype=dtype)
+    # Initialize the first column
+    A[minj:maxj] = 0.0
+    for i in range(1, m + 1):
+        # Generate the next column.
+        # First calculate the sliding window
+        lastminj, lastlen = minj, curlen
+        minj = max(int(np.floor((ng * i - h) / mg)) + 1, 0)
+        minj = min(minj, n)
+        maxj = min(int(np.ceil((ng * i + h) / mg)), n + 1)
+        if maxj <= minj:
+            return 1.0
+        # Now fill in the values. We cannot use cumsum, unfortunately.
+        val = 0.0 if minj == 0 else 1.0
+        for jj in range(maxj - minj):
+            j = jj + minj
+            val = (A[jj + minj - lastminj] * i + val * j) / (i + j)
+            A[jj] = val
+        curlen = maxj - minj
+        if lastlen > curlen:
+            # Set some carried-over elements to 1
+            A[maxj - minj : maxj - minj + (lastlen - curlen)] = 1
+
+    return A[maxj - minj - 1]
+
+
+def _compute_prob_outside_square(n: int, h: int) -> Any:
+    """
+    Compute the proportion of paths that pass outside the two diagonal lines.
+
+    Parameters
+    ----------
+    n : integer
+        n > 0
+    h : integer
+        0 <= h <= n
+
+    Returns
+    -------
+    p : float
+        The proportion of paths that pass outside the lines x-y = +/-h.
+
+    """
+    # Compute Pr(D_{n,n} >= h/n)
+    # Prob = 2 * ( binom(2n, n-h) - binom(2n, n-2a) + binom(2n, n-3a) - ... )
+    # / binom(2n, n)
+    # This formulation exhibits subtractive cancellation.
+    # Instead divide each term by binom(2n, n), then factor common terms
+    # and use a Horner-like algorithm
+    # P = 2 * A0 * (1 - A1*(1 - A2*(1 - A3*(1 - A4*(...)))))
+
+    P = 0.0
+    k = int(np.floor(n / h))
+    while k >= 0:
+        p1 = 1.0
+        # Each of the Ai terms has numerator and denominator with
+        # h simple terms.
+        for j in range(h):
+            p1 = (n - k * h - j) * p1 / (n + k * h + j + 1)
+        P = p1 * (1.0 - P)
+        k -= 1
+    return 2 * P
+
+
+def _count_paths_outside_method(m: int, n: int, g: int, h: int) -> Any:
+    """Count the number of paths that pass outside the specified diagonal.
+
+    Parameters
+    ----------
+    m : integer
+        m > 0
+    n : integer
+        n > 0
+    g : integer
+        g is greatest common divisor of m and n
+    h : integer
+        0 <= h <= lcm(m,n)
+
+    Returns
+    -------
+    p : float
+        The number of paths that go low.
+        The calculation may overflow - check for a finite answer.
+
+    Count the integer lattice paths from (0, 0) to (m, n), which at some
+    point (x, y) along the path, satisfy:
+      m*y <= n*x - h*g
+    The paths make steps of size +1 in either positive x or positive y
+    directions.
+
+    We generally follow Hodges' treatment of Drion/Gnedenko/Korolyuk.
+    Hodges, J.L. Jr.,
+    "The Significance Probability of the Smirnov Two-Sample Test,"
+    Arkiv fiur Matematik, 3, No. 43 (1958), 469-86.
+
+    """
+    # Compute #paths which stay lower than x/m-y/n = h/lcm(m,n)
+    # B(x, y) = #{paths from (0,0) to (x,y) without
+    #             previously crossing the boundary}
+    #         = binom(x, y) - #{paths which already reached the boundary}
+    # Multiply by the number of path extensions going from (x, y) to (m, n)
+    # Sum.
+
+    # Probability is symmetrical in m, n.  Computation below assumes m >= n.
+    if m < n:
+        m, n = n, m
+    mg = m // g
+    ng = n // g
+
+    # Not every x needs to be considered.
+    # xj holds the list of x values to be checked.
+    # Wherever n*x/m + ng*h crosses an integer
+    lxj = n + (mg - h) // mg
+    xj = [(h + mg * j + ng - 1) // ng for j in range(lxj)]
+    # B is an array just holding a few values of B(x,y), the ones needed.
+    # B[j] == B(x_j, j)
+    if lxj == 0:
+        return special.binom(m + n, n)
+    B = np.zeros(lxj)
+    B[0] = 1
+    # Compute the B(x, y) terms
+    for j in range(1, lxj):
+        Bj = special.binom(xj[j] + j, j)
+        for i in range(j):
+            bin = special.binom(xj[j] - xj[i] + j - i, j - i)
+            Bj -= bin * B[i]
+        B[j] = Bj
+    # Compute the number of path extensions...
+    num_paths = 0
+    for j in range(lxj):
+        bin = special.binom((m - xj[j]) + (n - j), n - j)
+        term = B[j] * bin
+        num_paths += term
+    return num_paths
+
+
+def _attempt_exact_2kssamp(n1: int, n2: int, g: int, d: float, alternative: str) -> Any:
+    """Attempts to compute the exact 2sample probability.
+
+    Parameters
+    ----------
+    n1 : integer
+        sample size of sample 1
+    n2 : integer
+        sample size of sample 2
+    g : integer
+        greatest common divisor of n1 and n2
+    d : float
+        max difference in ECDFs
+    alternative : string
+        alternative hypothesis, either 'two-sided', 'less' or 'greater'
+
+    Returns
+    -------
+    success : bool
+        True if the calculation was successful
+    d : float
+        max difference in ECDFs
+    prob : float
+        The probability of the test statistic under the null hypothesis.
+    """
+    lcm = (n1 // g) * n2
+    h = int(np.round(d * lcm))
+    d = h * 1.0 / lcm
+    if h == 0:
+        return True, d, 1.0
+    saw_fp_error, prob = False, np.nan
+    try:
+        with np.errstate(invalid="raise", over="raise"):
+            if alternative == "two-sided":
+                if n1 == n2:
+                    prob = _compute_prob_outside_square(n1, h)
+                else:
+                    prob = _compute_outer_prob_inside_method(n1, n2, g, h)
+            else:
+                if n1 == n2:
+                    # prob = binom(2n, n-h) / binom(2n, n)
+                    # Evaluating in that form incurs roundoff errors
+                    # from special.binom. Instead calculate directly
+                    jrange = np.arange(h)
+                    prob = float(np.prod((n1 - jrange) / (n1 + jrange + 1.0)))
+                else:
+                    with np.errstate(over="raise"):
+                        num_paths = _count_paths_outside_method(n1, n2, g, h)
+                    bin = special.binom(n1 + n2, n1)
+                    if num_paths > bin or np.isinf(bin):
+                        saw_fp_error = True
+                    else:
+                        prob = num_paths / bin
+
+    except (FloatingPointError, OverflowError):
+        saw_fp_error = True
+
+    if saw_fp_error:
+        return False, d, np.nan
+    if not 0 <= prob <= 1:
+        return False, d, prob
+    return True, d, prob
+
+
+def chisquare_1samp(
+    self: hist.BaseHist,
+    distribution: str | Callable[..., Any],
+    flow: bool = False,
+    args: Any = (),
+    kwds: Any = None,
+) -> Any:
+    kwds = {} if kwds is None else kwds
+
+    if self.ndim != 1:
+        raise NotImplementedError(
+            f"Only 1D-histogram supports chisquare_1samp, try projecting {self.__class__.__name__} to 1D"
+        )
+    cdf = _get_cdf_if_valid(distribution)
+
+    variances = self.variances(flow=flow)
+    if variances is None:
+        raise RuntimeError(
+            "Cannot compute from a variance-less histogram, try a Weight storage"
+        )
+
+    observed = self.values(flow=flow)
+    totalentries = self.values(flow=True).sum(dtype=int)
+    cdfvals = cdf(self.axes[0].edges, *args, **kwds)
+    if flow:
+        cdfvals = np.concatenate([[0], cdfvals, [1]])
+    expected = np.diff(cdfvals) * totalentries
+    # TODO: check if variances or expected should go in the denominator
+    # variances fails if bins have low statistics
+    where = variances != 0
+    squares = (expected - observed) ** 2
+    ndof = np.sum(where) - 1
+    chisq = np.sum(squares[where] / variances[where])
+    pvalue = spstats.chi2.sf(chisq, ndof)
+
+    return chisq, ndof, pvalue
+
+
+def chisquare_2samp(
+    self: hist.BaseHist, other: hist.BaseHist, flow: bool = False
+) -> Any:
+    if self.ndim != 1:
+        raise NotImplementedError(
+            f"Only 1D-histogram supports chisquare_2samp, try projecting {self.__class__.__name__} to 1D"
+        )
+    if isinstance(other, hist.hist.Hist) and other.ndim != 1:
+        raise NotImplementedError(
+            f"Only 1D-histogram supports chisquare_2samp, try projecting other={other.__class__.__name__} to 1D"
+        )
+    if not isinstance(other, hist.hist.Hist):
+        raise TypeError(
+            f"Unknown type {other.__class__.__name__}, other must be a hist.Hist object"
+        )
+    # TODO: add support for compatible rebinning
+    if self.size != other.size:
+        raise NotImplementedError(
+            "Cannot compute chi2 from histograms with different binning, try rebinning"
+        )
+    if not np.allclose(self.axes[0].edges, other.axes[0].edges):
+        raise NotImplementedError(
+            "Cannot compute chi2 from histograms with different binning, try rebinning"
+        )
+
+    variances1 = self.variances(flow=flow)
+    variances2 = other.variances(flow=flow)
+    if variances1 is None or variances2 is None:
+        raise RuntimeError(
+            "Cannot compute from variance-less histograms, try a Weight storage"
+        )
+
+    counts1 = self.values(flow=flow)
+    counts2 = other.values(flow=flow)
+    totalentries1 = self.values(flow=True).sum(dtype=int)
+    totalentries2 = other.values(flow=True).sum(dtype=int)
+    squares = (
+        counts1 * np.sqrt(totalentries2 / totalentries1)
+        - counts2 * np.sqrt(totalentries1 / totalentries2)
+    ) ** 2
+    variances = variances1 + variances2
+    where = variances != 0
+    chisq = np.sum(squares[where] / variances[where])
+    k = np.sum(where)
+    # TODO: check if ndof = k if totalentries1 == totalentries2 else k - 1 is correct
+    ndof = k - 1
+    pvalue = spstats.chi2.sf(chisq, ndof)
+
+    return chisq, ndof, pvalue
+
+
+def ks_1samp(
+    self: hist.BaseHist,
+    distribution: str | Callable[..., Any],
+    args: Any = (),
+    kwds: Any = None,
+    alternative: str = "two-sided",
+    mode: str = "auto",
+) -> Any:
+    kwds = {} if kwds is None else kwds
+
+    if self.ndim != 1:
+        raise NotImplementedError(
+            f"Only 1D-histogram supports ks_1samp, try projecting {self.__class__.__name__} to 1D"
+        )
+
+    if mode not in ["auto", "exact", "asymp"]:
+        raise ValueError(f"Invalid value for mode: {mode}")
+    alternative = {"t": "two-sided", "g": "greater", "l": "less"}.get(
+        alternative.lower()[0], alternative
+    )
+    if alternative not in ["two-sided", "less", "greater"]:
+        raise ValueError(f"Invalid value for alternative: {alternative}")
+
+    cdf = _get_cdf_if_valid(distribution)
+
+    cdflocs = self.axes[0].centers
+    cdfvals = cdf(cdflocs, *args, **kwds)
+    observed = self.values(flow=True)
+    totalentries = observed.sum(dtype=int)
+    ecdf = np.cumsum(observed) / totalentries
+    ecdfplus = ecdf[1:-1]
+    ecdfminus = ecdf[0:-2]
+    dplus_index = (ecdfplus - cdfvals).argmax()
+    dminus_index = (cdfvals - ecdfminus).argmax()
+    Dplus = (ecdfplus - cdfvals)[dplus_index]
+    Dminus = (cdfvals - ecdfminus)[dminus_index]
+    dplus_location = cdflocs[dplus_index]
+    dminus_location = cdflocs[dminus_index]
+
+    if Dplus > Dminus:
+        D = Dplus
+        d_location = dplus_location
+        d_sign = 1
+    else:
+        D = Dminus
+        d_location = dminus_location
+        d_sign = -1
+
+    if alternative == "greater":
+        pvalue = spstats.ksone.sf(Dplus, totalentries)
+        return Dplus, dplus_location, 1, pvalue
+    if alternative == "less":
+        pvalue = spstats.ksone.sf(Dminus, totalentries)
+        return Dminus, dminus_location, -1, pvalue
+
+    if mode == "auto":  # Always select exact
+        mode = "exact"
+    if mode == "exact":
+        pvalue = spstats.kstwo.sf(D, totalentries)
+    elif mode == "asymp":
+        pvalue = spstats.kstwobign.sf(D * np.sqrt(totalentries))
+    elif mode == "approx":
+        pvalue = 2 * spstats.ksone.sf(D, totalentries)
+
+    pvalue = np.clip(pvalue, 0, 1)
+
+    return D, Dplus, Dminus, dplus_location, dminus_location, d_location, d_sign, pvalue
+
+
+def ks_2samp(
+    self: hist.BaseHist,
+    other: hist.BaseHist,
+    equal_bins: bool = True,
+    flow: bool = False,
+    alternative: str = "two-sided",
+    mode: str = "auto",
+) -> Any:
+    if self.ndim != 1:
+        raise NotImplementedError(
+            f"Only 1D-histogram supports ks_2samp, try projecting {self.__class__.__name__} to 1D"
+        )
+    if isinstance(other, hist.hist.Hist) and other.ndim != 1:
+        raise NotImplementedError(
+            f"Only 1D-histogram supports ks_2samp, try projecting other={other.__class__.__name__} to 1D"
+        )
+    if not isinstance(other, hist.hist.Hist):
+        raise TypeError(
+            f"Unknown type {other.__class__.__name__}, other must be a hist.Hist object"
+        )
+
+    if equal_bins:
+        if self.size != other.size:
+            raise ValueError(
+                "Cannot compute KS from histograms with different binning, use equal_bins=False"
+            )
+        if not np.allclose(self.axes[0].edges, other.axes[0].edges):
+            raise ValueError(
+                "Cannot compute KS from histograms with different binning, use equal_bins=False"
+            )
+
+    if mode not in ["auto", "exact", "asymp"]:
+        raise ValueError(f"Invalid value for mode: {mode}")
+    alternative = {"t": "two-sided", "g": "greater", "l": "less"}.get(
+        alternative.lower()[0], alternative
+    )
+    if alternative not in ["two-sided", "less", "greater"]:
+        raise ValueError(f"Invalid value for alternative: {alternative}")
+
+    data1 = self.values(flow=True)
+    data2 = other.values(flow=True)
+    n1 = data1.sum(dtype=int)
+    n2 = data2.sum(dtype=int)
+    edges1 = self.axes[0].centers
+    edges2 = other.axes[0].centers
+    cdf1 = np.cumsum(data1) / n1
+    cdf2 = np.cumsum(data2) / n2
+    if flow:
+        edges1 = np.concatenate([[-np.inf], edges1, [np.inf]])
+        edges2 = np.concatenate([[-np.inf], edges2, [np.inf]])
+
+    if equal_bins:
+        cdflocs = edges1
+
+        if not flow:
+            cdf1 = cdf1[1:-1]
+            cdf2 = cdf2[1:-1]
+
+        cddiffs = cdf1 - cdf2
+
+    elif not equal_bins:
+        edges_all = np.sort(np.concatenate([edges1, edges2]))
+        cdflocs = edges_all
+
+        if flow:
+            cdf1 = np.concatenate([[0], cdf1])
+            cdf2 = np.concatenate([[0], cdf2])
+
+        # Use np.searchsorted to get the CDF values at all bin edges
+        cdf1_all = cdf1[np.searchsorted(edges1, edges_all, side="right")]
+        cdf2_all = cdf2[np.searchsorted(edges2, edges_all, side="right")]
+        cddiffs = cdf1_all - cdf2_all
+
+    # Identify the location of the statistic
+    argminS = np.argmin(cddiffs)
+    argmaxS = np.argmax(cddiffs)
+    loc_minS = cdflocs[argminS]
+    loc_maxS = cdflocs[argmaxS]
+
+    # Ensure sign of minS is not negative.
+    minS = np.clip(-cddiffs[argminS], 0, 1)
+    maxS = cddiffs[argmaxS]
+
+    if alternative == "less" or (alternative == "two-sided" and minS > maxS):
+        d = minS
+        d_location = loc_minS
+        d_sign = -1
+    else:
+        d = maxS
+        d_location = loc_maxS
+        d_sign = 1
+
+    g = np.gcd(int(n1), int(n2))
+    n1g = n1 // g
+    n2g = n2 // g
+    pvalue = -np.inf
+
+    if mode == "auto":  # Always select exact
+        mode = "exact"
+    if mode == "exact" and n1g >= np.iinfo(np.int32).max / n2g:
+        # If lcm(n1, n2) is too big, switch from exact to asymp
+        mode = "asymp"
+        warnings.warn(
+            f"Exact ks_2samp calculation not possible with samples sizes "
+            f"{n1} and {n2}. Switching to 'asymp'.",
+            RuntimeWarning,
+            stacklevel=3,
+        )
+
+    if mode == "exact":
+        success, d, pvalue = _attempt_exact_2kssamp(n1, n2, g, d, alternative)
+        if not success:
+            mode = "asymp"
+            warnings.warn(
+                f"ks_2samp: Exact calculation unsuccessful. "
+                f"Switching to method={mode}.",
+                RuntimeWarning,
+                stacklevel=3,
+            )
+
+    if mode == "asymp":
+        # The product n1*n2 is large.  Use Smirnov's asymptoptic formula.
+        # Ensure float to avoid overflow in multiplication
+        # sorted because the one-sided formula is not symmetric in n1, n2
+        m, n = sorted([float(n1), float(n2)], reverse=True)
+        en = m * n / (m + n)
+        if alternative == "two-sided":
+            pvalue = spstats.kstwo.sf(d, np.round(en))
+        else:
+            z = np.sqrt(en) * d
+            # Use Hodges' suggested approximation Eqn 5.3
+            # Requires m to be the larger of (n1, n2)
+            expt = -2 * z**2 - 2 * z * (m + 2 * n) / np.sqrt(m * n * (m + n)) / 3.0
+            pvalue = np.exp(expt)
+
+    pvalue = np.clip(pvalue, 0, 1)
+    D = d
+    Dplus = maxS
+    Dminus = minS
+    dplus_location = loc_maxS
+    dminus_location = loc_minS
+
+    return D, Dplus, Dminus, dplus_location, dminus_location, d_location, d_sign, pvalue
diff --git a/tests/test_plot.py b/tests/test_plot.py
index cededea7..d0259ff3 100644
--- a/tests/test_plot.py
+++ b/tests/test_plot.py
@@ -7,6 +7,7 @@
 
 plt = pytest.importorskip("matplotlib.pyplot")
 pytest.importorskip("scipy")
+pytest.importorskip("mplhep")
 
 
 def test_general_plot1d():
diff --git a/tests/test_stacks.py b/tests/test_stacks.py
index e3ed7be7..d8607131 100644
--- a/tests/test_stacks.py
+++ b/tests/test_stacks.py
@@ -8,6 +8,8 @@
 
 from hist import Hist, NamedHist, Stack, axis
 
+pytest.importorskip("mplhep")
+
 np.random.seed(42)
 
 # different histograms here!
diff --git a/tests/test_stats.py b/tests/test_stats.py
new file mode 100644
index 00000000..07845300
--- /dev/null
+++ b/tests/test_stats.py
@@ -0,0 +1,108 @@
+from __future__ import annotations
+
+import functools
+
+import numpy as np
+import pytest
+from numba_stats import norm
+
+import hist
+from hist import Hist
+
+cdf = functools.partial(norm.cdf, loc=0, scale=1)
+
+pytest.importorskip("scipy")
+
+
+@pytest.mark.parametrize("arg", ["norm", cdf])
+def test_chisquare_1samp(arg):
+    np.random.seed(42)
+
+    h = Hist(hist.axis.Regular(20, -5, 5))
+    h.fill(np.random.normal(size=1000))
+
+    chisq, ndof, pvalue = h.chisquare_1samp(arg)
+    assert chisq == pytest.approx(9.47425856230132)
+    assert ndof == 14
+    assert pvalue == pytest.approx(0.7995235818496339)
+
+
+def test_chisquare_1samp_2d():
+    np.random.seed(42)
+
+    h = Hist(hist.axis.Regular(20, -5, 5), hist.axis.Regular(20, -5, 5))
+    h.fill(np.random.normal(size=1000), np.random.normal(size=1000))
+    with pytest.raises(NotImplementedError):
+        h.chisquare_1samp("norm")
+
+
+def test_chisquare_1samp_weight():
+    np.random.seed(42)
+    h = Hist(hist.axis.Regular(20, -5, 5), storage=hist.storage.Weight())
+    h.fill(np.random.normal(size=1000), weight=np.random.randint(0, 10, size=1000))
+    h.chisquare_1samp("norm")
+
+
+def test_chisquare_1samp_weight_raises():
+    np.random.seed(42)
+    h = Hist(hist.axis.Regular(20, -5, 5))
+    h.fill(np.random.normal(size=1000), weight=np.random.randint(0, 10, size=1000))
+    with pytest.raises(RuntimeError):
+        h.chisquare_1samp("norm")
+
+
+def test_chisquare_1samp_invalid_args():
+    h = Hist(hist.axis.Regular(20, -5, 5))
+    h.fill(np.random.normal(size=1000))
+    with pytest.raises(TypeError):
+        h.chisquare_1samp("not_a_distribution")
+
+    with pytest.raises(TypeError):
+        h.chisquare_1samp(1)
+
+
+def test_chisquare_2samp():
+    np.random.seed(42)
+
+    h1 = Hist(hist.axis.Regular(20, -5, 5, name="norm"))
+    h2 = Hist(hist.axis.Regular(20, -5, 5, name="norm"))
+    h1.fill(np.random.normal(size=1000))
+    h2.fill(np.random.normal(size=500))
+
+    chisq, ndof, pvalue = h1.chisquare_2samp(h2)
+    assert chisq == pytest.approx(12.901853991544478)
+    assert ndof == 14
+    assert pvalue == pytest.approx(0.5342659447391381)
+
+    np.random.seed(42)
+
+    h1 = Hist(hist.axis.Regular(20, -5, 5, name="norm"))
+    h2 = Hist(hist.axis.Regular(20, -5, 5, name="norm"))
+    h1.fill(np.random.normal(size=1000))
+    h2.fill(np.random.normal(size=1000))
+
+    chisq, ndof, pvalue = h1.chisquare_2samp(h2)
+    assert chisq == pytest.approx(13.577308400334086)
+    assert ndof == 14
+    assert pvalue == pytest.approx(0.4816525905214355)
+
+    h = Hist(hist.axis.Regular(20, -5, 5), hist.axis.Regular(20, -5, 5))
+    h.fill(np.random.normal(size=1000), np.random.normal(size=1000))
+    with pytest.raises(NotImplementedError):
+        h.chisquare_2samp(h2)
+
+    h = Hist(hist.axis.Regular(20, -5, 5))
+    h.fill(np.random.normal(size=1000), weight=np.random.randint(0, 10, size=1000))
+    with pytest.raises(RuntimeError):
+        h.chisquare_2samp(h2)
+
+    with pytest.raises(TypeError):
+        h.chisquare_2samp(1)
+
+
+def test_ks_1samp():
+    pass
+
+
+def test_ks_2samp():
+    pass