Skip to content

Commit

Permalink
Added named_arrays.histogram() function which can compute the n-dim…
Browse files Browse the repository at this point in the history
…ensional histogram for scalars, vectors, and functions. (#100)
  • Loading branch information
byrdie authored Nov 18, 2024
1 parent d12489a commit 44a6ac1
Show file tree
Hide file tree
Showing 11 changed files with 604 additions and 4 deletions.
28 changes: 27 additions & 1 deletion named_arrays/_functions/function_named_array_functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Callable, Literal
from typing import Callable, Literal, Sequence
import numpy as np
import matplotlib
import astropy.units as u
Expand Down Expand Up @@ -102,6 +102,32 @@ def unit_normalized(
)


@_implements(na.histogram)
def histogram(
a: na.AbstractFunctionArray,
bins: dict[str, int] | na.AbstractScalarArray,
axis: None | str | Sequence[str] = None,
min: None | na.AbstractScalarArray = None,
max: None | na.AbstractScalarArray = None,
density: bool = False,
weights: None = None,
) -> na.FunctionArray[na.AbstractScalarArray, na.ScalarArray]:
if weights is not None: # pragma: nocover
raise ValueError(
"`weights` must be `None` for `AbstractFunctionArray`"
f"inputs, got {type(weights)}."
)
return na.histogram(
a=a.inputs,
bins=bins,
axis=axis,
min=min,
max=max,
density=density,
weights=a.outputs,
)


@_implements(na.plt.pcolormesh)
def pcolormesh(
*XY: na.AbstractArray,
Expand Down
36 changes: 35 additions & 1 deletion named_arrays/_functions/tests/test_functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Sequence, Callable
from typing import Sequence, Callable, Literal
import pytest
import numpy as np
import astropy.units as u
Expand Down Expand Up @@ -650,6 +650,40 @@ class TestInterp(
):
pass

@pytest.mark.parametrize(
argnames="bins",
argvalues=[
"dict",
],
)
class TestHistogram(
named_arrays.tests.test_core.AbstractTestAbstractArray.TestNamedArrayFunctions.TestHistogram,
):
def test_histogram(
self,
array: na.AbstractFunctionArray,
bins: Literal["dict"],
axis: None | str | Sequence[str],
min: None | na.AbstractScalarArray | na.AbstractVectorArray,
max: None | na.AbstractScalarArray | na.AbstractVectorArray,
weights: None | na.AbstractScalarArray,
):
if bins == "dict":
if isinstance(array.inputs, na.AbstractVectorArray):
bins = {f"axis_{c}": 11 for c in array.inputs.cartesian_nd.components}
else:
bins = dict(axis_x=11)
if isinstance(array.outputs, na.AbstractVectorArray):
return
super().test_histogram(
array=array,
bins=bins,
axis=axis,
min=min,
max=max,
weights=weights,
)

@pytest.mark.xfail
class TestPltPlotLikeFunctions(
named_arrays.tests.test_core.AbstractTestAbstractArray.TestNamedArrayFunctions.TestPltPlotLikeFunctions
Expand Down
6 changes: 6 additions & 0 deletions named_arrays/_matrices/tests/test_matrices.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ class TestNamedArrayFunctions(
named_arrays._vectors.tests.test_vectors.AbstractTestAbstractVectorArray.TestNamedArrayFunctions
):

@pytest.mark.skip
class TestHistogram(
named_arrays.tests.test_core.AbstractTestAbstractArray.TestNamedArrayFunctions.TestHistogram,
):
pass

@pytest.mark.skip
class TestPltPlotLikeFunctions(
named_arrays._vectors.tests.test_vectors.AbstractTestAbstractVectorArray.TestNamedArrayFunctions.TestPltPlotLikeFunctions
Expand Down
131 changes: 131 additions & 0 deletions named_arrays/_named_array_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
'concatenate',
'add_axes',
"interp",
"histogram",
"histogram2d",
"histogramdd",
'jacobian',
'despike',
]
Expand Down Expand Up @@ -718,6 +720,80 @@ def interp(
)


def histogram(
a: na.AbstractArray,
bins: dict[str, int] | na.AbstractArray,
axis: None | str | Sequence[str] = None,
min: None | float | na.AbstractArray = None,
max: None | float | na.AbstractArray = None,
density: bool = False,
weights: None | na.AbstractArray = None,
) -> na.FunctionArray[na.AbstractArray, na.ScalarArray]:
"""
A thin wrapper around :func:`numpy.histogram` which adds an `axis` argument.
Parameters
----------
a
The input data over which to compute the histogram.
bins
The bin specification of the histogram:
* If `bins` is a dictionary, the keys are interpreted as the axis names
and the values are the number of bins along each axis.
This dictionary must have only one key per coordinate.
* If `bins` is an array, it represents the bin edges.
axis
The logical axes along which to histogram the data points.
If :obj:`None` (the default), the histogram will be computed along
all the axes of `a`.
min
The lower boundary of the histogram.
If :obj:`None` (the default), the minimum of `a` is used.
max
The upper boundary of the histogram.
If :obj:`None` (the default), the maximum of `a` is used.
density
If :obj:`False` (the default), returns the number of samples in each bin.
If :obj:`True`, returns the probability density in each bin.
weights
An optional array weighting each sample.
Examples
--------
Construct a 2D histogram with constant bin width.
.. jupyter-execute::
import numpy as np
import matplotlib.pyplot as plt
import named_arrays as na
# Define the bin edges
bins = dict(x=6)
# Define random points to collect into a histogram
a = na.random.normal(0, 2, shape_random=dict(h=101))
# Compute the histogram
hist = na.histogram(a, bins=bins)
# Plot the resulting histogram
fig, ax = plt.subplots()
na.plt.stairs(hist.inputs, hist.outputs);
"""
return _named_array_function(
func=histogram,
a=a,
axis=axis,
bins=bins,
min=min,
max=max,
density=density,
weights=weights,
)


def histogram2d(
x: na.AbstractScalarArray,
y: na.AbstractScalarArray,
Expand Down Expand Up @@ -822,6 +898,61 @@ def histogram2d(
)


def histogramdd(
*sample: na.AbstractScalar,
bins: dict[str, int] | na.AbstractScalar| Sequence[na.AbstractScalar],
axis: None | str | Sequence[str] = None,
min: None | na.AbstractScalar | Sequence[na.AbstractScalar] = None,
max: None | na.AbstractScalar | Sequence[na.AbstractScalar] = None,
density: bool = False,
weights: None | na.AbstractScalar = None,
) -> tuple[na.AbstractScalar, tuple[na.AbstractScalar, ...]]:
"""
A thin wrapper around :func:`numpy.histogramdd` which adds an `axis` argument.
Parameters
----------
sample
The data to be histrogrammed.
Note the difference in signature compared to :func:`numpy.histogramdd`,
each component must be a separate argument,
instead of a single argument containing a sequence of arrays.
This is done so that multiple dispatch works better for this function.
bins
The bin specification of the histogram:
* If `bins` is a dictionary, the keys are interpreted as the axis names
and the values are the number of bins along each axis.
This dictionary must have the same number of elements as `sample`.
* If `bins` is an array or a sequence of arrays, it describes the
monotonically-increasing bin edges along each dimension
axis
The logical axes along which to histogram the data points.
If :obj:`None` (the default), the histogram will be computed along
all the axes of `sample`.
min
The lower boundary of the histogram along each dimension.
If :obj:`None` (the default), the minimum of each element of `sample` is used.
max
The upper boundary of the histogram along each dimension.
If :obj:`None` (the default), the maximum of each elemennt of `sample` is used.
density
If :obj:`False` (the default), returns the number of samples in each bin.
If :obj:`True`, returns the probability density in each bin.
weights
An optional array weighting each sample.
"""
return _named_array_function(
histogramdd,
*sample,
axis=axis,
bins=bins,
min=min,
max=max,
density=density,
weights=weights,
)


def jacobian(
function: Callable[[InputT], OutputT],
x: InputT,
Expand Down
129 changes: 129 additions & 0 deletions named_arrays/_scalars/scalar_named_array_functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Callable, Sequence, Any, Literal
import collections
import dataclasses
import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -249,6 +250,37 @@ def interp(
return result


@_implements(na.histogram)
def histogram(
a: na.AbstractScalarArray,
bins: dict[str, int] | na.AbstractScalarArray,
axis: None | str | Sequence[str] = None,
min: None | na.AbstractScalarArray = None,
max: None | na.AbstractScalarArray = None,
density: bool = False,
weights: None | na.AbstractScalarArray = None,
) -> na.FunctionArray[na.AbstractScalarArray, na.ScalarArray]:

if isinstance(bins, na.AbstractArray):
bins = (bins, )

hist, edges = na.histogramdd(
a,
bins=bins,
axis=axis,
min=min,
max=max,
density=density,
weights=weights,
)
edges = edges[0]

return na.FunctionArray(
inputs=edges,
outputs=hist,
)


@_implements(na.histogram2d)
def histogram2d(
x: na.AbstractScalarArray,
Expand Down Expand Up @@ -389,6 +421,103 @@ def histogram2d(
)


@_implements(na.histogramdd)
def histogramdd(
*sample: na.AbstractScalarArray,
bins: dict[str, int] | na.AbstractScalarArray | Sequence[na.AbstractScalarArray],
axis: None | str | Sequence[str] = None,
min: None | na.AbstractScalarArray | Sequence[na.AbstractScalarArray] = None,
max: None | na.AbstractScalarArray | Sequence[na.AbstractScalarArray] = None,
density: bool = False,
weights: None | na.AbstractScalarArray = None,
) -> tuple[na.AbstractScalarArray, tuple[na.AbstractScalarArray, ...]]:

try:
sample = [scalars._normalize(s) for s in sample]
weights = scalars._normalize(weights) if weights is not None else weights
except scalars.ScalarTypeError: # pragma: nocover
return NotImplemented

shape = na.shape_broadcasted(*sample, weights)

sample = [s.broadcast_to(shape) for s in sample]
weights = na.broadcast_to(weights, shape) if weights is not None else weights

if axis is None:
axis = tuple(shape)
elif isinstance(axis, str):
axis = (axis,)

if isinstance(bins, dict):

if min is None:
min = [s.min(axis) for s in sample]
elif not isinstance(min, collections.abc.Sequence):
min = [min] * len(sample)

if max is None:
max = [s.max(axis) for s in sample]
elif not isinstance(max, collections.abc.Sequence):
max = [max] * len(sample)

try:
max = [scalars._normalize(m) for m in max]
min = [scalars._normalize(m) for m in min]
except scalars.ScalarTypeError: # pragma: nocover
return NotImplemented

if len(bins) != len(sample): # pragma: nocover
raise ValueError(
f"if {bins=} is a dictionary, it must have the same number of"
f"elements as {len(sample)=}."
)

bins = [
na.linspace(start, stop, axis=ax, num=bins[ax])
for start, stop, ax in zip(min, max, bins)
]

try:
bins = [scalars._normalize(b) for b in bins]
except scalars.ScalarTypeError: # pragma: nocover
return NotImplemented

shape_orthogonal = {a: shape[a] for a in shape if a not in axis}

bins_broadcasted = [
b.broadcast_to(na.broadcast_shapes(shape_orthogonal, b.shape))
for b in bins
]

shape_bins = na.shape_broadcasted(*bins)
shape_hist = {
ax: shape_bins[ax] - 1
for ax in shape_bins
if ax not in shape_orthogonal
}
shape_hist = na.broadcast_shapes(shape_orthogonal, shape_hist)

hist = na.ScalarArray.empty(shape_hist)

unit_weights = na.unit(weights)
hist = hist if unit_weights is None else hist << unit_weights

for i in na.ndindex(shape_orthogonal):
hist_i, _ = np.histogramdd(
sample=[s[i].ndarray_aligned(axis).reshape(-1) for s in sample],
bins=[b[i].ndarray for b in bins_broadcasted],
density=density,
weights=weights[i].ndarray_aligned(axis).reshape(-1) if weights is not None else weights,
)

hist[i] = na.ScalarArray(
ndarray=hist_i,
axes=sum((b[i].axes for b in bins_broadcasted), ()),
)

return hist, tuple(bins)


def random(
func: Callable,
*args: float | u.Quantity | na.AbstractScalarArray,
Expand Down
Loading

0 comments on commit 44a6ac1

Please sign in to comment.