Skip to content

Commit

Permalink
PyVortex array subclasses (#2194)
Browse files Browse the repository at this point in the history
Downcast a PyArray to an encoding-specific array when we return it.
  • Loading branch information
gatesn authored Feb 2, 2025
1 parent 947a464 commit ded97ef
Show file tree
Hide file tree
Showing 20 changed files with 371 additions and 194 deletions.
51 changes: 47 additions & 4 deletions docs/python/api/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ functions, serialized, and otherwise manipulated as a generic array.

There are two ways of "downcasting" an array for more specific access patterns:

1. Into an encoding-specific array, like `vortex.encoding.BitPackedArray`.vortex.
1. Into an encoding-specific array, like `vortex.BitPackedArray`.vortex.
2. Into a type-specific array, like `vortex.array.BoolTypeArray`.

Be careful to note that :class:`vortex.encoding.BoolArray` represents an array that stores physical data
Be careful to note that :class:`vortex.BoolArray` represents an array that stores physical data
as a bit-buffer of booleans, vs `vortex.array.BoolTypeArray` which represents any array that has a logical
type of boolean.

Expand All @@ -20,9 +20,52 @@ Factory Functions
.. autofunction:: vortex.array


Type Classes
------------
Base Class
----------

.. autoclass:: vortex.Array
:members:
:special-members: __len__


Builtin Encodings
-----------------

.. autoclass:: vortex.ChunkedArray
:members:


.. autoclass:: vortex.ConstantArray
:members:


.. autoclass:: vortex.NullArray
:members:


.. autoclass:: vortex.BoolArray
:members:


.. autoclass:: vortex.PrimitiveArray
:members:


.. autoclass:: vortex.VarBinArray
:members:


.. autoclass:: vortex.VarBinViewArray
:members:


.. autoclass:: vortex.StructArray
:members:


.. autoclass:: vortex.ListArray
:members:


.. autoclass:: vortex.ExtensionArray
:members:
4 changes: 4 additions & 0 deletions docs/python/api/compress.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Compression
===========

.. autofunction:: vortex.compress
14 changes: 0 additions & 14 deletions docs/python/api/encoding.rst

This file was deleted.

6 changes: 3 additions & 3 deletions docs/python/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ Python API

arrays
dtypes
encoding
scalars
expr
compress
io
dataset
expr
scalars
25 changes: 24 additions & 1 deletion pyvortex/python/vortex/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
from . import _lib
from ._lib.arrays import (
BoolArray,
ChunkedArray,
ConstantArray,
ExtensionArray,
ListArray,
NullArray,
PrimitiveArray,
StructArray,
VarBinArray,
VarBinViewArray,
)
from ._lib.compress import compress
from ._lib.dtype import (
BinaryDType,
Expand Down Expand Up @@ -37,9 +49,20 @@
assert _lib, "Ensure we eagerly import the Vortex native library"

__all__ = [
"Array",
"array",
"compress",
# Arrays and builtin encodings
"Array",
"ConstantArray",
"ChunkedArray",
"NullArray",
"BoolArray",
"PrimitiveArray",
"VarBinArray",
"VarBinViewArray",
"StructArray",
"ListArray",
"ExtensionArray",
# DTypes
"DType",
"NullDType",
Expand Down
35 changes: 34 additions & 1 deletion pyvortex/python/vortex/_lib/arrays.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any
from typing import Any, final

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -32,3 +32,36 @@ class Array:
def to_polars_dataframe(self) -> pl.DataFrame: ...
def to_polars_series(self) -> pl.Series: ...
def to_pylist(self) -> list: ...

@final
class NullArray(Array): ...

@final
class BoolArray(Array): ...

@final
class PrimitiveArray(Array): ...

@final
class VarBinArray(Array): ...

@final
class VarBinViewArray(Array): ...

@final
class StructArray(Array):
def field(self, name: str) -> Array: ...

@final
class ListArray(Array): ...

@final
class ExtensionArray(Array): ...

@final
class ConstantArray(Array):
def scalar(self) -> vx.Scalar: ...

@final
class ChunkedArray(Array):
def chunks(self) -> list[Array]: ...
8 changes: 1 addition & 7 deletions pyvortex/python/vortex/_lib/encoding.pyi
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
from typing import Any, final
from typing import Any

import pyarrow as pa

import vortex as vx

def _encode(obj: Any) -> pa.Array: ...
@final
class BoolArray(vx.Array):
def __new__(cls, array: vx.Array) -> BoolArray: ...
def true_count(self) -> int: ...
3 changes: 0 additions & 3 deletions pyvortex/python/vortex/encoding.py

This file was deleted.

24 changes: 24 additions & 0 deletions pyvortex/src/arrays/builtins/chunked.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use itertools::Itertools;
use pyo3::{pyclass, pymethods, Bound, PyRef, PyResult};
use vortex::array::ChunkedEncoding;

use crate::arrays::{ArraySubclass, AsArrayRef, PyArray};

/// Concrete class for arrays with `vortex.chunked` encoding.
#[pyclass(name = "ChunkedArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyChunkedArray;

impl ArraySubclass for PyChunkedArray {
type Encoding = ChunkedEncoding;
}

#[pymethods]
impl PyChunkedArray {
pub fn chunks(self_: PyRef<'_, Self>) -> PyResult<Vec<Bound<'_, PyArray>>> {
self_
.as_array_ref()
.chunks()
.map(|chunk| PyArray::init(self_.py(), chunk))
.try_collect()
}
}
21 changes: 21 additions & 0 deletions pyvortex/src/arrays/builtins/constant.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use pyo3::{pyclass, pymethods, Bound, PyRef, PyResult};
use vortex::array::ConstantEncoding;

use crate::arrays::{ArraySubclass, AsArrayRef, PyArray};
use crate::scalar::PyScalar;

/// Concrete class for arrays with `vortex.constant` encoding.
#[pyclass(name = "ConstantArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyConstantArray;

impl ArraySubclass for PyConstantArray {
type Encoding = ConstantEncoding;
}

#[pymethods]
impl PyConstantArray {
/// Return the scalar value of the constant array.
pub fn scalar(self_: PyRef<'_, Self>) -> PyResult<Bound<PyScalar>> {
PyScalar::init(self_.py(), self_.as_array_ref().scalar())
}
}
38 changes: 38 additions & 0 deletions pyvortex/src/arrays/builtins/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
mod chunked;
mod constant;
mod struct_;

pub(crate) use chunked::*;
pub(crate) use constant::*;
use pyo3::prelude::*;
pub(crate) use struct_::*;

use crate::arrays::PyArray;

/// Concrete class for arrays with `vortex.null` encoding.
#[pyclass(name = "NullArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyNullArray;

/// Concrete class for arrays with `vortex.bool` encoding.
#[pyclass(name = "BoolArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyBoolArray;

/// Concrete class for arrays with `vortex.primitive` encoding.
#[pyclass(name = "PrimitiveArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyPrimitiveArray;

/// Concrete class for arrays with `vortex.varbin` encoding.
#[pyclass(name = "VarBinArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyVarBinArray;

/// Concrete class for arrays with `vortex.varbinview` encoding.
#[pyclass(name = "VarBinViewArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyVarBinViewArray;

/// Concrete class for arrays with `vortex.list` encoding.
#[pyclass(name = "ListArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyListArray;

/// Concrete class for arrays with `vortex.ext` encoding.
#[pyclass(name = "ExtensionArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyExtensionArray;
26 changes: 26 additions & 0 deletions pyvortex/src/arrays/builtins/struct_.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use pyo3::exceptions::PyKeyError;
use pyo3::{pyclass, pymethods, Bound, PyRef, PyResult};
use vortex::array::StructEncoding;
use vortex::variants::StructArrayTrait;

use crate::arrays::{ArraySubclass, AsArrayRef, PyArray};

/// Concrete class for arrays with `vortex.struct` encoding.
#[pyclass(name = "StructArray", module = "vortex", extends=PyArray, frozen)]
pub(crate) struct PyStructArray;

impl ArraySubclass for PyStructArray {
type Encoding = StructEncoding;
}

#[pymethods]
impl PyStructArray {
/// Returns the given field of the struct array.
pub fn field<'py>(self_: PyRef<'py, Self>, name: &str) -> PyResult<Bound<'py, PyArray>> {
let field = self_
.as_array_ref()
.maybe_null_field_by_name(name)
.ok_or_else(|| PyKeyError::new_err(format!("Field name not found: {}", name)))?;
PyArray::init(self_.py(), field)
}
}
Loading

0 comments on commit ded97ef

Please sign in to comment.