Skip to content

Commit

Permalink
add integration test for pyarrow reads
Browse files Browse the repository at this point in the history
  • Loading branch information
kaushiksrini committed Feb 21, 2025
1 parent 1515e44 commit e35cf5c
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions tests/integration/test_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
NestedField,
StringType,
TimestampType,
UnknownType,
)
from pyiceberg.utils.concurrent import ExecutorFactory

Expand Down Expand Up @@ -978,3 +979,39 @@ def test_scan_with_datetime(catalog: Catalog) -> None:

df = table.scan(row_filter=LessThan("datetime", yesterday)).to_pandas()
assert len(df) == 0


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")])
def test_read_unknown_type(catalog: Catalog) -> None:
identifier = "default.test_table_read_unknown_type"
arrow_table = pa.Table.from_pydict(
{
"int": [1, 2],
"string": ["a", "b"],
"unknown": [None, None],
},
schema=pa.schema(
[
pa.field("int", pa.int32(), nullable=True),
pa.field("string", pa.string(), nullable=True),
pa.field("unknown", pa.null(), nullable=True),
],
),
)

try:
catalog.drop_table(identifier)
except NoSuchTableError:
pass

tbl = catalog.create_table(
identifier,
schema=arrow_table.schema,
)

tbl.append(arrow_table)

assert tbl.schema().find_type("unknown") == UnknownType()
result_table = tbl.scan().to_arrow()
assert result_table["unknown"].to_pylist() == [None, None]

0 comments on commit e35cf5c

Please sign in to comment.