From e35cf5cd61dec5f50ff4831e45e5d9165e1831f9 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Thu, 20 Feb 2025 19:45:27 -0500 Subject: [PATCH] add integration test for pyarrow reads --- tests/integration/test_reads.py | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index ee5f8a2574..444091d944 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -56,6 +56,7 @@ NestedField, StringType, TimestampType, + UnknownType, ) from pyiceberg.utils.concurrent import ExecutorFactory @@ -978,3 +979,39 @@ def test_scan_with_datetime(catalog: Catalog) -> None: df = table.scan(row_filter=LessThan("datetime", yesterday)).to_pandas() assert len(df) == 0 + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")]) +def test_read_unknown_type(catalog: Catalog) -> None: + identifier = "default.test_table_read_unknown_type" + arrow_table = pa.Table.from_pydict( + { + "int": [1, 2], + "string": ["a", "b"], + "unknown": [None, None], + }, + schema=pa.schema( + [ + pa.field("int", pa.int32(), nullable=True), + pa.field("string", pa.string(), nullable=True), + pa.field("unknown", pa.null(), nullable=True), + ], + ), + ) + + try: + catalog.drop_table(identifier) + except NoSuchTableError: + pass + + tbl = catalog.create_table( + identifier, + schema=arrow_table.schema, + ) + + tbl.append(arrow_table) + + assert tbl.schema().find_type("unknown") == UnknownType() + result_table = tbl.scan().to_arrow() + assert result_table["unknown"].to_pylist() == [None, None] \ No newline at end of file