microsoft · jameslamb · Feb 5, 2025 · Jan 12, 2025 · Jan 14, 2025 · Jan 14, 2025
@@ -27,6 +27,7 @@
 import scipy.sparse
 
 from .compat import (
+    CFFI_INSTALLED,
     PANDAS_INSTALLED,
     PYARROW_INSTALLED,
     arrow_cffi,
@@ -1706,8 +1707,8 @@ def __pred_for_pyarrow_table(
         predict_type: int,
     ) -> Tuple[np.ndarray, int]:
         """Predict for a PyArrow table."""
-        if not PYARROW_INSTALLED:
-            raise LightGBMError("Cannot predict from Arrow without `pyarrow` installed.")
+        if not (PYARROW_INSTALLED and CFFI_INSTALLED):
+            raise LightGBMError("Cannot predict from Arrow without `pyarrow` and `cffi` installed.")
 
         # Check that the input is valid: we only handle numbers (for now)
         if not all(arrow_is_integer(t) or arrow_is_floating(t) or arrow_is_boolean(t) for t in table.schema.types):
@@ -2459,8 +2460,8 @@ def __init_from_pyarrow_table(
         ref_dataset: Optional[_DatasetHandle],
     ) -> "Dataset":
         """Initialize data from a PyArrow table."""
-        if not PYARROW_INSTALLED:
-            raise LightGBMError("Cannot init dataframe from Arrow without `pyarrow` installed.")
+        if not (PYARROW_INSTALLED and CFFI_INSTALLED):
+            raise LightGBMError("Cannot init dataframe from Arrow without `pyarrow` and `cffi` installed.")
-            raise LightGBMError("Cannot init dataframe from Arrow without `pyarrow` and `cffi` installed.")
+            raise LightGBMError("Cannot init Dataset from Arrow without `pyarrow` and `cffi` installed.")
-            raise LightGBMError("Cannot init dataframe from Arrow without `pyarrow` and `cffi` installed.")
+            raise LightGBMError("Cannot init Dataset from Arrow without `pyarrow` and `cffi` installed.")
 
         # Check that the input is valid: we only handle numbers (for now)
         if not all(arrow_is_integer(t) or arrow_is_floating(t) or arrow_is_boolean(t) for t in table.schema.types):

@@ -289,7 +289,6 @@ def __init__(self, *args: Any, **kwargs: Any):
     from pyarrow import ChunkedArray as pa_ChunkedArray
     from pyarrow import Table as pa_Table
     from pyarrow import chunked_array as pa_chunked_array
-    from pyarrow.cffi import ffi as arrow_cffi
     from pyarrow.types import is_boolean as arrow_is_boolean
     from pyarrow.types import is_floating as arrow_is_floating
     from pyarrow.types import is_integer as arrow_is_integer
@@ -316,17 +315,6 @@ class pa_Table:  # type: ignore
         def __init__(self, *args: Any, **kwargs: Any):
             pass
 
-    class arrow_cffi:  # type: ignore
-        """Dummy class for pyarrow.cffi.ffi."""
-
-        CData = None
-        addressof = None
-        cast = None
-        new = None
-
-        def __init__(self, *args: Any, **kwargs: Any):
-            pass
-
     class pa_compute:  # type: ignore
         """Dummy class for pyarrow.compute."""
 
@@ -338,6 +326,27 @@ class pa_compute:  # type: ignore
     arrow_is_integer = None
     arrow_is_floating = None
 
+
+"""cffi"""
+try:
+    from pyarrow.cffi import ffi as arrow_cffi
+
+    CFFI_INSTALLED = True
+except ImportError:
+    CFFI_INSTALLED = False
+
+    class arrow_cffi:  # type: ignore
+        """Dummy class for pyarrow.cffi.ffi."""
 chunks: arrow_cffi.CData 
 chunks: arrow_cffi.CData 
+
+        CData = None
+        addressof = None
+        cast = None
+        new = None
+
+    def __init__(self, *args: Any, **kwargs: Any):
+        pass
+
+
 """cpu_count()"""
 try:
     from joblib import cpu_count

@@ -1,8 +1,10 @@
 # coding: utf-8
 import filecmp
 import os
+import sys
 from pathlib import Path
 from typing import Any, Dict, Optional
+from unittest import mock
 
 import numpy as np
 import pytest
@@ -454,3 +456,26 @@ def test_arrow_feature_name_manual():
     )
     booster = lgb.train({"num_leaves": 7}, dataset, num_boost_round=5)
     assert booster.feature_name() == ["c", "d"]
+
+
+def test_training_and_predicting_from_pa_table_without_cffi_raises():
+    data = generate_dummy_arrow_table()
+    with mock.patch.dict(sys.modules, {"pyarrow.cffi": None}):
+        assert lgb.compat.PYARROW_INSTALLED is True
+        assert lgb.compat.CFFI_INSTALLED is False
+
+        with pytest.raises(
+            lgb.basic.LightGBMError, match="Cannot init dataframe from Arrow without `pyarrow` and `cffi` installed."
+        ):
+            dataset = lgb.Dataset(
+                data,
+                label=pa.array([0, 1, 0, 0, 1]),
+                params=dummy_dataset_params(),
+                feature_name=["c", "d"],
+                categorical_feature=["c"],
+            )
+
+        with pytest.raises(
+            lgb.basic.LightGBMError, match="Cannot predict from Arrow without `pyarrow` and `cffi` installed."
+        ):
+            _ = lgb.train({"num_leaves": 7}, dataset, num_boost_round=5)