Skip to content

Commit

Permalink
FEAT-#4605: Add native query compiler (#7259)
Browse files Browse the repository at this point in the history
Co-authored-by: Igoshev, Iaroslav <[email protected]>
Signed-off-by: arunjose696 <[email protected]>
  • Loading branch information
arunjose696 and YarShev authored Aug 26, 2024
1 parent 8fc230a commit da01571
Show file tree
Hide file tree
Showing 13 changed files with 1,427 additions and 21 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -246,12 +246,16 @@ jobs:
unidist: ${{ steps.filter.outputs.unidist }}
engines: ${{ steps.engines.outputs.engines }}
experimental: ${{ steps.experimental.outputs.experimental }}
test-native-dataframe-mode: ${{ steps.filter.outputs.test-native-dataframe-mode }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
test-native-dataframe-mode:
- 'modin/core/storage_formats/pandas/native_query_compiler.py'
- 'modin/core/storage_formats/base/query_compiler.py'
shared: &shared
- 'modin/core/execution/dispatching/**'
ray:
Expand Down Expand Up @@ -665,6 +669,37 @@ jobs:
python-version: ${{matrix.python-version}}
- run: python -m pytest modin/tests/experimental/spreadsheet/test_general.py

test-native-dataframe-mode:
needs: [ lint-flake8, execution-filter]
if: ${{ needs.execution-filter.outputs.test-native-dataframe-mode == 'true' }}
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.9"]
env:
MODIN_NATIVE_DATAFRAME_MODE: "Pandas"
name: test-native-dataframe-mode python ${{matrix.python-version}})
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/mamba-env
with:
environment-file: environment-dev.yml
python-version: ${{matrix.python-version}}
- run: python -m pytest modin/tests/pandas/dataframe/test_binary.py
- run: python -m pytest modin/tests/pandas/dataframe/test_default.py
- run: python -m pytest modin/tests/pandas/dataframe/test_indexing.py
- run: python -m pytest modin/tests/pandas/dataframe/test_iter.py
- run: python -m pytest modin/tests/pandas/dataframe/test_join_sort.py
- run: python -m pytest modin/tests/pandas/dataframe/test_map_metadata.py
- run: python -m pytest modin/tests/pandas/dataframe/test_pickle.py
- run: python -m pytest modin/tests/pandas/dataframe/test_reduce.py
- run: python -m pytest modin/tests/pandas/dataframe/test_udf.py
- run: python -m pytest modin/tests/pandas/dataframe/test_window.py
- uses: ./.github/actions/upload-coverage

merge-coverage-artifacts:
needs: [test-internals, test-api-and-no-engine, test-defaults, test-all-unidist, test-all, test-experimental, test-sanity]
if: always() # we need to run it regardless of some job being skipped, like in PR
Expand Down
2 changes: 2 additions & 0 deletions modin/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
MinPartitionSize,
MinRowPartitionSize,
ModinNumpy,
NativeDataframeMode,
NPartitions,
PersistentPickle,
ProgressBar,
Expand Down Expand Up @@ -69,6 +70,7 @@
"CpuCount",
"GpuCount",
"Memory",
"NativeDataframeMode",
# Ray specific
"IsRayCluster",
"RayRedisAddress",
Expand Down
22 changes: 22 additions & 0 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,4 +943,26 @@ def _check_vars() -> None:
)


class NativeDataframeMode(EnvironmentVariable, type=str):
"""
Configures the query compiler to process Modin data.
When this config is set to ``Default``, ``PandasQueryCompiler`` is used,
which leads to Modin executing dataframes in distributed fashion.
When set to a string (e.g., ``pandas``), ``NativeQueryCompiler`` is used,
which handles the dataframes without distributing,
falling back to native library functions (e.g., ``pandas``).
This could be beneficial for handling relatively small dataframes
without involving additional overhead of communication between processes.
"""

varname = "MODIN_NATIVE_DATAFRAME_MODE"
choices = (
"Default",
"Pandas",
)
default = "Default"


_check_vars()
5 changes: 5 additions & 0 deletions modin/core/execution/dispatching/factories/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
import pandas
from pandas.util._decorators import doc

from modin.config import NativeDataframeMode
from modin.core.io import BaseIO
from modin.core.storage_formats.pandas.native_query_compiler import NativeQueryCompiler
from modin.utils import get_current_execution

_doc_abstract_factory_class = """
Expand Down Expand Up @@ -168,6 +170,9 @@ def prepare(cls):
method="io.from_pandas",
)
def _from_pandas(cls, df):
if NativeDataframeMode.get() == "Pandas":
df_copy = df.copy()
return NativeQueryCompiler(df_copy)
return cls.io_cls.from_pandas(df)

@classmethod
Expand Down
Loading

0 comments on commit da01571

Please sign in to comment.