From 17b8d635fc386e55ad1c19e9b10611f2f9046cb5 Mon Sep 17 00:00:00 2001 From: Stanley Chan <149976039+StanChan03@users.noreply.github.com> Date: Tue, 14 Jan 2025 01:22:43 -0800 Subject: [PATCH] add missing operator_cache decorator (#80) mssing decorator and doc updates --- docs/configurations.rst | 10 ++++++++-- lotus/sem_ops/sem_dedup.py | 2 ++ lotus/sem_ops/sem_index.py | 2 ++ lotus/sem_ops/sem_partition_by.py | 3 +++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/configurations.rst b/docs/configurations.rst index 16cec150..d0fd9bc4 100644 --- a/docs/configurations.rst +++ b/docs/configurations.rst @@ -21,8 +21,8 @@ Using the Settings module Configurable Parameters -------------------------- -1. enable_message_cache: - * Description: Enables or Disables cahcing mechanisms +1. enable_cache: + * Description: Enables or Disables caching mechanisms * Default: False * Parameters: - cache_type: Type of caching (SQLITE or In_MEMORY) @@ -31,6 +31,12 @@ Configurable Parameters * Note: It is recommended to enable caching .. code-block:: python + import pandas as pd + + import lotus + from lotus.models import LM + from lotus.cache import CacheFactory, CacheConfig, CacheType + cache_config = CacheConfig(cache_type=CacheType.SQLITE, max_size=1000) cache = CacheFactory.create_cache(cache_config) diff --git a/lotus/sem_ops/sem_dedup.py b/lotus/sem_ops/sem_dedup.py index 7c900073..3360714d 100644 --- a/lotus/sem_ops/sem_dedup.py +++ b/lotus/sem_ops/sem_dedup.py @@ -4,6 +4,7 @@ import pandas as pd import lotus +from lotus.cache import operator_cache @pd.api.extensions.register_dataframe_accessor("sem_dedup") @@ -19,6 +20,7 @@ def _validate(obj: Any) -> None: if not isinstance(obj, pd.DataFrame): raise AttributeError("Must be a DataFrame") + @operator_cache def __call__( self, col_name: str, diff --git a/lotus/sem_ops/sem_index.py b/lotus/sem_ops/sem_index.py index cb7d8cd4..ae8d7753 100644 --- a/lotus/sem_ops/sem_index.py +++ b/lotus/sem_ops/sem_index.py @@ -3,6 +3,7 @@ import pandas as pd import lotus +from lotus.cache import operator_cache @pd.api.extensions.register_dataframe_accessor("sem_index") @@ -19,6 +20,7 @@ def _validate(obj: Any) -> None: if not isinstance(obj, pd.DataFrame): raise AttributeError("Must be a DataFrame") + @operator_cache def __call__(self, col_name: str, index_dir: str) -> pd.DataFrame: """ Index a column in the DataFrame. diff --git a/lotus/sem_ops/sem_partition_by.py b/lotus/sem_ops/sem_partition_by.py index 298b5e5c..7886594c 100644 --- a/lotus/sem_ops/sem_partition_by.py +++ b/lotus/sem_ops/sem_partition_by.py @@ -2,6 +2,8 @@ import pandas as pd +from lotus.cache import operator_cache + @pd.api.extensions.register_dataframe_accessor("sem_partition_by") class SemPartitionByDataframe: @@ -16,6 +18,7 @@ def _validate(obj: Any) -> None: if not isinstance(obj, pd.DataFrame): raise AttributeError("Must be a DataFrame") + @operator_cache def __call__( self, partition_fn: Callable[[pd.DataFrame], list[int]],