Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added caching support to LM.py #27

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions .github/tests/lm_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

# Set logger level to DEBUG
lotus.logger.setLevel("DEBUG")
Expand All @@ -11,8 +11,8 @@
@pytest.fixture
def setup_models():
# Setup GPT models
gpt_4o_mini = OpenAIModel(model="gpt-4o-mini")
gpt_4o = OpenAIModel(model="gpt-4o")
gpt_4o_mini = LM(model="gpt-4o-mini")
gpt_4o = LM(model="gpt-4o")
return gpt_4o_mini, gpt_4o


Expand All @@ -30,6 +30,34 @@ def test_filter_operation(setup_models):
assert filtered_df.equals(expected_df)


def test_filter_caching(setup_models):
gpt_4o_mini, _ = setup_models
lotus.settings.configure(lm=gpt_4o_mini)

# Test filter operation on a dataframe
data = {"Text": ["I am really excited to go to class today!", "I am very sad"]}
df = pd.DataFrame(data)
user_instruction = "{Text} is a positive sentiment"

# First call - should make API calls
initial_api_calls = gpt_4o_mini.api_calls
filtered_df1 = df.sem_filter(user_instruction)
first_call_api_count = gpt_4o_mini.api_calls - initial_api_calls

# Second call - should use cache
filtered_df2 = df.sem_filter(user_instruction)
second_call_api_count = gpt_4o_mini.api_calls - (initial_api_calls + first_call_api_count)

# Verify results are the same
assert filtered_df1.equals(filtered_df2)

# Verify first call made API calls
assert first_call_api_count == 0, "First call should make API calls"

# Verify second call used cache (no new API calls)
assert second_call_api_count == 0, "Second call should use cache (no new API calls)"


def test_filter_cascade(setup_models):
gpt_4o_mini, gpt_4o = setup_models
lotus.settings.configure(lm=gpt_4o, helper_lm=gpt_4o_mini)
Expand Down Expand Up @@ -57,7 +85,6 @@ def test_filter_cascade(setup_models):
"Everything is going as planned, couldn't be happier.",
"Feeling super motivated and ready to take on challenges!",
"I appreciate all the small things that bring me joy.",

# Negative examples
"I am very sad.",
"Today has been really tough; I feel exhausted.",
Expand Down Expand Up @@ -177,4 +204,4 @@ def test_map_fewshot(setup_models):

pairs = set(zip(df["School"], df["State"]))
expected_pairs = set([("UC Berkeley", "CA"), ("Carnegie Mellon", "PA")])
assert pairs == expected_pairs
assert pairs == expected_pairs
Binary file added .lotus_cache/cache.db
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/requirements-docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ sphinx-rtd-theme==2.0.0

backoff==2.2.1
faiss-cpu==1.8.0.post1
litellm==1.51.0
numpy==1.26.4
openai==1.35.13
pandas==2.2.2
sentence-transformers==3.0.1
tiktoken==0.7.0
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/agg.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import E5Model, OpenAIModel
from lotus.models import LM, E5Model

lm = OpenAIModel()
lm = LM()
rm = E5Model()

lotus.settings.configure(lm=lm, rm=rm)
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/cluster.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import E5Model, OpenAIModel
from lotus.models import LM, E5Model

lm = OpenAIModel()
lm = LM()
rm = E5Model()

lotus.settings.configure(lm=lm, rm=rm)
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/filter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

lm = OpenAIModel()
lm = LM()

lotus.settings.configure(lm=lm)
data = {
Expand Down
6 changes: 3 additions & 3 deletions examples/op_examples/filter_cascade.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import pandas as pd

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

gpt_35_turbo = OpenAIModel("gpt-3.5-turbo")
gpt_4o = OpenAIModel("gpt-4o")
gpt_35_turbo = LM("gpt-3.5-turbo")
gpt_4o = LM("gpt-4o")

lotus.settings.configure(lm=gpt_4o, helper_lm=gpt_35_turbo)
data = {
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/join.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

lm = OpenAIModel()
lm = LM()

lotus.settings.configure(lm=lm)
data = {
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/map.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

lm = OpenAIModel()
lm = LM()

lotus.settings.configure(lm=lm)
data = {
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/map_fewshot.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

lm = OpenAIModel()
lm = LM()

lotus.settings.configure(lm=lm)
data = {
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/partition.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import E5Model, OpenAIModel
from lotus.models import LM, E5Model

lm = OpenAIModel(max_tokens=2048)
lm = LM(max_tokens=2048)
rm = E5Model()

lotus.settings.configure(lm=lm, rm=rm)
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/search.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import CrossEncoderModel, E5Model, OpenAIModel
from lotus.models import LM, CrossEncoderModel, E5Model

lm = OpenAIModel()
lm = LM()
rm = E5Model()
reranker = CrossEncoderModel()

Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/sim_join.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import E5Model, OpenAIModel
from lotus.models import LM, E5Model

lm = OpenAIModel()
lm = LM()
rm = E5Model()

lotus.settings.configure(lm=lm, rm=rm)
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/top_k.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

lm = OpenAIModel()
lm = LM()

lotus.settings.configure(lm=lm)
data = {
Expand Down
4 changes: 2 additions & 2 deletions examples/provider_examples/oai.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

lm = OpenAIModel()
lm = LM()

lotus.settings.configure(lm=lm)
data = {
Expand Down
4 changes: 2 additions & 2 deletions examples/provider_examples/ollama.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

lm = OpenAIModel(
lm = LM(
api_base="http://localhost:11434/v1",
model="llama3.2",
hf_name="meta-llama/Llama-3.2-3B-Instruct",
Expand Down
4 changes: 2 additions & 2 deletions examples/provider_examples/vllm.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd

import lotus
from lotus.models import OpenAIModel
from lotus.models import LM

lm = OpenAIModel(
lm = LM(
model="meta-llama/Meta-Llama-3.1-70B-Instruct",
api_base="http://localhost:8000/v1",
provider="vllm",
Expand Down
2 changes: 0 additions & 2 deletions lotus/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
from lotus.models.cross_encoder_model import CrossEncoderModel
from lotus.models.e5_model import E5Model
from lotus.models.lm import LM
from lotus.models.openai_model import OpenAIModel
from lotus.models.reranker import Reranker
from lotus.models.rm import RM

__all__ = [
"OpenAIModel",
"E5Model",
"ColBERTv2Model",
"CrossEncoderModel",
Expand Down
Loading
Loading