From d06f68cba61ac3135951a1492654a3987b857bdd Mon Sep 17 00:00:00 2001 From: Sid Jha Date: Sun, 29 Sep 2024 14:54:53 -0700 Subject: [PATCH] Add ollama support and provider examples --- examples/provider_examples/oai.py | 20 ++++++++++++++++++++ examples/provider_examples/ollama.py | 25 +++++++++++++++++++++++++ examples/provider_examples/vllm.py | 24 ++++++++++++++++++++++++ lotus/models/openai_model.py | 19 +++++-------------- 4 files changed, 74 insertions(+), 14 deletions(-) create mode 100644 examples/provider_examples/oai.py create mode 100644 examples/provider_examples/ollama.py create mode 100644 examples/provider_examples/vllm.py diff --git a/examples/provider_examples/oai.py b/examples/provider_examples/oai.py new file mode 100644 index 00000000..b89f74f2 --- /dev/null +++ b/examples/provider_examples/oai.py @@ -0,0 +1,20 @@ +import pandas as pd + +import lotus +from lotus.models import OpenAIModel + +lm = OpenAIModel() + +lotus.settings.configure(lm=lm) +data = { + "Course Name": [ + "Probability and Random Processes", + "Optimization Methods in Engineering", + "Digital Design and Integrated Circuits", + "Computer Security", + ] +} +df = pd.DataFrame(data) +user_instruction = "{Course Name} requires a lot of math" +df = df.sem_filter(user_instruction) +print(df) diff --git a/examples/provider_examples/ollama.py b/examples/provider_examples/ollama.py new file mode 100644 index 00000000..727add7d --- /dev/null +++ b/examples/provider_examples/ollama.py @@ -0,0 +1,25 @@ +import pandas as pd + +import lotus +from lotus.models import OpenAIModel + +lm = OpenAIModel( + api_base="http://localhost:11434/v1", + model="llama3.2", + hf_name="meta-llama/Llama-3.2-3B-Instruct", + provider="ollama", +) + +lotus.settings.configure(lm=lm) +data = { + "Course Name": [ + "Probability and Random Processes", + "Optimization Methods in Engineering", + "Digital Design and Integrated Circuits", + "Computer Security", + ] +} +df = pd.DataFrame(data) +user_instruction = "{Course Name} requires a lot of math" +df = df.sem_filter(user_instruction) +print(df) diff --git a/examples/provider_examples/vllm.py b/examples/provider_examples/vllm.py new file mode 100644 index 00000000..76a46884 --- /dev/null +++ b/examples/provider_examples/vllm.py @@ -0,0 +1,24 @@ +import pandas as pd + +import lotus +from lotus.models import OpenAIModel + +lm = OpenAIModel( + model="meta-llama/Meta-Llama-3.1-70B-Instruct", + api_base="http://localhost:8000/v1", + provider="vllm", +) + +lotus.settings.configure(lm=lm) +data = { + "Course Name": [ + "Probability and Random Processes", + "Optimization Methods in Engineering", + "Digital Design and Integrated Circuits", + "Computer Security", + ] +} +df = pd.DataFrame(data) +user_instruction = "{Course Name} requires a lot of math" +df = df.sem_filter(user_instruction) +print(df) diff --git a/lotus/models/openai_model.py b/lotus/models/openai_model.py index 2fb11360..0d992a9f 100644 --- a/lotus/models/openai_model.py +++ b/lotus/models/openai_model.py @@ -11,13 +11,6 @@ import lotus from lotus.models.lm import LM -# Mapping from Databricks model names to their Hugging Face model names for tokenizers -DBRX_NAME_TO_MODEL = { - "databricks-dbrx-instruct": "databricks/dbrx-instruct", - "databricks-llama-2-70b-chat": "meta-llama/Llama-2-70b-chat-hf", - "databricks-mixtral-8x7b-instruct": "mistralai/Mixtral-8x7B-Instruct-v0.1", -} - ERRORS = (openai.RateLimitError, openai.APIError) @@ -46,16 +39,17 @@ class OpenAIModel(LM): def __init__( self, model: str = "gpt-4o-mini", + hf_name: Optional[str] = None, api_key: Optional[str] = None, api_base: Optional[str] = None, provider: str = "openai", - max_batch_size=64, - max_ctx_len=4096, + max_batch_size: int = 64, + max_ctx_len: int = 4096, **kwargs: Dict[str, Any], ): super().__init__() self.provider = provider - self.use_chat = provider in ["openai", "dbrx"] + self.use_chat = provider in ["openai", "dbrx", "ollama"] self.max_batch_size = max_batch_size self.max_ctx_len = max_ctx_len @@ -70,14 +64,11 @@ def __init__( self.client = OpenAI(api_key=api_key, base_url=api_base) - self.kwargs["model"] = model # TODO: Refactor this if self.provider == "openai": self.tokenizer = tiktoken.encoding_for_model(model) - elif model in DBRX_NAME_TO_MODEL: - self.tokenizer = AutoTokenizer.from_pretrained(DBRX_NAME_TO_MODEL[model]) else: - self.tokenizer = AutoTokenizer.from_pretrained(model) + self.tokenizer = AutoTokenizer.from_pretrained(hf_name) def handle_chat_request(self, messages: List, **kwargs: Dict[str, Any]) -> Union[List, Tuple[List, List]]: """Handle single chat request to OpenAI server.