Skip to content

Commit

Permalink
add changes for lora adapter support and /v1/models endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
sven-knoblauch committed Oct 9, 2024
1 parent d3ee323 commit 5cd12ba
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions src/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, CompletionRequest, ErrorResponse
from vllm.entrypoints.openai.serving_engine import BaseModelPath
from vllm.entrypoints.openai.serving_engine import BaseModelPath, LoRAModulePath


from utils import DummyRequest, JobInput, BatchSize, create_error_response
from constants import DEFAULT_MAX_CONCURRENCY, DEFAULT_BATCH_SIZE, DEFAULT_BATCH_SIZE_GROWTH_FACTOR, DEFAULT_MIN_BATCH_SIZE
Expand Down Expand Up @@ -128,21 +129,31 @@ async def _initialize_engines(self):
self.base_model_paths = [
BaseModelPath(name=self.engine_args.model, model_path=self.engine_args.model)
]

lora_modules = os.getenv('LORA_MODULES', None)
if lora_modules is not None:
try:
lora_modules = json.loads(lora_modules)
lora_modules = LoRAModulePath(**lora_modules)
except:
lora_modules = None


self.chat_engine = OpenAIServingChat(
engine_client=self.llm,
model_config=self.model_config,
base_model_paths=self.base_model_paths,
response_role=self.response_role,
chat_template=self.tokenizer.tokenizer.chat_template,
lora_modules=None,
lora_modules=[lora_modules],
prompt_adapters=None,
request_logger=None
)
self.completion_engine = OpenAIServingCompletion(
engine_client=self.llm,
model_config=self.model_config,
base_model_paths=self.base_model_paths,
lora_modules=[],
lora_modules=[lora_modules],
prompt_adapters=None,
request_logger=None
)
Expand All @@ -158,9 +169,6 @@ async def generate(self, openai_request: JobInput):

async def _handle_model_request(self):
models = await self.chat_engine.show_available_models()
fixed_model = models.data[0]
fixed_model.id = self.served_model_name
models.data = [fixed_model]
return models.model_dump()

async def _handle_chat_or_completion_request(self, openai_request: JobInput):
Expand Down

0 comments on commit 5cd12ba

Please sign in to comment.