From 768124c4b0bd2759ebd708ca0cde89306d9ce0c6 Mon Sep 17 00:00:00 2001 From: Blazzycrafter Date: Mon, 18 Nov 2024 17:12:41 +0100 Subject: [PATCH 1/3] Added model load logic for chat Completions --- extensions/openai/script.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 03d99e8ded..b90af6cde7 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -119,6 +119,18 @@ async def generator(): @app.post('/v1/chat/completions', response_model=ChatCompletionResponse, dependencies=check_key) async def openai_chat_completions(request: Request, request_data: ChatCompletionRequest): + requested_model = request_data.model + payload = OAImodels.get_current_model_info() + current_model = payload["model_name"] + if not current_model == requested_model: + requested_model_dict = {"model_name": requested_model} + try: + OAImodels._load_model(requested_model_dict) + except: + traceback.print_exc() + return HTTPException(status_code=400, detail="Failed to load the model.") + + path = request.url.path is_legacy = "/generate" in path From 6f0f214408869ccf0c9f4557378fdc6b32df498c Mon Sep 17 00:00:00 2001 From: Blazzycrafter Date: Mon, 18 Nov 2024 17:15:29 +0100 Subject: [PATCH 2/3] - added available models into dummy models - changed args and settings to the get method to make it more Robust and easier to use --- extensions/openai/models.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/extensions/openai/models.py b/extensions/openai/models.py index a7e67df6f6..7e370f6995 100644 --- a/extensions/openai/models.py +++ b/extensions/openai/models.py @@ -24,9 +24,12 @@ def list_dummy_models(): "data": [] } + models = get_available_models()[1:] # these are expected by so much, so include some here as a dummy for model in ['gpt-3.5-turbo', 'text-embedding-ada-002']: result["data"].append(model_info_dict(model)) + for model in models: + result["data"].append(model_info_dict(model)) return result @@ -42,8 +45,8 @@ def model_info_dict(model_name: str) -> dict: def _load_model(data): model_name = data["model_name"] - args = data["args"] - settings = data["settings"] + args = data.get("args", None) + settings = data.get("settings", None) unload_model() model_settings = get_model_metadata(model_name) From 8d44d78db54cd5b5798f07783435b6bc355fdfb4 Mon Sep 17 00:00:00 2001 From: Blazzycrafter Date: Mon, 18 Nov 2024 17:27:31 +0100 Subject: [PATCH 3/3] corrected too many lines --- extensions/openai/script.py | 1 - 1 file changed, 1 deletion(-) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index b90af6cde7..eb11793c3d 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -130,7 +130,6 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion traceback.print_exc() return HTTPException(status_code=400, detail="Failed to load the model.") - path = request.url.path is_legacy = "/generate" in path