From d3a14ab7b325904247761033173723ccc3b8cdba Mon Sep 17 00:00:00 2001 From: Xinyao Wang Date: Thu, 7 Nov 2024 10:59:35 +0800 Subject: [PATCH] remove ray_serve Signed-off-by: Xinyao Wang --- comps/llms/text-generation/ray_serve/llm.py | 82 ------------------- .../ray_serve/requirements.txt | 14 ---- 2 files changed, 96 deletions(-) delete mode 100644 comps/llms/text-generation/ray_serve/llm.py delete mode 100644 comps/llms/text-generation/ray_serve/requirements.txt diff --git a/comps/llms/text-generation/ray_serve/llm.py b/comps/llms/text-generation/ray_serve/llm.py deleted file mode 100644 index c09bb45848..0000000000 --- a/comps/llms/text-generation/ray_serve/llm.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from fastapi.responses import StreamingResponse -from langchain_openai import ChatOpenAI - -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice - - -def post_process_text(text: str): - if text == " ": - return "data: @#$\n\n" - if text == "\n": - return "data:
\n\n" - if text.isspace(): - return None - new_text = text.replace(" ", "@#$") - return f"data: {new_text}\n\n" - - -@register_microservice( - name="opea_service@llm_ray", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -async def llm_generate(input: LLMParamsDoc): - llm_endpoint = os.getenv("RAY_Serve_ENDPOINT", "http://localhost:8080") - llm_model = os.getenv("LLM_MODEL", "Llama-2-7b-chat-hf") - if "/" in llm_model: - llm_model = llm_model.split("/")[-1] - llm = ChatOpenAI( - openai_api_base=llm_endpoint + "/v1", - model_name=llm_model, - openai_api_key=os.getenv("OPENAI_API_KEY", "not_needed"), - max_tokens=input.max_tokens, - temperature=input.temperature, - streaming=input.streaming, - request_timeout=600, - ) - - if input.streaming: - - async def stream_generator(): - chat_response = "" - async for text in llm.astream(input.query): - text = text.content - chat_response += text - processed_text = post_process_text(text) - if text and processed_text: - if "" in text: - res = text.split("")[0] - if res != "": - yield res - break - yield processed_text - print(f"[llm - chat_stream] stream response: {chat_response}") - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = await llm.ainvoke(input.query) - response = response.content - return GeneratedDoc(text=response, prompt=input.query) - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_ray"].start() diff --git a/comps/llms/text-generation/ray_serve/requirements.txt b/comps/llms/text-generation/ray_serve/requirements.txt deleted file mode 100644 index 87aef99929..0000000000 --- a/comps/llms/text-generation/ray_serve/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -docarray[full] -fastapi -huggingface_hub -langchain -langchain_community -langchain_openai -openai -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -ray[serve]>=2.10 -shortuuid -transformers