Skip to content

Commit

Permalink
Merge branch 'Combine_compose' of https://github.com/opea-project/Gen…
Browse files Browse the repository at this point in the history
…AIComps into Combine_compose

Signed-off-by: ZePan110 <[email protected]>
  • Loading branch information
ZePan110 committed Nov 7, 2024
2 parents 7d4c693 + 7df1359 commit 28c2a9f
Show file tree
Hide file tree
Showing 36 changed files with 490 additions and 114 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/_comps-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
docker_compose_path: ${{ steps.get-yaml-path.outputs.docker_compose_path }}
mode: ${{ inputs.mode }}
run: |
build_list=$(bash ${{ github.workspace }}/.github/workflows/scripts/get_cicd_list.sh "${mode}" ${docker_compose_path})
build_list=$(bash ${{ github.workspace }}/.github/workflows/scripts/get_cicd_list.sh "${mode}" ${docker_compose_path})
echo "build_list=${build_list}" >> $GITHUB_OUTPUT
- name: Build Image
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/docker/compose/embeddings-compose-cd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ services:
build:
dockerfile: comps/embeddings/predictionguard/Dockerfile
image: ${REGISTRY:-opea}/embedding-predictionguard:${TAG:-latest}
embedding-reranking-local:
build:
dockerfile: comps/embeddings/tei/langchain/Dockerfile.dynamic_batching
image: ${REGISTRY:-opea}/embedding-reranking-local:${TAG:-latest}
4 changes: 0 additions & 4 deletions .github/workflows/docker/compose/llms-compose-cd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@ services:
build:
dockerfile: comps/llms/text-generation/vllm/llama_index/Dockerfile
image: ${REGISTRY:-opea}/llm-vllm-llamaindex:${TAG:-latest}
llm-vllm-llamaindex-hpu:
build:
dockerfile: comps/llms/text-generation/vllm/llama_index/dependency/Dockerfile.intel_hpu
image: ${REGISTRY:-opea}/llm-vllm-llamaindex-hpu:${TAG:-latest}
llm-predictionguard:
build:
dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/docker/compose/llms-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ services:
build:
dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
llm-vllm-hpu:
build:
dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
llm-vllm-ray:
build:
dockerfile: comps/llms/text-generation/vllm/ray/Dockerfile
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/scripts/freeze_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@

declare -A dict
dict["langchain/langchain"]="docker://docker.io/langchain/langchain"
# dict["vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2"]="docker://vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2"
dict["opea/habanalabs:1.16.1-pytorch-installer-2.2.2"]="docker://docker.io/opea/habanalabs:1.16.1-pytorch-installer-2.2.2"
dict["vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0"]="docker://vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0"

function get_latest_version() {
repo_image=$1
Expand Down
2 changes: 1 addition & 1 deletion comps/animation/wav2lip/dependency/Dockerfile.intel_hpu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Use a base image
# FROM python:3.11-slim
FROM vault.habana.ai/gaudi-docker/1.16.2/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu
FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0 AS hpu

# Set environment variables
ENV LANG=en_US.UTF-8
Expand Down
3 changes: 1 addition & 2 deletions comps/asr/whisper/dependency/Dockerfile.intel_hpu
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

# HABANA environment
# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu
FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu
FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0 AS hpu

RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
Expand Down
69 changes: 66 additions & 3 deletions comps/cores/mega/micro_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,21 @@

import asyncio
import multiprocessing
import os
from collections import defaultdict, deque
from enum import Enum
from typing import Any, List, Optional, Type

from ..proto.docarray import TextDoc
from .constants import ServiceRoleType, ServiceType
from .logger import CustomLogger
from .utils import check_ports_availability

opea_microservices = {}

logger = CustomLogger("micro_service")
logflag = os.getenv("LOGFLAG", False)


class MicroService:
"""MicroService class to create a microservice."""
Expand All @@ -31,6 +38,9 @@ def __init__(
provider: Optional[str] = None,
provider_endpoint: Optional[str] = None,
use_remote_service: Optional[bool] = False,
dynamic_batching: bool = False,
dynamic_batching_timeout: int = 1,
dynamic_batching_max_batch_size: int = 32,
):
"""Init the microservice."""
self.name = f"{name}/{self.__class__.__name__}" if name else self.__class__.__name__
Expand All @@ -43,6 +53,9 @@ def __init__(
self.input_datatype = input_datatype
self.output_datatype = output_datatype
self.use_remote_service = use_remote_service
self.dynamic_batching = dynamic_batching
self.dynamic_batching_timeout = dynamic_batching_timeout
self.dynamic_batching_max_batch_size = dynamic_batching_max_batch_size
self.uvicorn_kwargs = {}

if ssl_keyfile:
Expand All @@ -58,10 +71,50 @@ def __init__(

self.server = self._get_server()
self.app = self.server.app
# create a batch request processor loop if using dynamic batching
if self.dynamic_batching:
self.buffer_lock = asyncio.Lock()
self.request_buffer = defaultdict(deque)

@self.app.on_event("startup")
async def startup_event():
asyncio.create_task(self._dynamic_batch_processor())

self.event_loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.event_loop)
self.event_loop.run_until_complete(self._async_setup())

async def _dynamic_batch_processor(self):
if logflag:
logger.info("dynamic batch processor looping...")
while True:
await asyncio.sleep(self.dynamic_batching_timeout)
runtime_batch: dict[Enum, list[dict]] = {} # {ServiceType.Embedding: [{"request": xx, "response": yy}, {}]}

async with self.buffer_lock:
# prepare the runtime batch, access to buffer is locked
if self.request_buffer:
for service_type, request_lst in self.request_buffer.items():
batch = []
# grab min(MAX_BATCH_SIZE, REQUEST_SIZE) requests from buffer
for _ in range(min(self.dynamic_batching_max_batch_size, len(request_lst))):
batch.append(request_lst.popleft())

runtime_batch[service_type] = batch

# Run batched inference on the batch and set results
for service_type, batch in runtime_batch.items():
if not batch:
continue
results = await self.dynamic_batching_infer(service_type, batch)

for req, result in zip(batch, results):
req["response"].set_result(result)

async def dynamic_batching_infer(self, service_type: Enum, batch: list[dict]):
"""Need to implement."""
raise NotImplementedError("Unimplemented dynamic batching inference!")

def _validate_env(self):
"""Check whether to use the microservice locally."""
if self.use_remote_service:
Expand Down Expand Up @@ -116,10 +169,14 @@ def run(self):
self._validate_env()
self.event_loop.run_until_complete(self._async_run_forever())

def start(self):
def start(self, in_single_process=False):
self._validate_env()
self.process = multiprocessing.Process(target=self.run, daemon=False, name=self.name)
self.process.start()
if in_single_process:
# Resolve HPU segmentation fault and potential tokenizer issues by limiting to same process
self.run()
else:
self.process = multiprocessing.Process(target=self.run, daemon=False, name=self.name)
self.process.start()

async def _async_teardown(self):
"""Shutdown the server."""
Expand Down Expand Up @@ -155,6 +212,9 @@ def register_microservice(
provider: Optional[str] = None,
provider_endpoint: Optional[str] = None,
methods: List[str] = ["POST"],
dynamic_batching: bool = False,
dynamic_batching_timeout: int = 1,
dynamic_batching_max_batch_size: int = 32,
):
def decorator(func):
if name not in opea_microservices:
Expand All @@ -172,6 +232,9 @@ def decorator(func):
output_datatype=output_datatype,
provider=provider,
provider_endpoint=provider_endpoint,
dynamic_batching=dynamic_batching,
dynamic_batching_timeout=dynamic_batching_timeout,
dynamic_batching_max_batch_size=dynamic_batching_max_batch_size,
)
opea_microservices[name] = micro_service
opea_microservices[name].app.router.add_api_route(endpoint, func, methods=methods)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

# HABANA environment
FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu
FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0 AS hpu
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
Expand Down
28 changes: 28 additions & 0 deletions comps/embeddings/tei/langchain/Dockerfile.dynamic_batching
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu
FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest as hpu

RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev

RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/

# Disable user for now
# USER user

COPY comps /home/user/comps

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/embeddings/tei/langchain/requirements.txt && \
pip install git+https://github.com/huggingface/optimum-habana.git

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/embeddings/tei/langchain

ENTRYPOINT ["python", "local_embedding_reranking.py"]
Loading

0 comments on commit 28c2a9f

Please sign in to comment.