Merge branch 'Combine_compose' of https://github.com/opea-project/Gen…

…AIComps into Combine_compose Signed-off-by: ZePan110 <[email protected]>
opea-project · Nov 7, 2024 · 28c2a9f · 28c2a9f
2 parents 7d4c693 + 7df1359
commit 28c2a9f
Show file tree

Hide file tree

Showing 36 changed files with 490 additions and 114 deletions.
diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml
@@ -69,7 +69,7 @@ jobs:
           docker_compose_path: ${{ steps.get-yaml-path.outputs.docker_compose_path }}
           mode: ${{ inputs.mode }}
         run: |
-          build_list=$(bash ${{ github.workspace }}/.github/workflows/scripts/get_cicd_list.sh "${mode}" ${docker_compose_path}) 
+          build_list=$(bash ${{ github.workspace }}/.github/workflows/scripts/get_cicd_list.sh "${mode}" ${docker_compose_path})
           echo "build_list=${build_list}" >> $GITHUB_OUTPUT
 
       - name: Build Image

diff --git a/.github/workflows/docker/compose/embeddings-compose-cd.yaml b/.github/workflows/docker/compose/embeddings-compose-cd.yaml
@@ -22,3 +22,7 @@ services:
     build:
       dockerfile: comps/embeddings/predictionguard/Dockerfile
     image: ${REGISTRY:-opea}/embedding-predictionguard:${TAG:-latest}
+  embedding-reranking-local:
+    build:
+      dockerfile: comps/embeddings/tei/langchain/Dockerfile.dynamic_batching
+    image: ${REGISTRY:-opea}/embedding-reranking-local:${TAG:-latest}
diff --git a/.github/workflows/docker/compose/llms-compose-cd.yaml b/.github/workflows/docker/compose/llms-compose-cd.yaml
@@ -23,10 +23,6 @@ services:
     build:
       dockerfile: comps/llms/text-generation/vllm/llama_index/Dockerfile
     image: ${REGISTRY:-opea}/llm-vllm-llamaindex:${TAG:-latest}
-  llm-vllm-llamaindex-hpu:
-    build:
-      dockerfile: comps/llms/text-generation/vllm/llama_index/dependency/Dockerfile.intel_hpu
-    image: ${REGISTRY:-opea}/llm-vllm-llamaindex-hpu:${TAG:-latest}
   llm-predictionguard:
     build:
       dockerfile: comps/llms/text-generation/predictionguard/Dockerfile

diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml
@@ -24,10 +24,6 @@ services:
     build:
       dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
     image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
-  llm-vllm-hpu:
-    build:
-      dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu
-    image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
   llm-vllm-ray:
     build:
       dockerfile: comps/llms/text-generation/vllm/ray/Dockerfile

diff --git a/.github/workflows/scripts/freeze_images.sh b/.github/workflows/scripts/freeze_images.sh
@@ -5,8 +5,7 @@
 
 declare -A dict
 dict["langchain/langchain"]="docker://docker.io/langchain/langchain"
-# dict["vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2"]="docker://vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2"
-dict["opea/habanalabs:1.16.1-pytorch-installer-2.2.2"]="docker://docker.io/opea/habanalabs:1.16.1-pytorch-installer-2.2.2"
+dict["vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0"]="docker://vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0"
 
 function get_latest_version() {
     repo_image=$1

diff --git a/comps/animation/wav2lip/dependency/Dockerfile.intel_hpu b/comps/animation/wav2lip/dependency/Dockerfile.intel_hpu
@@ -1,6 +1,6 @@
 # Use a base image
 # FROM python:3.11-slim
-FROM vault.habana.ai/gaudi-docker/1.16.2/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu
+FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0 AS hpu
 
 # Set environment variables
 ENV LANG=en_US.UTF-8

diff --git a/comps/asr/whisper/dependency/Dockerfile.intel_hpu b/comps/asr/whisper/dependency/Dockerfile.intel_hpu
@@ -2,8 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # HABANA environment
-# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu
-FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu
+FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0 AS hpu
 
 RUN useradd -m -s /bin/bash user && \
     mkdir -p /home/user && \

diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py
@@ -3,14 +3,21 @@
 
 import asyncio
 import multiprocessing
+import os
+from collections import defaultdict, deque
+from enum import Enum
 from typing import Any, List, Optional, Type
 
 from ..proto.docarray import TextDoc
 from .constants import ServiceRoleType, ServiceType
+from .logger import CustomLogger
 from .utils import check_ports_availability
 
 opea_microservices = {}
 
+logger = CustomLogger("micro_service")
+logflag = os.getenv("LOGFLAG", False)
+
 
 class MicroService:
     """MicroService class to create a microservice."""
@@ -31,6 +38,9 @@ def __init__(
         provider: Optional[str] = None,
         provider_endpoint: Optional[str] = None,
         use_remote_service: Optional[bool] = False,
+        dynamic_batching: bool = False,
+        dynamic_batching_timeout: int = 1,
+        dynamic_batching_max_batch_size: int = 32,
     ):
         """Init the microservice."""
         self.name = f"{name}/{self.__class__.__name__}" if name else self.__class__.__name__
@@ -43,6 +53,9 @@ def __init__(
         self.input_datatype = input_datatype
         self.output_datatype = output_datatype
         self.use_remote_service = use_remote_service
+        self.dynamic_batching = dynamic_batching
+        self.dynamic_batching_timeout = dynamic_batching_timeout
+        self.dynamic_batching_max_batch_size = dynamic_batching_max_batch_size
         self.uvicorn_kwargs = {}
 
         if ssl_keyfile:
@@ -58,10 +71,50 @@ def __init__(
 
             self.server = self._get_server()
             self.app = self.server.app
+            # create a batch request processor loop if using dynamic batching
+            if self.dynamic_batching:
+                self.buffer_lock = asyncio.Lock()
+                self.request_buffer = defaultdict(deque)
+
+                @self.app.on_event("startup")
+                async def startup_event():
+                    asyncio.create_task(self._dynamic_batch_processor())
+
             self.event_loop = asyncio.new_event_loop()
             asyncio.set_event_loop(self.event_loop)
             self.event_loop.run_until_complete(self._async_setup())
 
+    async def _dynamic_batch_processor(self):
+        if logflag:
+            logger.info("dynamic batch processor looping...")
+        while True:
+            await asyncio.sleep(self.dynamic_batching_timeout)
+            runtime_batch: dict[Enum, list[dict]] = {}  # {ServiceType.Embedding: [{"request": xx, "response": yy}, {}]}
+
+            async with self.buffer_lock:
+                # prepare the runtime batch, access to buffer is locked
+                if self.request_buffer:
+                    for service_type, request_lst in self.request_buffer.items():
+                        batch = []
+                        # grab min(MAX_BATCH_SIZE, REQUEST_SIZE) requests from buffer
+                        for _ in range(min(self.dynamic_batching_max_batch_size, len(request_lst))):
+                            batch.append(request_lst.popleft())
+
+                        runtime_batch[service_type] = batch
+
+            # Run batched inference on the batch and set results
+            for service_type, batch in runtime_batch.items():
+                if not batch:
+                    continue
+                results = await self.dynamic_batching_infer(service_type, batch)
+
+                for req, result in zip(batch, results):
+                    req["response"].set_result(result)
+
+    async def dynamic_batching_infer(self, service_type: Enum, batch: list[dict]):
+        """Need to implement."""
+        raise NotImplementedError("Unimplemented dynamic batching inference!")
+
     def _validate_env(self):
         """Check whether to use the microservice locally."""
         if self.use_remote_service:
@@ -116,10 +169,14 @@ def run(self):
         self._validate_env()
         self.event_loop.run_until_complete(self._async_run_forever())
 
-    def start(self):
+    def start(self, in_single_process=False):
         self._validate_env()
-        self.process = multiprocessing.Process(target=self.run, daemon=False, name=self.name)
-        self.process.start()
+        if in_single_process:
+            # Resolve HPU segmentation fault and potential tokenizer issues by limiting to same process
+            self.run()
+        else:
+            self.process = multiprocessing.Process(target=self.run, daemon=False, name=self.name)
+            self.process.start()
 
     async def _async_teardown(self):
         """Shutdown the server."""
@@ -155,6 +212,9 @@ def register_microservice(
     provider: Optional[str] = None,
     provider_endpoint: Optional[str] = None,
     methods: List[str] = ["POST"],
+    dynamic_batching: bool = False,
+    dynamic_batching_timeout: int = 1,
+    dynamic_batching_max_batch_size: int = 32,
 ):
     def decorator(func):
         if name not in opea_microservices:
@@ -172,6 +232,9 @@ def decorator(func):
                 output_datatype=output_datatype,
                 provider=provider,
                 provider_endpoint=provider_endpoint,
+                dynamic_batching=dynamic_batching,
+                dynamic_batching_timeout=dynamic_batching_timeout,
+                dynamic_batching_max_batch_size=dynamic_batching_max_batch_size,
             )
             opea_microservices[name] = micro_service
         opea_microservices[name].app.router.add_api_route(endpoint, func, methods=methods)

diff --git a/comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu b/comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # HABANA environment
-FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu
+FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0 AS hpu
 RUN useradd -m -s /bin/bash user && \
     mkdir -p /home/user && \
     chown -R user /home/user/

diff --git a/comps/embeddings/tei/langchain/Dockerfile.dynamic_batching b/comps/embeddings/tei/langchain/Dockerfile.dynamic_batching
@@ -0,0 +1,28 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu
+FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest as hpu
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+# Disable user for now
+# USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/embeddings/tei/langchain/requirements.txt && \
+    pip install git+https://github.com/huggingface/optimum-habana.git
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/embeddings/tei/langchain
+
+ENTRYPOINT ["python", "local_embedding_reranking.py"]