Merge branch 'main' into pending-requests

opea-project · Nov 11, 2024 · f00ebff · f00ebff
2 parents 6e198dc + 9a50131
commit f00ebff
Show file tree

Hide file tree

Showing 49 changed files with 1,084 additions and 1,267 deletions.
diff --git a/.github/workflows/check-online-doc-build.yml b/.github/workflows/check-online-doc-build.yml
@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Check Online Document Building
+permissions: {}
+
+on:
+  pull_request:
+    paths:
+      - "**.md"
+      - "**.rst"
+    branches: [main]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        path: GenAIComps
+
+    - name: Checkout docs
+      uses: actions/checkout@v4
+      with:
+        repository: opea-project/docs
+        path: docs
+
+    - name: Build Online Document
+      shell: bash
+      run: |
+        echo "build online doc"
+        cd docs
+        bash scripts/build.sh
diff --git a/.github/workflows/docker/compose/llms-compose-cd.yaml b/.github/workflows/docker/compose/llms-compose-cd.yaml
@@ -15,6 +15,10 @@ services:
       context: vllm-openvino
       dockerfile: Dockerfile.openvino
     image: ${REGISTRY:-opea}/vllm-openvino:${TAG:-latest}
+  vllm-arc:
+    build:
+      dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu
+    image: ${REGISTRY:-opea}/vllm-arc:${TAG:-latest}
   llm-eval:
     build:
       dockerfile: comps/llms/utils/lm-eval/Dockerfile

diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml
@@ -24,11 +24,3 @@ services:
     build:
       dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
     image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
-  llm-vllm-ray:
-    build:
-      dockerfile: comps/llms/text-generation/vllm/ray/Dockerfile
-    image: ${REGISTRY:-opea}/llm-vllm-ray:${TAG:-latest}
-  llm-vllm-ray-hpu:
-    build:
-      dockerfile: comps/llms/text-generation/vllm/ray/dependency/Dockerfile
-    image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py
@@ -25,6 +25,41 @@
 from .micro_service import MicroService
 
 
+def read_pdf(file):
+    from langchain.document_loaders import PyPDFLoader
+
+    loader = PyPDFLoader(file)
+    docs = loader.load_and_split()
+    return docs
+
+
+def read_text_from_file(file, save_file_name):
+    import docx2txt
+    from langchain.text_splitter import CharacterTextSplitter
+
+    # read text file
+    if file.headers["content-type"] == "text/plain":
+        file.file.seek(0)
+        content = file.file.read().decode("utf-8")
+        # Split text
+        text_splitter = CharacterTextSplitter()
+        texts = text_splitter.split_text(content)
+        # Create multiple documents
+        file_content = texts
+    # read pdf file
+    elif file.headers["content-type"] == "application/pdf":
+        documents = read_pdf(save_file_name)
+        file_content = [doc.page_content for doc in documents]
+    # read docx file
+    elif (
+        file.headers["content-type"] == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+        or file.headers["content-type"] == "application/octet-stream"
+    ):
+        file_content = docx2txt.process(save_file_name)
+
+    return file_content
+
+
 class Gateway:
     def __init__(
         self,
@@ -72,8 +107,19 @@ async def handle_request(self, request: Request):
 
     def list_service(self):
         response = {}
-        for node in self.all_leaves():
-            response = {self.services[node].description: self.services[node].endpoint_path}
+        for node, service in self.megaservice.services.items():
+            # Check if the service has a 'description' attribute and it is not None
+            if hasattr(service, "description") and service.description:
+                response[node] = {"description": service.description}
+            # Check if the service has an 'endpoint' attribute and it is not None
+            if hasattr(service, "endpoint") and service.endpoint:
+                if node in response:
+                    response[node]["endpoint"] = service.endpoint
+                else:
+                    response[node] = {"endpoint": service.endpoint}
+            # If neither 'description' nor 'endpoint' is available, add an error message for the node
+            if node not in response:
+                response[node] = {"error": f"Service node {node} does not have 'description' or 'endpoint' attribute."}
         return response
 
     def list_parameter(self):
@@ -365,39 +411,6 @@ def __init__(self, megaservice, host="0.0.0.0", port=8888):
             megaservice, host, port, str(MegaServiceEndpoint.DOC_SUMMARY), ChatCompletionRequest, ChatCompletionResponse
         )
 
-    def read_pdf(self, file):
-        from langchain.document_loaders import PyPDFLoader
-
-        loader = PyPDFLoader(file)
-        docs = loader.load_and_split()
-        return docs
-
-    def read_text_from_file(self, file, save_file_name):
-        import docx2txt
-        from langchain.text_splitter import CharacterTextSplitter
-
-        # read text file
-        if file.headers["content-type"] == "text/plain":
-            file.file.seek(0)
-            content = file.file.read().decode("utf-8")
-            # Split text
-            text_splitter = CharacterTextSplitter()
-            texts = text_splitter.split_text(content)
-            # Create multiple documents
-            file_content = texts
-        # read pdf file
-        elif file.headers["content-type"] == "application/pdf":
-            documents = self.read_pdf(save_file_name)
-            file_content = [doc.page_content for doc in documents]
-        # read docx file
-        elif (
-            file.headers["content-type"] == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-            or file.headers["content-type"] == "application/octet-stream"
-        ):
-            file_content = docx2txt.process(save_file_name)
-
-        return file_content
-
     async def handle_request(self, request: Request, files: List[UploadFile] = File(default=None)):
         data = await request.form()
         stream_opt = data.get("stream", True)
@@ -411,7 +424,7 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File(
 
                 async with aiofiles.open(file_path, "wb") as f:
                     await f.write(await file.read())
-                docs = self.read_text_from_file(file, file_path)
+                docs = read_text_from_file(file, file_path)
                 os.remove(file_path)
                 if isinstance(docs, list):
                     file_summaries.extend(docs)
@@ -547,11 +560,31 @@ def __init__(self, megaservice, host="0.0.0.0", port=8888):
             megaservice, host, port, str(MegaServiceEndpoint.FAQ_GEN), ChatCompletionRequest, ChatCompletionResponse
         )
 
-    async def handle_request(self, request: Request):
-        data = await request.json()
+    async def handle_request(self, request: Request, files: List[UploadFile] = File(default=None)):
+        data = await request.form()
         stream_opt = data.get("stream", True)
         chat_request = ChatCompletionRequest.parse_obj(data)
-        prompt = self._handle_message(chat_request.messages)
+        file_summaries = []
+        if files:
+            for file in files:
+                file_path = f"/tmp/{file.filename}"
+
+                import aiofiles
+
+                async with aiofiles.open(file_path, "wb") as f:
+                    await f.write(await file.read())
+                docs = read_text_from_file(file, file_path)
+                os.remove(file_path)
+                if isinstance(docs, list):
+                    file_summaries.extend(docs)
+                else:
+                    file_summaries.append(docs)
+
+        if file_summaries:
+            prompt = self._handle_message(chat_request.messages) + "\n".join(file_summaries)
+        else:
+            prompt = self._handle_message(chat_request.messages)
+
         parameters = LLMParams(
             max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
             top_k=chat_request.top_k if chat_request.top_k else 10,

diff --git a/comps/cores/mega/http_service.py b/comps/cores/mega/http_service.py
@@ -1,6 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import logging
 import re
 from typing import Optional
 
@@ -120,6 +121,7 @@ async def start_server(self, **kwargs):
                 **self.uvicorn_kwargs,
             )
         )
+        logging.getLogger("uvicorn.access").addFilter(lambda record: "/v1/health_check" not in record.getMessage())
         self.logger.info(f"Uvicorn server setup on port {self.primary_port}")
         await self.server.setup_server()
         self.logger.info("HTTP server setup successful")

diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py
@@ -38,6 +38,7 @@ def __init__(
         provider: Optional[str] = None,
         provider_endpoint: Optional[str] = None,
         use_remote_service: Optional[bool] = False,
+        description: Optional[str] = None,
         dynamic_batching: bool = False,
         dynamic_batching_timeout: int = 1,
         dynamic_batching_max_batch_size: int = 32,
@@ -53,6 +54,7 @@ def __init__(
         self.input_datatype = input_datatype
         self.output_datatype = output_datatype
         self.use_remote_service = use_remote_service
+        self.description = description
         self.dynamic_batching = dynamic_batching
         self.dynamic_batching_timeout = dynamic_batching_timeout
         self.dynamic_batching_max_batch_size = dynamic_batching_max_batch_size

diff --git a/comps/dataprep/multimodal/redis/langchain/README.md b/comps/dataprep/multimodal/redis/langchain/README.md
@@ -1,6 +1,10 @@
 # Dataprep Microservice for Multimodal Data with Redis
 
-This `dataprep` microservice accepts videos (mp4 files) and their transcripts (optional) from the user and ingests them into Redis vectorstore.
+This `dataprep` microservice accepts the following from the user and ingests them into a Redis vector store:
+
+- Videos (mp4 files) and their transcripts (optional)
+- Images (gif, jpg, jpeg, and png files) and their captions (optional)
+- Audio (wav files)
 
 ## 🚀1. Start Microservice with Python（Option 1）
 
@@ -107,18 +111,18 @@ docker container logs -f dataprep-multimodal-redis
 
 ## 🚀4. Consume Microservice
 
-Once this dataprep microservice is started, user can use the below commands to invoke the microservice to convert videos and their transcripts (optional) to embeddings and save to the Redis vector store.
+Once this dataprep microservice is started, user can use the below commands to invoke the microservice to convert images and videos and their transcripts (optional) to embeddings and save to the Redis vector store.
 
-This mircroservice has provided 3 different ways for users to ingest videos into Redis vector store corresponding to the 3 use cases.
+This microservice provides 3 different ways for users to ingest files into Redis vector store corresponding to the 3 use cases.
 
-### 4.1 Consume _videos_with_transcripts_ API
+### 4.1 Consume _ingest_with_text_ API
 
-**Use case:** This API is used when a transcript file (under `.vtt` format) is available for each video.
+**Use case:** This API is used when videos are accompanied by transcript files (`.vtt` format) or images are accompanied by text caption files (`.txt` format).
 
 **Important notes:**
 
 - Make sure the file paths after `files=@` are correct.
-- Every transcript file's name must be identical with its corresponding video file's name (except their extension .vtt and .mp4). For example, `video1.mp4` and `video1.vtt`. Otherwise, if `video1.vtt` is not included correctly in this API call, this microservice will return error `No captions file video1.vtt found for video1.mp4`.
+- Every transcript or caption file's name must be identical to its corresponding video or image file's name (except their extension - .vtt goes with .mp4 and .txt goes with .jpg, .jpeg, .png, or .gif). For example, `video1.mp4` and `video1.vtt`. Otherwise, if `video1.vtt` is not included correctly in the API call, the microservice will return an error `No captions file video1.vtt found for video1.mp4`.
 
 #### Single video-transcript pair upload
 
@@ -127,10 +131,20 @@ curl -X POST \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./video1.mp4" \
     -F "files=@./video1.vtt" \
-    http://localhost:6007/v1/videos_with_transcripts
+    http://localhost:6007/v1/ingest_with_text
+```
+
+#### Single image-caption pair upload
+
+```bash
+curl -X POST \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./image.jpg" \
+    -F "files=@./image.txt" \
+    http://localhost:6007/v1/ingest_with_text
 ```
 
-#### Multiple video-transcript pair upload
+#### Multiple file pair upload
 
 ```bash
 curl -X POST \
@@ -139,16 +153,20 @@ curl -X POST \
     -F "files=@./video1.vtt" \
     -F "files=@./video2.mp4" \
     -F "files=@./video2.vtt" \
-    http://localhost:6007/v1/videos_with_transcripts
+    -F "files=@./image1.png" \
+    -F "files=@./image1.txt" \
+    -F "files=@./image2.jpg" \
+    -F "files=@./image2.txt" \
+    http://localhost:6007/v1/ingest_with_text
 ```
 
 ### 4.2 Consume _generate_transcripts_ API
 
-**Use case:** This API should be used when a video has meaningful audio or recognizable speech but its transcript file is not available.
+**Use case:** This API should be used when a video has meaningful audio or recognizable speech but its transcript file is not available, or for audio files with speech.
 
-In this use case, this microservice will use [`whisper`](https://openai.com/index/whisper/) model to generate the `.vtt` transcript for the video.
+In this use case, this microservice will use [`whisper`](https://openai.com/index/whisper/) model to generate the `.vtt` transcript for the video or audio files.
 
-#### Single video upload
+#### Single file upload
 
 ```bash
 curl -X POST \
@@ -157,21 +175,22 @@ curl -X POST \
     http://localhost:6007/v1/generate_transcripts
 ```
 
-#### Multiple video upload
+#### Multiple file upload
 
 ```bash
 curl -X POST \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./video1.mp4" \
     -F "files=@./video2.mp4" \
+    -F "files=@./audio1.wav" \
     http://localhost:6007/v1/generate_transcripts
 ```
 
 ### 4.3 Consume _generate_captions_ API
 
-**Use case:** This API should be used when a video does not have meaningful audio or does not have audio.
+**Use case:** This API should be used when uploading an image, or when uploading a video that does not have meaningful audio or does not have audio.
 
-In this use case, transcript either does not provide any meaningful information or does not exist. Thus, it is preferred to leverage a LVM microservice to summarize the video frames.
+In this use case, there is no meaningful language transcription. Thus, it is preferred to leverage a LVM microservice to summarize the frames.
 
 - Single video upload
 
@@ -192,22 +211,31 @@ curl -X POST \
     http://localhost:6007/v1/generate_captions
 ```
 
-### 4.4 Consume get_videos API
+- Single image upload
+
+```bash
+curl -X POST \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./image.jpg" \
+    http://localhost:6007/v1/generate_captions
+```
+
+### 4.4 Consume get_files API
 
-To get names of uploaded videos, use the following command.
+To get names of uploaded files, use the following command.
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    http://localhost:6007/v1/dataprep/get_videos
+    http://localhost:6007/v1/dataprep/get_files
 ```
 
-### 4.5 Consume delete_videos API
+### 4.5 Consume delete_files API
 
-To delete uploaded videos and clear the database, use the following command.
+To delete uploaded files and clear the database, use the following command.
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    http://localhost:6007/v1/dataprep/delete_videos
+    http://localhost:6007/v1/dataprep/delete_files
 ```
diff --git a/comps/dataprep/multimodal/redis/langchain/config.py b/comps/dataprep/multimodal/redis/langchain/config.py
@@ -4,7 +4,7 @@
 import os
 
 # Models
-EMBED_MODEL = os.getenv("EMBED_MODEL", "BridgeTower/bridgetower-large-itm-mlm-itc")
+EMBED_MODEL = os.getenv("EMBEDDING_MODEL_ID", "BridgeTower/bridgetower-large-itm-mlm-itc")
 WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small")
 
 # Redis Connection Information