From 17306d14b56afcbe7cc1e2374c46fc70037221c6 Mon Sep 17 00:00:00 2001
From: Jitendra Patil <jitendra.patil@intel.com>
Date: Fri, 31 Jan 2025 16:25:31 -0600
Subject: [PATCH 01/17] Update LEGAL_INFORMATION.md about software subject to
 non-open source licenses (#1247)

Signed-off-by: Patil, Jitendra <jitendra.patil@intel.com>
---
 LEGAL_INFORMATION.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/LEGAL_INFORMATION.md b/LEGAL_INFORMATION.md
index 223566f3eb..41cb6dc31a 100644
--- a/LEGAL_INFORMATION.md
+++ b/LEGAL_INFORMATION.md
@@ -13,6 +13,9 @@ Your use of the source code for these components is subject to the terms and con
 
 See the accompanying [license](LICENSE) file for full license text and copyright notices.
 
+Please note: component(s) depend on software subject to non-open source licenses.  If you use or redistribute this software, it is your sole responsibility to ensure compliance with such licenses.
+e.g langserve
+
 ## Citation
 
 If you use Generative AI Components in your research, use the following BibTeX entry.

From 30e3dea6ab51a54f978e0e10d6b2dfc5116dafb0 Mon Sep 17 00:00:00 2001
From: rbrugaro <rita.brugarolas.brufau@intel.com>
Date: Fri, 31 Jan 2025 15:36:56 -0800
Subject: [PATCH 02/17] [bugfix] docsum error by HuggingFaceEndpoint  (#1246)

* Fix bug iin HuggingFaceEndpoint usage

	1. Upgrade langchain hugginface from community to partner (community deprecated)
Added task=text-generation argument to fix error with tgi_endpoint

Signed-off-by: rbrugaro <rita.brugarolas.brufau@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: rbrugaro <rita.brugarolas.brufau@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com>
---
 comps/llms/src/doc-summarization/integrations/tgi.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/comps/llms/src/doc-summarization/integrations/tgi.py b/comps/llms/src/doc-summarization/integrations/tgi.py
index 002f8de19f..902334bc03 100644
--- a/comps/llms/src/doc-summarization/integrations/tgi.py
+++ b/comps/llms/src/doc-summarization/integrations/tgi.py
@@ -4,7 +4,7 @@
 import os
 
 import requests
-from langchain_community.llms import HuggingFaceEndpoint
+from langchain_huggingface import HuggingFaceEndpoint
 
 from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType
 from comps.cores.proto.api_protocol import DocSumChatCompletionRequest
@@ -71,6 +71,7 @@ async def invoke(self, input: DocSumChatCompletionRequest):
             repetition_penalty=input.repetition_penalty if input.repetition_penalty else 1.03,
             streaming=input.stream,
             server_kwargs=server_kwargs,
+            task="text-generation",
         )
         result = await self.generate(input, self.client)
 

From 3104454c2364da4cc9032f439953f3d3884584a3 Mon Sep 17 00:00:00 2001
From: Dina Suehiro Jones <dina.s.jones@intel.com>
Date: Fri, 31 Jan 2025 17:49:49 -0800
Subject: [PATCH 03/17] Fix port in the data prep redis README file (#1250)

Signed-off-by: dmsuehir <dina.s.jones@intel.com>
Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com>
---
 comps/dataprep/src/README_redis.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comps/dataprep/src/README_redis.md b/comps/dataprep/src/README_redis.md
index c6e4555295..d6ff2a6b76 100644
--- a/comps/dataprep/src/README_redis.md
+++ b/comps/dataprep/src/README_redis.md
@@ -112,7 +112,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest
+docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)

From 119acf2af564943e5948ed1d79804a808020ffed Mon Sep 17 00:00:00 2001
From: Eero Tamminen <eero.t.tamminen@intel.com>
Date: Tue, 4 Feb 2025 22:21:04 +0200
Subject: [PATCH 04/17] Add Dockerfile for comps-base image (#1127)

Signed-off-by: Eero Tamminen <eero.t.tamminen@intel.com>
---
 Dockerfile | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000..b1db30c720
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,37 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Base image for GenAIComps based OPEA Python applications
+# Build: docker build -t opea/comps-base -f Dockerfile .
+
+ARG IMAGE_NAME=python
+ARG IMAGE_TAG=3.11-slim
+
+FROM ${IMAGE_NAME}:${IMAGE_TAG} AS base
+
+ENV HOME=/home/user
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p $HOME && \
+    chown -R user $HOME
+
+# get security updates
+RUN apt-get update && apt-get upgrade -y && \
+    apt-get clean autoclean && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR $HOME
+
+COPY *.toml *.py *.txt *.md LICENSE ./
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    pip install --no-cache-dir -r requirements.txt
+
+COPY comps/ comps/
+
+ENV PYTHONPATH=$PYTHONPATH:$HOME
+
+USER user
+
+ENTRYPOINT ["sh", "-c", "set && ls -la"]

From 4ede4055255942dee9490ed457851cde2e237fc9 Mon Sep 17 00:00:00 2001
From: Eero Tamminen <eero.t.tamminen@intel.com>
Date: Wed, 5 Feb 2025 22:04:50 +0200
Subject: [PATCH 05/17] Create token metrics only when they are available
 (#1092)

* Create token metrics only when they are available

This avoids generation of useless token/request histogram metrics
for services that use Orchestrator class, but never call its token
processing functionality.

(Helps in differentiating frontend megaservice metrics from backend
megaservice ones, especially when multiple OPEA applications run in
the same cluster.)

Also change Orchestrator CI test workaround to use unique prefix for
each metric instance, instead of metrics being (singleton) class
variables.

Signed-off-by: Eero Tamminen <eero.t.tamminen@intel.com>

* Add locking for latency metric creation / method change

As that that could be called from multiple request handling threads.

Signed-off-by: Eero Tamminen <eero.t.tamminen@intel.com>

---------

Signed-off-by: Eero Tamminen <eero.t.tamminen@intel.com>
Co-authored-by: Malini Bhandaru <malini.bhandaru@intel.com>
---
 comps/cores/mega/orchestrator.py | 61 ++++++++++++++++++++++++++------
 1 file changed, 50 insertions(+), 11 deletions(-)

diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py
index 4053988566..97ee2a76b3 100644
--- a/comps/cores/mega/orchestrator.py
+++ b/comps/cores/mega/orchestrator.py
@@ -7,6 +7,7 @@
 import json
 import os
 import re
+import threading
 import time
 from typing import Dict, List
 
@@ -27,20 +28,58 @@
 
 
 class OrchestratorMetrics:
-    # Because:
+    # Need an instance ID for metric prefix because:
+    # - Orchestror instances are not named
     # - CI creates several orchestrator instances
-    # - Prometheus requires metrics to be singletons
-    # - Oorchestror instances are not provided their own names
-    # Metrics are class members with "megaservice" name prefix
-    first_token_latency = Histogram("megaservice_first_token_latency", "First token latency (histogram)")
-    inter_token_latency = Histogram("megaservice_inter_token_latency", "Inter-token latency (histogram)")
-    request_latency = Histogram("megaservice_request_latency", "Whole request/reply latency (histogram)")
-    request_pending = Gauge("megaservice_request_pending", "Count of currently pending requests (gauge)")
+    # - Prometheus requires metrics (their names) to be unique
+    _instance_id = 0
 
     def __init__(self) -> None:
-        pass
+        self._instance_id += 1
+        if self._instance_id > 1:
+            self._prefix = f"megaservice{self._instance_id}"
+        else:
+            self._prefix = "megaservice"
+
+        self.request_pending = Gauge(f"{self._prefix}_request_pending", "Count of currently pending requests (gauge)")
+
+        # locking for latency metric creation / method change
+        self._lock = threading.Lock()
+
+        # Metrics related to token processing are created on demand,
+        # to avoid bogus ones for services that never handle tokens
+        self.first_token_latency = None
+        self.inter_token_latency = None
+        self.request_latency = None
+
+        # initial methods to create the metrics
+        self.token_update = self._token_update_create
+        self.request_update = self._request_update_create
+
+    def _token_update_create(self, token_start: float, is_first: bool) -> float:
+        with self._lock:
+            # in case another thread already got here
+            if self.token_update == self._token_update_create:
+                self.first_token_latency = Histogram(
+                    f"{self._prefix}_first_token_latency", "First token latency (histogram)"
+                )
+                self.inter_token_latency = Histogram(
+                    f"{self._prefix}_inter_token_latency", "Inter-token latency (histogram)"
+                )
+                self.token_update = self._token_update_real
+        return self.token_update(token_start, is_first)
+
+    def _request_update_create(self, req_start: float) -> None:
+        with self._lock:
+            # in case another thread already got here
+            if self.request_update == self._request_update_create:
+                self.request_latency = Histogram(
+                    f"{self._prefix}_request_latency", "Whole LLM request/reply latency (histogram)"
+                )
+                self.request_update = self._request_update_real
+        self.request_update(req_start)
 
-    def token_update(self, token_start: float, is_first: bool) -> float:
+    def _token_update_real(self, token_start: float, is_first: bool) -> float:
         now = time.time()
         if is_first:
             self.first_token_latency.observe(now - token_start)
@@ -48,7 +87,7 @@ def token_update(self, token_start: float, is_first: bool) -> float:
             self.inter_token_latency.observe(now - token_start)
         return now
 
-    def request_update(self, req_start: float) -> None:
+    def _request_update_real(self, req_start: float) -> None:
         self.request_latency.observe(time.time() - req_start)
 
     def pending_update(self, increase: bool) -> None:

From fa01f46f1ea922a35ea1d68a5aa0f42335b78f56 Mon Sep 17 00:00:00 2001
From: minmin-intel <minmin.hou@intel.com>
Date: Wed, 5 Feb 2025 13:57:57 -0800
Subject: [PATCH 06/17] fix tei embedding and tei reranking bug (#1256)

Signed-off-by: minmin-intel <minmin.hou@intel.com>
Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com>
---
 comps/embeddings/src/integrations/tei.py | 1 +
 comps/rerankings/src/integrations/tei.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/comps/embeddings/src/integrations/tei.py b/comps/embeddings/src/integrations/tei.py
index dd7dd602e1..8d589fb822 100644
--- a/comps/embeddings/src/integrations/tei.py
+++ b/comps/embeddings/src/integrations/tei.py
@@ -70,6 +70,7 @@ async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse:
             raise TypeError("Unsupported input type: input must be a string or list of strings.")
         response = await self.client.post(
             json={"input": texts, "encoding_format": input.encoding_format, "model": input.model, "user": input.user},
+            model=f"{self.base_url}/v1/embeddings",
             task="text-embedding",
         )
         embeddings = json.loads(response.decode())
diff --git a/comps/rerankings/src/integrations/tei.py b/comps/rerankings/src/integrations/tei.py
index 8060555fa8..f1ebd9b6aa 100644
--- a/comps/rerankings/src/integrations/tei.py
+++ b/comps/rerankings/src/integrations/tei.py
@@ -71,6 +71,7 @@ async def invoke(
 
             response = await self.client.post(
                 json={"query": query, "texts": docs},
+                model=f"{self.base_url}/rerank",
                 task="text-reranking",
             )
 

From 5baada85634582137dc3eeed070896d09dfd7086 Mon Sep 17 00:00:00 2001
From: ZePan110 <ze.pan@intel.com>
Date: Thu, 6 Feb 2025 15:03:00 +0800
Subject: [PATCH 07/17] Fix CD test issue. (#1263)

1.Fix template name in README
2.Fix invalid release name

Signed-off-by: ZePan110 <ze.pan@intel.com>
---
 .github/workflows/_run-helm-chart.yml            | 3 ++-
 comps/guardrails/deployment/kubernetes/README.md | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_run-helm-chart.yml b/.github/workflows/_run-helm-chart.yml
index 08fc71db66..0eca64ac4c 100644
--- a/.github/workflows/_run-helm-chart.yml
+++ b/.github/workflows/_run-helm-chart.yml
@@ -134,8 +134,9 @@ jobs:
           if [[ "${service,,}" == *"third_parties"* ]]; then
             CHART_NAME="$(echo "${service,,}"|cut -d'/' -f2)"  # bridgetower
           else
-            CHART_NAME="${service_name}"  # agent
+            CHART_NAME="${service_name}"  # web_retrievers
           fi
+          CHART_NAME=$(echo "$CHART_NAME" | tr -cd 'a-z0-9')
           echo "service_name=$service_name" >> $GITHUB_ENV
           echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV
           echo "RELEASE_NAME=${CHART_NAME}$(date +%d%H%M%S)" >> $GITHUB_ENV
diff --git a/comps/guardrails/deployment/kubernetes/README.md b/comps/guardrails/deployment/kubernetes/README.md
index b309900a07..2b3d3002c1 100644
--- a/comps/guardrails/deployment/kubernetes/README.md
+++ b/comps/guardrails/deployment/kubernetes/README.md
@@ -7,5 +7,5 @@
 
 ```
 export HFTOKEN="insert-your-huggingface-token-here"
-helm install guardrails oci://ghcr.io/opea-project/charts/guardrails --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
+helm install guardrails oci://ghcr.io/opea-project/charts/guardrails-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
 ```

From ecb7f7bb353a78f2c88b9636f238761e2cf90edc Mon Sep 17 00:00:00 2001
From: Spycsh <39623753+Spycsh@users.noreply.github.com>
Date: Fri, 7 Feb 2025 16:58:22 +0800
Subject: [PATCH 08/17] Fix web-retrievers hub client and tei endpoint issue
 (#1270)

* fix web-retrievers hub client and tei endpoint issue

Signed-off-by: Spycsh <sihan.chen@intel.com>
---
 comps/web_retrievers/deployment/docker_compose/compose.yaml | 1 +
 comps/web_retrievers/src/README.md                          | 1 +
 comps/web_retrievers/src/requirements.txt                   | 1 +
 3 files changed, 3 insertions(+)

diff --git a/comps/web_retrievers/deployment/docker_compose/compose.yaml b/comps/web_retrievers/deployment/docker_compose/compose.yaml
index d626313eb1..276ca6ed4f 100644
--- a/comps/web_retrievers/deployment/docker_compose/compose.yaml
+++ b/comps/web_retrievers/deployment/docker_compose/compose.yaml
@@ -15,6 +15,7 @@ services:
       GOOGLE_API_KEY: ${GOOGLE_API_KEY}
       GOOGLE_CSE_ID: ${GOOGLE_CSE_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
     depends_on:
       tei-embedding-service:
diff --git a/comps/web_retrievers/src/README.md b/comps/web_retrievers/src/README.md
index b276a28b3e..cc96a7e4a1 100644
--- a/comps/web_retrievers/src/README.md
+++ b/comps/web_retrievers/src/README.md
@@ -46,6 +46,7 @@ export GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
 export TEI_PORT=6060
 export no_proxy=$host_ip,$no_proxy
 export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
+export TEI_EMBEDDING_ENDPOINT=http://${host_ip}:6060
 
 docker compose -f ../deployment/docker_compose/compose.yaml up web-retriever-service tei-embedding-service -d
 ```
diff --git a/comps/web_retrievers/src/requirements.txt b/comps/web_retrievers/src/requirements.txt
index c3b0134961..d893a630de 100644
--- a/comps/web_retrievers/src/requirements.txt
+++ b/comps/web_retrievers/src/requirements.txt
@@ -4,6 +4,7 @@ docarray[full]
 fastapi
 google-api-python-client>=2.100.0
 html2text
+huggingface-hub==0.27.1
 langchain-huggingface
 langchain_community
 opentelemetry-api

From fb86b5e5a04ce0393912e74b516971c1333ed754 Mon Sep 17 00:00:00 2001
From: Louie Tsai <louie.tsai@intel.com>
Date: Sat, 8 Feb 2025 00:58:33 -0800
Subject: [PATCH 09/17] Add Deepseek model into validated model table and add
 required Gaudi cards for LLM microservice  (#1267)

* Update README.md for Deepseek support and numbers of required gaudi cards

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>

* Update README.md

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>

---------

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
---
 comps/llms/src/text-generation/README.md | 43 +++++++++++++++++-------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/comps/llms/src/text-generation/README.md b/comps/llms/src/text-generation/README.md
index 360c459dc1..ba1a31df39 100644
--- a/comps/llms/src/text-generation/README.md
+++ b/comps/llms/src/text-generation/README.md
@@ -8,14 +8,31 @@ Overall, this microservice offers a streamlined way to integrate large language
 
 ## Validated LLM Models
 
-| Model                       | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi |
-| --------------------------- | --------- | -------- | ---------- |
-| [Intel/neural-chat-7b-v3-3] | ✓         | ✓        | ✓          |
-| [Llama-2-7b-chat-hf]        | ✓         | ✓        | ✓          |
-| [Llama-2-70b-chat-hf]       | ✓         | -        | ✓          |
-| [Meta-Llama-3-8B-Instruct]  | ✓         | ✓        | ✓          |
-| [Meta-Llama-3-70B-Instruct] | ✓         | -        | ✓          |
-| [Phi-3]                     | x         | Limit 4K | Limit 4K   |
+| Model                                       | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi |
+| ------------------------------------------- | --------- | -------- | ---------- |
+| [Intel/neural-chat-7b-v3-3]                 | ✓         | ✓        | ✓          |
+| [meta-llama/Llama-2-7b-chat-hf]             | ✓         | ✓        | ✓          |
+| [meta-llama/Llama-2-70b-chat-hf]            | ✓         | -        | ✓          |
+| [meta-llama/Meta-Llama-3-8B-Instruct]       | ✓         | ✓        | ✓          |
+| [meta-llama/Meta-Llama-3-70B-Instruct]      | ✓         | -        | ✓          |
+| [Phi-3]                                     | x         | Limit 4K | Limit 4K   |
+| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | ✓         | -        | ✓          |
+| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]  | ✓         | -        | ✓          |
+
+### System Requirements for LLM Models
+
+| Model                                       | Minimum number of Gaudi cards |
+| ------------------------------------------- | ----------------------------- |
+| [Intel/neural-chat-7b-v3-3]                 | 1                             |
+| [meta-llama/Llama-2-7b-chat-hf]             | 1                             |
+| [meta-llama/Llama-2-70b-chat-hf]            | 2                             |
+| [meta-llama/Meta-Llama-3-8B-Instruct]       | 1                             |
+| [meta-llama/Meta-Llama-3-70B-Instruct]      | 2                             |
+| [Phi-3]                                     | x                             |
+| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | 8                             |
+| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]  | 4                             |
+
+> NOTE: Detailed system requirements coming soon.
 
 ## Support integrations
 
@@ -166,9 +183,11 @@ curl http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions \
 <!--Below are links used in these document. They are not rendered: -->
 
 [Intel/neural-chat-7b-v3-3]: https://huggingface.co/Intel/neural-chat-7b-v3-3
-[Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
-[Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-[Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
-[Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct
+[meta-llama/Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
+[meta-llama/Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
+[meta-llama/Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
+[meta-llama/Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct
 [Phi-3]: https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3
 [HuggingFace]: https://huggingface.co/
+[deepseek-ai/DeepSeek-R1-Distill-Llama-70B]: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+[deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B

From 0df374bc39a4e98bb5790da7985067c7be571487 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20De=20Le=C3=B3n?=
 <111013930+daniel-de-leon-user293@users.noreply.github.com>
Date: Sun, 9 Feb 2025 22:01:58 -0800
Subject: [PATCH 10/17] Update docs for LLamaGuard & WildGuard Microservice
 (#1259)

* working README for CLI and compose

Signed-off-by: Daniel Deleon <daniel.de.leon@intel.com>

* update for direct python execution

Signed-off-by: Daniel Deleon <daniel.de.leon@intel.com>

* fix formatting

Signed-off-by: Daniel Deleon <daniel.de.leon@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* bring back depends_on condition

Signed-off-by: Daniel Deleon <daniel.de.leon@intel.com>

---------

Signed-off-by: Daniel Deleon <daniel.de.leon@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com>
---
 comps/guardrails/src/guardrails/README.md | 185 ++++++++++------------
 comps/third_parties/tgi/README.md         |   4 +-
 2 files changed, 88 insertions(+), 101 deletions(-)

diff --git a/comps/guardrails/src/guardrails/README.md b/comps/guardrails/src/guardrails/README.md
index 7794d9490f..15679f706e 100644
--- a/comps/guardrails/src/guardrails/README.md
+++ b/comps/guardrails/src/guardrails/README.md
@@ -9,9 +9,9 @@ The Guardrails Microservice now offers two primary types of guardrails:
 - Input Guardrails: These are applied to user inputs. An input guardrail can either reject the input, halting further processing.
 - Output Guardrails: These are applied to outputs generated by the LLM. An output guardrail can reject the output, preventing it from being returned to the user.
 
-## LlamaGuard
+**This microservice supports Meta's [Llama Guard](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B) and Allen Institute for AI's [WildGuard](https://huggingface.co/allenai/wildguard) models.**
 
-We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B) model.
+## Llama Guard
 
 Any content that is detected in the following categories is determined as unsafe:
 
@@ -22,111 +22,84 @@ Any content that is detected in the following categories is determined as unsafe
 - Regulated or Controlled Substances
 - Suicide & Self Harm
 
-### 🚀1. Start Microservice with Python (Option 1)
-
-To start the Guardrails microservice, you need to install python packages first.
+## WildGuard
 
-#### 1.1 Install Requirements
+`allenai/wildguard` was fine-tuned from `mistralai/Mistral-7B-v0.3` on their own [`allenai/wildguardmix`](https://huggingface.co/datasets/allenai/wildguardmix) dataset. Any content that is detected in the following categories is determined as unsafe:
 
-```bash
-pip install -r requirements.txt
-```
+- Privacy
+- Misinformation
+- Harmful Language
+- Malicious Uses
 
-#### 1.2 Start TGI Gaudi Service
+## Clone OPEA GenAIComps and set initial environment variables
 
 ```bash
-export HF_TOKEN=${your_hf_api_token}
-volume=$PWD/data
-model_id="meta-llama/Meta-Llama-Guard-2-8B"
-docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048
+git clone https://github.com/opea-project/GenAIComps.git
+export OPEA_GENAICOMPS_ROOT=$(pwd)/GenAIComps
+export GUARDRAIL_PORT=9090
 ```
 
-#### 1.3 Verify the TGI Gaudi Service
+## Start up the HuggingFace Text Generation Inference (TGI) Server
 
-```bash
-curl 127.0.0.1:8088/generate \
-  -X POST \
-  -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
-  -H 'Content-Type: application/json'
-```
+Before starting the guardrail service, we first need to start the TGI server that will be hosting the guardrail model.
 
-#### 1.4 Start Guardrails Service
+Choose one of the following before starting your TGI server.
 
-Optional: If you have deployed a Guardrails model with TGI Gaudi Service other than default model (i.e., `meta-llama/Meta-Llama-Guard-2-8B`) [from section 1.2](#12-start-tgi-gaudi-service), you will need to add the eviornment variable `SAFETY_GUARD_MODEL_ID` containing the model id. For example, the following informs the Guardrails Service the deployed model used LlamaGuard2:
+**For LlamaGuard:**
 
 ```bash
 export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
+export GUARDRAILS_COMPONENT_NAME=OPEA_LLAMA_GUARD
 ```
 
+Or
+
 ```bash
-export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
-python guardrails_tgi.py
+export SAFETY_GUARD_MODEL_ID="meta-llama/LlamaGuard-7b"
+export GUARDRAILS_COMPONENT_NAME=OPEA_LLAMA_GUARD
 ```
 
-### 🚀2. Start Microservice with Docker (Option 2)
-
-If you start an Guardrails microservice with docker, the `docker_compose_guardrails.yaml` file will automatically start a TGI gaudi service with docker.
+_Other variations of LlamaGuard are also an option to use but are not guaranteed to work OOB._
 
-#### 2.1 Setup Environment Variables
-
-In order to start TGI and LLM services, you need to setup the following environment variables first.
+**For Wild Guard:**
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
-export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
-export LLM_MODEL_ID=${your_hf_llm_model}
+export SAFETY_GUARD_MODEL_ID="allenai/wildguard"
+export GUARDRAILS_COMPONENT_NAME=OPEA_WILD_GUARD
 ```
 
-#### 2.2 Build Docker Image
-
-```bash
-cd ../../../../
-docker build -t opea/guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile .
-```
+_Note that both of these models are gated and you need to complete their form on their associated model pages first in order to use them with your HF token._
 
-#### 2.3 Run Docker with CLI
+Follow the steps [here](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/tgi) to start the TGI server container where LLM_MODEL_ID is set to your SAFETY_GUARD_MODEL_ID like below:
 
 ```bash
-docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/guardrails:latest
+export LLM_MODEL_ID=$SAFETY_GUARD_MODEL_ID
 ```
 
-#### 2.4 Run Docker with Docker Compose
+Once the container is starting up and loading the model, set the endpoint that you will use to make requests to the TGI server:
 
 ```bash
-cd deployment/docker_compose/
-docker compose -f compose_llamaguard.yaml up -d
+export SAFETY_GUARD_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
 ```
 
-### 🚀3. Consume Guardrails Service
+**Verify that the TGI Server is ready for inference**
 
-#### 3.1 Check Service Status
+First check that the TGI server successfully loaded the guardrail model. Loading the model could take up to 5-10 minutes. You can do this by running the following:
 
 ```bash
-curl http://localhost:9090/v1/health_check\
-  -X GET \
-  -H 'Content-Type: application/json'
+docker logs tgi-gaudi-server
 ```
 
-#### 3.2 Consume Guardrails Service
+If the last line of the log contains something like `INFO text_generation_router::server: router/src/server.rs:2209: Connected` then your TGI server is ready and the following curl should work:
 
 ```bash
-curl http://localhost:9090/v1/guardrails\
+curl localhost:${LLM_ENDPOINT_PORT}/generate \
   -X POST \
-  -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
+  -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
   -H 'Content-Type: application/json'
 ```
 
-## WildGuard
-
-We also offer content moderation support utilizing Allen Institute for AI's [WildGuard](https://huggingface.co/allenai/wildguard) model.
-
-`allenai/wildguard` was fine-tuned from `mistralai/Mistral-7B-v0.3` on their own [`allenai/wildguardmix`](https://huggingface.co/datasets/allenai/wildguardmix) dataset. Any content that is detected in the following categories is determined as unsafe:
-
-- Privacy
-- Misinformation
-- Harmful Language
-- Malicious Uses
+Check the logs again with the `logs` command to confirm that the curl request resulted in `Success`.
 
 ### 🚀1. Start Microservice with Python (Option 1)
 
@@ -135,67 +108,76 @@ To start the Guardrails microservice, you need to install python packages first.
 #### 1.1 Install Requirements
 
 ```bash
+pip install $OPEA_GENAICOMPS_ROOT
+cd $OPEA_GENAICOMPS_ROOT/comps/guardrails/src/guardrails
 pip install -r requirements.txt
 ```
 
-#### 1.2 Start TGI Gaudi Service
+#### 1.2 Start Guardrails Service
 
 ```bash
-export HF_TOKEN=${your_hf_api_token}
-volume=$PWD/data
-model_id="allenai/wildguard"
-docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048
+python opea_guardrails_microservice.py
 ```
 
-#### 1.3 Verify the TGI Gaudi Service
+### 🚀2. Start Microservice with Docker (Option 2)
 
-```bash
-curl 127.0.0.1:8088/generate \
-  -X POST \
-  -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
-  -H 'Content-Type: application/json'
-```
+With the TGI server already running, now we can start the guardrail service container.
 
-#### 1.4 Start Guardrails Service
+#### 2.1 Build Docker Image
 
 ```bash
-export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
-python guardrails_tgi.py
+cd $OPEA_GENAICOMPS_ROOT
+docker build -t opea/guardrails:latest \
+  --build-arg https_proxy=$https_proxy \
+  --build-arg http_proxy=$http_proxy \
+  -f comps/guardrails/src/guardrails/Dockerfile .
 ```
 
-### 🚀2. Start Microservice with Docker (Option 2)
-
-If you start an Guardrails microservice with docker, the `compose_wildguard.yaml` file will automatically start a TGI gaudi service with docker.
-
-#### 2.1 Setup Environment Variables
+#### 2.2.a Run with Docker Compose (Option A)
 
-In order to start TGI and LLM services, you need to setup the following environment variables first.
+**To run with LLama Guard:**
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
-export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
-export LLM_MODEL_ID=${your_hf_llm_model}
+docker compose -f $OPEA_GENAICOMPS_ROOT/comps/guardrails/deployment/docker_compose/compose.yaml up -d llamaguard-guardrails-server
 ```
 
-#### 2.2 Build Docker Image
+**To run with WildGuard:**
 
 ```bash
-cd ../../../../
-docker build -t opea/guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile .
+docker compose -f $OPEA_GENAICOMPS_ROOT/comps/guardrails/deployment/docker_compose/compose.yaml up -d wildguard-guardrails-server
 ```
 
-#### 2.3 Run Docker with CLI
+#### 2.2.b Run Docker with CLI (Option B)
+
+**To run with LLama Guard:**
 
 ```bash
-docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e GUARDRAILS_COMPONENT_NAME="OPEA_WILD_GUARD" opea/guardrails:latest
+docker run -d \
+  --name="llamaguard-guardrails-server" \
+  -p ${GUARDRAIL_PORT}:${GUARDRAIL_PORT} \
+  --ipc=host \
+  -e http_proxy=$http_proxy \
+  -e https_proxy=$https_proxy \
+  -e no_proxy=$no_proxy \
+  -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT \
+  -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN \
+  opea/guardrails:latest
 ```
 
-#### 2.4 Run Docker with Docker Compose
+**To run with WildGuard:**
 
 ```bash
-cd deployment/docker_compose/
-docker compose -f compose_wildguard.yaml up -d
+docker run -d \
+  --name="wildguard-guardrails-server" \
+  -p ${GUARDRAIL_PORT}:${GUARDRAIL_PORT} \
+  --ipc=host \
+  -e http_proxy=$http_proxy \
+  -e https_proxy=$https_proxy \
+  -e no_proxy=$no_proxy \
+  -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT \
+  -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN \
+  -e GUARDRAILS_COMPONENT_NAME="OPEA_WILD_GUARD" \
+  opea/guardrails:latest
 ```
 
 ### 🚀3. Consume Guardrails Service
@@ -203,7 +185,7 @@ docker compose -f compose_wildguard.yaml up -d
 #### 3.1 Check Service Status
 
 ```bash
-curl http://localhost:9090/v1/health_check \
+curl http://localhost:${GUARDRAIL_PORT}/v1/health_check\
   -X GET \
   -H 'Content-Type: application/json'
 ```
@@ -211,8 +193,13 @@ curl http://localhost:9090/v1/health_check \
 #### 3.2 Consume Guardrails Service
 
 ```bash
-curl http://localhost:9090/v1/guardrails \
+curl http://localhost:${GUARDRAIL_PORT}/v1/guardrails\
   -X POST \
   -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
   -H 'Content-Type: application/json'
 ```
+
+This request should return text containing:
+`"Violated policies: <category>, please check your input."`
+
+Where `category` is `Violent Crimes` or `harmful` for `Llama-Guard-2-8B` or `wildguard`, respectively.
diff --git a/comps/third_parties/tgi/README.md b/comps/third_parties/tgi/README.md
index e12f6d34da..07fb28e8eb 100644
--- a/comps/third_parties/tgi/README.md
+++ b/comps/third_parties/tgi/README.md
@@ -19,12 +19,12 @@ Run tgi on xeon.
 
 ```bash
 cd deplopyment/docker_compose
-docker compose -f compose.yaml tgi-server up -d
+docker compose -f compose.yaml up -d tgi-server
 ```
 
 Run tgi on gaudi.
 
 ```bash
 cd deplopyment/docker_compose
-docker compose -f compose.yaml tgi-gaudi-server up -d
+docker compose -f compose.yaml up -d tgi-gaudi-server
 ```

From b777db72c03afade52662d7bfb1d85bfa02ad51f Mon Sep 17 00:00:00 2001
From: Letong Han <106566639+letonghan@users.noreply.github.com>
Date: Mon, 10 Feb 2025 16:00:55 +0800
Subject: [PATCH 11/17] Fix Dataprep Ingest Data Issue. (#1271)

* Fix Dataprep Ingest Data Issue.

Trace:
1. The update of `langchain_huggingface.HuggingFaceEndpointEmbeddings` caused the wrong size of embedding vectors.
2. Wrong size vectors are wrongly saved into Redis database, and the indices are not created correctly.
3. The retriever can not retrieve data from Redis using index due to the
   reasons above.
4. Then the RAG seems `not work`, for the file uploaded can not be
   retrieved from database.

Solution:
Replace all of the `langchain_huggingface.HuggingFaceEndpointEmbeddings`
to `langchain_community.embeddings.HuggingFaceInferenceAPIEmbeddings`,
and modify related READMEs and scirpts.

Related issue:
- https://github.com/opea-project/GenAIExamples/issues/1473
- https://github.com/opea-project/GenAIExamples/issues/1482

---------

Signed-off-by: letonghan <letong.han@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../deployment/docker_compose/compose.yaml    |  5 +--
 comps/dataprep/src/README_elasticsearch.md    |  2 +-
 comps/dataprep/src/README_milvus.md           |  5 ++-
 comps/dataprep/src/README_opensearch.md       |  6 ++--
 comps/dataprep/src/README_pgvector.md         |  4 ++-
 comps/dataprep/src/README_redis.md            |  5 ++-
 comps/dataprep/src/README_vdms.md             |  6 ++--
 .../src/integrations/elasticsearch.py         | 31 ++++++++++++----
 comps/dataprep/src/integrations/milvus.py     | 36 ++++++++++++++-----
 .../src/integrations/neo4j_langchain.py       |  3 +-
 comps/dataprep/src/integrations/opensearch.py | 26 +++++++++++---
 comps/dataprep/src/integrations/pgvect.py     | 26 +++++++++++---
 comps/dataprep/src/integrations/pipecone.py   | 26 +++++++++++---
 comps/dataprep/src/integrations/qdrant.py     | 25 ++++++++++---
 comps/dataprep/src/integrations/redis.py      | 22 ++++++++++--
 comps/dataprep/src/integrations/vdms.py       | 27 ++++++++++----
 tests/dataprep/test_dataprep_milvus.sh        |  1 +
 .../test_dataprep_neo4j_on_intel_hpu.sh       |  1 +
 tests/dataprep/test_dataprep_qdrant.sh        |  1 +
 19 files changed, 199 insertions(+), 59 deletions(-)

diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml
index ef54a69e0c..8fff22cd82 100644
--- a/comps/dataprep/deployment/docker_compose/compose.yaml
+++ b/comps/dataprep/deployment/docker_compose/compose.yaml
@@ -28,7 +28,7 @@ services:
       DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_ELASTICSEARCH"
       ES_CONNECTION_STRING: ${ES_CONNECTION_STRING}
       INDEX_NAME: ${INDEX_NAME}
-      TEI_ENDPOINT: ${TEI_ENDPOINT}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
     depends_on:
@@ -49,6 +49,7 @@ services:
       MILVUS_HOST: ${MILVUS_HOST}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
     depends_on:
       tei-embedding-serving:
@@ -161,7 +162,7 @@ services:
       QDRANT_HOST: ${QDRANT_HOST}
       QDRANT_PORT: ${QDRANT_PORT}
       COLLECTION_NAME: ${COLLECTION_NAME}
-      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
 
diff --git a/comps/dataprep/src/README_elasticsearch.md b/comps/dataprep/src/README_elasticsearch.md
index ab4b8547b5..94d8d47ba1 100644
--- a/comps/dataprep/src/README_elasticsearch.md
+++ b/comps/dataprep/src/README_elasticsearch.md
@@ -50,7 +50,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run  --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest
+docker run  --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B)
diff --git a/comps/dataprep/src/README_milvus.md b/comps/dataprep/src/README_milvus.md
index bcf545f72b..3e79ff6976 100644
--- a/comps/dataprep/src/README_milvus.md
+++ b/comps/dataprep/src/README_milvus.md
@@ -25,6 +25,7 @@ export MILVUS_HOST=${your_milvus_host_ip}
 export MILVUS_PORT=19530
 export COLLECTION_NAME=${your_collection_name}
 export TEI_EMBEDDING_ENDPOINT=${your_embedding_endpoint}
+export HUGGINGFACEHUB_API_TOKEN=${your_huggingface_api_token}
 ```
 
 ### 1.4 Start TEI Embedding Service
@@ -70,13 +71,15 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 
 ```bash
 export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export EMBEDDING_MODEL_ID=${your_embedding_model_id}
 export MILVUS_HOST=${your_host_ip}
 ```
 
 ### 2.3 Run Docker with CLI (Option A)
 
 ```bash
-docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest
+docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest
 ```
 
 ### 2.4 Run with Docker Compose (Option B)
diff --git a/comps/dataprep/src/README_opensearch.md b/comps/dataprep/src/README_opensearch.md
index b5d14c9a9d..b5f1c2c26c 100644
--- a/comps/dataprep/src/README_opensearch.md
+++ b/comps/dataprep/src/README_opensearch.md
@@ -51,7 +51,7 @@ curl localhost:$your_port/embed \
 After checking that it works, set up environment variables.
 
 ```bash
-export TEI_ENDPOINT="http://localhost:$your_port"
+export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port"
 ```
 
 ### 1.4 Start Document Preparation Microservice for OpenSearch with Python Script
@@ -75,7 +75,7 @@ Please refer to this [readme](../../third_parties/opensearch/src/README.md).
 
 ```bash
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export TEI_ENDPOINT="http://${your_ip}:6006"
+export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
 export OPENSEARCH_URL="http://${your_ip}:9200"
 export INDEX_NAME=${your_index_name}
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
@@ -97,7 +97,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 - option 1: Start single-process version (for processing up to 10 files)
 
 ```bash
-docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest
+docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)
diff --git a/comps/dataprep/src/README_pgvector.md b/comps/dataprep/src/README_pgvector.md
index 92d0db577a..1fdb678e7f 100644
--- a/comps/dataprep/src/README_pgvector.md
+++ b/comps/dataprep/src/README_pgvector.md
@@ -38,6 +38,8 @@ Please refer to this [readme](../../third_parties/pgvector/src/README.md).
 ```bash
 export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb
 export INDEX_NAME=${your_index_name}
+export TEI_EMBEDDING_ENDPOINT=${your_tei_embedding_endpoint}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 ```
 
 ### 2.3 Build Docker Image
@@ -50,7 +52,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run  --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest
+docker run  --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B)
diff --git a/comps/dataprep/src/README_redis.md b/comps/dataprep/src/README_redis.md
index d6ff2a6b76..69f3ae39af 100644
--- a/comps/dataprep/src/README_redis.md
+++ b/comps/dataprep/src/README_redis.md
@@ -95,8 +95,7 @@ Please refer to this [readme](../../third_parties/redis/src/README.md).
 ### 2.2 Setup Environment Variables
 
 ```bash
-export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export TEI_ENDPOINT="http://${your_ip}:6006"
+export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
 export REDIS_URL="redis://${your_ip}:6379"
 export INDEX_NAME=${your_index_name}
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
@@ -112,7 +111,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest
+docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)
diff --git a/comps/dataprep/src/README_vdms.md b/comps/dataprep/src/README_vdms.md
index 7571ca80d2..7318282c4e 100644
--- a/comps/dataprep/src/README_vdms.md
+++ b/comps/dataprep/src/README_vdms.md
@@ -69,7 +69,8 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export VDMS_HOST=${host_ip}
 export VDMS_PORT=55555
-export TEI_ENDPOINT=${your_tei_endpoint}
+export TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export COLLECTION_NAME=${your_collection_name}
 export SEARCH_ENGINE="FaissFlat"
 export DISTANCE_STRATEGY="L2"
@@ -89,7 +90,8 @@ Start single-process version (for 1-10 files processing)
 
 ```bash
 docker run -d --name="dataprep-vdms-server" -p 6007:6007 --runtime=runc --ipc=host \
--e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_ENDPOINT=$TEI_ENDPOINT \
+-e http_proxy=$http_proxy -e https_proxy=$https_proxy \
+-e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
 -e COLLECTION_NAME=$COLLECTION_NAME -e VDMS_HOST=$VDMS_HOST -e VDMS_PORT=$VDMS_PORT \
 -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_VDMS" opea/dataprep:latest
 ```
diff --git a/comps/dataprep/src/integrations/elasticsearch.py b/comps/dataprep/src/integrations/elasticsearch.py
index ed07d157ea..83e422741e 100644
--- a/comps/dataprep/src/integrations/elasticsearch.py
+++ b/comps/dataprep/src/integrations/elasticsearch.py
@@ -9,10 +9,9 @@
 from elasticsearch import Elasticsearch
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_core.documents import Document
 from langchain_elasticsearch import ElasticsearchStore
-from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
 
 from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType
 from comps.dataprep.src.utils import (
@@ -37,7 +36,9 @@
 EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
 
 # TEI Embedding endpoints
-TEI_ENDPOINT = os.getenv("TEI_ENDPOINT", "")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 # Vector Index Configuration
 INDEX_NAME = os.getenv("INDEX_NAME", "rag-elastic")
@@ -77,15 +78,31 @@ def create_index(self) -> None:
         if not self.es_client.indices.exists(index=INDEX_NAME):
             self.es_client.indices.create(index=INDEX_NAME)
 
-    def get_embedder(self) -> Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]:
+    def get_embedder(self) -> Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings]:
         """Obtain required Embedder."""
-        if TEI_ENDPOINT:
-            return HuggingFaceEndpointEmbeddings(model=TEI_ENDPOINT)
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` and the `EMBED_MODEL` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
+            return embedder
         else:
             return HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
 
     def get_elastic_store(
-        self, embedder: Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]
+        self, embedder: Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings]
     ) -> ElasticsearchStore:
         """Get Elasticsearch vector store."""
         return ElasticsearchStore(index_name=INDEX_NAME, embedding=embedder, es_connection=self.es_client)
diff --git a/comps/dataprep/src/integrations/milvus.py b/comps/dataprep/src/integrations/milvus.py
index c3e3e57309..c4aecf86e4 100644
--- a/comps/dataprep/src/integrations/milvus.py
+++ b/comps/dataprep/src/integrations/milvus.py
@@ -10,7 +10,7 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings, OpenAIEmbeddings
 from langchain_core.documents import Document
 from langchain_milvus.vectorstores import Milvus
 from langchain_text_splitters import HTMLHeaderTextSplitter
@@ -36,8 +36,11 @@
 # Local Embedding model
 LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1")
 # TEI configuration
-TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5")
+EMBED_MODEL = os.environ.get("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
 TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
+
 # MILVUS configuration
 MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost")
 MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530))
@@ -75,7 +78,7 @@ def ingest_chunks_to_milvus(embeddings, file_name: str, chunks: List):
         except Exception as e:
             if logflag:
                 logger.info(f"[ ingest chunks ] fail to ingest chunks into Milvus. error: {e}")
-            raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.")
+            raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}: {e}")
 
     if logflag:
         logger.info(f"[ ingest chunks ] Docs ingested file {file_name} to Milvus collection {COLLECTION_NAME}.")
@@ -189,7 +192,23 @@ def _initialize_embedder(self):
             # create embeddings using TEI endpoint service
             if logflag:
                 logger.info(f"[ milvus embedding ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}")
-            embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            # create embeddings using TEI endpoint service
+            embeddings = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             if logflag:
@@ -274,7 +293,7 @@ async def ingest_files(
                         search_res = search_by_file(my_milvus.col, encode_file)
                     except Exception as e:
                         raise HTTPException(
-                            status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}."
+                            status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}: {e}"
                         )
                     if len(search_res) > 0:
                         if logflag:
@@ -319,7 +338,7 @@ async def ingest_files(
                         search_res = search_by_file(my_milvus.col, encoded_link + ".txt")
                     except Exception as e:
                         raise HTTPException(
-                            status_code=500, detail=f"Failed when searching in Milvus db for link {link}."
+                            status_code=500, detail=f"Failed when searching in Milvus db for link {link}: {e}"
                         )
                     if len(search_res) > 0:
                         if logflag:
@@ -375,7 +394,7 @@ async def get_files(self):
         try:
             all_data = search_all(my_milvus.col)
         except Exception as e:
-            raise HTTPException(status_code=500, detail="Failed when searching in Milvus db for all files.")
+            raise HTTPException(status_code=500, detail=f"Failed when searching in Milvus db for all files: {e}")
 
         # return [] if no data in db
         if len(all_data) == 0:
@@ -422,8 +441,7 @@ async def delete_files(self, file_path: str = Body(..., embed=True)):
             except Exception as e:
                 if logflag:
                     logger.info(f"[ milvus delete ] {e}. Fail to delete {upload_folder}.")
-                raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.")
-
+                raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}: {e}")
             if logflag:
                 logger.info("[ milvus delete ] successfully delete all files.")
 
diff --git a/comps/dataprep/src/integrations/neo4j_langchain.py b/comps/dataprep/src/integrations/neo4j_langchain.py
index ba03437972..75b23252d8 100644
--- a/comps/dataprep/src/integrations/neo4j_langchain.py
+++ b/comps/dataprep/src/integrations/neo4j_langchain.py
@@ -34,10 +34,9 @@
 NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j")
 NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test")
 
-# LLM/Embedding endpoints
+# LLM endpoints
 TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
-TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT")
 OPENAI_KEY = os.getenv("OPENAI_API_KEY")
 
 
diff --git a/comps/dataprep/src/integrations/opensearch.py b/comps/dataprep/src/integrations/opensearch.py
index 6f4b10bbd1..8df0104004 100644
--- a/comps/dataprep/src/integrations/opensearch.py
+++ b/comps/dataprep/src/integrations/opensearch.py
@@ -7,9 +7,8 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import OpenSearchVectorSearch
-from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
 from opensearchpy import OpenSearch
 
@@ -79,9 +78,26 @@ def __init__(self, name: str, description: str, config: dict = None):
         self.upload_folder = "./uploaded_files/"
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
         # Initialize embeddings
-        tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
-        if tei_embedding_endpoint:
-            self.embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint)
+        TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+        HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
+        EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            self.embeddings = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             self.embeddings = HuggingFaceBgeEmbeddings(model_name=Config.EMBED_MODEL)
 
diff --git a/comps/dataprep/src/integrations/pgvect.py b/comps/dataprep/src/integrations/pgvect.py
index 43b38e5d6d..d9eb698782 100644
--- a/comps/dataprep/src/integrations/pgvect.py
+++ b/comps/dataprep/src/integrations/pgvect.py
@@ -10,7 +10,7 @@
 import psycopg2
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import PGVector
 
 from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType
@@ -30,6 +30,10 @@
 
 # Embedding model
 EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
+# TEI Embedding endpoints
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 PG_CONNECTION_STRING = os.getenv("PG_CONNECTION_STRING", "localhost")
 
@@ -47,12 +51,26 @@ class OpeaPgvectorDataprep(OpeaComponent):
 
     def __init__(self, name: str, description: str, config: dict = None):
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
-        self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
         self.upload_folder = "./uploaded_files/"
         # Create vectorstore
-        if self.tei_embedding_endpoint:
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
             # create embeddings using TEI endpoint service
-            self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint)
+            self.embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/dataprep/src/integrations/pipecone.py b/comps/dataprep/src/integrations/pipecone.py
index 33ffeea4b6..ec03174608 100644
--- a/comps/dataprep/src/integrations/pipecone.py
+++ b/comps/dataprep/src/integrations/pipecone.py
@@ -8,7 +8,7 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_pinecone import PineconeVectorStore
 from langchain_text_splitters import HTMLHeaderTextSplitter
 from pinecone import Pinecone, ServerlessSpec
@@ -39,7 +39,9 @@
 # LLM/Embedding endpoints
 TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
-TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 
 @OpeaComponentRegistry.register("OPEA_DATAPREP_PINECONE")
@@ -48,12 +50,26 @@ class OpeaPineConeDataprep(OpeaComponent):
 
     def __init__(self, name: str, description: str, config: dict = None):
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
-        self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT")
         self.upload_folder = "./uploaded_files/"
         # Create vectorstore
-        if self.tei_embedding_endpoint:
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
             # create embeddings using TEI endpoint service
-            self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint)
+            self.embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/dataprep/src/integrations/qdrant.py b/comps/dataprep/src/integrations/qdrant.py
index e54c6c572b..62a9efa21a 100644
--- a/comps/dataprep/src/integrations/qdrant.py
+++ b/comps/dataprep/src/integrations/qdrant.py
@@ -7,9 +7,8 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import Qdrant
-from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
 from qdrant_client import QdrantClient
 
@@ -38,7 +37,9 @@
 # LLM/Embedding endpoints
 TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
-TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 
 @OpeaComponentRegistry.register("OPEA_DATAPREP_QDRANT")
@@ -47,12 +48,26 @@ class OpeaQdrantDataprep(OpeaComponent):
 
     def __init__(self, name: str, description: str, config: dict = None):
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
-        self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT")
         self.upload_folder = "./uploaded_files/"
         # Create vectorstore
         if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
             # create embeddings using TEI endpoint service
-            self.embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
+            self.embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/dataprep/src/integrations/redis.py b/comps/dataprep/src/integrations/redis.py
index 06cb0d7f27..a181013bcd 100644
--- a/comps/dataprep/src/integrations/redis.py
+++ b/comps/dataprep/src/integrations/redis.py
@@ -11,9 +11,8 @@
 import redis
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import Redis
-from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
 from redis.commands.search.field import TextField
 from redis.commands.search.indexDefinition import IndexDefinition, IndexType
@@ -40,6 +39,8 @@
 EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
 # TEI Embedding endpoints
 TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 # Vector Index Configuration
 INDEX_NAME = os.getenv("INDEX_NAME", "rag_redis")
@@ -187,8 +188,23 @@ def ingest_chunks_to_redis(file_name: str, chunks: List):
         logger.info(f"[ redis ingest chunks ] file name: {file_name}")
     # Create vectorstore
     if TEI_EMBEDDING_ENDPOINT:
+        if not HUGGINGFACEHUB_API_TOKEN:
+            raise HTTPException(
+                status_code=400,
+                detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+            )
+        import requests
+
+        response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+        if response.status_code != 200:
+            raise HTTPException(
+                status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+            )
+        model_id = response.json()["model_id"]
         # create embeddings using TEI endpoint service
-        embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
+        embedder = HuggingFaceInferenceAPIEmbeddings(
+            api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+        )
     else:
         # create embeddings using local embedding model
         embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/dataprep/src/integrations/vdms.py b/comps/dataprep/src/integrations/vdms.py
index 998b23a5c7..e4085b1812 100644
--- a/comps/dataprep/src/integrations/vdms.py
+++ b/comps/dataprep/src/integrations/vdms.py
@@ -7,7 +7,7 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores.vdms import VDMS, VDMS_Client
 from langchain_text_splitters import HTMLHeaderTextSplitter
 
@@ -28,7 +28,6 @@
 
 def getEnv(key, default_value=None):
     env_value = os.getenv(key, default=default_value)
-    print(f"{key}: {env_value}")
     return env_value
 
 
@@ -45,7 +44,9 @@ def getEnv(key, default_value=None):
 # LLM/Embedding endpoints
 TGI_LLM_ENDPOINT = getEnv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = getEnv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
-TEI_EMBEDDING_ENDPOINT = getEnv("TEI_ENDPOINT")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 # chunk parameters
 CHUNK_SIZE = getEnv("CHUNK_SIZE", 1500)
@@ -58,14 +59,28 @@ class OpeaVdmsDataprep(OpeaComponent):
 
     def __init__(self, name: str, description: str, config: dict = None):
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
-        self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
         self.upload_folder = "./uploaded_files/"
         create_upload_folder(self.upload_folder)
         self.client = VDMS_Client(VDMS_HOST, int(VDMS_PORT))
         # Create vectorstore
-        if self.tei_embedding_endpoint:
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
             # create embeddings using TEI endpoint service
-            self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint)
+            self.embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/tests/dataprep/test_dataprep_milvus.sh b/tests/dataprep/test_dataprep_milvus.sh
index 603fb671cd..498f14f6f1 100644
--- a/tests/dataprep/test_dataprep_milvus.sh
+++ b/tests/dataprep/test_dataprep_milvus.sh
@@ -36,6 +36,7 @@ function start_service() {
     export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
     export MILVUS_HOST=${ip_address}
     export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
+    export LOGFLAG=true
     service_name="dataprep-milvus tei-embedding-serving"
     cd $WORKPATH/comps/dataprep/deployment/docker_compose/
     docker compose up ${service_name} -d
diff --git a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh
index 2b923bb66d..fbafda69e3 100755
--- a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh
+++ b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh
@@ -38,6 +38,7 @@ function start_service() {
     export TEI_EMBEDDER_PORT=12006
     export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
     export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export EMBED_MODEL=${EMBEDDING_MODEL_ID}
     export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}"
     export LLM_ENDPOINT_PORT=10510
     export TGI_LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}"
diff --git a/tests/dataprep/test_dataprep_qdrant.sh b/tests/dataprep/test_dataprep_qdrant.sh
index 818f99da24..9c31e2d7ab 100644
--- a/tests/dataprep/test_dataprep_qdrant.sh
+++ b/tests/dataprep/test_dataprep_qdrant.sh
@@ -26,6 +26,7 @@ function build_docker_images() {
 function start_service() {
     export host_ip=${ip_address}
     export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export EMBED_MODEL=${EMBEDDING_MODEL_ID}
     export TEI_EMBEDDER_PORT="10224"
     export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}"
     export COLLECTION_NAME="rag-qdrant"

From 17b96729433d515d33043b0d09fb812b75f6ac6b Mon Sep 17 00:00:00 2001
From: XinyaoWa <xinyao.wang@intel.com>
Date: Wed, 12 Feb 2025 13:53:31 +0800
Subject: [PATCH 12/17] Fix langchain and huggingface version to avoid bug in
 FaqGen and DocSum, remove vllm hpu triton version fix (#1275)

* Fix langchain and huggingface version to avoid bug

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
---
 .github/workflows/_comps-workflow.yml                       | 1 -
 .github/workflows/push-image-build.yml                      | 1 -
 comps/llms/src/doc-summarization/requirements.txt           | 6 +++---
 comps/llms/src/faq-generation/requirements.txt              | 6 +++---
 comps/third_parties/vllm/src/build_docker_vllm.sh           | 1 -
 tests/agent/test_agent_langchain_on_intel_hpu.sh            | 1 -
 .../test_guardrails_hallucination_detection_on_intel_hpu.sh | 1 -
 tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh | 1 -
 tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh    | 1 -
 .../test_llms_text-generation_service_vllm_on_intel_hpu.sh  | 1 -
 10 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml
index 964d2d7284..f7e8019dbe 100644
--- a/.github/workflows/_comps-workflow.yml
+++ b/.github/workflows/_comps-workflow.yml
@@ -71,7 +71,6 @@ jobs:
           fi
           if [[ $(grep -c "vllm-gaudi:" ${docker_compose_yml}) != 0 ]]; then
               git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
-              sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
           fi
       - name: Get build list
         id: get-build-list
diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml
index fda1528065..67389a3cd4 100644
--- a/.github/workflows/push-image-build.yml
+++ b/.github/workflows/push-image-build.yml
@@ -96,7 +96,6 @@ jobs:
           fi
           if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
               git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
-              sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
           fi
 
       - name: Build Image
diff --git a/comps/llms/src/doc-summarization/requirements.txt b/comps/llms/src/doc-summarization/requirements.txt
index 1694618637..6bc1bb1e55 100644
--- a/comps/llms/src/doc-summarization/requirements.txt
+++ b/comps/llms/src/doc-summarization/requirements.txt
@@ -1,11 +1,11 @@
 docarray[full]
 fastapi
 httpx==0.27.2
-huggingface_hub
-langchain #==0.1.12
+huggingface_hub==0.27.1
+langchain==0.3.14
 langchain-huggingface
 langchain-openai
-langchain_community
+langchain_community==0.3.14
 langchainhub
 opentelemetry-api
 opentelemetry-exporter-otlp
diff --git a/comps/llms/src/faq-generation/requirements.txt b/comps/llms/src/faq-generation/requirements.txt
index 36257d3939..037079294b 100644
--- a/comps/llms/src/faq-generation/requirements.txt
+++ b/comps/llms/src/faq-generation/requirements.txt
@@ -1,10 +1,10 @@
 docarray[full]
 fastapi
-huggingface_hub
-langchain
+huggingface_hub==0.27.1
+langchain==0.3.14
 langchain-huggingface
 langchain-openai
-langchain_community
+langchain_community==0.3.14
 langchainhub
 opentelemetry-api
 opentelemetry-exporter-otlp
diff --git a/comps/third_parties/vllm/src/build_docker_vllm.sh b/comps/third_parties/vllm/src/build_docker_vllm.sh
index bd8df2e708..bec3a0c8f1 100644
--- a/comps/third_parties/vllm/src/build_docker_vllm.sh
+++ b/comps/third_parties/vllm/src/build_docker_vllm.sh
@@ -38,7 +38,6 @@ if [ "$hw_mode" = "hpu" ]; then
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd ./vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
     cd ..
     rm -rf vllm-fork
diff --git a/tests/agent/test_agent_langchain_on_intel_hpu.sh b/tests/agent/test_agent_langchain_on_intel_hpu.sh
index 090d1ed332..2c12354723 100644
--- a/tests/agent/test_agent_langchain_on_intel_hpu.sh
+++ b/tests/agent/test_agent_langchain_on_intel_hpu.sh
@@ -57,7 +57,6 @@ function build_vllm_docker_images() {
     fi
     cd ./vllm-fork
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi:comps failed"
diff --git a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh
index 92b29827fe..d040f954a1 100644
--- a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh
+++ b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh
@@ -13,7 +13,6 @@ function build_docker_images() {
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi built fail"
diff --git a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh
index d9552e9a0d..a6096bd309 100644
--- a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh
+++ b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh
@@ -20,7 +20,6 @@ function build_docker_images() {
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi built fail"
diff --git a/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh
index 5d489b250d..8607f2c550 100644
--- a/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh
+++ b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh
@@ -20,7 +20,6 @@ function build_docker_images() {
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi built fail"
diff --git a/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh
index 7c32a8977b..ea8c9ee6ca 100644
--- a/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh
+++ b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh
@@ -20,7 +20,6 @@ function build_docker_images() {
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi built fail"

From d3906cedcf5573dc5d322ddbcb80d18b941befc6 Mon Sep 17 00:00:00 2001
From: "chen, suyue" <suyue.chen@intel.com>
Date: Wed, 12 Feb 2025 14:56:55 +0800
Subject: [PATCH 13/17] update default service list (#1276)

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 .github/workflows/manual-comps-test.yml     | 2 +-
 .github/workflows/manual-docker-publish.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/manual-comps-test.yml b/.github/workflows/manual-comps-test.yml
index c3f73fb25f..7fa8c7255c 100644
--- a/.github/workflows/manual-comps-test.yml
+++ b/.github/workflows/manual-comps-test.yml
@@ -7,7 +7,7 @@ on:
     inputs:
       services:
         default: "asr"
-        description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]"
+        description: "List of services to test [agent,asr,chathistory,animation,dataprep,embeddings,feedback_management,finetuning,guardrails,image2image,image2video,intent_detection,llms,lvms,prompt_registry,ragas,rerankings,retrievers,text2image,text2sql,third_parties,tts,vectorstores,web_retrievers]"
         required: true
         type: string
       build:
diff --git a/.github/workflows/manual-docker-publish.yml b/.github/workflows/manual-docker-publish.yml
index b7e770dedb..aae3d3ca84 100644
--- a/.github/workflows/manual-docker-publish.yml
+++ b/.github/workflows/manual-docker-publish.yml
@@ -7,7 +7,7 @@ on:
     inputs:
       services:
         default: ""
-        description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]"
+        description: "List of services to test [agent,asr,chathistory,animation,dataprep,embeddings,feedback_management,finetuning,guardrails,image2image,image2video,intent_detection,llms,lvms,prompt_registry,ragas,rerankings,retrievers,text2image,text2sql,third_parties,tts,vectorstores,web_retrievers]"
         required: false
         type: string
       images:

From f8e62164fc498a1b8763892390ed3662b9c69eac Mon Sep 17 00:00:00 2001
From: Spycsh <39623753+Spycsh@users.noreply.github.com>
Date: Wed, 12 Feb 2025 15:45:14 +0800
Subject: [PATCH 14/17] fix metric id issue when init multiple Orchestrator
 instance (#1280)

Signed-off-by: Spycsh <sihan.chen@intel.com>
---
 comps/cores/mega/orchestrator.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py
index 97ee2a76b3..2d1957b1b7 100644
--- a/comps/cores/mega/orchestrator.py
+++ b/comps/cores/mega/orchestrator.py
@@ -28,15 +28,13 @@
 
 
 class OrchestratorMetrics:
-    # Need an instance ID for metric prefix because:
-    # - Orchestror instances are not named
-    # - CI creates several orchestrator instances
+    # Need an static class-level ID for metric prefix because:
     # - Prometheus requires metrics (their names) to be unique
     _instance_id = 0
 
     def __init__(self) -> None:
-        self._instance_id += 1
-        if self._instance_id > 1:
+        OrchestratorMetrics._instance_id += 1
+        if OrchestratorMetrics._instance_id > 1:
             self._prefix = f"megaservice{self._instance_id}"
         else:
             self._prefix = "megaservice"

From 23b2be20c642e7e439c2351f748a6b4490ae8908 Mon Sep 17 00:00:00 2001
From: ZePan110 <ze.pan@intel.com>
Date: Thu, 13 Feb 2025 16:07:14 +0800
Subject: [PATCH 15/17] Fix Build latest images on push event workflow (#1282)

Signed-off-by: ZePan110 <ze.pan@intel.com>
---
 .github/workflows/push-image-build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml
index 67389a3cd4..51a2a88b75 100644
--- a/.github/workflows/push-image-build.yml
+++ b/.github/workflows/push-image-build.yml
@@ -62,7 +62,7 @@ jobs:
 
   image-build:
     needs: get-build-matrix
-    if: ${{ fromJSON(needs.get-build-matrix.outputs.services).length != 0 }}
+    if: needs.get-build-matrix.outputs.services != '[]'
     strategy:
       matrix:
         service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }}

From bef501c7aedcb5fc8998ecdb5f1af25de464c90a Mon Sep 17 00:00:00 2001
From: Liang Lv <liang1.lv@intel.com>
Date: Thu, 13 Feb 2025 21:18:58 +0800
Subject: [PATCH 16/17] Fix VDMS retrieval issue (#1252)

* Fix VDMS retrieval issue
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
---
 comps/retrievers/src/Dockerfile                          | 2 +-
 comps/retrievers/src/integrations/vdms.py                | 2 +-
 comps/retrievers/src/requirements.txt                    | 1 +
 .../pathway/deployment/docker_compose/compose.yaml       | 6 ++++--
 comps/third_parties/pathway/src/requirements.txt         | 4 ++--
 comps/third_parties/pathway/src/vectorstore_pathway.py   | 9 +++++----
 tests/retrievers/test_retrievers_elasticsearch.sh        | 2 ++
 tests/retrievers/test_retrievers_milvus.sh               | 2 ++
 tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh   | 8 ++++----
 tests/retrievers/test_retrievers_opensearch.sh           | 2 ++
 tests/retrievers/test_retrievers_pathway.sh              | 2 ++
 tests/retrievers/test_retrievers_pgvector.sh             | 2 ++
 tests/retrievers/test_retrievers_qdrant.sh               | 2 ++
 tests/retrievers/test_retrievers_redis.sh                | 2 ++
 tests/retrievers/test_retrievers_vdms.sh                 | 2 ++
 15 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/comps/retrievers/src/Dockerfile b/comps/retrievers/src/Dockerfile
index 3fb6b3650e..53963d884d 100644
--- a/comps/retrievers/src/Dockerfile
+++ b/comps/retrievers/src/Dockerfile
@@ -26,7 +26,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \
     fi && \
     pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \
     pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/retrievers/src/requirements.txt && \
-    pip install opentelemetry-api==1.27.0 opentelemetry-exporter-otlp==1.27.0 opentelemetry-sdk==1.27.0
+    pip install opentelemetry-api==1.29.0 opentelemetry-exporter-otlp==1.29.0 opentelemetry-sdk==1.29.0
 
 ENV PYTHONPATH=$PYTHONPATH:/home/user
 
diff --git a/comps/retrievers/src/integrations/vdms.py b/comps/retrievers/src/integrations/vdms.py
index b6a44fdf14..5e5b1731fa 100644
--- a/comps/retrievers/src/integrations/vdms.py
+++ b/comps/retrievers/src/integrations/vdms.py
@@ -48,7 +48,7 @@ def _initialize_embedder(self):
             from comps.third_parties.clip.src.clip_embedding import vCLIP
 
             embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64})
-        if TEI_EMBEDDING_ENDPOINT:
+        elif TEI_EMBEDDING_ENDPOINT:
             # create embeddings using TEI endpoint service
             if logflag:
                 logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}")
diff --git a/comps/retrievers/src/requirements.txt b/comps/retrievers/src/requirements.txt
index a04fef1771..511bcc744f 100644
--- a/comps/retrievers/src/requirements.txt
+++ b/comps/retrievers/src/requirements.txt
@@ -3,6 +3,7 @@ cairosvg
 docarray[full]
 docx2txt
 easyocr
+einops
 fastapi
 future
 graspologic
diff --git a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml
index 35dc90c32e..9c1ead2b94 100644
--- a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml
+++ b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 include:
-   - ../../../tei/deployment/docker_compose/compose.yaml
+  - ../../../tei/deployment/docker_compose/compose.yaml
 
 services:
   pathway-db:
@@ -12,13 +12,15 @@ services:
       - "${PATHWAY_PORT:-6379}:${PATHWAY_PORT:-6379}"
     volumes:
       - "${PATHWAY_VOLUME:-../../src/README.md}:/app/data/README.md"
-    network_mode: host
     environment:
+      no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       PATHWAY_HOST: ${PATHWAY_HOST_DB}
       PATHWAY_PORT: ${PATHWAY_PORT}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
     healthcheck:
       test: ["CMD-SHELL", "sleep 30 && exit 0"]
       interval: 1s
diff --git a/comps/third_parties/pathway/src/requirements.txt b/comps/third_parties/pathway/src/requirements.txt
index ef1bcb44bc..e552e247ff 100644
--- a/comps/third_parties/pathway/src/requirements.txt
+++ b/comps/third_parties/pathway/src/requirements.txt
@@ -1,7 +1,7 @@
 langchain
 langchain-community
-langchain_huggingface
-langchain_openai
+openai
 pathway[xpack-llm]
 sentence-transformers
+tiktoken
 unstructured[all-docs] >= 0.16
diff --git a/comps/third_parties/pathway/src/vectorstore_pathway.py b/comps/third_parties/pathway/src/vectorstore_pathway.py
index 22a23a2414..1b9d207edb 100644
--- a/comps/third_parties/pathway/src/vectorstore_pathway.py
+++ b/comps/third_parties/pathway/src/vectorstore_pathway.py
@@ -7,8 +7,7 @@
 import nltk
 import pathway as pw
 from langchain import text_splitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
-from langchain_huggingface import HuggingFaceEndpointEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from pathway.xpacks.llm.parsers import ParseUnstructured
 from pathway.xpacks.llm.vector_store import VectorStoreServer
 
@@ -40,7 +39,7 @@
 port = int(os.getenv("PATHWAY_PORT", 8666))
 
 EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
-
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT")
 
 if __name__ == "__main__":
@@ -48,7 +47,9 @@
     if tei_embedding_endpoint:
         # create embeddings using TEI endpoint service
         logging.info(f"Initializing the embedder from tei_embedding_endpoint: {tei_embedding_endpoint}")
-        embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint)
+        embeddings = HuggingFaceInferenceAPIEmbeddings(
+            api_key=HUGGINGFACEHUB_API_TOKEN, model_name=EMBED_MODEL, api_url=tei_embedding_endpoint
+        )
     else:
         # create embeddings using local embedding model
         embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/tests/retrievers/test_retrievers_elasticsearch.sh b/tests/retrievers/test_retrievers_elasticsearch.sh
index 60996a44ec..a5fd53fb72 100644
--- a/tests/retrievers/test_retrievers_elasticsearch.sh
+++ b/tests/retrievers/test_retrievers_elasticsearch.sh
@@ -79,6 +79,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=elasticsearch-vector-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_milvus.sh b/tests/retrievers/test_retrievers_milvus.sh
index 507f43c5af..05bd69601a 100644
--- a/tests/retrievers/test_retrievers_milvus.sh
+++ b/tests/retrievers/test_retrievers_milvus.sh
@@ -83,6 +83,8 @@ function stop_docker() {
 
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=tei-embedding-serving")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh
index f6857f35cb..a819e2e485 100644
--- a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh
+++ b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh
@@ -46,8 +46,8 @@ function start_service() {
     export RETRIEVER_PORT=11635
     export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
     export DATA_PATH="/data2/cache"
-    export MAX_INPUT_TOKENS=1024
-    export MAX_TOTAL_TOKENS=3000
+    export MAX_INPUT_TOKENS=4096
+    export MAX_TOTAL_TOKENS=8192
     export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
     export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
     export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
@@ -67,7 +67,7 @@ function start_service() {
     docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \
         -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \
         -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \
-        -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps
+        -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e MAX_INPUT_LEN=$MAX_INPUT_TOKENS -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps
 
     sleep 1m
 
@@ -152,7 +152,7 @@ function validate_microservice() {
 }
 
 function stop_docker() {
-    cid=$(docker ps -aq --filter "name=test-comps-*")
+    cid=$(docker ps -aq --filter "name=test-comps-*" --filter "name=neo4j-apoc" --filter "name=tgi-gaudi-server" --filter "name=tei-embedding-serving")
     if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
diff --git a/tests/retrievers/test_retrievers_opensearch.sh b/tests/retrievers/test_retrievers_opensearch.sh
index 7a5fc0aeb2..ae49c41a90 100644
--- a/tests/retrievers/test_retrievers_opensearch.sh
+++ b/tests/retrievers/test_retrievers_opensearch.sh
@@ -75,6 +75,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=opensearch-vector-db" --filter "name=tei-embedding-serving")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_pathway.sh b/tests/retrievers/test_retrievers_pathway.sh
index 86fadaa812..3dbc2bb301 100644
--- a/tests/retrievers/test_retrievers_pathway.sh
+++ b/tests/retrievers/test_retrievers_pathway.sh
@@ -69,6 +69,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=pathway-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_pgvector.sh b/tests/retrievers/test_retrievers_pgvector.sh
index 021d81a0c2..2a51a3e91d 100644
--- a/tests/retrievers/test_retrievers_pgvector.sh
+++ b/tests/retrievers/test_retrievers_pgvector.sh
@@ -64,6 +64,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=pgvector-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_qdrant.sh b/tests/retrievers/test_retrievers_qdrant.sh
index da2d343ffc..e50642ac0b 100644
--- a/tests/retrievers/test_retrievers_qdrant.sh
+++ b/tests/retrievers/test_retrievers_qdrant.sh
@@ -59,6 +59,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=qdrant-vector-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_redis.sh b/tests/retrievers/test_retrievers_redis.sh
index 0964049f98..aa2bbe61fc 100644
--- a/tests/retrievers/test_retrievers_redis.sh
+++ b/tests/retrievers/test_retrievers_redis.sh
@@ -131,6 +131,8 @@ function validate_mm_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} ${service_name_mm} --remove-orphans
+    cid=$(docker ps -aq --filter "name=redis-vector-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_vdms.sh b/tests/retrievers/test_retrievers_vdms.sh
index cd2b41b53e..732fd91134 100644
--- a/tests/retrievers/test_retrievers_vdms.sh
+++ b/tests/retrievers/test_retrievers_vdms.sh
@@ -78,6 +78,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} ${service_name_mm} --remove-orphans
+    cid=$(docker ps -aq --filter "name=retriever-vdms*" --filter "name=vdms-vector-db" --filter "name=tei-embedding-serving")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {

From d1dfd0ef24da221b5ebfef99104c3efdac6d2793 Mon Sep 17 00:00:00 2001
From: Spycsh <39623753+Spycsh@users.noreply.github.com>
Date: Thu, 13 Feb 2025 22:39:47 +0800
Subject: [PATCH 17/17] Align mongo related
 chathistory/feedbackmanagement/promptregistry image names with examples
 (#1284)

Align mongo related chathistory/feedbackmanagement/promptregistry image names with examples

Signed-off-by: Spycsh <sihan.chen@intel.com>
Co-authored-by: Liang Lv <liang1.lv@intel.com>
---
 .github/workflows/docker/compose/chathistory-compose.yaml | 4 ++--
 comps/chathistory/deployment/docker_compose/compose.yaml  | 2 +-
 comps/chathistory/src/README.md                           | 2 +-
 tests/chathistory/test_chathistory_mongo.sh               | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/docker/compose/chathistory-compose.yaml b/.github/workflows/docker/compose/chathistory-compose.yaml
index 3991a99734..d3f35db8e6 100644
--- a/.github/workflows/docker/compose/chathistory-compose.yaml
+++ b/.github/workflows/docker/compose/chathistory-compose.yaml
@@ -3,7 +3,7 @@
 
 # this file should be run in the root of the repo
 services:
-  chathistory-mongo-server:
+  chathistory-mongo:
     build:
       dockerfile: comps/chathistory/src/Dockerfile
-    image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest}
+    image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest}
diff --git a/comps/chathistory/deployment/docker_compose/compose.yaml b/comps/chathistory/deployment/docker_compose/compose.yaml
index db9812e692..0e0a5c9ad2 100644
--- a/comps/chathistory/deployment/docker_compose/compose.yaml
+++ b/comps/chathistory/deployment/docker_compose/compose.yaml
@@ -15,7 +15,7 @@ services:
     command: mongod --quiet --logpath /dev/null
 
   chathistory-mongo:
-    image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest}
+    image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest}
     container_name: chathistory-mongo-server
     ports:
       - "${CHATHISTORY_PORT:-6012}:6012"
diff --git a/comps/chathistory/src/README.md b/comps/chathistory/src/README.md
index 5d753fdec3..3cdf5bf270 100644
--- a/comps/chathistory/src/README.md
+++ b/comps/chathistory/src/README.md
@@ -23,7 +23,7 @@ export COLLECTION_NAME=${COLLECTION_NAME}
 
 ```bash
 cd ../../../../
-docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile .
+docker build -t opea/chathistory-mongo:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile .
 ```
 
 ### Run Docker with CLI
diff --git a/tests/chathistory/test_chathistory_mongo.sh b/tests/chathistory/test_chathistory_mongo.sh
index 9f32165be7..4bb098d79c 100644
--- a/tests/chathistory/test_chathistory_mongo.sh
+++ b/tests/chathistory/test_chathistory_mongo.sh
@@ -16,12 +16,12 @@ function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
 
-    docker build --no-cache -t opea/chathistory-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile .
+    docker build --no-cache -t opea/chathistory-mongo:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile .
     if [ $? -ne 0 ]; then
-        echo "opea/chathistory-mongo-server built fail"
+        echo "opea/chathistory-mongo built fail"
         exit 1
     else
-        echo "opea/chathistory-mongo-server built successful"
+        echo "opea/chathistory-mongo built successful"
     fi
 }