diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml
index 964d2d7284..f7e8019dbe 100644
--- a/.github/workflows/_comps-workflow.yml
+++ b/.github/workflows/_comps-workflow.yml
@@ -71,7 +71,6 @@ jobs:
           fi
           if [[ $(grep -c "vllm-gaudi:" ${docker_compose_yml}) != 0 ]]; then
               git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
-              sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
           fi
       - name: Get build list
         id: get-build-list
diff --git a/.github/workflows/_run-helm-chart.yml b/.github/workflows/_run-helm-chart.yml
index 08fc71db66..0eca64ac4c 100644
--- a/.github/workflows/_run-helm-chart.yml
+++ b/.github/workflows/_run-helm-chart.yml
@@ -134,8 +134,9 @@ jobs:
           if [[ "${service,,}" == *"third_parties"* ]]; then
             CHART_NAME="$(echo "${service,,}"|cut -d'/' -f2)"  # bridgetower
           else
-            CHART_NAME="${service_name}"  # agent
+            CHART_NAME="${service_name}"  # web_retrievers
           fi
+          CHART_NAME=$(echo "$CHART_NAME" | tr -cd 'a-z0-9')
           echo "service_name=$service_name" >> $GITHUB_ENV
           echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV
           echo "RELEASE_NAME=${CHART_NAME}$(date +%d%H%M%S)" >> $GITHUB_ENV
diff --git a/.github/workflows/docker/compose/chathistory-compose.yaml b/.github/workflows/docker/compose/chathistory-compose.yaml
index 3991a99734..d3f35db8e6 100644
--- a/.github/workflows/docker/compose/chathistory-compose.yaml
+++ b/.github/workflows/docker/compose/chathistory-compose.yaml
@@ -3,7 +3,7 @@
 
 # this file should be run in the root of the repo
 services:
-  chathistory-mongo-server:
+  chathistory-mongo:
     build:
       dockerfile: comps/chathistory/src/Dockerfile
-    image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest}
+    image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest}
diff --git a/.github/workflows/manual-comps-test.yml b/.github/workflows/manual-comps-test.yml
index c3f73fb25f..7fa8c7255c 100644
--- a/.github/workflows/manual-comps-test.yml
+++ b/.github/workflows/manual-comps-test.yml
@@ -7,7 +7,7 @@ on:
     inputs:
       services:
         default: "asr"
-        description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]"
+        description: "List of services to test [agent,asr,chathistory,animation,dataprep,embeddings,feedback_management,finetuning,guardrails,image2image,image2video,intent_detection,llms,lvms,prompt_registry,ragas,rerankings,retrievers,text2image,text2sql,third_parties,tts,vectorstores,web_retrievers]"
         required: true
         type: string
       build:
diff --git a/.github/workflows/manual-docker-publish.yml b/.github/workflows/manual-docker-publish.yml
index b7e770dedb..aae3d3ca84 100644
--- a/.github/workflows/manual-docker-publish.yml
+++ b/.github/workflows/manual-docker-publish.yml
@@ -7,7 +7,7 @@ on:
     inputs:
       services:
         default: ""
-        description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]"
+        description: "List of services to test [agent,asr,chathistory,animation,dataprep,embeddings,feedback_management,finetuning,guardrails,image2image,image2video,intent_detection,llms,lvms,prompt_registry,ragas,rerankings,retrievers,text2image,text2sql,third_parties,tts,vectorstores,web_retrievers]"
         required: false
         type: string
       images:
diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml
index fda1528065..51a2a88b75 100644
--- a/.github/workflows/push-image-build.yml
+++ b/.github/workflows/push-image-build.yml
@@ -62,7 +62,7 @@ jobs:
 
   image-build:
     needs: get-build-matrix
-    if: ${{ fromJSON(needs.get-build-matrix.outputs.services).length != 0 }}
+    if: needs.get-build-matrix.outputs.services != '[]'
     strategy:
       matrix:
         service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }}
@@ -96,7 +96,6 @@ jobs:
           fi
           if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
               git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
-              sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
           fi
 
       - name: Build Image
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000..b1db30c720
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,37 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Base image for GenAIComps based OPEA Python applications
+# Build: docker build -t opea/comps-base -f Dockerfile .
+
+ARG IMAGE_NAME=python
+ARG IMAGE_TAG=3.11-slim
+
+FROM ${IMAGE_NAME}:${IMAGE_TAG} AS base
+
+ENV HOME=/home/user
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p $HOME && \
+    chown -R user $HOME
+
+# get security updates
+RUN apt-get update && apt-get upgrade -y && \
+    apt-get clean autoclean && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR $HOME
+
+COPY *.toml *.py *.txt *.md LICENSE ./
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    pip install --no-cache-dir -r requirements.txt
+
+COPY comps/ comps/
+
+ENV PYTHONPATH=$PYTHONPATH:$HOME
+
+USER user
+
+ENTRYPOINT ["sh", "-c", "set && ls -la"]
diff --git a/LEGAL_INFORMATION.md b/LEGAL_INFORMATION.md
index 223566f3eb..41cb6dc31a 100644
--- a/LEGAL_INFORMATION.md
+++ b/LEGAL_INFORMATION.md
@@ -13,6 +13,9 @@ Your use of the source code for these components is subject to the terms and con
 
 See the accompanying [license](LICENSE) file for full license text and copyright notices.
 
+Please note: component(s) depend on software subject to non-open source licenses.  If you use or redistribute this software, it is your sole responsibility to ensure compliance with such licenses.
+e.g langserve
+
 ## Citation
 
 If you use Generative AI Components in your research, use the following BibTeX entry.
diff --git a/comps/chathistory/deployment/docker_compose/compose.yaml b/comps/chathistory/deployment/docker_compose/compose.yaml
index db9812e692..0e0a5c9ad2 100644
--- a/comps/chathistory/deployment/docker_compose/compose.yaml
+++ b/comps/chathistory/deployment/docker_compose/compose.yaml
@@ -15,7 +15,7 @@ services:
     command: mongod --quiet --logpath /dev/null
 
   chathistory-mongo:
-    image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest}
+    image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest}
     container_name: chathistory-mongo-server
     ports:
       - "${CHATHISTORY_PORT:-6012}:6012"
diff --git a/comps/chathistory/src/README.md b/comps/chathistory/src/README.md
index 5d753fdec3..3cdf5bf270 100644
--- a/comps/chathistory/src/README.md
+++ b/comps/chathistory/src/README.md
@@ -23,7 +23,7 @@ export COLLECTION_NAME=${COLLECTION_NAME}
 
 ```bash
 cd ../../../../
-docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile .
+docker build -t opea/chathistory-mongo:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile .
 ```
 
 ### Run Docker with CLI
diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py
index 4053988566..2d1957b1b7 100644
--- a/comps/cores/mega/orchestrator.py
+++ b/comps/cores/mega/orchestrator.py
@@ -7,6 +7,7 @@
 import json
 import os
 import re
+import threading
 import time
 from typing import Dict, List
 
@@ -27,20 +28,56 @@
 
 
 class OrchestratorMetrics:
-    # Because:
-    # - CI creates several orchestrator instances
-    # - Prometheus requires metrics to be singletons
-    # - Oorchestror instances are not provided their own names
-    # Metrics are class members with "megaservice" name prefix
-    first_token_latency = Histogram("megaservice_first_token_latency", "First token latency (histogram)")
-    inter_token_latency = Histogram("megaservice_inter_token_latency", "Inter-token latency (histogram)")
-    request_latency = Histogram("megaservice_request_latency", "Whole request/reply latency (histogram)")
-    request_pending = Gauge("megaservice_request_pending", "Count of currently pending requests (gauge)")
+    # Need an static class-level ID for metric prefix because:
+    # - Prometheus requires metrics (their names) to be unique
+    _instance_id = 0
 
     def __init__(self) -> None:
-        pass
+        OrchestratorMetrics._instance_id += 1
+        if OrchestratorMetrics._instance_id > 1:
+            self._prefix = f"megaservice{self._instance_id}"
+        else:
+            self._prefix = "megaservice"
+
+        self.request_pending = Gauge(f"{self._prefix}_request_pending", "Count of currently pending requests (gauge)")
+
+        # locking for latency metric creation / method change
+        self._lock = threading.Lock()
+
+        # Metrics related to token processing are created on demand,
+        # to avoid bogus ones for services that never handle tokens
+        self.first_token_latency = None
+        self.inter_token_latency = None
+        self.request_latency = None
+
+        # initial methods to create the metrics
+        self.token_update = self._token_update_create
+        self.request_update = self._request_update_create
+
+    def _token_update_create(self, token_start: float, is_first: bool) -> float:
+        with self._lock:
+            # in case another thread already got here
+            if self.token_update == self._token_update_create:
+                self.first_token_latency = Histogram(
+                    f"{self._prefix}_first_token_latency", "First token latency (histogram)"
+                )
+                self.inter_token_latency = Histogram(
+                    f"{self._prefix}_inter_token_latency", "Inter-token latency (histogram)"
+                )
+                self.token_update = self._token_update_real
+        return self.token_update(token_start, is_first)
+
+    def _request_update_create(self, req_start: float) -> None:
+        with self._lock:
+            # in case another thread already got here
+            if self.request_update == self._request_update_create:
+                self.request_latency = Histogram(
+                    f"{self._prefix}_request_latency", "Whole LLM request/reply latency (histogram)"
+                )
+                self.request_update = self._request_update_real
+        self.request_update(req_start)
 
-    def token_update(self, token_start: float, is_first: bool) -> float:
+    def _token_update_real(self, token_start: float, is_first: bool) -> float:
         now = time.time()
         if is_first:
             self.first_token_latency.observe(now - token_start)
@@ -48,7 +85,7 @@ def token_update(self, token_start: float, is_first: bool) -> float:
             self.inter_token_latency.observe(now - token_start)
         return now
 
-    def request_update(self, req_start: float) -> None:
+    def _request_update_real(self, req_start: float) -> None:
         self.request_latency.observe(time.time() - req_start)
 
     def pending_update(self, increase: bool) -> None:
diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml
index ef54a69e0c..8fff22cd82 100644
--- a/comps/dataprep/deployment/docker_compose/compose.yaml
+++ b/comps/dataprep/deployment/docker_compose/compose.yaml
@@ -28,7 +28,7 @@ services:
       DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_ELASTICSEARCH"
       ES_CONNECTION_STRING: ${ES_CONNECTION_STRING}
       INDEX_NAME: ${INDEX_NAME}
-      TEI_ENDPOINT: ${TEI_ENDPOINT}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
     depends_on:
@@ -49,6 +49,7 @@ services:
       MILVUS_HOST: ${MILVUS_HOST}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
     depends_on:
       tei-embedding-serving:
@@ -161,7 +162,7 @@ services:
       QDRANT_HOST: ${QDRANT_HOST}
       QDRANT_PORT: ${QDRANT_PORT}
       COLLECTION_NAME: ${COLLECTION_NAME}
-      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
 
diff --git a/comps/dataprep/src/README_elasticsearch.md b/comps/dataprep/src/README_elasticsearch.md
index ab4b8547b5..94d8d47ba1 100644
--- a/comps/dataprep/src/README_elasticsearch.md
+++ b/comps/dataprep/src/README_elasticsearch.md
@@ -50,7 +50,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run  --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest
+docker run  --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B)
diff --git a/comps/dataprep/src/README_milvus.md b/comps/dataprep/src/README_milvus.md
index bcf545f72b..3e79ff6976 100644
--- a/comps/dataprep/src/README_milvus.md
+++ b/comps/dataprep/src/README_milvus.md
@@ -25,6 +25,7 @@ export MILVUS_HOST=${your_milvus_host_ip}
 export MILVUS_PORT=19530
 export COLLECTION_NAME=${your_collection_name}
 export TEI_EMBEDDING_ENDPOINT=${your_embedding_endpoint}
+export HUGGINGFACEHUB_API_TOKEN=${your_huggingface_api_token}
 ```
 
 ### 1.4 Start TEI Embedding Service
@@ -70,13 +71,15 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 
 ```bash
 export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export EMBEDDING_MODEL_ID=${your_embedding_model_id}
 export MILVUS_HOST=${your_host_ip}
 ```
 
 ### 2.3 Run Docker with CLI (Option A)
 
 ```bash
-docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest
+docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest
 ```
 
 ### 2.4 Run with Docker Compose (Option B)
diff --git a/comps/dataprep/src/README_opensearch.md b/comps/dataprep/src/README_opensearch.md
index b5d14c9a9d..b5f1c2c26c 100644
--- a/comps/dataprep/src/README_opensearch.md
+++ b/comps/dataprep/src/README_opensearch.md
@@ -51,7 +51,7 @@ curl localhost:$your_port/embed \
 After checking that it works, set up environment variables.
 
 ```bash
-export TEI_ENDPOINT="http://localhost:$your_port"
+export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port"
 ```
 
 ### 1.4 Start Document Preparation Microservice for OpenSearch with Python Script
@@ -75,7 +75,7 @@ Please refer to this [readme](../../third_parties/opensearch/src/README.md).
 
 ```bash
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export TEI_ENDPOINT="http://${your_ip}:6006"
+export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
 export OPENSEARCH_URL="http://${your_ip}:9200"
 export INDEX_NAME=${your_index_name}
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
@@ -97,7 +97,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 - option 1: Start single-process version (for processing up to 10 files)
 
 ```bash
-docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest
+docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)
diff --git a/comps/dataprep/src/README_pgvector.md b/comps/dataprep/src/README_pgvector.md
index 92d0db577a..1fdb678e7f 100644
--- a/comps/dataprep/src/README_pgvector.md
+++ b/comps/dataprep/src/README_pgvector.md
@@ -38,6 +38,8 @@ Please refer to this [readme](../../third_parties/pgvector/src/README.md).
 ```bash
 export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb
 export INDEX_NAME=${your_index_name}
+export TEI_EMBEDDING_ENDPOINT=${your_tei_embedding_endpoint}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 ```
 
 ### 2.3 Build Docker Image
@@ -50,7 +52,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run  --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest
+docker run  --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B)
diff --git a/comps/dataprep/src/README_redis.md b/comps/dataprep/src/README_redis.md
index c6e4555295..69f3ae39af 100644
--- a/comps/dataprep/src/README_redis.md
+++ b/comps/dataprep/src/README_redis.md
@@ -95,8 +95,7 @@ Please refer to this [readme](../../third_parties/redis/src/README.md).
 ### 2.2 Setup Environment Variables
 
 ```bash
-export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export TEI_ENDPOINT="http://${your_ip}:6006"
+export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
 export REDIS_URL="redis://${your_ip}:6379"
 export INDEX_NAME=${your_index_name}
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
@@ -112,7 +111,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest
+docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)
diff --git a/comps/dataprep/src/README_vdms.md b/comps/dataprep/src/README_vdms.md
index 7571ca80d2..7318282c4e 100644
--- a/comps/dataprep/src/README_vdms.md
+++ b/comps/dataprep/src/README_vdms.md
@@ -69,7 +69,8 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export VDMS_HOST=${host_ip}
 export VDMS_PORT=55555
-export TEI_ENDPOINT=${your_tei_endpoint}
+export TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export COLLECTION_NAME=${your_collection_name}
 export SEARCH_ENGINE="FaissFlat"
 export DISTANCE_STRATEGY="L2"
@@ -89,7 +90,8 @@ Start single-process version (for 1-10 files processing)
 
 ```bash
 docker run -d --name="dataprep-vdms-server" -p 6007:6007 --runtime=runc --ipc=host \
--e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_ENDPOINT=$TEI_ENDPOINT \
+-e http_proxy=$http_proxy -e https_proxy=$https_proxy \
+-e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
 -e COLLECTION_NAME=$COLLECTION_NAME -e VDMS_HOST=$VDMS_HOST -e VDMS_PORT=$VDMS_PORT \
 -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_VDMS" opea/dataprep:latest
 ```
diff --git a/comps/dataprep/src/integrations/elasticsearch.py b/comps/dataprep/src/integrations/elasticsearch.py
index ed07d157ea..83e422741e 100644
--- a/comps/dataprep/src/integrations/elasticsearch.py
+++ b/comps/dataprep/src/integrations/elasticsearch.py
@@ -9,10 +9,9 @@
 from elasticsearch import Elasticsearch
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_core.documents import Document
 from langchain_elasticsearch import ElasticsearchStore
-from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
 
 from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType
 from comps.dataprep.src.utils import (
@@ -37,7 +36,9 @@
 EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
 
 # TEI Embedding endpoints
-TEI_ENDPOINT = os.getenv("TEI_ENDPOINT", "")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 # Vector Index Configuration
 INDEX_NAME = os.getenv("INDEX_NAME", "rag-elastic")
@@ -77,15 +78,31 @@ def create_index(self) -> None:
         if not self.es_client.indices.exists(index=INDEX_NAME):
             self.es_client.indices.create(index=INDEX_NAME)
 
-    def get_embedder(self) -> Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]:
+    def get_embedder(self) -> Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings]:
         """Obtain required Embedder."""
-        if TEI_ENDPOINT:
-            return HuggingFaceEndpointEmbeddings(model=TEI_ENDPOINT)
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` and the `EMBED_MODEL` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
+            return embedder
         else:
             return HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
 
     def get_elastic_store(
-        self, embedder: Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]
+        self, embedder: Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings]
     ) -> ElasticsearchStore:
         """Get Elasticsearch vector store."""
         return ElasticsearchStore(index_name=INDEX_NAME, embedding=embedder, es_connection=self.es_client)
diff --git a/comps/dataprep/src/integrations/milvus.py b/comps/dataprep/src/integrations/milvus.py
index c3e3e57309..c4aecf86e4 100644
--- a/comps/dataprep/src/integrations/milvus.py
+++ b/comps/dataprep/src/integrations/milvus.py
@@ -10,7 +10,7 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings, OpenAIEmbeddings
 from langchain_core.documents import Document
 from langchain_milvus.vectorstores import Milvus
 from langchain_text_splitters import HTMLHeaderTextSplitter
@@ -36,8 +36,11 @@
 # Local Embedding model
 LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1")
 # TEI configuration
-TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5")
+EMBED_MODEL = os.environ.get("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
 TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
+
 # MILVUS configuration
 MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost")
 MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530))
@@ -75,7 +78,7 @@ def ingest_chunks_to_milvus(embeddings, file_name: str, chunks: List):
         except Exception as e:
             if logflag:
                 logger.info(f"[ ingest chunks ] fail to ingest chunks into Milvus. error: {e}")
-            raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.")
+            raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}: {e}")
 
     if logflag:
         logger.info(f"[ ingest chunks ] Docs ingested file {file_name} to Milvus collection {COLLECTION_NAME}.")
@@ -189,7 +192,23 @@ def _initialize_embedder(self):
             # create embeddings using TEI endpoint service
             if logflag:
                 logger.info(f"[ milvus embedding ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}")
-            embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            # create embeddings using TEI endpoint service
+            embeddings = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             if logflag:
@@ -274,7 +293,7 @@ async def ingest_files(
                         search_res = search_by_file(my_milvus.col, encode_file)
                     except Exception as e:
                         raise HTTPException(
-                            status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}."
+                            status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}: {e}"
                         )
                     if len(search_res) > 0:
                         if logflag:
@@ -319,7 +338,7 @@ async def ingest_files(
                         search_res = search_by_file(my_milvus.col, encoded_link + ".txt")
                     except Exception as e:
                         raise HTTPException(
-                            status_code=500, detail=f"Failed when searching in Milvus db for link {link}."
+                            status_code=500, detail=f"Failed when searching in Milvus db for link {link}: {e}"
                         )
                     if len(search_res) > 0:
                         if logflag:
@@ -375,7 +394,7 @@ async def get_files(self):
         try:
             all_data = search_all(my_milvus.col)
         except Exception as e:
-            raise HTTPException(status_code=500, detail="Failed when searching in Milvus db for all files.")
+            raise HTTPException(status_code=500, detail=f"Failed when searching in Milvus db for all files: {e}")
 
         # return [] if no data in db
         if len(all_data) == 0:
@@ -422,8 +441,7 @@ async def delete_files(self, file_path: str = Body(..., embed=True)):
             except Exception as e:
                 if logflag:
                     logger.info(f"[ milvus delete ] {e}. Fail to delete {upload_folder}.")
-                raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.")
-
+                raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}: {e}")
             if logflag:
                 logger.info("[ milvus delete ] successfully delete all files.")
 
diff --git a/comps/dataprep/src/integrations/neo4j_langchain.py b/comps/dataprep/src/integrations/neo4j_langchain.py
index ba03437972..75b23252d8 100644
--- a/comps/dataprep/src/integrations/neo4j_langchain.py
+++ b/comps/dataprep/src/integrations/neo4j_langchain.py
@@ -34,10 +34,9 @@
 NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j")
 NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test")
 
-# LLM/Embedding endpoints
+# LLM endpoints
 TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
-TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT")
 OPENAI_KEY = os.getenv("OPENAI_API_KEY")
 
 
diff --git a/comps/dataprep/src/integrations/opensearch.py b/comps/dataprep/src/integrations/opensearch.py
index 6f4b10bbd1..8df0104004 100644
--- a/comps/dataprep/src/integrations/opensearch.py
+++ b/comps/dataprep/src/integrations/opensearch.py
@@ -7,9 +7,8 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import OpenSearchVectorSearch
-from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
 from opensearchpy import OpenSearch
 
@@ -79,9 +78,26 @@ def __init__(self, name: str, description: str, config: dict = None):
         self.upload_folder = "./uploaded_files/"
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
         # Initialize embeddings
-        tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
-        if tei_embedding_endpoint:
-            self.embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint)
+        TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+        HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
+        EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            self.embeddings = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             self.embeddings = HuggingFaceBgeEmbeddings(model_name=Config.EMBED_MODEL)
 
diff --git a/comps/dataprep/src/integrations/pgvect.py b/comps/dataprep/src/integrations/pgvect.py
index 43b38e5d6d..d9eb698782 100644
--- a/comps/dataprep/src/integrations/pgvect.py
+++ b/comps/dataprep/src/integrations/pgvect.py
@@ -10,7 +10,7 @@
 import psycopg2
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import PGVector
 
 from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType
@@ -30,6 +30,10 @@
 
 # Embedding model
 EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
+# TEI Embedding endpoints
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 PG_CONNECTION_STRING = os.getenv("PG_CONNECTION_STRING", "localhost")
 
@@ -47,12 +51,26 @@ class OpeaPgvectorDataprep(OpeaComponent):
 
     def __init__(self, name: str, description: str, config: dict = None):
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
-        self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
         self.upload_folder = "./uploaded_files/"
         # Create vectorstore
-        if self.tei_embedding_endpoint:
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
             # create embeddings using TEI endpoint service
-            self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint)
+            self.embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/dataprep/src/integrations/pipecone.py b/comps/dataprep/src/integrations/pipecone.py
index 33ffeea4b6..ec03174608 100644
--- a/comps/dataprep/src/integrations/pipecone.py
+++ b/comps/dataprep/src/integrations/pipecone.py
@@ -8,7 +8,7 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_pinecone import PineconeVectorStore
 from langchain_text_splitters import HTMLHeaderTextSplitter
 from pinecone import Pinecone, ServerlessSpec
@@ -39,7 +39,9 @@
 # LLM/Embedding endpoints
 TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
-TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 
 @OpeaComponentRegistry.register("OPEA_DATAPREP_PINECONE")
@@ -48,12 +50,26 @@ class OpeaPineConeDataprep(OpeaComponent):
 
     def __init__(self, name: str, description: str, config: dict = None):
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
-        self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT")
         self.upload_folder = "./uploaded_files/"
         # Create vectorstore
-        if self.tei_embedding_endpoint:
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
             # create embeddings using TEI endpoint service
-            self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint)
+            self.embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/dataprep/src/integrations/qdrant.py b/comps/dataprep/src/integrations/qdrant.py
index e54c6c572b..62a9efa21a 100644
--- a/comps/dataprep/src/integrations/qdrant.py
+++ b/comps/dataprep/src/integrations/qdrant.py
@@ -7,9 +7,8 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import Qdrant
-from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
 from qdrant_client import QdrantClient
 
@@ -38,7 +37,9 @@
 # LLM/Embedding endpoints
 TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
-TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 
 @OpeaComponentRegistry.register("OPEA_DATAPREP_QDRANT")
@@ -47,12 +48,26 @@ class OpeaQdrantDataprep(OpeaComponent):
 
     def __init__(self, name: str, description: str, config: dict = None):
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
-        self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT")
         self.upload_folder = "./uploaded_files/"
         # Create vectorstore
         if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
             # create embeddings using TEI endpoint service
-            self.embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
+            self.embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/dataprep/src/integrations/redis.py b/comps/dataprep/src/integrations/redis.py
index 06cb0d7f27..a181013bcd 100644
--- a/comps/dataprep/src/integrations/redis.py
+++ b/comps/dataprep/src/integrations/redis.py
@@ -11,9 +11,8 @@
 import redis
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import Redis
-from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
 from redis.commands.search.field import TextField
 from redis.commands.search.indexDefinition import IndexDefinition, IndexType
@@ -40,6 +39,8 @@
 EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
 # TEI Embedding endpoints
 TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 # Vector Index Configuration
 INDEX_NAME = os.getenv("INDEX_NAME", "rag_redis")
@@ -187,8 +188,23 @@ def ingest_chunks_to_redis(file_name: str, chunks: List):
         logger.info(f"[ redis ingest chunks ] file name: {file_name}")
     # Create vectorstore
     if TEI_EMBEDDING_ENDPOINT:
+        if not HUGGINGFACEHUB_API_TOKEN:
+            raise HTTPException(
+                status_code=400,
+                detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+            )
+        import requests
+
+        response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+        if response.status_code != 200:
+            raise HTTPException(
+                status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+            )
+        model_id = response.json()["model_id"]
         # create embeddings using TEI endpoint service
-        embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
+        embedder = HuggingFaceInferenceAPIEmbeddings(
+            api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+        )
     else:
         # create embeddings using local embedding model
         embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/dataprep/src/integrations/vdms.py b/comps/dataprep/src/integrations/vdms.py
index 998b23a5c7..e4085b1812 100644
--- a/comps/dataprep/src/integrations/vdms.py
+++ b/comps/dataprep/src/integrations/vdms.py
@@ -7,7 +7,7 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores.vdms import VDMS, VDMS_Client
 from langchain_text_splitters import HTMLHeaderTextSplitter
 
@@ -28,7 +28,6 @@
 
 def getEnv(key, default_value=None):
     env_value = os.getenv(key, default=default_value)
-    print(f"{key}: {env_value}")
     return env_value
 
 
@@ -45,7 +44,9 @@ def getEnv(key, default_value=None):
 # LLM/Embedding endpoints
 TGI_LLM_ENDPOINT = getEnv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = getEnv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
-TEI_EMBEDDING_ENDPOINT = getEnv("TEI_ENDPOINT")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 # chunk parameters
 CHUNK_SIZE = getEnv("CHUNK_SIZE", 1500)
@@ -58,14 +59,28 @@ class OpeaVdmsDataprep(OpeaComponent):
 
     def __init__(self, name: str, description: str, config: dict = None):
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
-        self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
         self.upload_folder = "./uploaded_files/"
         create_upload_folder(self.upload_folder)
         self.client = VDMS_Client(VDMS_HOST, int(VDMS_PORT))
         # Create vectorstore
-        if self.tei_embedding_endpoint:
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
             # create embeddings using TEI endpoint service
-            self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint)
+            self.embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/embeddings/src/integrations/tei.py b/comps/embeddings/src/integrations/tei.py
index dd7dd602e1..8d589fb822 100644
--- a/comps/embeddings/src/integrations/tei.py
+++ b/comps/embeddings/src/integrations/tei.py
@@ -70,6 +70,7 @@ async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse:
             raise TypeError("Unsupported input type: input must be a string or list of strings.")
         response = await self.client.post(
             json={"input": texts, "encoding_format": input.encoding_format, "model": input.model, "user": input.user},
+            model=f"{self.base_url}/v1/embeddings",
             task="text-embedding",
         )
         embeddings = json.loads(response.decode())
diff --git a/comps/guardrails/deployment/kubernetes/README.md b/comps/guardrails/deployment/kubernetes/README.md
index b309900a07..2b3d3002c1 100644
--- a/comps/guardrails/deployment/kubernetes/README.md
+++ b/comps/guardrails/deployment/kubernetes/README.md
@@ -7,5 +7,5 @@
 
 ```
 export HFTOKEN="insert-your-huggingface-token-here"
-helm install guardrails oci://ghcr.io/opea-project/charts/guardrails --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
+helm install guardrails oci://ghcr.io/opea-project/charts/guardrails-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
 ```
diff --git a/comps/guardrails/src/guardrails/README.md b/comps/guardrails/src/guardrails/README.md
index 7794d9490f..15679f706e 100644
--- a/comps/guardrails/src/guardrails/README.md
+++ b/comps/guardrails/src/guardrails/README.md
@@ -9,9 +9,9 @@ The Guardrails Microservice now offers two primary types of guardrails:
 - Input Guardrails: These are applied to user inputs. An input guardrail can either reject the input, halting further processing.
 - Output Guardrails: These are applied to outputs generated by the LLM. An output guardrail can reject the output, preventing it from being returned to the user.
 
-## LlamaGuard
+**This microservice supports Meta's [Llama Guard](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B) and Allen Institute for AI's [WildGuard](https://huggingface.co/allenai/wildguard) models.**
 
-We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B) model.
+## Llama Guard
 
 Any content that is detected in the following categories is determined as unsafe:
 
@@ -22,111 +22,84 @@ Any content that is detected in the following categories is determined as unsafe
 - Regulated or Controlled Substances
 - Suicide & Self Harm
 
-### 🚀1. Start Microservice with Python (Option 1)
-
-To start the Guardrails microservice, you need to install python packages first.
+## WildGuard
 
-#### 1.1 Install Requirements
+`allenai/wildguard` was fine-tuned from `mistralai/Mistral-7B-v0.3` on their own [`allenai/wildguardmix`](https://huggingface.co/datasets/allenai/wildguardmix) dataset. Any content that is detected in the following categories is determined as unsafe:
 
-```bash
-pip install -r requirements.txt
-```
+- Privacy
+- Misinformation
+- Harmful Language
+- Malicious Uses
 
-#### 1.2 Start TGI Gaudi Service
+## Clone OPEA GenAIComps and set initial environment variables
 
 ```bash
-export HF_TOKEN=${your_hf_api_token}
-volume=$PWD/data
-model_id="meta-llama/Meta-Llama-Guard-2-8B"
-docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048
+git clone https://github.com/opea-project/GenAIComps.git
+export OPEA_GENAICOMPS_ROOT=$(pwd)/GenAIComps
+export GUARDRAIL_PORT=9090
 ```
 
-#### 1.3 Verify the TGI Gaudi Service
+## Start up the HuggingFace Text Generation Inference (TGI) Server
 
-```bash
-curl 127.0.0.1:8088/generate \
-  -X POST \
-  -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
-  -H 'Content-Type: application/json'
-```
+Before starting the guardrail service, we first need to start the TGI server that will be hosting the guardrail model.
 
-#### 1.4 Start Guardrails Service
+Choose one of the following before starting your TGI server.
 
-Optional: If you have deployed a Guardrails model with TGI Gaudi Service other than default model (i.e., `meta-llama/Meta-Llama-Guard-2-8B`) [from section 1.2](#12-start-tgi-gaudi-service), you will need to add the eviornment variable `SAFETY_GUARD_MODEL_ID` containing the model id. For example, the following informs the Guardrails Service the deployed model used LlamaGuard2:
+**For LlamaGuard:**
 
 ```bash
 export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
+export GUARDRAILS_COMPONENT_NAME=OPEA_LLAMA_GUARD
 ```
 
+Or
+
 ```bash
-export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
-python guardrails_tgi.py
+export SAFETY_GUARD_MODEL_ID="meta-llama/LlamaGuard-7b"
+export GUARDRAILS_COMPONENT_NAME=OPEA_LLAMA_GUARD
 ```
 
-### 🚀2. Start Microservice with Docker (Option 2)
-
-If you start an Guardrails microservice with docker, the `docker_compose_guardrails.yaml` file will automatically start a TGI gaudi service with docker.
+_Other variations of LlamaGuard are also an option to use but are not guaranteed to work OOB._
 
-#### 2.1 Setup Environment Variables
-
-In order to start TGI and LLM services, you need to setup the following environment variables first.
+**For Wild Guard:**
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
-export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
-export LLM_MODEL_ID=${your_hf_llm_model}
+export SAFETY_GUARD_MODEL_ID="allenai/wildguard"
+export GUARDRAILS_COMPONENT_NAME=OPEA_WILD_GUARD
 ```
 
-#### 2.2 Build Docker Image
-
-```bash
-cd ../../../../
-docker build -t opea/guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile .
-```
+_Note that both of these models are gated and you need to complete their form on their associated model pages first in order to use them with your HF token._
 
-#### 2.3 Run Docker with CLI
+Follow the steps [here](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/tgi) to start the TGI server container where LLM_MODEL_ID is set to your SAFETY_GUARD_MODEL_ID like below:
 
 ```bash
-docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/guardrails:latest
+export LLM_MODEL_ID=$SAFETY_GUARD_MODEL_ID
 ```
 
-#### 2.4 Run Docker with Docker Compose
+Once the container is starting up and loading the model, set the endpoint that you will use to make requests to the TGI server:
 
 ```bash
-cd deployment/docker_compose/
-docker compose -f compose_llamaguard.yaml up -d
+export SAFETY_GUARD_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
 ```
 
-### 🚀3. Consume Guardrails Service
+**Verify that the TGI Server is ready for inference**
 
-#### 3.1 Check Service Status
+First check that the TGI server successfully loaded the guardrail model. Loading the model could take up to 5-10 minutes. You can do this by running the following:
 
 ```bash
-curl http://localhost:9090/v1/health_check\
-  -X GET \
-  -H 'Content-Type: application/json'
+docker logs tgi-gaudi-server
 ```
 
-#### 3.2 Consume Guardrails Service
+If the last line of the log contains something like `INFO text_generation_router::server: router/src/server.rs:2209: Connected` then your TGI server is ready and the following curl should work:
 
 ```bash
-curl http://localhost:9090/v1/guardrails\
+curl localhost:${LLM_ENDPOINT_PORT}/generate \
   -X POST \
-  -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
+  -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
   -H 'Content-Type: application/json'
 ```
 
-## WildGuard
-
-We also offer content moderation support utilizing Allen Institute for AI's [WildGuard](https://huggingface.co/allenai/wildguard) model.
-
-`allenai/wildguard` was fine-tuned from `mistralai/Mistral-7B-v0.3` on their own [`allenai/wildguardmix`](https://huggingface.co/datasets/allenai/wildguardmix) dataset. Any content that is detected in the following categories is determined as unsafe:
-
-- Privacy
-- Misinformation
-- Harmful Language
-- Malicious Uses
+Check the logs again with the `logs` command to confirm that the curl request resulted in `Success`.
 
 ### 🚀1. Start Microservice with Python (Option 1)
 
@@ -135,67 +108,76 @@ To start the Guardrails microservice, you need to install python packages first.
 #### 1.1 Install Requirements
 
 ```bash
+pip install $OPEA_GENAICOMPS_ROOT
+cd $OPEA_GENAICOMPS_ROOT/comps/guardrails/src/guardrails
 pip install -r requirements.txt
 ```
 
-#### 1.2 Start TGI Gaudi Service
+#### 1.2 Start Guardrails Service
 
 ```bash
-export HF_TOKEN=${your_hf_api_token}
-volume=$PWD/data
-model_id="allenai/wildguard"
-docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048
+python opea_guardrails_microservice.py
 ```
 
-#### 1.3 Verify the TGI Gaudi Service
+### 🚀2. Start Microservice with Docker (Option 2)
 
-```bash
-curl 127.0.0.1:8088/generate \
-  -X POST \
-  -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
-  -H 'Content-Type: application/json'
-```
+With the TGI server already running, now we can start the guardrail service container.
 
-#### 1.4 Start Guardrails Service
+#### 2.1 Build Docker Image
 
 ```bash
-export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
-python guardrails_tgi.py
+cd $OPEA_GENAICOMPS_ROOT
+docker build -t opea/guardrails:latest \
+  --build-arg https_proxy=$https_proxy \
+  --build-arg http_proxy=$http_proxy \
+  -f comps/guardrails/src/guardrails/Dockerfile .
 ```
 
-### 🚀2. Start Microservice with Docker (Option 2)
-
-If you start an Guardrails microservice with docker, the `compose_wildguard.yaml` file will automatically start a TGI gaudi service with docker.
-
-#### 2.1 Setup Environment Variables
+#### 2.2.a Run with Docker Compose (Option A)
 
-In order to start TGI and LLM services, you need to setup the following environment variables first.
+**To run with LLama Guard:**
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
-export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
-export LLM_MODEL_ID=${your_hf_llm_model}
+docker compose -f $OPEA_GENAICOMPS_ROOT/comps/guardrails/deployment/docker_compose/compose.yaml up -d llamaguard-guardrails-server
 ```
 
-#### 2.2 Build Docker Image
+**To run with WildGuard:**
 
 ```bash
-cd ../../../../
-docker build -t opea/guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile .
+docker compose -f $OPEA_GENAICOMPS_ROOT/comps/guardrails/deployment/docker_compose/compose.yaml up -d wildguard-guardrails-server
 ```
 
-#### 2.3 Run Docker with CLI
+#### 2.2.b Run Docker with CLI (Option B)
+
+**To run with LLama Guard:**
 
 ```bash
-docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e GUARDRAILS_COMPONENT_NAME="OPEA_WILD_GUARD" opea/guardrails:latest
+docker run -d \
+  --name="llamaguard-guardrails-server" \
+  -p ${GUARDRAIL_PORT}:${GUARDRAIL_PORT} \
+  --ipc=host \
+  -e http_proxy=$http_proxy \
+  -e https_proxy=$https_proxy \
+  -e no_proxy=$no_proxy \
+  -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT \
+  -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN \
+  opea/guardrails:latest
 ```
 
-#### 2.4 Run Docker with Docker Compose
+**To run with WildGuard:**
 
 ```bash
-cd deployment/docker_compose/
-docker compose -f compose_wildguard.yaml up -d
+docker run -d \
+  --name="wildguard-guardrails-server" \
+  -p ${GUARDRAIL_PORT}:${GUARDRAIL_PORT} \
+  --ipc=host \
+  -e http_proxy=$http_proxy \
+  -e https_proxy=$https_proxy \
+  -e no_proxy=$no_proxy \
+  -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT \
+  -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN \
+  -e GUARDRAILS_COMPONENT_NAME="OPEA_WILD_GUARD" \
+  opea/guardrails:latest
 ```
 
 ### 🚀3. Consume Guardrails Service
@@ -203,7 +185,7 @@ docker compose -f compose_wildguard.yaml up -d
 #### 3.1 Check Service Status
 
 ```bash
-curl http://localhost:9090/v1/health_check \
+curl http://localhost:${GUARDRAIL_PORT}/v1/health_check\
   -X GET \
   -H 'Content-Type: application/json'
 ```
@@ -211,8 +193,13 @@ curl http://localhost:9090/v1/health_check \
 #### 3.2 Consume Guardrails Service
 
 ```bash
-curl http://localhost:9090/v1/guardrails \
+curl http://localhost:${GUARDRAIL_PORT}/v1/guardrails\
   -X POST \
   -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
   -H 'Content-Type: application/json'
 ```
+
+This request should return text containing:
+`"Violated policies: <category>, please check your input."`
+
+Where `category` is `Violent Crimes` or `harmful` for `Llama-Guard-2-8B` or `wildguard`, respectively.
diff --git a/comps/llms/src/doc-summarization/integrations/tgi.py b/comps/llms/src/doc-summarization/integrations/tgi.py
index 002f8de19f..902334bc03 100644
--- a/comps/llms/src/doc-summarization/integrations/tgi.py
+++ b/comps/llms/src/doc-summarization/integrations/tgi.py
@@ -4,7 +4,7 @@
 import os
 
 import requests
-from langchain_community.llms import HuggingFaceEndpoint
+from langchain_huggingface import HuggingFaceEndpoint
 
 from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType
 from comps.cores.proto.api_protocol import DocSumChatCompletionRequest
@@ -71,6 +71,7 @@ async def invoke(self, input: DocSumChatCompletionRequest):
             repetition_penalty=input.repetition_penalty if input.repetition_penalty else 1.03,
             streaming=input.stream,
             server_kwargs=server_kwargs,
+            task="text-generation",
         )
         result = await self.generate(input, self.client)
 
diff --git a/comps/llms/src/doc-summarization/requirements.txt b/comps/llms/src/doc-summarization/requirements.txt
index 1694618637..6bc1bb1e55 100644
--- a/comps/llms/src/doc-summarization/requirements.txt
+++ b/comps/llms/src/doc-summarization/requirements.txt
@@ -1,11 +1,11 @@
 docarray[full]
 fastapi
 httpx==0.27.2
-huggingface_hub
-langchain #==0.1.12
+huggingface_hub==0.27.1
+langchain==0.3.14
 langchain-huggingface
 langchain-openai
-langchain_community
+langchain_community==0.3.14
 langchainhub
 opentelemetry-api
 opentelemetry-exporter-otlp
diff --git a/comps/llms/src/faq-generation/requirements.txt b/comps/llms/src/faq-generation/requirements.txt
index 36257d3939..037079294b 100644
--- a/comps/llms/src/faq-generation/requirements.txt
+++ b/comps/llms/src/faq-generation/requirements.txt
@@ -1,10 +1,10 @@
 docarray[full]
 fastapi
-huggingface_hub
-langchain
+huggingface_hub==0.27.1
+langchain==0.3.14
 langchain-huggingface
 langchain-openai
-langchain_community
+langchain_community==0.3.14
 langchainhub
 opentelemetry-api
 opentelemetry-exporter-otlp
diff --git a/comps/llms/src/text-generation/README.md b/comps/llms/src/text-generation/README.md
index 360c459dc1..ba1a31df39 100644
--- a/comps/llms/src/text-generation/README.md
+++ b/comps/llms/src/text-generation/README.md
@@ -8,14 +8,31 @@ Overall, this microservice offers a streamlined way to integrate large language
 
 ## Validated LLM Models
 
-| Model                       | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi |
-| --------------------------- | --------- | -------- | ---------- |
-| [Intel/neural-chat-7b-v3-3] | ✓         | ✓        | ✓          |
-| [Llama-2-7b-chat-hf]        | ✓         | ✓        | ✓          |
-| [Llama-2-70b-chat-hf]       | ✓         | -        | ✓          |
-| [Meta-Llama-3-8B-Instruct]  | ✓         | ✓        | ✓          |
-| [Meta-Llama-3-70B-Instruct] | ✓         | -        | ✓          |
-| [Phi-3]                     | x         | Limit 4K | Limit 4K   |
+| Model                                       | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi |
+| ------------------------------------------- | --------- | -------- | ---------- |
+| [Intel/neural-chat-7b-v3-3]                 | ✓         | ✓        | ✓          |
+| [meta-llama/Llama-2-7b-chat-hf]             | ✓         | ✓        | ✓          |
+| [meta-llama/Llama-2-70b-chat-hf]            | ✓         | -        | ✓          |
+| [meta-llama/Meta-Llama-3-8B-Instruct]       | ✓         | ✓        | ✓          |
+| [meta-llama/Meta-Llama-3-70B-Instruct]      | ✓         | -        | ✓          |
+| [Phi-3]                                     | x         | Limit 4K | Limit 4K   |
+| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | ✓         | -        | ✓          |
+| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]  | ✓         | -        | ✓          |
+
+### System Requirements for LLM Models
+
+| Model                                       | Minimum number of Gaudi cards |
+| ------------------------------------------- | ----------------------------- |
+| [Intel/neural-chat-7b-v3-3]                 | 1                             |
+| [meta-llama/Llama-2-7b-chat-hf]             | 1                             |
+| [meta-llama/Llama-2-70b-chat-hf]            | 2                             |
+| [meta-llama/Meta-Llama-3-8B-Instruct]       | 1                             |
+| [meta-llama/Meta-Llama-3-70B-Instruct]      | 2                             |
+| [Phi-3]                                     | x                             |
+| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | 8                             |
+| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]  | 4                             |
+
+> NOTE: Detailed system requirements coming soon.
 
 ## Support integrations
 
@@ -166,9 +183,11 @@ curl http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions \
 <!--Below are links used in these document. They are not rendered: -->
 
 [Intel/neural-chat-7b-v3-3]: https://huggingface.co/Intel/neural-chat-7b-v3-3
-[Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
-[Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-[Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
-[Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct
+[meta-llama/Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
+[meta-llama/Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
+[meta-llama/Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
+[meta-llama/Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct
 [Phi-3]: https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3
 [HuggingFace]: https://huggingface.co/
+[deepseek-ai/DeepSeek-R1-Distill-Llama-70B]: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+[deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
diff --git a/comps/rerankings/src/integrations/tei.py b/comps/rerankings/src/integrations/tei.py
index 8060555fa8..f1ebd9b6aa 100644
--- a/comps/rerankings/src/integrations/tei.py
+++ b/comps/rerankings/src/integrations/tei.py
@@ -71,6 +71,7 @@ async def invoke(
 
             response = await self.client.post(
                 json={"query": query, "texts": docs},
+                model=f"{self.base_url}/rerank",
                 task="text-reranking",
             )
 
diff --git a/comps/retrievers/src/Dockerfile b/comps/retrievers/src/Dockerfile
index 3fb6b3650e..53963d884d 100644
--- a/comps/retrievers/src/Dockerfile
+++ b/comps/retrievers/src/Dockerfile
@@ -26,7 +26,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \
     fi && \
     pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \
     pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/retrievers/src/requirements.txt && \
-    pip install opentelemetry-api==1.27.0 opentelemetry-exporter-otlp==1.27.0 opentelemetry-sdk==1.27.0
+    pip install opentelemetry-api==1.29.0 opentelemetry-exporter-otlp==1.29.0 opentelemetry-sdk==1.29.0
 
 ENV PYTHONPATH=$PYTHONPATH:/home/user
 
diff --git a/comps/retrievers/src/integrations/vdms.py b/comps/retrievers/src/integrations/vdms.py
index b6a44fdf14..5e5b1731fa 100644
--- a/comps/retrievers/src/integrations/vdms.py
+++ b/comps/retrievers/src/integrations/vdms.py
@@ -48,7 +48,7 @@ def _initialize_embedder(self):
             from comps.third_parties.clip.src.clip_embedding import vCLIP
 
             embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64})
-        if TEI_EMBEDDING_ENDPOINT:
+        elif TEI_EMBEDDING_ENDPOINT:
             # create embeddings using TEI endpoint service
             if logflag:
                 logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}")
diff --git a/comps/retrievers/src/requirements.txt b/comps/retrievers/src/requirements.txt
index a04fef1771..511bcc744f 100644
--- a/comps/retrievers/src/requirements.txt
+++ b/comps/retrievers/src/requirements.txt
@@ -3,6 +3,7 @@ cairosvg
 docarray[full]
 docx2txt
 easyocr
+einops
 fastapi
 future
 graspologic
diff --git a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml
index 35dc90c32e..9c1ead2b94 100644
--- a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml
+++ b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 include:
-   - ../../../tei/deployment/docker_compose/compose.yaml
+  - ../../../tei/deployment/docker_compose/compose.yaml
 
 services:
   pathway-db:
@@ -12,13 +12,15 @@ services:
       - "${PATHWAY_PORT:-6379}:${PATHWAY_PORT:-6379}"
     volumes:
       - "${PATHWAY_VOLUME:-../../src/README.md}:/app/data/README.md"
-    network_mode: host
     environment:
+      no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       PATHWAY_HOST: ${PATHWAY_HOST_DB}
       PATHWAY_PORT: ${PATHWAY_PORT}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
     healthcheck:
       test: ["CMD-SHELL", "sleep 30 && exit 0"]
       interval: 1s
diff --git a/comps/third_parties/pathway/src/requirements.txt b/comps/third_parties/pathway/src/requirements.txt
index ef1bcb44bc..e552e247ff 100644
--- a/comps/third_parties/pathway/src/requirements.txt
+++ b/comps/third_parties/pathway/src/requirements.txt
@@ -1,7 +1,7 @@
 langchain
 langchain-community
-langchain_huggingface
-langchain_openai
+openai
 pathway[xpack-llm]
 sentence-transformers
+tiktoken
 unstructured[all-docs] >= 0.16
diff --git a/comps/third_parties/pathway/src/vectorstore_pathway.py b/comps/third_parties/pathway/src/vectorstore_pathway.py
index 22a23a2414..1b9d207edb 100644
--- a/comps/third_parties/pathway/src/vectorstore_pathway.py
+++ b/comps/third_parties/pathway/src/vectorstore_pathway.py
@@ -7,8 +7,7 @@
 import nltk
 import pathway as pw
 from langchain import text_splitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
-from langchain_huggingface import HuggingFaceEndpointEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from pathway.xpacks.llm.parsers import ParseUnstructured
 from pathway.xpacks.llm.vector_store import VectorStoreServer
 
@@ -40,7 +39,7 @@
 port = int(os.getenv("PATHWAY_PORT", 8666))
 
 EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
-
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT")
 
 if __name__ == "__main__":
@@ -48,7 +47,9 @@
     if tei_embedding_endpoint:
         # create embeddings using TEI endpoint service
         logging.info(f"Initializing the embedder from tei_embedding_endpoint: {tei_embedding_endpoint}")
-        embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint)
+        embeddings = HuggingFaceInferenceAPIEmbeddings(
+            api_key=HUGGINGFACEHUB_API_TOKEN, model_name=EMBED_MODEL, api_url=tei_embedding_endpoint
+        )
     else:
         # create embeddings using local embedding model
         embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
diff --git a/comps/third_parties/tgi/README.md b/comps/third_parties/tgi/README.md
index e12f6d34da..07fb28e8eb 100644
--- a/comps/third_parties/tgi/README.md
+++ b/comps/third_parties/tgi/README.md
@@ -19,12 +19,12 @@ Run tgi on xeon.
 
 ```bash
 cd deplopyment/docker_compose
-docker compose -f compose.yaml tgi-server up -d
+docker compose -f compose.yaml up -d tgi-server
 ```
 
 Run tgi on gaudi.
 
 ```bash
 cd deplopyment/docker_compose
-docker compose -f compose.yaml tgi-gaudi-server up -d
+docker compose -f compose.yaml up -d tgi-gaudi-server
 ```
diff --git a/comps/third_parties/vllm/src/build_docker_vllm.sh b/comps/third_parties/vllm/src/build_docker_vllm.sh
index bd8df2e708..bec3a0c8f1 100644
--- a/comps/third_parties/vllm/src/build_docker_vllm.sh
+++ b/comps/third_parties/vllm/src/build_docker_vllm.sh
@@ -38,7 +38,6 @@ if [ "$hw_mode" = "hpu" ]; then
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd ./vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
     cd ..
     rm -rf vllm-fork
diff --git a/comps/web_retrievers/deployment/docker_compose/compose.yaml b/comps/web_retrievers/deployment/docker_compose/compose.yaml
index d626313eb1..276ca6ed4f 100644
--- a/comps/web_retrievers/deployment/docker_compose/compose.yaml
+++ b/comps/web_retrievers/deployment/docker_compose/compose.yaml
@@ -15,6 +15,7 @@ services:
       GOOGLE_API_KEY: ${GOOGLE_API_KEY}
       GOOGLE_CSE_ID: ${GOOGLE_CSE_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
     depends_on:
       tei-embedding-service:
diff --git a/comps/web_retrievers/src/README.md b/comps/web_retrievers/src/README.md
index b276a28b3e..cc96a7e4a1 100644
--- a/comps/web_retrievers/src/README.md
+++ b/comps/web_retrievers/src/README.md
@@ -46,6 +46,7 @@ export GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
 export TEI_PORT=6060
 export no_proxy=$host_ip,$no_proxy
 export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
+export TEI_EMBEDDING_ENDPOINT=http://${host_ip}:6060
 
 docker compose -f ../deployment/docker_compose/compose.yaml up web-retriever-service tei-embedding-service -d
 ```
diff --git a/comps/web_retrievers/src/requirements.txt b/comps/web_retrievers/src/requirements.txt
index c3b0134961..d893a630de 100644
--- a/comps/web_retrievers/src/requirements.txt
+++ b/comps/web_retrievers/src/requirements.txt
@@ -4,6 +4,7 @@ docarray[full]
 fastapi
 google-api-python-client>=2.100.0
 html2text
+huggingface-hub==0.27.1
 langchain-huggingface
 langchain_community
 opentelemetry-api
diff --git a/tests/agent/test_agent_langchain_on_intel_hpu.sh b/tests/agent/test_agent_langchain_on_intel_hpu.sh
index c2b7de000a..ae2fd984a7 100644
--- a/tests/agent/test_agent_langchain_on_intel_hpu.sh
+++ b/tests/agent/test_agent_langchain_on_intel_hpu.sh
@@ -60,7 +60,6 @@ function build_vllm_docker_images() {
     fi
     cd ./vllm-fork
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi:comps failed"
diff --git a/tests/chathistory/test_chathistory_mongo.sh b/tests/chathistory/test_chathistory_mongo.sh
index 9f32165be7..4bb098d79c 100644
--- a/tests/chathistory/test_chathistory_mongo.sh
+++ b/tests/chathistory/test_chathistory_mongo.sh
@@ -16,12 +16,12 @@ function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
 
-    docker build --no-cache -t opea/chathistory-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile .
+    docker build --no-cache -t opea/chathistory-mongo:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile .
     if [ $? -ne 0 ]; then
-        echo "opea/chathistory-mongo-server built fail"
+        echo "opea/chathistory-mongo built fail"
         exit 1
     else
-        echo "opea/chathistory-mongo-server built successful"
+        echo "opea/chathistory-mongo built successful"
     fi
 }
 
diff --git a/tests/dataprep/test_dataprep_milvus.sh b/tests/dataprep/test_dataprep_milvus.sh
index 603fb671cd..498f14f6f1 100644
--- a/tests/dataprep/test_dataprep_milvus.sh
+++ b/tests/dataprep/test_dataprep_milvus.sh
@@ -36,6 +36,7 @@ function start_service() {
     export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
     export MILVUS_HOST=${ip_address}
     export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
+    export LOGFLAG=true
     service_name="dataprep-milvus tei-embedding-serving"
     cd $WORKPATH/comps/dataprep/deployment/docker_compose/
     docker compose up ${service_name} -d
diff --git a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh
index 2b923bb66d..fbafda69e3 100755
--- a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh
+++ b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh
@@ -38,6 +38,7 @@ function start_service() {
     export TEI_EMBEDDER_PORT=12006
     export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
     export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export EMBED_MODEL=${EMBEDDING_MODEL_ID}
     export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}"
     export LLM_ENDPOINT_PORT=10510
     export TGI_LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}"
diff --git a/tests/dataprep/test_dataprep_qdrant.sh b/tests/dataprep/test_dataprep_qdrant.sh
index 818f99da24..9c31e2d7ab 100644
--- a/tests/dataprep/test_dataprep_qdrant.sh
+++ b/tests/dataprep/test_dataprep_qdrant.sh
@@ -26,6 +26,7 @@ function build_docker_images() {
 function start_service() {
     export host_ip=${ip_address}
     export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export EMBED_MODEL=${EMBEDDING_MODEL_ID}
     export TEI_EMBEDDER_PORT="10224"
     export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}"
     export COLLECTION_NAME="rag-qdrant"
diff --git a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh
index 92b29827fe..d040f954a1 100644
--- a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh
+++ b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh
@@ -13,7 +13,6 @@ function build_docker_images() {
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi built fail"
diff --git a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh
index d9552e9a0d..a6096bd309 100644
--- a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh
+++ b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh
@@ -20,7 +20,6 @@ function build_docker_images() {
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi built fail"
diff --git a/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh
index 5d489b250d..8607f2c550 100644
--- a/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh
+++ b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh
@@ -20,7 +20,6 @@ function build_docker_images() {
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi built fail"
diff --git a/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh
index 7c32a8977b..ea8c9ee6ca 100644
--- a/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh
+++ b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh
@@ -20,7 +20,6 @@ function build_docker_images() {
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
     git checkout v0.6.4.post2+Gaudi-1.19.0
-    sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
     docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-gaudi built fail"
diff --git a/tests/retrievers/test_retrievers_elasticsearch.sh b/tests/retrievers/test_retrievers_elasticsearch.sh
index 60996a44ec..a5fd53fb72 100644
--- a/tests/retrievers/test_retrievers_elasticsearch.sh
+++ b/tests/retrievers/test_retrievers_elasticsearch.sh
@@ -79,6 +79,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=elasticsearch-vector-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_milvus.sh b/tests/retrievers/test_retrievers_milvus.sh
index 507f43c5af..05bd69601a 100644
--- a/tests/retrievers/test_retrievers_milvus.sh
+++ b/tests/retrievers/test_retrievers_milvus.sh
@@ -83,6 +83,8 @@ function stop_docker() {
 
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=tei-embedding-serving")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh
index f6857f35cb..a819e2e485 100644
--- a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh
+++ b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh
@@ -46,8 +46,8 @@ function start_service() {
     export RETRIEVER_PORT=11635
     export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
     export DATA_PATH="/data2/cache"
-    export MAX_INPUT_TOKENS=1024
-    export MAX_TOTAL_TOKENS=3000
+    export MAX_INPUT_TOKENS=4096
+    export MAX_TOTAL_TOKENS=8192
     export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
     export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
     export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
@@ -67,7 +67,7 @@ function start_service() {
     docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \
         -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \
         -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \
-        -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps
+        -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e MAX_INPUT_LEN=$MAX_INPUT_TOKENS -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps
 
     sleep 1m
 
@@ -152,7 +152,7 @@ function validate_microservice() {
 }
 
 function stop_docker() {
-    cid=$(docker ps -aq --filter "name=test-comps-*")
+    cid=$(docker ps -aq --filter "name=test-comps-*" --filter "name=neo4j-apoc" --filter "name=tgi-gaudi-server" --filter "name=tei-embedding-serving")
     if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
diff --git a/tests/retrievers/test_retrievers_opensearch.sh b/tests/retrievers/test_retrievers_opensearch.sh
index 7a5fc0aeb2..ae49c41a90 100644
--- a/tests/retrievers/test_retrievers_opensearch.sh
+++ b/tests/retrievers/test_retrievers_opensearch.sh
@@ -75,6 +75,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=opensearch-vector-db" --filter "name=tei-embedding-serving")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_pathway.sh b/tests/retrievers/test_retrievers_pathway.sh
index 86fadaa812..3dbc2bb301 100644
--- a/tests/retrievers/test_retrievers_pathway.sh
+++ b/tests/retrievers/test_retrievers_pathway.sh
@@ -69,6 +69,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=pathway-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_pgvector.sh b/tests/retrievers/test_retrievers_pgvector.sh
index 021d81a0c2..2a51a3e91d 100644
--- a/tests/retrievers/test_retrievers_pgvector.sh
+++ b/tests/retrievers/test_retrievers_pgvector.sh
@@ -64,6 +64,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=pgvector-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_qdrant.sh b/tests/retrievers/test_retrievers_qdrant.sh
index da2d343ffc..e50642ac0b 100644
--- a/tests/retrievers/test_retrievers_qdrant.sh
+++ b/tests/retrievers/test_retrievers_qdrant.sh
@@ -59,6 +59,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} --remove-orphans
+    cid=$(docker ps -aq --filter "name=qdrant-vector-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_redis.sh b/tests/retrievers/test_retrievers_redis.sh
index 0964049f98..aa2bbe61fc 100644
--- a/tests/retrievers/test_retrievers_redis.sh
+++ b/tests/retrievers/test_retrievers_redis.sh
@@ -131,6 +131,8 @@ function validate_mm_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} ${service_name_mm} --remove-orphans
+    cid=$(docker ps -aq --filter "name=redis-vector-db")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {
diff --git a/tests/retrievers/test_retrievers_vdms.sh b/tests/retrievers/test_retrievers_vdms.sh
index cd2b41b53e..732fd91134 100644
--- a/tests/retrievers/test_retrievers_vdms.sh
+++ b/tests/retrievers/test_retrievers_vdms.sh
@@ -78,6 +78,8 @@ function validate_microservice() {
 function stop_docker() {
     cd $WORKPATH/comps/retrievers/deployment/docker_compose
     docker compose -f compose.yaml down  ${service_name} ${service_name_mm} --remove-orphans
+    cid=$(docker ps -aq --filter "name=retriever-vdms*" --filter "name=vdms-vector-db" --filter "name=tei-embedding-serving")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
 function main() {