diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml index 964d2d7284..f7e8019dbe 100644 --- a/.github/workflows/_comps-workflow.yml +++ b/.github/workflows/_comps-workflow.yml @@ -71,7 +71,6 @@ jobs: fi if [[ $(grep -c "vllm-gaudi:" ${docker_compose_yml}) != 0 ]]; then git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git - sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt fi - name: Get build list id: get-build-list diff --git a/.github/workflows/_run-helm-chart.yml b/.github/workflows/_run-helm-chart.yml index 08fc71db66..0eca64ac4c 100644 --- a/.github/workflows/_run-helm-chart.yml +++ b/.github/workflows/_run-helm-chart.yml @@ -134,8 +134,9 @@ jobs: if [[ "${service,,}" == *"third_parties"* ]]; then CHART_NAME="$(echo "${service,,}"|cut -d'/' -f2)" # bridgetower else - CHART_NAME="${service_name}" # agent + CHART_NAME="${service_name}" # web_retrievers fi + CHART_NAME=$(echo "$CHART_NAME" | tr -cd 'a-z0-9') echo "service_name=$service_name" >> $GITHUB_ENV echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV echo "RELEASE_NAME=${CHART_NAME}$(date +%d%H%M%S)" >> $GITHUB_ENV diff --git a/.github/workflows/docker/compose/chathistory-compose.yaml b/.github/workflows/docker/compose/chathistory-compose.yaml index 3991a99734..d3f35db8e6 100644 --- a/.github/workflows/docker/compose/chathistory-compose.yaml +++ b/.github/workflows/docker/compose/chathistory-compose.yaml @@ -3,7 +3,7 @@ # this file should be run in the root of the repo services: - chathistory-mongo-server: + chathistory-mongo: build: dockerfile: comps/chathistory/src/Dockerfile - image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest} diff --git a/.github/workflows/manual-comps-test.yml b/.github/workflows/manual-comps-test.yml index c3f73fb25f..7fa8c7255c 100644 --- a/.github/workflows/manual-comps-test.yml +++ b/.github/workflows/manual-comps-test.yml @@ -7,7 +7,7 @@ on: inputs: services: default: "asr" - description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]" + description: "List of services to test [agent,asr,chathistory,animation,dataprep,embeddings,feedback_management,finetuning,guardrails,image2image,image2video,intent_detection,llms,lvms,prompt_registry,ragas,rerankings,retrievers,text2image,text2sql,third_parties,tts,vectorstores,web_retrievers]" required: true type: string build: diff --git a/.github/workflows/manual-docker-publish.yml b/.github/workflows/manual-docker-publish.yml index b7e770dedb..aae3d3ca84 100644 --- a/.github/workflows/manual-docker-publish.yml +++ b/.github/workflows/manual-docker-publish.yml @@ -7,7 +7,7 @@ on: inputs: services: default: "" - description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]" + description: "List of services to test [agent,asr,chathistory,animation,dataprep,embeddings,feedback_management,finetuning,guardrails,image2image,image2video,intent_detection,llms,lvms,prompt_registry,ragas,rerankings,retrievers,text2image,text2sql,third_parties,tts,vectorstores,web_retrievers]" required: false type: string images: diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml index fda1528065..51a2a88b75 100644 --- a/.github/workflows/push-image-build.yml +++ b/.github/workflows/push-image-build.yml @@ -62,7 +62,7 @@ jobs: image-build: needs: get-build-matrix - if: ${{ fromJSON(needs.get-build-matrix.outputs.services).length != 0 }} + if: needs.get-build-matrix.outputs.services != '[]' strategy: matrix: service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }} @@ -96,7 +96,6 @@ jobs: fi if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git - sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt fi - name: Build Image diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000..b1db30c720 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,37 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Base image for GenAIComps based OPEA Python applications +# Build: docker build -t opea/comps-base -f Dockerfile . + +ARG IMAGE_NAME=python +ARG IMAGE_TAG=3.11-slim + +FROM ${IMAGE_NAME}:${IMAGE_TAG} AS base + +ENV HOME=/home/user + +RUN useradd -m -s /bin/bash user && \ + mkdir -p $HOME && \ + chown -R user $HOME + +# get security updates +RUN apt-get update && apt-get upgrade -y && \ + apt-get clean autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR $HOME + +COPY *.toml *.py *.txt *.md LICENSE ./ + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r requirements.txt + +COPY comps/ comps/ + +ENV PYTHONPATH=$PYTHONPATH:$HOME + +USER user + +ENTRYPOINT ["sh", "-c", "set && ls -la"] diff --git a/LEGAL_INFORMATION.md b/LEGAL_INFORMATION.md index 223566f3eb..41cb6dc31a 100644 --- a/LEGAL_INFORMATION.md +++ b/LEGAL_INFORMATION.md @@ -13,6 +13,9 @@ Your use of the source code for these components is subject to the terms and con See the accompanying [license](LICENSE) file for full license text and copyright notices. +Please note: component(s) depend on software subject to non-open source licenses.  If you use or redistribute this software, it is your sole responsibility to ensure compliance with such licenses. +e.g langserve + ## Citation If you use Generative AI Components in your research, use the following BibTeX entry. diff --git a/comps/chathistory/deployment/docker_compose/compose.yaml b/comps/chathistory/deployment/docker_compose/compose.yaml index db9812e692..0e0a5c9ad2 100644 --- a/comps/chathistory/deployment/docker_compose/compose.yaml +++ b/comps/chathistory/deployment/docker_compose/compose.yaml @@ -15,7 +15,7 @@ services: command: mongod --quiet --logpath /dev/null chathistory-mongo: - image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest} container_name: chathistory-mongo-server ports: - "${CHATHISTORY_PORT:-6012}:6012" diff --git a/comps/chathistory/src/README.md b/comps/chathistory/src/README.md index 5d753fdec3..3cdf5bf270 100644 --- a/comps/chathistory/src/README.md +++ b/comps/chathistory/src/README.md @@ -23,7 +23,7 @@ export COLLECTION_NAME=${COLLECTION_NAME} ```bash cd ../../../../ -docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . +docker build -t opea/chathistory-mongo:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . ``` ### Run Docker with CLI diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py index 4053988566..2d1957b1b7 100644 --- a/comps/cores/mega/orchestrator.py +++ b/comps/cores/mega/orchestrator.py @@ -7,6 +7,7 @@ import json import os import re +import threading import time from typing import Dict, List @@ -27,20 +28,56 @@ class OrchestratorMetrics: - # Because: - # - CI creates several orchestrator instances - # - Prometheus requires metrics to be singletons - # - Oorchestror instances are not provided their own names - # Metrics are class members with "megaservice" name prefix - first_token_latency = Histogram("megaservice_first_token_latency", "First token latency (histogram)") - inter_token_latency = Histogram("megaservice_inter_token_latency", "Inter-token latency (histogram)") - request_latency = Histogram("megaservice_request_latency", "Whole request/reply latency (histogram)") - request_pending = Gauge("megaservice_request_pending", "Count of currently pending requests (gauge)") + # Need an static class-level ID for metric prefix because: + # - Prometheus requires metrics (their names) to be unique + _instance_id = 0 def __init__(self) -> None: - pass + OrchestratorMetrics._instance_id += 1 + if OrchestratorMetrics._instance_id > 1: + self._prefix = f"megaservice{self._instance_id}" + else: + self._prefix = "megaservice" + + self.request_pending = Gauge(f"{self._prefix}_request_pending", "Count of currently pending requests (gauge)") + + # locking for latency metric creation / method change + self._lock = threading.Lock() + + # Metrics related to token processing are created on demand, + # to avoid bogus ones for services that never handle tokens + self.first_token_latency = None + self.inter_token_latency = None + self.request_latency = None + + # initial methods to create the metrics + self.token_update = self._token_update_create + self.request_update = self._request_update_create + + def _token_update_create(self, token_start: float, is_first: bool) -> float: + with self._lock: + # in case another thread already got here + if self.token_update == self._token_update_create: + self.first_token_latency = Histogram( + f"{self._prefix}_first_token_latency", "First token latency (histogram)" + ) + self.inter_token_latency = Histogram( + f"{self._prefix}_inter_token_latency", "Inter-token latency (histogram)" + ) + self.token_update = self._token_update_real + return self.token_update(token_start, is_first) + + def _request_update_create(self, req_start: float) -> None: + with self._lock: + # in case another thread already got here + if self.request_update == self._request_update_create: + self.request_latency = Histogram( + f"{self._prefix}_request_latency", "Whole LLM request/reply latency (histogram)" + ) + self.request_update = self._request_update_real + self.request_update(req_start) - def token_update(self, token_start: float, is_first: bool) -> float: + def _token_update_real(self, token_start: float, is_first: bool) -> float: now = time.time() if is_first: self.first_token_latency.observe(now - token_start) @@ -48,7 +85,7 @@ def token_update(self, token_start: float, is_first: bool) -> float: self.inter_token_latency.observe(now - token_start) return now - def request_update(self, req_start: float) -> None: + def _request_update_real(self, req_start: float) -> None: self.request_latency.observe(time.time() - req_start) def pending_update(self, increase: bool) -> None: diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml index ef54a69e0c..8fff22cd82 100644 --- a/comps/dataprep/deployment/docker_compose/compose.yaml +++ b/comps/dataprep/deployment/docker_compose/compose.yaml @@ -28,7 +28,7 @@ services: DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_ELASTICSEARCH" ES_CONNECTION_STRING: ${ES_CONNECTION_STRING} INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped depends_on: @@ -49,6 +49,7 @@ services: MILVUS_HOST: ${MILVUS_HOST} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG} restart: unless-stopped depends_on: tei-embedding-serving: @@ -161,7 +162,7 @@ services: QDRANT_HOST: ${QDRANT_HOST} QDRANT_PORT: ${QDRANT_PORT} COLLECTION_NAME: ${COLLECTION_NAME} - TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped diff --git a/comps/dataprep/src/README_elasticsearch.md b/comps/dataprep/src/README_elasticsearch.md index ab4b8547b5..94d8d47ba1 100644 --- a/comps/dataprep/src/README_elasticsearch.md +++ b/comps/dataprep/src/README_elasticsearch.md @@ -50,7 +50,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil ### 2.4 Run Docker with CLI (Option A) ```bash -docker run --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest +docker run --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B) diff --git a/comps/dataprep/src/README_milvus.md b/comps/dataprep/src/README_milvus.md index bcf545f72b..3e79ff6976 100644 --- a/comps/dataprep/src/README_milvus.md +++ b/comps/dataprep/src/README_milvus.md @@ -25,6 +25,7 @@ export MILVUS_HOST=${your_milvus_host_ip} export MILVUS_PORT=19530 export COLLECTION_NAME=${your_collection_name} export TEI_EMBEDDING_ENDPOINT=${your_embedding_endpoint} +export HUGGINGFACEHUB_API_TOKEN=${your_huggingface_api_token} ``` ### 1.4 Start TEI Embedding Service @@ -70,13 +71,15 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil ```bash export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export EMBEDDING_MODEL_ID=${your_embedding_model_id} export MILVUS_HOST=${your_host_ip} ``` ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest +docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest ``` ### 2.4 Run with Docker Compose (Option B) diff --git a/comps/dataprep/src/README_opensearch.md b/comps/dataprep/src/README_opensearch.md index b5d14c9a9d..b5f1c2c26c 100644 --- a/comps/dataprep/src/README_opensearch.md +++ b/comps/dataprep/src/README_opensearch.md @@ -51,7 +51,7 @@ curl localhost:$your_port/embed \ After checking that it works, set up environment variables. ```bash -export TEI_ENDPOINT="http://localhost:$your_port" +export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port" ``` ### 1.4 Start Document Preparation Microservice for OpenSearch with Python Script @@ -75,7 +75,7 @@ Please refer to this [readme](../../third_parties/opensearch/src/README.md). ```bash export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export TEI_ENDPOINT="http://${your_ip}:6006" +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006" export OPENSEARCH_URL="http://${your_ip}:9200" export INDEX_NAME=${your_index_name} export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} @@ -97,7 +97,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil - option 1: Start single-process version (for processing up to 10 files) ```bash -docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest +docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) diff --git a/comps/dataprep/src/README_pgvector.md b/comps/dataprep/src/README_pgvector.md index 92d0db577a..1fdb678e7f 100644 --- a/comps/dataprep/src/README_pgvector.md +++ b/comps/dataprep/src/README_pgvector.md @@ -38,6 +38,8 @@ Please refer to this [readme](../../third_parties/pgvector/src/README.md). ```bash export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb export INDEX_NAME=${your_index_name} +export TEI_EMBEDDING_ENDPOINT=${your_tei_embedding_endpoint} +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} ``` ### 2.3 Build Docker Image @@ -50,7 +52,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil ### 2.4 Run Docker with CLI (Option A) ```bash -docker run --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest +docker run --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B) diff --git a/comps/dataprep/src/README_redis.md b/comps/dataprep/src/README_redis.md index c6e4555295..69f3ae39af 100644 --- a/comps/dataprep/src/README_redis.md +++ b/comps/dataprep/src/README_redis.md @@ -95,8 +95,7 @@ Please refer to this [readme](../../third_parties/redis/src/README.md). ### 2.2 Setup Environment Variables ```bash -export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export TEI_ENDPOINT="http://${your_ip}:6006" +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006" export REDIS_URL="redis://${your_ip}:6379" export INDEX_NAME=${your_index_name} export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} @@ -112,7 +111,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil ### 2.4 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest +docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) diff --git a/comps/dataprep/src/README_vdms.md b/comps/dataprep/src/README_vdms.md index 7571ca80d2..7318282c4e 100644 --- a/comps/dataprep/src/README_vdms.md +++ b/comps/dataprep/src/README_vdms.md @@ -69,7 +69,8 @@ export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} export VDMS_HOST=${host_ip} export VDMS_PORT=55555 -export TEI_ENDPOINT=${your_tei_endpoint} +export TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint} +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export COLLECTION_NAME=${your_collection_name} export SEARCH_ENGINE="FaissFlat" export DISTANCE_STRATEGY="L2" @@ -89,7 +90,8 @@ Start single-process version (for 1-10 files processing) ```bash docker run -d --name="dataprep-vdms-server" -p 6007:6007 --runtime=runc --ipc=host \ --e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_ENDPOINT=$TEI_ENDPOINT \ +-e http_proxy=$http_proxy -e https_proxy=$https_proxy \ +-e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ -e COLLECTION_NAME=$COLLECTION_NAME -e VDMS_HOST=$VDMS_HOST -e VDMS_PORT=$VDMS_PORT \ -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_VDMS" opea/dataprep:latest ``` diff --git a/comps/dataprep/src/integrations/elasticsearch.py b/comps/dataprep/src/integrations/elasticsearch.py index ed07d157ea..83e422741e 100644 --- a/comps/dataprep/src/integrations/elasticsearch.py +++ b/comps/dataprep/src/integrations/elasticsearch.py @@ -9,10 +9,9 @@ from elasticsearch import Elasticsearch from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_core.documents import Document from langchain_elasticsearch import ElasticsearchStore -from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType from comps.dataprep.src.utils import ( @@ -37,7 +36,9 @@ EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") # TEI Embedding endpoints -TEI_ENDPOINT = os.getenv("TEI_ENDPOINT", "") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") # Vector Index Configuration INDEX_NAME = os.getenv("INDEX_NAME", "rag-elastic") @@ -77,15 +78,31 @@ def create_index(self) -> None: if not self.es_client.indices.exists(index=INDEX_NAME): self.es_client.indices.create(index=INDEX_NAME) - def get_embedder(self) -> Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]: + def get_embedder(self) -> Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings]: """Obtain required Embedder.""" - if TEI_ENDPOINT: - return HuggingFaceEndpointEmbeddings(model=TEI_ENDPOINT) + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` and the `EMBED_MODEL` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] + embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) + return embedder else: return HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) def get_elastic_store( - self, embedder: Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings] + self, embedder: Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings] ) -> ElasticsearchStore: """Get Elasticsearch vector store.""" return ElasticsearchStore(index_name=INDEX_NAME, embedding=embedder, es_connection=self.es_client) diff --git a/comps/dataprep/src/integrations/milvus.py b/comps/dataprep/src/integrations/milvus.py index c3e3e57309..c4aecf86e4 100644 --- a/comps/dataprep/src/integrations/milvus.py +++ b/comps/dataprep/src/integrations/milvus.py @@ -10,7 +10,7 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings, OpenAIEmbeddings from langchain_core.documents import Document from langchain_milvus.vectorstores import Milvus from langchain_text_splitters import HTMLHeaderTextSplitter @@ -36,8 +36,11 @@ # Local Embedding model LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1") # TEI configuration -TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5") +EMBED_MODEL = os.environ.get("EMBED_MODEL", "BAAI/bge-base-en-v1.5") TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") + # MILVUS configuration MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) @@ -75,7 +78,7 @@ def ingest_chunks_to_milvus(embeddings, file_name: str, chunks: List): except Exception as e: if logflag: logger.info(f"[ ingest chunks ] fail to ingest chunks into Milvus. error: {e}") - raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") + raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}: {e}") if logflag: logger.info(f"[ ingest chunks ] Docs ingested file {file_name} to Milvus collection {COLLECTION_NAME}.") @@ -189,7 +192,23 @@ def _initialize_embedder(self): # create embeddings using TEI endpoint service if logflag: logger.info(f"[ milvus embedding ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") - embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] + # create embeddings using TEI endpoint service + embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model if logflag: @@ -274,7 +293,7 @@ async def ingest_files( search_res = search_by_file(my_milvus.col, encode_file) except Exception as e: raise HTTPException( - status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}." + status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}: {e}" ) if len(search_res) > 0: if logflag: @@ -319,7 +338,7 @@ async def ingest_files( search_res = search_by_file(my_milvus.col, encoded_link + ".txt") except Exception as e: raise HTTPException( - status_code=500, detail=f"Failed when searching in Milvus db for link {link}." + status_code=500, detail=f"Failed when searching in Milvus db for link {link}: {e}" ) if len(search_res) > 0: if logflag: @@ -375,7 +394,7 @@ async def get_files(self): try: all_data = search_all(my_milvus.col) except Exception as e: - raise HTTPException(status_code=500, detail="Failed when searching in Milvus db for all files.") + raise HTTPException(status_code=500, detail=f"Failed when searching in Milvus db for all files: {e}") # return [] if no data in db if len(all_data) == 0: @@ -422,8 +441,7 @@ async def delete_files(self, file_path: str = Body(..., embed=True)): except Exception as e: if logflag: logger.info(f"[ milvus delete ] {e}. Fail to delete {upload_folder}.") - raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") - + raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}: {e}") if logflag: logger.info("[ milvus delete ] successfully delete all files.") diff --git a/comps/dataprep/src/integrations/neo4j_langchain.py b/comps/dataprep/src/integrations/neo4j_langchain.py index ba03437972..75b23252d8 100644 --- a/comps/dataprep/src/integrations/neo4j_langchain.py +++ b/comps/dataprep/src/integrations/neo4j_langchain.py @@ -34,10 +34,9 @@ NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test") -# LLM/Embedding endpoints +# LLM endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") OPENAI_KEY = os.getenv("OPENAI_API_KEY") diff --git a/comps/dataprep/src/integrations/opensearch.py b/comps/dataprep/src/integrations/opensearch.py index 6f4b10bbd1..8df0104004 100644 --- a/comps/dataprep/src/integrations/opensearch.py +++ b/comps/dataprep/src/integrations/opensearch.py @@ -7,9 +7,8 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores import OpenSearchVectorSearch -from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter from opensearchpy import OpenSearch @@ -79,9 +78,26 @@ def __init__(self, name: str, description: str, config: dict = None): self.upload_folder = "./uploaded_files/" super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) # Initialize embeddings - tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") - if tei_embedding_endpoint: - self.embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) + TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") + HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") + EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] + self.embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: self.embeddings = HuggingFaceBgeEmbeddings(model_name=Config.EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/pgvect.py b/comps/dataprep/src/integrations/pgvect.py index 43b38e5d6d..d9eb698782 100644 --- a/comps/dataprep/src/integrations/pgvect.py +++ b/comps/dataprep/src/integrations/pgvect.py @@ -10,7 +10,7 @@ import psycopg2 from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores import PGVector from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType @@ -30,6 +30,10 @@ # Embedding model EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") +# TEI Embedding endpoints +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") PG_CONNECTION_STRING = os.getenv("PG_CONNECTION_STRING", "localhost") @@ -47,12 +51,26 @@ class OpeaPgvectorDataprep(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) - self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") self.upload_folder = "./uploaded_files/" # Create vectorstore - if self.tei_embedding_endpoint: + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint) + self.embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/pipecone.py b/comps/dataprep/src/integrations/pipecone.py index 33ffeea4b6..ec03174608 100644 --- a/comps/dataprep/src/integrations/pipecone.py +++ b/comps/dataprep/src/integrations/pipecone.py @@ -8,7 +8,7 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_pinecone import PineconeVectorStore from langchain_text_splitters import HTMLHeaderTextSplitter from pinecone import Pinecone, ServerlessSpec @@ -39,7 +39,9 @@ # LLM/Embedding endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") @OpeaComponentRegistry.register("OPEA_DATAPREP_PINECONE") @@ -48,12 +50,26 @@ class OpeaPineConeDataprep(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) - self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") self.upload_folder = "./uploaded_files/" # Create vectorstore - if self.tei_embedding_endpoint: + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint) + self.embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/qdrant.py b/comps/dataprep/src/integrations/qdrant.py index e54c6c572b..62a9efa21a 100644 --- a/comps/dataprep/src/integrations/qdrant.py +++ b/comps/dataprep/src/integrations/qdrant.py @@ -7,9 +7,8 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores import Qdrant -from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter from qdrant_client import QdrantClient @@ -38,7 +37,9 @@ # LLM/Embedding endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") @OpeaComponentRegistry.register("OPEA_DATAPREP_QDRANT") @@ -47,12 +48,26 @@ class OpeaQdrantDataprep(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) - self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") self.upload_folder = "./uploaded_files/" # Create vectorstore if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - self.embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + self.embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/redis.py b/comps/dataprep/src/integrations/redis.py index 06cb0d7f27..a181013bcd 100644 --- a/comps/dataprep/src/integrations/redis.py +++ b/comps/dataprep/src/integrations/redis.py @@ -11,9 +11,8 @@ import redis from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores import Redis -from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter from redis.commands.search.field import TextField from redis.commands.search.indexDefinition import IndexDefinition, IndexType @@ -40,6 +39,8 @@ EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") # TEI Embedding endpoints TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") # Vector Index Configuration INDEX_NAME = os.getenv("INDEX_NAME", "rag_redis") @@ -187,8 +188,23 @@ def ingest_chunks_to_redis(file_name: str, chunks: List): logger.info(f"[ redis ingest chunks ] file name: {file_name}") # Create vectorstore if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/vdms.py b/comps/dataprep/src/integrations/vdms.py index 998b23a5c7..e4085b1812 100644 --- a/comps/dataprep/src/integrations/vdms.py +++ b/comps/dataprep/src/integrations/vdms.py @@ -7,7 +7,7 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores.vdms import VDMS, VDMS_Client from langchain_text_splitters import HTMLHeaderTextSplitter @@ -28,7 +28,6 @@ def getEnv(key, default_value=None): env_value = os.getenv(key, default=default_value) - print(f"{key}: {env_value}") return env_value @@ -45,7 +44,9 @@ def getEnv(key, default_value=None): # LLM/Embedding endpoints TGI_LLM_ENDPOINT = getEnv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = getEnv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = getEnv("TEI_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") # chunk parameters CHUNK_SIZE = getEnv("CHUNK_SIZE", 1500) @@ -58,14 +59,28 @@ class OpeaVdmsDataprep(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) - self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") self.upload_folder = "./uploaded_files/" create_upload_folder(self.upload_folder) self.client = VDMS_Client(VDMS_HOST, int(VDMS_PORT)) # Create vectorstore - if self.tei_embedding_endpoint: + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint) + self.embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/embeddings/src/integrations/tei.py b/comps/embeddings/src/integrations/tei.py index dd7dd602e1..8d589fb822 100644 --- a/comps/embeddings/src/integrations/tei.py +++ b/comps/embeddings/src/integrations/tei.py @@ -70,6 +70,7 @@ async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse: raise TypeError("Unsupported input type: input must be a string or list of strings.") response = await self.client.post( json={"input": texts, "encoding_format": input.encoding_format, "model": input.model, "user": input.user}, + model=f"{self.base_url}/v1/embeddings", task="text-embedding", ) embeddings = json.loads(response.decode()) diff --git a/comps/guardrails/deployment/kubernetes/README.md b/comps/guardrails/deployment/kubernetes/README.md index b309900a07..2b3d3002c1 100644 --- a/comps/guardrails/deployment/kubernetes/README.md +++ b/comps/guardrails/deployment/kubernetes/README.md @@ -7,5 +7,5 @@ ``` export HFTOKEN="insert-your-huggingface-token-here" -helm install guardrails oci://ghcr.io/opea-project/charts/guardrails --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +helm install guardrails oci://ghcr.io/opea-project/charts/guardrails-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml ``` diff --git a/comps/guardrails/src/guardrails/README.md b/comps/guardrails/src/guardrails/README.md index 7794d9490f..15679f706e 100644 --- a/comps/guardrails/src/guardrails/README.md +++ b/comps/guardrails/src/guardrails/README.md @@ -9,9 +9,9 @@ The Guardrails Microservice now offers two primary types of guardrails: - Input Guardrails: These are applied to user inputs. An input guardrail can either reject the input, halting further processing. - Output Guardrails: These are applied to outputs generated by the LLM. An output guardrail can reject the output, preventing it from being returned to the user. -## LlamaGuard +**This microservice supports Meta's [Llama Guard](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B) and Allen Institute for AI's [WildGuard](https://huggingface.co/allenai/wildguard) models.** -We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B) model. +## Llama Guard Any content that is detected in the following categories is determined as unsafe: @@ -22,111 +22,84 @@ Any content that is detected in the following categories is determined as unsafe - Regulated or Controlled Substances - Suicide & Self Harm -### 🚀1. Start Microservice with Python (Option 1) - -To start the Guardrails microservice, you need to install python packages first. +## WildGuard -#### 1.1 Install Requirements +`allenai/wildguard` was fine-tuned from `mistralai/Mistral-7B-v0.3` on their own [`allenai/wildguardmix`](https://huggingface.co/datasets/allenai/wildguardmix) dataset. Any content that is detected in the following categories is determined as unsafe: -```bash -pip install -r requirements.txt -``` +- Privacy +- Misinformation +- Harmful Language +- Malicious Uses -#### 1.2 Start TGI Gaudi Service +## Clone OPEA GenAIComps and set initial environment variables ```bash -export HF_TOKEN=${your_hf_api_token} -volume=$PWD/data -model_id="meta-llama/Meta-Llama-Guard-2-8B" -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 -docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 +git clone https://github.com/opea-project/GenAIComps.git +export OPEA_GENAICOMPS_ROOT=$(pwd)/GenAIComps +export GUARDRAIL_PORT=9090 ``` -#### 1.3 Verify the TGI Gaudi Service +## Start up the HuggingFace Text Generation Inference (TGI) Server -```bash -curl 127.0.0.1:8088/generate \ - -X POST \ - -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ - -H 'Content-Type: application/json' -``` +Before starting the guardrail service, we first need to start the TGI server that will be hosting the guardrail model. -#### 1.4 Start Guardrails Service +Choose one of the following before starting your TGI server. -Optional: If you have deployed a Guardrails model with TGI Gaudi Service other than default model (i.e., `meta-llama/Meta-Llama-Guard-2-8B`) [from section 1.2](#12-start-tgi-gaudi-service), you will need to add the eviornment variable `SAFETY_GUARD_MODEL_ID` containing the model id. For example, the following informs the Guardrails Service the deployed model used LlamaGuard2: +**For LlamaGuard:** ```bash export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" +export GUARDRAILS_COMPONENT_NAME=OPEA_LLAMA_GUARD ``` +Or + ```bash -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -python guardrails_tgi.py +export SAFETY_GUARD_MODEL_ID="meta-llama/LlamaGuard-7b" +export GUARDRAILS_COMPONENT_NAME=OPEA_LLAMA_GUARD ``` -### 🚀2. Start Microservice with Docker (Option 2) - -If you start an Guardrails microservice with docker, the `docker_compose_guardrails.yaml` file will automatically start a TGI gaudi service with docker. +_Other variations of LlamaGuard are also an option to use but are not guaranteed to work OOB._ -#### 2.1 Setup Environment Variables - -In order to start TGI and LLM services, you need to setup the following environment variables first. +**For Wild Guard:** ```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -export LLM_MODEL_ID=${your_hf_llm_model} +export SAFETY_GUARD_MODEL_ID="allenai/wildguard" +export GUARDRAILS_COMPONENT_NAME=OPEA_WILD_GUARD ``` -#### 2.2 Build Docker Image - -```bash -cd ../../../../ -docker build -t opea/guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile . -``` +_Note that both of these models are gated and you need to complete their form on their associated model pages first in order to use them with your HF token._ -#### 2.3 Run Docker with CLI +Follow the steps [here](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/tgi) to start the TGI server container where LLM_MODEL_ID is set to your SAFETY_GUARD_MODEL_ID like below: ```bash -docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/guardrails:latest +export LLM_MODEL_ID=$SAFETY_GUARD_MODEL_ID ``` -#### 2.4 Run Docker with Docker Compose +Once the container is starting up and loading the model, set the endpoint that you will use to make requests to the TGI server: ```bash -cd deployment/docker_compose/ -docker compose -f compose_llamaguard.yaml up -d +export SAFETY_GUARD_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" ``` -### 🚀3. Consume Guardrails Service +**Verify that the TGI Server is ready for inference** -#### 3.1 Check Service Status +First check that the TGI server successfully loaded the guardrail model. Loading the model could take up to 5-10 minutes. You can do this by running the following: ```bash -curl http://localhost:9090/v1/health_check\ - -X GET \ - -H 'Content-Type: application/json' +docker logs tgi-gaudi-server ``` -#### 3.2 Consume Guardrails Service +If the last line of the log contains something like `INFO text_generation_router::server: router/src/server.rs:2209: Connected` then your TGI server is ready and the following curl should work: ```bash -curl http://localhost:9090/v1/guardrails\ +curl localhost:${LLM_ENDPOINT_PORT}/generate \ -X POST \ - -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ + -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ -H 'Content-Type: application/json' ``` -## WildGuard - -We also offer content moderation support utilizing Allen Institute for AI's [WildGuard](https://huggingface.co/allenai/wildguard) model. - -`allenai/wildguard` was fine-tuned from `mistralai/Mistral-7B-v0.3` on their own [`allenai/wildguardmix`](https://huggingface.co/datasets/allenai/wildguardmix) dataset. Any content that is detected in the following categories is determined as unsafe: - -- Privacy -- Misinformation -- Harmful Language -- Malicious Uses +Check the logs again with the `logs` command to confirm that the curl request resulted in `Success`. ### 🚀1. Start Microservice with Python (Option 1) @@ -135,67 +108,76 @@ To start the Guardrails microservice, you need to install python packages first. #### 1.1 Install Requirements ```bash +pip install $OPEA_GENAICOMPS_ROOT +cd $OPEA_GENAICOMPS_ROOT/comps/guardrails/src/guardrails pip install -r requirements.txt ``` -#### 1.2 Start TGI Gaudi Service +#### 1.2 Start Guardrails Service ```bash -export HF_TOKEN=${your_hf_api_token} -volume=$PWD/data -model_id="allenai/wildguard" -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 -docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 +python opea_guardrails_microservice.py ``` -#### 1.3 Verify the TGI Gaudi Service +### 🚀2. Start Microservice with Docker (Option 2) -```bash -curl 127.0.0.1:8088/generate \ - -X POST \ - -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ - -H 'Content-Type: application/json' -``` +With the TGI server already running, now we can start the guardrail service container. -#### 1.4 Start Guardrails Service +#### 2.1 Build Docker Image ```bash -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -python guardrails_tgi.py +cd $OPEA_GENAICOMPS_ROOT +docker build -t opea/guardrails:latest \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -f comps/guardrails/src/guardrails/Dockerfile . ``` -### 🚀2. Start Microservice with Docker (Option 2) - -If you start an Guardrails microservice with docker, the `compose_wildguard.yaml` file will automatically start a TGI gaudi service with docker. - -#### 2.1 Setup Environment Variables +#### 2.2.a Run with Docker Compose (Option A) -In order to start TGI and LLM services, you need to setup the following environment variables first. +**To run with LLama Guard:** ```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -export LLM_MODEL_ID=${your_hf_llm_model} +docker compose -f $OPEA_GENAICOMPS_ROOT/comps/guardrails/deployment/docker_compose/compose.yaml up -d llamaguard-guardrails-server ``` -#### 2.2 Build Docker Image +**To run with WildGuard:** ```bash -cd ../../../../ -docker build -t opea/guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile . +docker compose -f $OPEA_GENAICOMPS_ROOT/comps/guardrails/deployment/docker_compose/compose.yaml up -d wildguard-guardrails-server ``` -#### 2.3 Run Docker with CLI +#### 2.2.b Run Docker with CLI (Option B) + +**To run with LLama Guard:** ```bash -docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e GUARDRAILS_COMPONENT_NAME="OPEA_WILD_GUARD" opea/guardrails:latest +docker run -d \ + --name="llamaguard-guardrails-server" \ + -p ${GUARDRAIL_PORT}:${GUARDRAIL_PORT} \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT \ + -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN \ + opea/guardrails:latest ``` -#### 2.4 Run Docker with Docker Compose +**To run with WildGuard:** ```bash -cd deployment/docker_compose/ -docker compose -f compose_wildguard.yaml up -d +docker run -d \ + --name="wildguard-guardrails-server" \ + -p ${GUARDRAIL_PORT}:${GUARDRAIL_PORT} \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT \ + -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN \ + -e GUARDRAILS_COMPONENT_NAME="OPEA_WILD_GUARD" \ + opea/guardrails:latest ``` ### 🚀3. Consume Guardrails Service @@ -203,7 +185,7 @@ docker compose -f compose_wildguard.yaml up -d #### 3.1 Check Service Status ```bash -curl http://localhost:9090/v1/health_check \ +curl http://localhost:${GUARDRAIL_PORT}/v1/health_check\ -X GET \ -H 'Content-Type: application/json' ``` @@ -211,8 +193,13 @@ curl http://localhost:9090/v1/health_check \ #### 3.2 Consume Guardrails Service ```bash -curl http://localhost:9090/v1/guardrails \ +curl http://localhost:${GUARDRAIL_PORT}/v1/guardrails\ -X POST \ -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ -H 'Content-Type: application/json' ``` + +This request should return text containing: +`"Violated policies: , please check your input."` + +Where `category` is `Violent Crimes` or `harmful` for `Llama-Guard-2-8B` or `wildguard`, respectively. diff --git a/comps/llms/src/doc-summarization/integrations/tgi.py b/comps/llms/src/doc-summarization/integrations/tgi.py index 002f8de19f..902334bc03 100644 --- a/comps/llms/src/doc-summarization/integrations/tgi.py +++ b/comps/llms/src/doc-summarization/integrations/tgi.py @@ -4,7 +4,7 @@ import os import requests -from langchain_community.llms import HuggingFaceEndpoint +from langchain_huggingface import HuggingFaceEndpoint from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType from comps.cores.proto.api_protocol import DocSumChatCompletionRequest @@ -71,6 +71,7 @@ async def invoke(self, input: DocSumChatCompletionRequest): repetition_penalty=input.repetition_penalty if input.repetition_penalty else 1.03, streaming=input.stream, server_kwargs=server_kwargs, + task="text-generation", ) result = await self.generate(input, self.client) diff --git a/comps/llms/src/doc-summarization/requirements.txt b/comps/llms/src/doc-summarization/requirements.txt index 1694618637..6bc1bb1e55 100644 --- a/comps/llms/src/doc-summarization/requirements.txt +++ b/comps/llms/src/doc-summarization/requirements.txt @@ -1,11 +1,11 @@ docarray[full] fastapi httpx==0.27.2 -huggingface_hub -langchain #==0.1.12 +huggingface_hub==0.27.1 +langchain==0.3.14 langchain-huggingface langchain-openai -langchain_community +langchain_community==0.3.14 langchainhub opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/llms/src/faq-generation/requirements.txt b/comps/llms/src/faq-generation/requirements.txt index 36257d3939..037079294b 100644 --- a/comps/llms/src/faq-generation/requirements.txt +++ b/comps/llms/src/faq-generation/requirements.txt @@ -1,10 +1,10 @@ docarray[full] fastapi -huggingface_hub -langchain +huggingface_hub==0.27.1 +langchain==0.3.14 langchain-huggingface langchain-openai -langchain_community +langchain_community==0.3.14 langchainhub opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/llms/src/text-generation/README.md b/comps/llms/src/text-generation/README.md index 360c459dc1..ba1a31df39 100644 --- a/comps/llms/src/text-generation/README.md +++ b/comps/llms/src/text-generation/README.md @@ -8,14 +8,31 @@ Overall, this microservice offers a streamlined way to integrate large language ## Validated LLM Models -| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | -| --------------------------- | --------- | -------- | ---------- | -| [Intel/neural-chat-7b-v3-3] | ✓ | ✓ | ✓ | -| [Llama-2-7b-chat-hf] | ✓ | ✓ | ✓ | -| [Llama-2-70b-chat-hf] | ✓ | - | ✓ | -| [Meta-Llama-3-8B-Instruct] | ✓ | ✓ | ✓ | -| [Meta-Llama-3-70B-Instruct] | ✓ | - | ✓ | -| [Phi-3] | x | Limit 4K | Limit 4K | +| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | +| ------------------------------------------- | --------- | -------- | ---------- | +| [Intel/neural-chat-7b-v3-3] | ✓ | ✓ | ✓ | +| [meta-llama/Llama-2-7b-chat-hf] | ✓ | ✓ | ✓ | +| [meta-llama/Llama-2-70b-chat-hf] | ✓ | - | ✓ | +| [meta-llama/Meta-Llama-3-8B-Instruct] | ✓ | ✓ | ✓ | +| [meta-llama/Meta-Llama-3-70B-Instruct] | ✓ | - | ✓ | +| [Phi-3] | x | Limit 4K | Limit 4K | +| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | ✓ | - | ✓ | +| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B] | ✓ | - | ✓ | + +### System Requirements for LLM Models + +| Model | Minimum number of Gaudi cards | +| ------------------------------------------- | ----------------------------- | +| [Intel/neural-chat-7b-v3-3] | 1 | +| [meta-llama/Llama-2-7b-chat-hf] | 1 | +| [meta-llama/Llama-2-70b-chat-hf] | 2 | +| [meta-llama/Meta-Llama-3-8B-Instruct] | 1 | +| [meta-llama/Meta-Llama-3-70B-Instruct] | 2 | +| [Phi-3] | x | +| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | 8 | +| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B] | 4 | + +> NOTE: Detailed system requirements coming soon. ## Support integrations @@ -166,9 +183,11 @@ curl http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions \ [Intel/neural-chat-7b-v3-3]: https://huggingface.co/Intel/neural-chat-7b-v3-3 -[Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -[Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -[Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct -[Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct +[meta-llama/Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf +[meta-llama/Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf +[meta-llama/Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct +[meta-llama/Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct [Phi-3]: https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3 [HuggingFace]: https://huggingface.co/ +[deepseek-ai/DeepSeek-R1-Distill-Llama-70B]: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B +[deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B diff --git a/comps/rerankings/src/integrations/tei.py b/comps/rerankings/src/integrations/tei.py index 8060555fa8..f1ebd9b6aa 100644 --- a/comps/rerankings/src/integrations/tei.py +++ b/comps/rerankings/src/integrations/tei.py @@ -71,6 +71,7 @@ async def invoke( response = await self.client.post( json={"query": query, "texts": docs}, + model=f"{self.base_url}/rerank", task="text-reranking", ) diff --git a/comps/retrievers/src/Dockerfile b/comps/retrievers/src/Dockerfile index 3fb6b3650e..53963d884d 100644 --- a/comps/retrievers/src/Dockerfile +++ b/comps/retrievers/src/Dockerfile @@ -26,7 +26,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi && \ pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \ pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/retrievers/src/requirements.txt && \ - pip install opentelemetry-api==1.27.0 opentelemetry-exporter-otlp==1.27.0 opentelemetry-sdk==1.27.0 + pip install opentelemetry-api==1.29.0 opentelemetry-exporter-otlp==1.29.0 opentelemetry-sdk==1.29.0 ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/retrievers/src/integrations/vdms.py b/comps/retrievers/src/integrations/vdms.py index b6a44fdf14..5e5b1731fa 100644 --- a/comps/retrievers/src/integrations/vdms.py +++ b/comps/retrievers/src/integrations/vdms.py @@ -48,7 +48,7 @@ def _initialize_embedder(self): from comps.third_parties.clip.src.clip_embedding import vCLIP embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64}) - if TEI_EMBEDDING_ENDPOINT: + elif TEI_EMBEDDING_ENDPOINT: # create embeddings using TEI endpoint service if logflag: logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") diff --git a/comps/retrievers/src/requirements.txt b/comps/retrievers/src/requirements.txt index a04fef1771..511bcc744f 100644 --- a/comps/retrievers/src/requirements.txt +++ b/comps/retrievers/src/requirements.txt @@ -3,6 +3,7 @@ cairosvg docarray[full] docx2txt easyocr +einops fastapi future graspologic diff --git a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml index 35dc90c32e..9c1ead2b94 100644 --- a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 include: - - ../../../tei/deployment/docker_compose/compose.yaml + - ../../../tei/deployment/docker_compose/compose.yaml services: pathway-db: @@ -12,13 +12,15 @@ services: - "${PATHWAY_PORT:-6379}:${PATHWAY_PORT:-6379}" volumes: - "${PATHWAY_VOLUME:-../../src/README.md}:/app/data/README.md" - network_mode: host environment: + no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} PATHWAY_HOST: ${PATHWAY_HOST_DB} PATHWAY_PORT: ${PATHWAY_PORT} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HF_TOKEN: ${HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "sleep 30 && exit 0"] interval: 1s diff --git a/comps/third_parties/pathway/src/requirements.txt b/comps/third_parties/pathway/src/requirements.txt index ef1bcb44bc..e552e247ff 100644 --- a/comps/third_parties/pathway/src/requirements.txt +++ b/comps/third_parties/pathway/src/requirements.txt @@ -1,7 +1,7 @@ langchain langchain-community -langchain_huggingface -langchain_openai +openai pathway[xpack-llm] sentence-transformers +tiktoken unstructured[all-docs] >= 0.16 diff --git a/comps/third_parties/pathway/src/vectorstore_pathway.py b/comps/third_parties/pathway/src/vectorstore_pathway.py index 22a23a2414..1b9d207edb 100644 --- a/comps/third_parties/pathway/src/vectorstore_pathway.py +++ b/comps/third_parties/pathway/src/vectorstore_pathway.py @@ -7,8 +7,7 @@ import nltk import pathway as pw from langchain import text_splitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_huggingface import HuggingFaceEndpointEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from pathway.xpacks.llm.parsers import ParseUnstructured from pathway.xpacks.llm.vector_store import VectorStoreServer @@ -40,7 +39,7 @@ port = int(os.getenv("PATHWAY_PORT", 8666)) EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") if __name__ == "__main__": @@ -48,7 +47,9 @@ if tei_embedding_endpoint: # create embeddings using TEI endpoint service logging.info(f"Initializing the embedder from tei_embedding_endpoint: {tei_embedding_endpoint}") - embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) + embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=EMBED_MODEL, api_url=tei_embedding_endpoint + ) else: # create embeddings using local embedding model embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/third_parties/tgi/README.md b/comps/third_parties/tgi/README.md index e12f6d34da..07fb28e8eb 100644 --- a/comps/third_parties/tgi/README.md +++ b/comps/third_parties/tgi/README.md @@ -19,12 +19,12 @@ Run tgi on xeon. ```bash cd deplopyment/docker_compose -docker compose -f compose.yaml tgi-server up -d +docker compose -f compose.yaml up -d tgi-server ``` Run tgi on gaudi. ```bash cd deplopyment/docker_compose -docker compose -f compose.yaml tgi-gaudi-server up -d +docker compose -f compose.yaml up -d tgi-gaudi-server ``` diff --git a/comps/third_parties/vllm/src/build_docker_vllm.sh b/comps/third_parties/vllm/src/build_docker_vllm.sh index bd8df2e708..bec3a0c8f1 100644 --- a/comps/third_parties/vllm/src/build_docker_vllm.sh +++ b/comps/third_parties/vllm/src/build_docker_vllm.sh @@ -38,7 +38,6 @@ if [ "$hw_mode" = "hpu" ]; then git clone https://github.com/HabanaAI/vllm-fork.git cd ./vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy cd .. rm -rf vllm-fork diff --git a/comps/web_retrievers/deployment/docker_compose/compose.yaml b/comps/web_retrievers/deployment/docker_compose/compose.yaml index d626313eb1..276ca6ed4f 100644 --- a/comps/web_retrievers/deployment/docker_compose/compose.yaml +++ b/comps/web_retrievers/deployment/docker_compose/compose.yaml @@ -15,6 +15,7 @@ services: GOOGLE_API_KEY: ${GOOGLE_API_KEY} GOOGLE_CSE_ID: ${GOOGLE_CSE_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} restart: unless-stopped depends_on: tei-embedding-service: diff --git a/comps/web_retrievers/src/README.md b/comps/web_retrievers/src/README.md index b276a28b3e..cc96a7e4a1 100644 --- a/comps/web_retrievers/src/README.md +++ b/comps/web_retrievers/src/README.md @@ -46,6 +46,7 @@ export GOOGLE_CSE_ID=${GOOGLE_CSE_ID} export TEI_PORT=6060 export no_proxy=$host_ip,$no_proxy export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5 +export TEI_EMBEDDING_ENDPOINT=http://${host_ip}:6060 docker compose -f ../deployment/docker_compose/compose.yaml up web-retriever-service tei-embedding-service -d ``` diff --git a/comps/web_retrievers/src/requirements.txt b/comps/web_retrievers/src/requirements.txt index c3b0134961..d893a630de 100644 --- a/comps/web_retrievers/src/requirements.txt +++ b/comps/web_retrievers/src/requirements.txt @@ -4,6 +4,7 @@ docarray[full] fastapi google-api-python-client>=2.100.0 html2text +huggingface-hub==0.27.1 langchain-huggingface langchain_community opentelemetry-api diff --git a/tests/agent/test_agent_langchain_on_intel_hpu.sh b/tests/agent/test_agent_langchain_on_intel_hpu.sh index c2b7de000a..ae2fd984a7 100644 --- a/tests/agent/test_agent_langchain_on_intel_hpu.sh +++ b/tests/agent/test_agent_langchain_on_intel_hpu.sh @@ -60,7 +60,6 @@ function build_vllm_docker_images() { fi cd ./vllm-fork git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy if [ $? -ne 0 ]; then echo "opea/vllm-gaudi:comps failed" diff --git a/tests/chathistory/test_chathistory_mongo.sh b/tests/chathistory/test_chathistory_mongo.sh index 9f32165be7..4bb098d79c 100644 --- a/tests/chathistory/test_chathistory_mongo.sh +++ b/tests/chathistory/test_chathistory_mongo.sh @@ -16,12 +16,12 @@ function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/chathistory-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . + docker build --no-cache -t opea/chathistory-mongo:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/chathistory-mongo-server built fail" + echo "opea/chathistory-mongo built fail" exit 1 else - echo "opea/chathistory-mongo-server built successful" + echo "opea/chathistory-mongo built successful" fi } diff --git a/tests/dataprep/test_dataprep_milvus.sh b/tests/dataprep/test_dataprep_milvus.sh index 603fb671cd..498f14f6f1 100644 --- a/tests/dataprep/test_dataprep_milvus.sh +++ b/tests/dataprep/test_dataprep_milvus.sh @@ -36,6 +36,7 @@ function start_service() { export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export MILVUS_HOST=${ip_address} export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export LOGFLAG=true service_name="dataprep-milvus tei-embedding-serving" cd $WORKPATH/comps/dataprep/deployment/docker_compose/ docker compose up ${service_name} -d diff --git a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh index 2b923bb66d..fbafda69e3 100755 --- a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh +++ b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh @@ -38,6 +38,7 @@ function start_service() { export TEI_EMBEDDER_PORT=12006 export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export EMBED_MODEL=${EMBEDDING_MODEL_ID} export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" export LLM_ENDPOINT_PORT=10510 export TGI_LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}" diff --git a/tests/dataprep/test_dataprep_qdrant.sh b/tests/dataprep/test_dataprep_qdrant.sh index 818f99da24..9c31e2d7ab 100644 --- a/tests/dataprep/test_dataprep_qdrant.sh +++ b/tests/dataprep/test_dataprep_qdrant.sh @@ -26,6 +26,7 @@ function build_docker_images() { function start_service() { export host_ip=${ip_address} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export EMBED_MODEL=${EMBEDDING_MODEL_ID} export TEI_EMBEDDER_PORT="10224" export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" export COLLECTION_NAME="rag-qdrant" diff --git a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh index 92b29827fe..d040f954a1 100644 --- a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh +++ b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh @@ -13,7 +13,6 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" diff --git a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh index d9552e9a0d..a6096bd309 100644 --- a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh +++ b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh @@ -20,7 +20,6 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" diff --git a/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh index 5d489b250d..8607f2c550 100644 --- a/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh +++ b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh @@ -20,7 +20,6 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" diff --git a/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh index 7c32a8977b..ea8c9ee6ca 100644 --- a/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh +++ b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh @@ -20,7 +20,6 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" diff --git a/tests/retrievers/test_retrievers_elasticsearch.sh b/tests/retrievers/test_retrievers_elasticsearch.sh index 60996a44ec..a5fd53fb72 100644 --- a/tests/retrievers/test_retrievers_elasticsearch.sh +++ b/tests/retrievers/test_retrievers_elasticsearch.sh @@ -79,6 +79,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=elasticsearch-vector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_milvus.sh b/tests/retrievers/test_retrievers_milvus.sh index 507f43c5af..05bd69601a 100644 --- a/tests/retrievers/test_retrievers_milvus.sh +++ b/tests/retrievers/test_retrievers_milvus.sh @@ -83,6 +83,8 @@ function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=tei-embedding-serving") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh index f6857f35cb..a819e2e485 100644 --- a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh +++ b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh @@ -46,8 +46,8 @@ function start_service() { export RETRIEVER_PORT=11635 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export DATA_PATH="/data2/cache" - export MAX_INPUT_TOKENS=1024 - export MAX_TOTAL_TOKENS=3000 + export MAX_INPUT_TOKENS=4096 + export MAX_TOTAL_TOKENS=8192 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" @@ -67,7 +67,7 @@ function start_service() { docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \ -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \ - -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps + -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e MAX_INPUT_LEN=$MAX_INPUT_TOKENS -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps sleep 1m @@ -152,7 +152,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-*") + cid=$(docker ps -aq --filter "name=test-comps-*" --filter "name=neo4j-apoc" --filter "name=tgi-gaudi-server" --filter "name=tei-embedding-serving") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans diff --git a/tests/retrievers/test_retrievers_opensearch.sh b/tests/retrievers/test_retrievers_opensearch.sh index 7a5fc0aeb2..ae49c41a90 100644 --- a/tests/retrievers/test_retrievers_opensearch.sh +++ b/tests/retrievers/test_retrievers_opensearch.sh @@ -75,6 +75,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=opensearch-vector-db" --filter "name=tei-embedding-serving") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_pathway.sh b/tests/retrievers/test_retrievers_pathway.sh index 86fadaa812..3dbc2bb301 100644 --- a/tests/retrievers/test_retrievers_pathway.sh +++ b/tests/retrievers/test_retrievers_pathway.sh @@ -69,6 +69,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=pathway-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_pgvector.sh b/tests/retrievers/test_retrievers_pgvector.sh index 021d81a0c2..2a51a3e91d 100644 --- a/tests/retrievers/test_retrievers_pgvector.sh +++ b/tests/retrievers/test_retrievers_pgvector.sh @@ -64,6 +64,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=pgvector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_qdrant.sh b/tests/retrievers/test_retrievers_qdrant.sh index da2d343ffc..e50642ac0b 100644 --- a/tests/retrievers/test_retrievers_qdrant.sh +++ b/tests/retrievers/test_retrievers_qdrant.sh @@ -59,6 +59,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=qdrant-vector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_redis.sh b/tests/retrievers/test_retrievers_redis.sh index 0964049f98..aa2bbe61fc 100644 --- a/tests/retrievers/test_retrievers_redis.sh +++ b/tests/retrievers/test_retrievers_redis.sh @@ -131,6 +131,8 @@ function validate_mm_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans + cid=$(docker ps -aq --filter "name=redis-vector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_vdms.sh b/tests/retrievers/test_retrievers_vdms.sh index cd2b41b53e..732fd91134 100644 --- a/tests/retrievers/test_retrievers_vdms.sh +++ b/tests/retrievers/test_retrievers_vdms.sh @@ -78,6 +78,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans + cid=$(docker ps -aq --filter "name=retriever-vdms*" --filter "name=vdms-vector-db" --filter "name=tei-embedding-serving") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() {