From 17306d14b56afcbe7cc1e2374c46fc70037221c6 Mon Sep 17 00:00:00 2001 From: Jitendra Patil Date: Fri, 31 Jan 2025 16:25:31 -0600 Subject: [PATCH 01/17] Update LEGAL_INFORMATION.md about software subject to non-open source licenses (#1247) Signed-off-by: Patil, Jitendra --- LEGAL_INFORMATION.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/LEGAL_INFORMATION.md b/LEGAL_INFORMATION.md index 223566f3eb..41cb6dc31a 100644 --- a/LEGAL_INFORMATION.md +++ b/LEGAL_INFORMATION.md @@ -13,6 +13,9 @@ Your use of the source code for these components is subject to the terms and con See the accompanying [license](LICENSE) file for full license text and copyright notices. +Please note: component(s) depend on software subject to non-open source licenses.  If you use or redistribute this software, it is your sole responsibility to ensure compliance with such licenses. +e.g langserve + ## Citation If you use Generative AI Components in your research, use the following BibTeX entry. From 30e3dea6ab51a54f978e0e10d6b2dfc5116dafb0 Mon Sep 17 00:00:00 2001 From: rbrugaro Date: Fri, 31 Jan 2025 15:36:56 -0800 Subject: [PATCH 02/17] [bugfix] docsum error by HuggingFaceEndpoint (#1246) * Fix bug iin HuggingFaceEndpoint usage 1. Upgrade langchain hugginface from community to partner (community deprecated) Added task=text-generation argument to fix error with tgi_endpoint Signed-off-by: rbrugaro * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: rbrugaro Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com> --- comps/llms/src/doc-summarization/integrations/tgi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/comps/llms/src/doc-summarization/integrations/tgi.py b/comps/llms/src/doc-summarization/integrations/tgi.py index 002f8de19f..902334bc03 100644 --- a/comps/llms/src/doc-summarization/integrations/tgi.py +++ b/comps/llms/src/doc-summarization/integrations/tgi.py @@ -4,7 +4,7 @@ import os import requests -from langchain_community.llms import HuggingFaceEndpoint +from langchain_huggingface import HuggingFaceEndpoint from comps import CustomLogger, GeneratedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType from comps.cores.proto.api_protocol import DocSumChatCompletionRequest @@ -71,6 +71,7 @@ async def invoke(self, input: DocSumChatCompletionRequest): repetition_penalty=input.repetition_penalty if input.repetition_penalty else 1.03, streaming=input.stream, server_kwargs=server_kwargs, + task="text-generation", ) result = await self.generate(input, self.client) From 3104454c2364da4cc9032f439953f3d3884584a3 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Fri, 31 Jan 2025 17:49:49 -0800 Subject: [PATCH 03/17] Fix port in the data prep redis README file (#1250) Signed-off-by: dmsuehir Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com> --- comps/dataprep/src/README_redis.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comps/dataprep/src/README_redis.md b/comps/dataprep/src/README_redis.md index c6e4555295..d6ff2a6b76 100644 --- a/comps/dataprep/src/README_redis.md +++ b/comps/dataprep/src/README_redis.md @@ -112,7 +112,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil ### 2.4 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest +docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) From 119acf2af564943e5948ed1d79804a808020ffed Mon Sep 17 00:00:00 2001 From: Eero Tamminen Date: Tue, 4 Feb 2025 22:21:04 +0200 Subject: [PATCH 04/17] Add Dockerfile for comps-base image (#1127) Signed-off-by: Eero Tamminen --- Dockerfile | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000..b1db30c720 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,37 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Base image for GenAIComps based OPEA Python applications +# Build: docker build -t opea/comps-base -f Dockerfile . + +ARG IMAGE_NAME=python +ARG IMAGE_TAG=3.11-slim + +FROM ${IMAGE_NAME}:${IMAGE_TAG} AS base + +ENV HOME=/home/user + +RUN useradd -m -s /bin/bash user && \ + mkdir -p $HOME && \ + chown -R user $HOME + +# get security updates +RUN apt-get update && apt-get upgrade -y && \ + apt-get clean autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR $HOME + +COPY *.toml *.py *.txt *.md LICENSE ./ + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r requirements.txt + +COPY comps/ comps/ + +ENV PYTHONPATH=$PYTHONPATH:$HOME + +USER user + +ENTRYPOINT ["sh", "-c", "set && ls -la"] From 4ede4055255942dee9490ed457851cde2e237fc9 Mon Sep 17 00:00:00 2001 From: Eero Tamminen Date: Wed, 5 Feb 2025 22:04:50 +0200 Subject: [PATCH 05/17] Create token metrics only when they are available (#1092) * Create token metrics only when they are available This avoids generation of useless token/request histogram metrics for services that use Orchestrator class, but never call its token processing functionality. (Helps in differentiating frontend megaservice metrics from backend megaservice ones, especially when multiple OPEA applications run in the same cluster.) Also change Orchestrator CI test workaround to use unique prefix for each metric instance, instead of metrics being (singleton) class variables. Signed-off-by: Eero Tamminen * Add locking for latency metric creation / method change As that that could be called from multiple request handling threads. Signed-off-by: Eero Tamminen --------- Signed-off-by: Eero Tamminen Co-authored-by: Malini Bhandaru --- comps/cores/mega/orchestrator.py | 61 ++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py index 4053988566..97ee2a76b3 100644 --- a/comps/cores/mega/orchestrator.py +++ b/comps/cores/mega/orchestrator.py @@ -7,6 +7,7 @@ import json import os import re +import threading import time from typing import Dict, List @@ -27,20 +28,58 @@ class OrchestratorMetrics: - # Because: + # Need an instance ID for metric prefix because: + # - Orchestror instances are not named # - CI creates several orchestrator instances - # - Prometheus requires metrics to be singletons - # - Oorchestror instances are not provided their own names - # Metrics are class members with "megaservice" name prefix - first_token_latency = Histogram("megaservice_first_token_latency", "First token latency (histogram)") - inter_token_latency = Histogram("megaservice_inter_token_latency", "Inter-token latency (histogram)") - request_latency = Histogram("megaservice_request_latency", "Whole request/reply latency (histogram)") - request_pending = Gauge("megaservice_request_pending", "Count of currently pending requests (gauge)") + # - Prometheus requires metrics (their names) to be unique + _instance_id = 0 def __init__(self) -> None: - pass + self._instance_id += 1 + if self._instance_id > 1: + self._prefix = f"megaservice{self._instance_id}" + else: + self._prefix = "megaservice" + + self.request_pending = Gauge(f"{self._prefix}_request_pending", "Count of currently pending requests (gauge)") + + # locking for latency metric creation / method change + self._lock = threading.Lock() + + # Metrics related to token processing are created on demand, + # to avoid bogus ones for services that never handle tokens + self.first_token_latency = None + self.inter_token_latency = None + self.request_latency = None + + # initial methods to create the metrics + self.token_update = self._token_update_create + self.request_update = self._request_update_create + + def _token_update_create(self, token_start: float, is_first: bool) -> float: + with self._lock: + # in case another thread already got here + if self.token_update == self._token_update_create: + self.first_token_latency = Histogram( + f"{self._prefix}_first_token_latency", "First token latency (histogram)" + ) + self.inter_token_latency = Histogram( + f"{self._prefix}_inter_token_latency", "Inter-token latency (histogram)" + ) + self.token_update = self._token_update_real + return self.token_update(token_start, is_first) + + def _request_update_create(self, req_start: float) -> None: + with self._lock: + # in case another thread already got here + if self.request_update == self._request_update_create: + self.request_latency = Histogram( + f"{self._prefix}_request_latency", "Whole LLM request/reply latency (histogram)" + ) + self.request_update = self._request_update_real + self.request_update(req_start) - def token_update(self, token_start: float, is_first: bool) -> float: + def _token_update_real(self, token_start: float, is_first: bool) -> float: now = time.time() if is_first: self.first_token_latency.observe(now - token_start) @@ -48,7 +87,7 @@ def token_update(self, token_start: float, is_first: bool) -> float: self.inter_token_latency.observe(now - token_start) return now - def request_update(self, req_start: float) -> None: + def _request_update_real(self, req_start: float) -> None: self.request_latency.observe(time.time() - req_start) def pending_update(self, increase: bool) -> None: From fa01f46f1ea922a35ea1d68a5aa0f42335b78f56 Mon Sep 17 00:00:00 2001 From: minmin-intel Date: Wed, 5 Feb 2025 13:57:57 -0800 Subject: [PATCH 06/17] fix tei embedding and tei reranking bug (#1256) Signed-off-by: minmin-intel Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com> --- comps/embeddings/src/integrations/tei.py | 1 + comps/rerankings/src/integrations/tei.py | 1 + 2 files changed, 2 insertions(+) diff --git a/comps/embeddings/src/integrations/tei.py b/comps/embeddings/src/integrations/tei.py index dd7dd602e1..8d589fb822 100644 --- a/comps/embeddings/src/integrations/tei.py +++ b/comps/embeddings/src/integrations/tei.py @@ -70,6 +70,7 @@ async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse: raise TypeError("Unsupported input type: input must be a string or list of strings.") response = await self.client.post( json={"input": texts, "encoding_format": input.encoding_format, "model": input.model, "user": input.user}, + model=f"{self.base_url}/v1/embeddings", task="text-embedding", ) embeddings = json.loads(response.decode()) diff --git a/comps/rerankings/src/integrations/tei.py b/comps/rerankings/src/integrations/tei.py index 8060555fa8..f1ebd9b6aa 100644 --- a/comps/rerankings/src/integrations/tei.py +++ b/comps/rerankings/src/integrations/tei.py @@ -71,6 +71,7 @@ async def invoke( response = await self.client.post( json={"query": query, "texts": docs}, + model=f"{self.base_url}/rerank", task="text-reranking", ) From 5baada85634582137dc3eeed070896d09dfd7086 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 6 Feb 2025 15:03:00 +0800 Subject: [PATCH 07/17] Fix CD test issue. (#1263) 1.Fix template name in README 2.Fix invalid release name Signed-off-by: ZePan110 --- .github/workflows/_run-helm-chart.yml | 3 ++- comps/guardrails/deployment/kubernetes/README.md | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_run-helm-chart.yml b/.github/workflows/_run-helm-chart.yml index 08fc71db66..0eca64ac4c 100644 --- a/.github/workflows/_run-helm-chart.yml +++ b/.github/workflows/_run-helm-chart.yml @@ -134,8 +134,9 @@ jobs: if [[ "${service,,}" == *"third_parties"* ]]; then CHART_NAME="$(echo "${service,,}"|cut -d'/' -f2)" # bridgetower else - CHART_NAME="${service_name}" # agent + CHART_NAME="${service_name}" # web_retrievers fi + CHART_NAME=$(echo "$CHART_NAME" | tr -cd 'a-z0-9') echo "service_name=$service_name" >> $GITHUB_ENV echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV echo "RELEASE_NAME=${CHART_NAME}$(date +%d%H%M%S)" >> $GITHUB_ENV diff --git a/comps/guardrails/deployment/kubernetes/README.md b/comps/guardrails/deployment/kubernetes/README.md index b309900a07..2b3d3002c1 100644 --- a/comps/guardrails/deployment/kubernetes/README.md +++ b/comps/guardrails/deployment/kubernetes/README.md @@ -7,5 +7,5 @@ ``` export HFTOKEN="insert-your-huggingface-token-here" -helm install guardrails oci://ghcr.io/opea-project/charts/guardrails --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +helm install guardrails oci://ghcr.io/opea-project/charts/guardrails-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml ``` From ecb7f7bb353a78f2c88b9636f238761e2cf90edc Mon Sep 17 00:00:00 2001 From: Spycsh <39623753+Spycsh@users.noreply.github.com> Date: Fri, 7 Feb 2025 16:58:22 +0800 Subject: [PATCH 08/17] Fix web-retrievers hub client and tei endpoint issue (#1270) * fix web-retrievers hub client and tei endpoint issue Signed-off-by: Spycsh --- comps/web_retrievers/deployment/docker_compose/compose.yaml | 1 + comps/web_retrievers/src/README.md | 1 + comps/web_retrievers/src/requirements.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/comps/web_retrievers/deployment/docker_compose/compose.yaml b/comps/web_retrievers/deployment/docker_compose/compose.yaml index d626313eb1..276ca6ed4f 100644 --- a/comps/web_retrievers/deployment/docker_compose/compose.yaml +++ b/comps/web_retrievers/deployment/docker_compose/compose.yaml @@ -15,6 +15,7 @@ services: GOOGLE_API_KEY: ${GOOGLE_API_KEY} GOOGLE_CSE_ID: ${GOOGLE_CSE_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} restart: unless-stopped depends_on: tei-embedding-service: diff --git a/comps/web_retrievers/src/README.md b/comps/web_retrievers/src/README.md index b276a28b3e..cc96a7e4a1 100644 --- a/comps/web_retrievers/src/README.md +++ b/comps/web_retrievers/src/README.md @@ -46,6 +46,7 @@ export GOOGLE_CSE_ID=${GOOGLE_CSE_ID} export TEI_PORT=6060 export no_proxy=$host_ip,$no_proxy export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5 +export TEI_EMBEDDING_ENDPOINT=http://${host_ip}:6060 docker compose -f ../deployment/docker_compose/compose.yaml up web-retriever-service tei-embedding-service -d ``` diff --git a/comps/web_retrievers/src/requirements.txt b/comps/web_retrievers/src/requirements.txt index c3b0134961..d893a630de 100644 --- a/comps/web_retrievers/src/requirements.txt +++ b/comps/web_retrievers/src/requirements.txt @@ -4,6 +4,7 @@ docarray[full] fastapi google-api-python-client>=2.100.0 html2text +huggingface-hub==0.27.1 langchain-huggingface langchain_community opentelemetry-api From fb86b5e5a04ce0393912e74b516971c1333ed754 Mon Sep 17 00:00:00 2001 From: Louie Tsai Date: Sat, 8 Feb 2025 00:58:33 -0800 Subject: [PATCH 09/17] Add Deepseek model into validated model table and add required Gaudi cards for LLM microservice (#1267) * Update README.md for Deepseek support and numbers of required gaudi cards Signed-off-by: Tsai, Louie * Update README.md Signed-off-by: Tsai, Louie --------- Signed-off-by: Tsai, Louie --- comps/llms/src/text-generation/README.md | 43 +++++++++++++++++------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/comps/llms/src/text-generation/README.md b/comps/llms/src/text-generation/README.md index 360c459dc1..ba1a31df39 100644 --- a/comps/llms/src/text-generation/README.md +++ b/comps/llms/src/text-generation/README.md @@ -8,14 +8,31 @@ Overall, this microservice offers a streamlined way to integrate large language ## Validated LLM Models -| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | -| --------------------------- | --------- | -------- | ---------- | -| [Intel/neural-chat-7b-v3-3] | ✓ | ✓ | ✓ | -| [Llama-2-7b-chat-hf] | ✓ | ✓ | ✓ | -| [Llama-2-70b-chat-hf] | ✓ | - | ✓ | -| [Meta-Llama-3-8B-Instruct] | ✓ | ✓ | ✓ | -| [Meta-Llama-3-70B-Instruct] | ✓ | - | ✓ | -| [Phi-3] | x | Limit 4K | Limit 4K | +| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | +| ------------------------------------------- | --------- | -------- | ---------- | +| [Intel/neural-chat-7b-v3-3] | ✓ | ✓ | ✓ | +| [meta-llama/Llama-2-7b-chat-hf] | ✓ | ✓ | ✓ | +| [meta-llama/Llama-2-70b-chat-hf] | ✓ | - | ✓ | +| [meta-llama/Meta-Llama-3-8B-Instruct] | ✓ | ✓ | ✓ | +| [meta-llama/Meta-Llama-3-70B-Instruct] | ✓ | - | ✓ | +| [Phi-3] | x | Limit 4K | Limit 4K | +| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | ✓ | - | ✓ | +| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B] | ✓ | - | ✓ | + +### System Requirements for LLM Models + +| Model | Minimum number of Gaudi cards | +| ------------------------------------------- | ----------------------------- | +| [Intel/neural-chat-7b-v3-3] | 1 | +| [meta-llama/Llama-2-7b-chat-hf] | 1 | +| [meta-llama/Llama-2-70b-chat-hf] | 2 | +| [meta-llama/Meta-Llama-3-8B-Instruct] | 1 | +| [meta-llama/Meta-Llama-3-70B-Instruct] | 2 | +| [Phi-3] | x | +| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | 8 | +| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B] | 4 | + +> NOTE: Detailed system requirements coming soon. ## Support integrations @@ -166,9 +183,11 @@ curl http://${host_ip}:${TEXTGEN_PORT}/v1/chat/completions \ [Intel/neural-chat-7b-v3-3]: https://huggingface.co/Intel/neural-chat-7b-v3-3 -[Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -[Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -[Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct -[Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct +[meta-llama/Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf +[meta-llama/Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf +[meta-llama/Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct +[meta-llama/Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct [Phi-3]: https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3 [HuggingFace]: https://huggingface.co/ +[deepseek-ai/DeepSeek-R1-Distill-Llama-70B]: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B +[deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B From 0df374bc39a4e98bb5790da7985067c7be571487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20De=20Le=C3=B3n?= <111013930+daniel-de-leon-user293@users.noreply.github.com> Date: Sun, 9 Feb 2025 22:01:58 -0800 Subject: [PATCH 10/17] Update docs for LLamaGuard & WildGuard Microservice (#1259) * working README for CLI and compose Signed-off-by: Daniel Deleon * update for direct python execution Signed-off-by: Daniel Deleon * fix formatting Signed-off-by: Daniel Deleon * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * bring back depends_on condition Signed-off-by: Daniel Deleon --------- Signed-off-by: Daniel Deleon Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com> --- comps/guardrails/src/guardrails/README.md | 185 ++++++++++------------ comps/third_parties/tgi/README.md | 4 +- 2 files changed, 88 insertions(+), 101 deletions(-) diff --git a/comps/guardrails/src/guardrails/README.md b/comps/guardrails/src/guardrails/README.md index 7794d9490f..15679f706e 100644 --- a/comps/guardrails/src/guardrails/README.md +++ b/comps/guardrails/src/guardrails/README.md @@ -9,9 +9,9 @@ The Guardrails Microservice now offers two primary types of guardrails: - Input Guardrails: These are applied to user inputs. An input guardrail can either reject the input, halting further processing. - Output Guardrails: These are applied to outputs generated by the LLM. An output guardrail can reject the output, preventing it from being returned to the user. -## LlamaGuard +**This microservice supports Meta's [Llama Guard](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B) and Allen Institute for AI's [WildGuard](https://huggingface.co/allenai/wildguard) models.** -We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B) model. +## Llama Guard Any content that is detected in the following categories is determined as unsafe: @@ -22,111 +22,84 @@ Any content that is detected in the following categories is determined as unsafe - Regulated or Controlled Substances - Suicide & Self Harm -### 🚀1. Start Microservice with Python (Option 1) - -To start the Guardrails microservice, you need to install python packages first. +## WildGuard -#### 1.1 Install Requirements +`allenai/wildguard` was fine-tuned from `mistralai/Mistral-7B-v0.3` on their own [`allenai/wildguardmix`](https://huggingface.co/datasets/allenai/wildguardmix) dataset. Any content that is detected in the following categories is determined as unsafe: -```bash -pip install -r requirements.txt -``` +- Privacy +- Misinformation +- Harmful Language +- Malicious Uses -#### 1.2 Start TGI Gaudi Service +## Clone OPEA GenAIComps and set initial environment variables ```bash -export HF_TOKEN=${your_hf_api_token} -volume=$PWD/data -model_id="meta-llama/Meta-Llama-Guard-2-8B" -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 -docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 +git clone https://github.com/opea-project/GenAIComps.git +export OPEA_GENAICOMPS_ROOT=$(pwd)/GenAIComps +export GUARDRAIL_PORT=9090 ``` -#### 1.3 Verify the TGI Gaudi Service +## Start up the HuggingFace Text Generation Inference (TGI) Server -```bash -curl 127.0.0.1:8088/generate \ - -X POST \ - -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ - -H 'Content-Type: application/json' -``` +Before starting the guardrail service, we first need to start the TGI server that will be hosting the guardrail model. -#### 1.4 Start Guardrails Service +Choose one of the following before starting your TGI server. -Optional: If you have deployed a Guardrails model with TGI Gaudi Service other than default model (i.e., `meta-llama/Meta-Llama-Guard-2-8B`) [from section 1.2](#12-start-tgi-gaudi-service), you will need to add the eviornment variable `SAFETY_GUARD_MODEL_ID` containing the model id. For example, the following informs the Guardrails Service the deployed model used LlamaGuard2: +**For LlamaGuard:** ```bash export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" +export GUARDRAILS_COMPONENT_NAME=OPEA_LLAMA_GUARD ``` +Or + ```bash -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -python guardrails_tgi.py +export SAFETY_GUARD_MODEL_ID="meta-llama/LlamaGuard-7b" +export GUARDRAILS_COMPONENT_NAME=OPEA_LLAMA_GUARD ``` -### 🚀2. Start Microservice with Docker (Option 2) - -If you start an Guardrails microservice with docker, the `docker_compose_guardrails.yaml` file will automatically start a TGI gaudi service with docker. +_Other variations of LlamaGuard are also an option to use but are not guaranteed to work OOB._ -#### 2.1 Setup Environment Variables - -In order to start TGI and LLM services, you need to setup the following environment variables first. +**For Wild Guard:** ```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -export LLM_MODEL_ID=${your_hf_llm_model} +export SAFETY_GUARD_MODEL_ID="allenai/wildguard" +export GUARDRAILS_COMPONENT_NAME=OPEA_WILD_GUARD ``` -#### 2.2 Build Docker Image - -```bash -cd ../../../../ -docker build -t opea/guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile . -``` +_Note that both of these models are gated and you need to complete their form on their associated model pages first in order to use them with your HF token._ -#### 2.3 Run Docker with CLI +Follow the steps [here](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/tgi) to start the TGI server container where LLM_MODEL_ID is set to your SAFETY_GUARD_MODEL_ID like below: ```bash -docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/guardrails:latest +export LLM_MODEL_ID=$SAFETY_GUARD_MODEL_ID ``` -#### 2.4 Run Docker with Docker Compose +Once the container is starting up and loading the model, set the endpoint that you will use to make requests to the TGI server: ```bash -cd deployment/docker_compose/ -docker compose -f compose_llamaguard.yaml up -d +export SAFETY_GUARD_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" ``` -### 🚀3. Consume Guardrails Service +**Verify that the TGI Server is ready for inference** -#### 3.1 Check Service Status +First check that the TGI server successfully loaded the guardrail model. Loading the model could take up to 5-10 minutes. You can do this by running the following: ```bash -curl http://localhost:9090/v1/health_check\ - -X GET \ - -H 'Content-Type: application/json' +docker logs tgi-gaudi-server ``` -#### 3.2 Consume Guardrails Service +If the last line of the log contains something like `INFO text_generation_router::server: router/src/server.rs:2209: Connected` then your TGI server is ready and the following curl should work: ```bash -curl http://localhost:9090/v1/guardrails\ +curl localhost:${LLM_ENDPOINT_PORT}/generate \ -X POST \ - -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ + -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ -H 'Content-Type: application/json' ``` -## WildGuard - -We also offer content moderation support utilizing Allen Institute for AI's [WildGuard](https://huggingface.co/allenai/wildguard) model. - -`allenai/wildguard` was fine-tuned from `mistralai/Mistral-7B-v0.3` on their own [`allenai/wildguardmix`](https://huggingface.co/datasets/allenai/wildguardmix) dataset. Any content that is detected in the following categories is determined as unsafe: - -- Privacy -- Misinformation -- Harmful Language -- Malicious Uses +Check the logs again with the `logs` command to confirm that the curl request resulted in `Success`. ### 🚀1. Start Microservice with Python (Option 1) @@ -135,67 +108,76 @@ To start the Guardrails microservice, you need to install python packages first. #### 1.1 Install Requirements ```bash +pip install $OPEA_GENAICOMPS_ROOT +cd $OPEA_GENAICOMPS_ROOT/comps/guardrails/src/guardrails pip install -r requirements.txt ``` -#### 1.2 Start TGI Gaudi Service +#### 1.2 Start Guardrails Service ```bash -export HF_TOKEN=${your_hf_api_token} -volume=$PWD/data -model_id="allenai/wildguard" -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 -docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 +python opea_guardrails_microservice.py ``` -#### 1.3 Verify the TGI Gaudi Service +### 🚀2. Start Microservice with Docker (Option 2) -```bash -curl 127.0.0.1:8088/generate \ - -X POST \ - -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ - -H 'Content-Type: application/json' -``` +With the TGI server already running, now we can start the guardrail service container. -#### 1.4 Start Guardrails Service +#### 2.1 Build Docker Image ```bash -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -python guardrails_tgi.py +cd $OPEA_GENAICOMPS_ROOT +docker build -t opea/guardrails:latest \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -f comps/guardrails/src/guardrails/Dockerfile . ``` -### 🚀2. Start Microservice with Docker (Option 2) - -If you start an Guardrails microservice with docker, the `compose_wildguard.yaml` file will automatically start a TGI gaudi service with docker. - -#### 2.1 Setup Environment Variables +#### 2.2.a Run with Docker Compose (Option A) -In order to start TGI and LLM services, you need to setup the following environment variables first. +**To run with LLama Guard:** ```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -export LLM_MODEL_ID=${your_hf_llm_model} +docker compose -f $OPEA_GENAICOMPS_ROOT/comps/guardrails/deployment/docker_compose/compose.yaml up -d llamaguard-guardrails-server ``` -#### 2.2 Build Docker Image +**To run with WildGuard:** ```bash -cd ../../../../ -docker build -t opea/guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/src/guardrails/Dockerfile . +docker compose -f $OPEA_GENAICOMPS_ROOT/comps/guardrails/deployment/docker_compose/compose.yaml up -d wildguard-guardrails-server ``` -#### 2.3 Run Docker with CLI +#### 2.2.b Run Docker with CLI (Option B) + +**To run with LLama Guard:** ```bash -docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e GUARDRAILS_COMPONENT_NAME="OPEA_WILD_GUARD" opea/guardrails:latest +docker run -d \ + --name="llamaguard-guardrails-server" \ + -p ${GUARDRAIL_PORT}:${GUARDRAIL_PORT} \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT \ + -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN \ + opea/guardrails:latest ``` -#### 2.4 Run Docker with Docker Compose +**To run with WildGuard:** ```bash -cd deployment/docker_compose/ -docker compose -f compose_wildguard.yaml up -d +docker run -d \ + --name="wildguard-guardrails-server" \ + -p ${GUARDRAIL_PORT}:${GUARDRAIL_PORT} \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT \ + -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN \ + -e GUARDRAILS_COMPONENT_NAME="OPEA_WILD_GUARD" \ + opea/guardrails:latest ``` ### 🚀3. Consume Guardrails Service @@ -203,7 +185,7 @@ docker compose -f compose_wildguard.yaml up -d #### 3.1 Check Service Status ```bash -curl http://localhost:9090/v1/health_check \ +curl http://localhost:${GUARDRAIL_PORT}/v1/health_check\ -X GET \ -H 'Content-Type: application/json' ``` @@ -211,8 +193,13 @@ curl http://localhost:9090/v1/health_check \ #### 3.2 Consume Guardrails Service ```bash -curl http://localhost:9090/v1/guardrails \ +curl http://localhost:${GUARDRAIL_PORT}/v1/guardrails\ -X POST \ -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ -H 'Content-Type: application/json' ``` + +This request should return text containing: +`"Violated policies: , please check your input."` + +Where `category` is `Violent Crimes` or `harmful` for `Llama-Guard-2-8B` or `wildguard`, respectively. diff --git a/comps/third_parties/tgi/README.md b/comps/third_parties/tgi/README.md index e12f6d34da..07fb28e8eb 100644 --- a/comps/third_parties/tgi/README.md +++ b/comps/third_parties/tgi/README.md @@ -19,12 +19,12 @@ Run tgi on xeon. ```bash cd deplopyment/docker_compose -docker compose -f compose.yaml tgi-server up -d +docker compose -f compose.yaml up -d tgi-server ``` Run tgi on gaudi. ```bash cd deplopyment/docker_compose -docker compose -f compose.yaml tgi-gaudi-server up -d +docker compose -f compose.yaml up -d tgi-gaudi-server ``` From b777db72c03afade52662d7bfb1d85bfa02ad51f Mon Sep 17 00:00:00 2001 From: Letong Han <106566639+letonghan@users.noreply.github.com> Date: Mon, 10 Feb 2025 16:00:55 +0800 Subject: [PATCH 11/17] Fix Dataprep Ingest Data Issue. (#1271) * Fix Dataprep Ingest Data Issue. Trace: 1. The update of `langchain_huggingface.HuggingFaceEndpointEmbeddings` caused the wrong size of embedding vectors. 2. Wrong size vectors are wrongly saved into Redis database, and the indices are not created correctly. 3. The retriever can not retrieve data from Redis using index due to the reasons above. 4. Then the RAG seems `not work`, for the file uploaded can not be retrieved from database. Solution: Replace all of the `langchain_huggingface.HuggingFaceEndpointEmbeddings` to `langchain_community.embeddings.HuggingFaceInferenceAPIEmbeddings`, and modify related READMEs and scirpts. Related issue: - https://github.com/opea-project/GenAIExamples/issues/1473 - https://github.com/opea-project/GenAIExamples/issues/1482 --------- Signed-off-by: letonghan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../deployment/docker_compose/compose.yaml | 5 +-- comps/dataprep/src/README_elasticsearch.md | 2 +- comps/dataprep/src/README_milvus.md | 5 ++- comps/dataprep/src/README_opensearch.md | 6 ++-- comps/dataprep/src/README_pgvector.md | 4 ++- comps/dataprep/src/README_redis.md | 5 ++- comps/dataprep/src/README_vdms.md | 6 ++-- .../src/integrations/elasticsearch.py | 31 ++++++++++++---- comps/dataprep/src/integrations/milvus.py | 36 ++++++++++++++----- .../src/integrations/neo4j_langchain.py | 3 +- comps/dataprep/src/integrations/opensearch.py | 26 +++++++++++--- comps/dataprep/src/integrations/pgvect.py | 26 +++++++++++--- comps/dataprep/src/integrations/pipecone.py | 26 +++++++++++--- comps/dataprep/src/integrations/qdrant.py | 25 ++++++++++--- comps/dataprep/src/integrations/redis.py | 22 ++++++++++-- comps/dataprep/src/integrations/vdms.py | 27 ++++++++++---- tests/dataprep/test_dataprep_milvus.sh | 1 + .../test_dataprep_neo4j_on_intel_hpu.sh | 1 + tests/dataprep/test_dataprep_qdrant.sh | 1 + 19 files changed, 199 insertions(+), 59 deletions(-) diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml index ef54a69e0c..8fff22cd82 100644 --- a/comps/dataprep/deployment/docker_compose/compose.yaml +++ b/comps/dataprep/deployment/docker_compose/compose.yaml @@ -28,7 +28,7 @@ services: DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_ELASTICSEARCH" ES_CONNECTION_STRING: ${ES_CONNECTION_STRING} INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped depends_on: @@ -49,6 +49,7 @@ services: MILVUS_HOST: ${MILVUS_HOST} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG} restart: unless-stopped depends_on: tei-embedding-serving: @@ -161,7 +162,7 @@ services: QDRANT_HOST: ${QDRANT_HOST} QDRANT_PORT: ${QDRANT_PORT} COLLECTION_NAME: ${COLLECTION_NAME} - TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped diff --git a/comps/dataprep/src/README_elasticsearch.md b/comps/dataprep/src/README_elasticsearch.md index ab4b8547b5..94d8d47ba1 100644 --- a/comps/dataprep/src/README_elasticsearch.md +++ b/comps/dataprep/src/README_elasticsearch.md @@ -50,7 +50,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil ### 2.4 Run Docker with CLI (Option A) ```bash -docker run --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest +docker run --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B) diff --git a/comps/dataprep/src/README_milvus.md b/comps/dataprep/src/README_milvus.md index bcf545f72b..3e79ff6976 100644 --- a/comps/dataprep/src/README_milvus.md +++ b/comps/dataprep/src/README_milvus.md @@ -25,6 +25,7 @@ export MILVUS_HOST=${your_milvus_host_ip} export MILVUS_PORT=19530 export COLLECTION_NAME=${your_collection_name} export TEI_EMBEDDING_ENDPOINT=${your_embedding_endpoint} +export HUGGINGFACEHUB_API_TOKEN=${your_huggingface_api_token} ``` ### 1.4 Start TEI Embedding Service @@ -70,13 +71,15 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil ```bash export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export EMBEDDING_MODEL_ID=${your_embedding_model_id} export MILVUS_HOST=${your_host_ip} ``` ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest +docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest ``` ### 2.4 Run with Docker Compose (Option B) diff --git a/comps/dataprep/src/README_opensearch.md b/comps/dataprep/src/README_opensearch.md index b5d14c9a9d..b5f1c2c26c 100644 --- a/comps/dataprep/src/README_opensearch.md +++ b/comps/dataprep/src/README_opensearch.md @@ -51,7 +51,7 @@ curl localhost:$your_port/embed \ After checking that it works, set up environment variables. ```bash -export TEI_ENDPOINT="http://localhost:$your_port" +export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port" ``` ### 1.4 Start Document Preparation Microservice for OpenSearch with Python Script @@ -75,7 +75,7 @@ Please refer to this [readme](../../third_parties/opensearch/src/README.md). ```bash export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export TEI_ENDPOINT="http://${your_ip}:6006" +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006" export OPENSEARCH_URL="http://${your_ip}:9200" export INDEX_NAME=${your_index_name} export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} @@ -97,7 +97,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil - option 1: Start single-process version (for processing up to 10 files) ```bash -docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest +docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) diff --git a/comps/dataprep/src/README_pgvector.md b/comps/dataprep/src/README_pgvector.md index 92d0db577a..1fdb678e7f 100644 --- a/comps/dataprep/src/README_pgvector.md +++ b/comps/dataprep/src/README_pgvector.md @@ -38,6 +38,8 @@ Please refer to this [readme](../../third_parties/pgvector/src/README.md). ```bash export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb export INDEX_NAME=${your_index_name} +export TEI_EMBEDDING_ENDPOINT=${your_tei_embedding_endpoint} +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} ``` ### 2.3 Build Docker Image @@ -50,7 +52,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil ### 2.4 Run Docker with CLI (Option A) ```bash -docker run --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest +docker run --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B) diff --git a/comps/dataprep/src/README_redis.md b/comps/dataprep/src/README_redis.md index d6ff2a6b76..69f3ae39af 100644 --- a/comps/dataprep/src/README_redis.md +++ b/comps/dataprep/src/README_redis.md @@ -95,8 +95,7 @@ Please refer to this [readme](../../third_parties/redis/src/README.md). ### 2.2 Setup Environment Variables ```bash -export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export TEI_ENDPOINT="http://${your_ip}:6006" +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006" export REDIS_URL="redis://${your_ip}:6379" export INDEX_NAME=${your_index_name} export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} @@ -112,7 +111,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil ### 2.4 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest +docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest ``` ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) diff --git a/comps/dataprep/src/README_vdms.md b/comps/dataprep/src/README_vdms.md index 7571ca80d2..7318282c4e 100644 --- a/comps/dataprep/src/README_vdms.md +++ b/comps/dataprep/src/README_vdms.md @@ -69,7 +69,8 @@ export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} export VDMS_HOST=${host_ip} export VDMS_PORT=55555 -export TEI_ENDPOINT=${your_tei_endpoint} +export TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint} +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export COLLECTION_NAME=${your_collection_name} export SEARCH_ENGINE="FaissFlat" export DISTANCE_STRATEGY="L2" @@ -89,7 +90,8 @@ Start single-process version (for 1-10 files processing) ```bash docker run -d --name="dataprep-vdms-server" -p 6007:6007 --runtime=runc --ipc=host \ --e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_ENDPOINT=$TEI_ENDPOINT \ +-e http_proxy=$http_proxy -e https_proxy=$https_proxy \ +-e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ -e COLLECTION_NAME=$COLLECTION_NAME -e VDMS_HOST=$VDMS_HOST -e VDMS_PORT=$VDMS_PORT \ -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_VDMS" opea/dataprep:latest ``` diff --git a/comps/dataprep/src/integrations/elasticsearch.py b/comps/dataprep/src/integrations/elasticsearch.py index ed07d157ea..83e422741e 100644 --- a/comps/dataprep/src/integrations/elasticsearch.py +++ b/comps/dataprep/src/integrations/elasticsearch.py @@ -9,10 +9,9 @@ from elasticsearch import Elasticsearch from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_core.documents import Document from langchain_elasticsearch import ElasticsearchStore -from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType from comps.dataprep.src.utils import ( @@ -37,7 +36,9 @@ EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") # TEI Embedding endpoints -TEI_ENDPOINT = os.getenv("TEI_ENDPOINT", "") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") # Vector Index Configuration INDEX_NAME = os.getenv("INDEX_NAME", "rag-elastic") @@ -77,15 +78,31 @@ def create_index(self) -> None: if not self.es_client.indices.exists(index=INDEX_NAME): self.es_client.indices.create(index=INDEX_NAME) - def get_embedder(self) -> Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]: + def get_embedder(self) -> Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings]: """Obtain required Embedder.""" - if TEI_ENDPOINT: - return HuggingFaceEndpointEmbeddings(model=TEI_ENDPOINT) + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` and the `EMBED_MODEL` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] + embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) + return embedder else: return HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) def get_elastic_store( - self, embedder: Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings] + self, embedder: Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings] ) -> ElasticsearchStore: """Get Elasticsearch vector store.""" return ElasticsearchStore(index_name=INDEX_NAME, embedding=embedder, es_connection=self.es_client) diff --git a/comps/dataprep/src/integrations/milvus.py b/comps/dataprep/src/integrations/milvus.py index c3e3e57309..c4aecf86e4 100644 --- a/comps/dataprep/src/integrations/milvus.py +++ b/comps/dataprep/src/integrations/milvus.py @@ -10,7 +10,7 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings, OpenAIEmbeddings from langchain_core.documents import Document from langchain_milvus.vectorstores import Milvus from langchain_text_splitters import HTMLHeaderTextSplitter @@ -36,8 +36,11 @@ # Local Embedding model LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1") # TEI configuration -TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5") +EMBED_MODEL = os.environ.get("EMBED_MODEL", "BAAI/bge-base-en-v1.5") TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") + # MILVUS configuration MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) @@ -75,7 +78,7 @@ def ingest_chunks_to_milvus(embeddings, file_name: str, chunks: List): except Exception as e: if logflag: logger.info(f"[ ingest chunks ] fail to ingest chunks into Milvus. error: {e}") - raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") + raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}: {e}") if logflag: logger.info(f"[ ingest chunks ] Docs ingested file {file_name} to Milvus collection {COLLECTION_NAME}.") @@ -189,7 +192,23 @@ def _initialize_embedder(self): # create embeddings using TEI endpoint service if logflag: logger.info(f"[ milvus embedding ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") - embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] + # create embeddings using TEI endpoint service + embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model if logflag: @@ -274,7 +293,7 @@ async def ingest_files( search_res = search_by_file(my_milvus.col, encode_file) except Exception as e: raise HTTPException( - status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}." + status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}: {e}" ) if len(search_res) > 0: if logflag: @@ -319,7 +338,7 @@ async def ingest_files( search_res = search_by_file(my_milvus.col, encoded_link + ".txt") except Exception as e: raise HTTPException( - status_code=500, detail=f"Failed when searching in Milvus db for link {link}." + status_code=500, detail=f"Failed when searching in Milvus db for link {link}: {e}" ) if len(search_res) > 0: if logflag: @@ -375,7 +394,7 @@ async def get_files(self): try: all_data = search_all(my_milvus.col) except Exception as e: - raise HTTPException(status_code=500, detail="Failed when searching in Milvus db for all files.") + raise HTTPException(status_code=500, detail=f"Failed when searching in Milvus db for all files: {e}") # return [] if no data in db if len(all_data) == 0: @@ -422,8 +441,7 @@ async def delete_files(self, file_path: str = Body(..., embed=True)): except Exception as e: if logflag: logger.info(f"[ milvus delete ] {e}. Fail to delete {upload_folder}.") - raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") - + raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}: {e}") if logflag: logger.info("[ milvus delete ] successfully delete all files.") diff --git a/comps/dataprep/src/integrations/neo4j_langchain.py b/comps/dataprep/src/integrations/neo4j_langchain.py index ba03437972..75b23252d8 100644 --- a/comps/dataprep/src/integrations/neo4j_langchain.py +++ b/comps/dataprep/src/integrations/neo4j_langchain.py @@ -34,10 +34,9 @@ NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test") -# LLM/Embedding endpoints +# LLM endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") OPENAI_KEY = os.getenv("OPENAI_API_KEY") diff --git a/comps/dataprep/src/integrations/opensearch.py b/comps/dataprep/src/integrations/opensearch.py index 6f4b10bbd1..8df0104004 100644 --- a/comps/dataprep/src/integrations/opensearch.py +++ b/comps/dataprep/src/integrations/opensearch.py @@ -7,9 +7,8 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores import OpenSearchVectorSearch -from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter from opensearchpy import OpenSearch @@ -79,9 +78,26 @@ def __init__(self, name: str, description: str, config: dict = None): self.upload_folder = "./uploaded_files/" super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) # Initialize embeddings - tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") - if tei_embedding_endpoint: - self.embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) + TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") + HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") + EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] + self.embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: self.embeddings = HuggingFaceBgeEmbeddings(model_name=Config.EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/pgvect.py b/comps/dataprep/src/integrations/pgvect.py index 43b38e5d6d..d9eb698782 100644 --- a/comps/dataprep/src/integrations/pgvect.py +++ b/comps/dataprep/src/integrations/pgvect.py @@ -10,7 +10,7 @@ import psycopg2 from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores import PGVector from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType @@ -30,6 +30,10 @@ # Embedding model EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") +# TEI Embedding endpoints +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") PG_CONNECTION_STRING = os.getenv("PG_CONNECTION_STRING", "localhost") @@ -47,12 +51,26 @@ class OpeaPgvectorDataprep(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) - self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") self.upload_folder = "./uploaded_files/" # Create vectorstore - if self.tei_embedding_endpoint: + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint) + self.embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/pipecone.py b/comps/dataprep/src/integrations/pipecone.py index 33ffeea4b6..ec03174608 100644 --- a/comps/dataprep/src/integrations/pipecone.py +++ b/comps/dataprep/src/integrations/pipecone.py @@ -8,7 +8,7 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_pinecone import PineconeVectorStore from langchain_text_splitters import HTMLHeaderTextSplitter from pinecone import Pinecone, ServerlessSpec @@ -39,7 +39,9 @@ # LLM/Embedding endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") @OpeaComponentRegistry.register("OPEA_DATAPREP_PINECONE") @@ -48,12 +50,26 @@ class OpeaPineConeDataprep(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) - self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") self.upload_folder = "./uploaded_files/" # Create vectorstore - if self.tei_embedding_endpoint: + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint) + self.embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/qdrant.py b/comps/dataprep/src/integrations/qdrant.py index e54c6c572b..62a9efa21a 100644 --- a/comps/dataprep/src/integrations/qdrant.py +++ b/comps/dataprep/src/integrations/qdrant.py @@ -7,9 +7,8 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores import Qdrant -from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter from qdrant_client import QdrantClient @@ -38,7 +37,9 @@ # LLM/Embedding endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") @OpeaComponentRegistry.register("OPEA_DATAPREP_QDRANT") @@ -47,12 +48,26 @@ class OpeaQdrantDataprep(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) - self.tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") self.upload_folder = "./uploaded_files/" # Create vectorstore if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - self.embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + self.embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/redis.py b/comps/dataprep/src/integrations/redis.py index 06cb0d7f27..a181013bcd 100644 --- a/comps/dataprep/src/integrations/redis.py +++ b/comps/dataprep/src/integrations/redis.py @@ -11,9 +11,8 @@ import redis from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores import Redis -from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter from redis.commands.search.field import TextField from redis.commands.search.indexDefinition import IndexDefinition, IndexType @@ -40,6 +39,8 @@ EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") # TEI Embedding endpoints TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") # Vector Index Configuration INDEX_NAME = os.getenv("INDEX_NAME", "rag_redis") @@ -187,8 +188,23 @@ def ingest_chunks_to_redis(file_name: str, chunks: List): logger.info(f"[ redis ingest chunks ] file name: {file_name}") # Create vectorstore if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/comps/dataprep/src/integrations/vdms.py b/comps/dataprep/src/integrations/vdms.py index 998b23a5c7..e4085b1812 100644 --- a/comps/dataprep/src/integrations/vdms.py +++ b/comps/dataprep/src/integrations/vdms.py @@ -7,7 +7,7 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from langchain_community.vectorstores.vdms import VDMS, VDMS_Client from langchain_text_splitters import HTMLHeaderTextSplitter @@ -28,7 +28,6 @@ def getEnv(key, default_value=None): env_value = os.getenv(key, default=default_value) - print(f"{key}: {env_value}") return env_value @@ -45,7 +44,9 @@ def getEnv(key, default_value=None): # LLM/Embedding endpoints TGI_LLM_ENDPOINT = getEnv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = getEnv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = getEnv("TEI_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") # chunk parameters CHUNK_SIZE = getEnv("CHUNK_SIZE", 1500) @@ -58,14 +59,28 @@ class OpeaVdmsDataprep(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) - self.tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") self.upload_folder = "./uploaded_files/" create_upload_folder(self.upload_folder) self.client = VDMS_Client(VDMS_HOST, int(VDMS_PORT)) # Create vectorstore - if self.tei_embedding_endpoint: + if TEI_EMBEDDING_ENDPOINT: + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] # create embeddings using TEI endpoint service - self.embedder = HuggingFaceHubEmbeddings(model=self.tei_embedding_endpoint) + self.embedder = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) else: # create embeddings using local embedding model self.embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/tests/dataprep/test_dataprep_milvus.sh b/tests/dataprep/test_dataprep_milvus.sh index 603fb671cd..498f14f6f1 100644 --- a/tests/dataprep/test_dataprep_milvus.sh +++ b/tests/dataprep/test_dataprep_milvus.sh @@ -36,6 +36,7 @@ function start_service() { export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export MILVUS_HOST=${ip_address} export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export LOGFLAG=true service_name="dataprep-milvus tei-embedding-serving" cd $WORKPATH/comps/dataprep/deployment/docker_compose/ docker compose up ${service_name} -d diff --git a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh index 2b923bb66d..fbafda69e3 100755 --- a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh +++ b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh @@ -38,6 +38,7 @@ function start_service() { export TEI_EMBEDDER_PORT=12006 export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export EMBED_MODEL=${EMBEDDING_MODEL_ID} export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" export LLM_ENDPOINT_PORT=10510 export TGI_LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}" diff --git a/tests/dataprep/test_dataprep_qdrant.sh b/tests/dataprep/test_dataprep_qdrant.sh index 818f99da24..9c31e2d7ab 100644 --- a/tests/dataprep/test_dataprep_qdrant.sh +++ b/tests/dataprep/test_dataprep_qdrant.sh @@ -26,6 +26,7 @@ function build_docker_images() { function start_service() { export host_ip=${ip_address} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export EMBED_MODEL=${EMBEDDING_MODEL_ID} export TEI_EMBEDDER_PORT="10224" export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" export COLLECTION_NAME="rag-qdrant" From 17b96729433d515d33043b0d09fb812b75f6ac6b Mon Sep 17 00:00:00 2001 From: XinyaoWa Date: Wed, 12 Feb 2025 13:53:31 +0800 Subject: [PATCH 12/17] Fix langchain and huggingface version to avoid bug in FaqGen and DocSum, remove vllm hpu triton version fix (#1275) * Fix langchain and huggingface version to avoid bug Signed-off-by: Xinyao Wang --- .github/workflows/_comps-workflow.yml | 1 - .github/workflows/push-image-build.yml | 1 - comps/llms/src/doc-summarization/requirements.txt | 6 +++--- comps/llms/src/faq-generation/requirements.txt | 6 +++--- comps/third_parties/vllm/src/build_docker_vllm.sh | 1 - tests/agent/test_agent_langchain_on_intel_hpu.sh | 1 - .../test_guardrails_hallucination_detection_on_intel_hpu.sh | 1 - tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh | 1 - tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh | 1 - .../test_llms_text-generation_service_vllm_on_intel_hpu.sh | 1 - 10 files changed, 6 insertions(+), 14 deletions(-) diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml index 964d2d7284..f7e8019dbe 100644 --- a/.github/workflows/_comps-workflow.yml +++ b/.github/workflows/_comps-workflow.yml @@ -71,7 +71,6 @@ jobs: fi if [[ $(grep -c "vllm-gaudi:" ${docker_compose_yml}) != 0 ]]; then git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git - sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt fi - name: Get build list id: get-build-list diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml index fda1528065..67389a3cd4 100644 --- a/.github/workflows/push-image-build.yml +++ b/.github/workflows/push-image-build.yml @@ -96,7 +96,6 @@ jobs: fi if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git - sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt fi - name: Build Image diff --git a/comps/llms/src/doc-summarization/requirements.txt b/comps/llms/src/doc-summarization/requirements.txt index 1694618637..6bc1bb1e55 100644 --- a/comps/llms/src/doc-summarization/requirements.txt +++ b/comps/llms/src/doc-summarization/requirements.txt @@ -1,11 +1,11 @@ docarray[full] fastapi httpx==0.27.2 -huggingface_hub -langchain #==0.1.12 +huggingface_hub==0.27.1 +langchain==0.3.14 langchain-huggingface langchain-openai -langchain_community +langchain_community==0.3.14 langchainhub opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/llms/src/faq-generation/requirements.txt b/comps/llms/src/faq-generation/requirements.txt index 36257d3939..037079294b 100644 --- a/comps/llms/src/faq-generation/requirements.txt +++ b/comps/llms/src/faq-generation/requirements.txt @@ -1,10 +1,10 @@ docarray[full] fastapi -huggingface_hub -langchain +huggingface_hub==0.27.1 +langchain==0.3.14 langchain-huggingface langchain-openai -langchain_community +langchain_community==0.3.14 langchainhub opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/third_parties/vllm/src/build_docker_vllm.sh b/comps/third_parties/vllm/src/build_docker_vllm.sh index bd8df2e708..bec3a0c8f1 100644 --- a/comps/third_parties/vllm/src/build_docker_vllm.sh +++ b/comps/third_parties/vllm/src/build_docker_vllm.sh @@ -38,7 +38,6 @@ if [ "$hw_mode" = "hpu" ]; then git clone https://github.com/HabanaAI/vllm-fork.git cd ./vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy cd .. rm -rf vllm-fork diff --git a/tests/agent/test_agent_langchain_on_intel_hpu.sh b/tests/agent/test_agent_langchain_on_intel_hpu.sh index 090d1ed332..2c12354723 100644 --- a/tests/agent/test_agent_langchain_on_intel_hpu.sh +++ b/tests/agent/test_agent_langchain_on_intel_hpu.sh @@ -57,7 +57,6 @@ function build_vllm_docker_images() { fi cd ./vllm-fork git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy if [ $? -ne 0 ]; then echo "opea/vllm-gaudi:comps failed" diff --git a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh index 92b29827fe..d040f954a1 100644 --- a/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh +++ b/tests/guardrails/test_guardrails_hallucination_detection_on_intel_hpu.sh @@ -13,7 +13,6 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" diff --git a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh index d9552e9a0d..a6096bd309 100644 --- a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh +++ b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh @@ -20,7 +20,6 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" diff --git a/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh index 5d489b250d..8607f2c550 100644 --- a/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh +++ b/tests/llms/test_llms_faq-generation_vllm_on_intel_hpu.sh @@ -20,7 +20,6 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" diff --git a/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh index 7c32a8977b..ea8c9ee6ca 100644 --- a/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh +++ b/tests/llms/test_llms_text-generation_service_vllm_on_intel_hpu.sh @@ -20,7 +20,6 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git cd vllm-fork/ git checkout v0.6.4.post2+Gaudi-1.19.0 - sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt docker build --no-cache -f Dockerfile.hpu -t ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} --shm-size=128g . if [ $? -ne 0 ]; then echo "opea/vllm-gaudi built fail" From d3906cedcf5573dc5d322ddbcb80d18b941befc6 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Wed, 12 Feb 2025 14:56:55 +0800 Subject: [PATCH 13/17] update default service list (#1276) Signed-off-by: chensuyue --- .github/workflows/manual-comps-test.yml | 2 +- .github/workflows/manual-docker-publish.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/manual-comps-test.yml b/.github/workflows/manual-comps-test.yml index c3f73fb25f..7fa8c7255c 100644 --- a/.github/workflows/manual-comps-test.yml +++ b/.github/workflows/manual-comps-test.yml @@ -7,7 +7,7 @@ on: inputs: services: default: "asr" - description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]" + description: "List of services to test [agent,asr,chathistory,animation,dataprep,embeddings,feedback_management,finetuning,guardrails,image2image,image2video,intent_detection,llms,lvms,prompt_registry,ragas,rerankings,retrievers,text2image,text2sql,third_parties,tts,vectorstores,web_retrievers]" required: true type: string build: diff --git a/.github/workflows/manual-docker-publish.yml b/.github/workflows/manual-docker-publish.yml index b7e770dedb..aae3d3ca84 100644 --- a/.github/workflows/manual-docker-publish.yml +++ b/.github/workflows/manual-docker-publish.yml @@ -7,7 +7,7 @@ on: inputs: services: default: "" - description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,feedback_management,finetuning,guardrails,knowledgegraphs,llms,lvms,nginx,prompt_registry,ragas,rerankings,retrievers,tts,web_retrievers]" + description: "List of services to test [agent,asr,chathistory,animation,dataprep,embeddings,feedback_management,finetuning,guardrails,image2image,image2video,intent_detection,llms,lvms,prompt_registry,ragas,rerankings,retrievers,text2image,text2sql,third_parties,tts,vectorstores,web_retrievers]" required: false type: string images: From f8e62164fc498a1b8763892390ed3662b9c69eac Mon Sep 17 00:00:00 2001 From: Spycsh <39623753+Spycsh@users.noreply.github.com> Date: Wed, 12 Feb 2025 15:45:14 +0800 Subject: [PATCH 14/17] fix metric id issue when init multiple Orchestrator instance (#1280) Signed-off-by: Spycsh --- comps/cores/mega/orchestrator.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py index 97ee2a76b3..2d1957b1b7 100644 --- a/comps/cores/mega/orchestrator.py +++ b/comps/cores/mega/orchestrator.py @@ -28,15 +28,13 @@ class OrchestratorMetrics: - # Need an instance ID for metric prefix because: - # - Orchestror instances are not named - # - CI creates several orchestrator instances + # Need an static class-level ID for metric prefix because: # - Prometheus requires metrics (their names) to be unique _instance_id = 0 def __init__(self) -> None: - self._instance_id += 1 - if self._instance_id > 1: + OrchestratorMetrics._instance_id += 1 + if OrchestratorMetrics._instance_id > 1: self._prefix = f"megaservice{self._instance_id}" else: self._prefix = "megaservice" From 23b2be20c642e7e439c2351f748a6b4490ae8908 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 13 Feb 2025 16:07:14 +0800 Subject: [PATCH 15/17] Fix Build latest images on push event workflow (#1282) Signed-off-by: ZePan110 --- .github/workflows/push-image-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml index 67389a3cd4..51a2a88b75 100644 --- a/.github/workflows/push-image-build.yml +++ b/.github/workflows/push-image-build.yml @@ -62,7 +62,7 @@ jobs: image-build: needs: get-build-matrix - if: ${{ fromJSON(needs.get-build-matrix.outputs.services).length != 0 }} + if: needs.get-build-matrix.outputs.services != '[]' strategy: matrix: service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }} From bef501c7aedcb5fc8998ecdb5f1af25de464c90a Mon Sep 17 00:00:00 2001 From: Liang Lv Date: Thu, 13 Feb 2025 21:18:58 +0800 Subject: [PATCH 16/17] Fix VDMS retrieval issue (#1252) * Fix VDMS retrieval issue Signed-off-by: lvliang-intel --- comps/retrievers/src/Dockerfile | 2 +- comps/retrievers/src/integrations/vdms.py | 2 +- comps/retrievers/src/requirements.txt | 1 + .../pathway/deployment/docker_compose/compose.yaml | 6 ++++-- comps/third_parties/pathway/src/requirements.txt | 4 ++-- comps/third_parties/pathway/src/vectorstore_pathway.py | 9 +++++---- tests/retrievers/test_retrievers_elasticsearch.sh | 2 ++ tests/retrievers/test_retrievers_milvus.sh | 2 ++ tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh | 8 ++++---- tests/retrievers/test_retrievers_opensearch.sh | 2 ++ tests/retrievers/test_retrievers_pathway.sh | 2 ++ tests/retrievers/test_retrievers_pgvector.sh | 2 ++ tests/retrievers/test_retrievers_qdrant.sh | 2 ++ tests/retrievers/test_retrievers_redis.sh | 2 ++ tests/retrievers/test_retrievers_vdms.sh | 2 ++ 15 files changed, 34 insertions(+), 14 deletions(-) diff --git a/comps/retrievers/src/Dockerfile b/comps/retrievers/src/Dockerfile index 3fb6b3650e..53963d884d 100644 --- a/comps/retrievers/src/Dockerfile +++ b/comps/retrievers/src/Dockerfile @@ -26,7 +26,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ fi && \ pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \ pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/retrievers/src/requirements.txt && \ - pip install opentelemetry-api==1.27.0 opentelemetry-exporter-otlp==1.27.0 opentelemetry-sdk==1.27.0 + pip install opentelemetry-api==1.29.0 opentelemetry-exporter-otlp==1.29.0 opentelemetry-sdk==1.29.0 ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/retrievers/src/integrations/vdms.py b/comps/retrievers/src/integrations/vdms.py index b6a44fdf14..5e5b1731fa 100644 --- a/comps/retrievers/src/integrations/vdms.py +++ b/comps/retrievers/src/integrations/vdms.py @@ -48,7 +48,7 @@ def _initialize_embedder(self): from comps.third_parties.clip.src.clip_embedding import vCLIP embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64}) - if TEI_EMBEDDING_ENDPOINT: + elif TEI_EMBEDDING_ENDPOINT: # create embeddings using TEI endpoint service if logflag: logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") diff --git a/comps/retrievers/src/requirements.txt b/comps/retrievers/src/requirements.txt index a04fef1771..511bcc744f 100644 --- a/comps/retrievers/src/requirements.txt +++ b/comps/retrievers/src/requirements.txt @@ -3,6 +3,7 @@ cairosvg docarray[full] docx2txt easyocr +einops fastapi future graspologic diff --git a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml index 35dc90c32e..9c1ead2b94 100644 --- a/comps/third_parties/pathway/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/pathway/deployment/docker_compose/compose.yaml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 include: - - ../../../tei/deployment/docker_compose/compose.yaml + - ../../../tei/deployment/docker_compose/compose.yaml services: pathway-db: @@ -12,13 +12,15 @@ services: - "${PATHWAY_PORT:-6379}:${PATHWAY_PORT:-6379}" volumes: - "${PATHWAY_VOLUME:-../../src/README.md}:/app/data/README.md" - network_mode: host environment: + no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} PATHWAY_HOST: ${PATHWAY_HOST_DB} PATHWAY_PORT: ${PATHWAY_PORT} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HF_TOKEN: ${HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "sleep 30 && exit 0"] interval: 1s diff --git a/comps/third_parties/pathway/src/requirements.txt b/comps/third_parties/pathway/src/requirements.txt index ef1bcb44bc..e552e247ff 100644 --- a/comps/third_parties/pathway/src/requirements.txt +++ b/comps/third_parties/pathway/src/requirements.txt @@ -1,7 +1,7 @@ langchain langchain-community -langchain_huggingface -langchain_openai +openai pathway[xpack-llm] sentence-transformers +tiktoken unstructured[all-docs] >= 0.16 diff --git a/comps/third_parties/pathway/src/vectorstore_pathway.py b/comps/third_parties/pathway/src/vectorstore_pathway.py index 22a23a2414..1b9d207edb 100644 --- a/comps/third_parties/pathway/src/vectorstore_pathway.py +++ b/comps/third_parties/pathway/src/vectorstore_pathway.py @@ -7,8 +7,7 @@ import nltk import pathway as pw from langchain import text_splitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings -from langchain_huggingface import HuggingFaceEndpointEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings from pathway.xpacks.llm.parsers import ParseUnstructured from pathway.xpacks.llm.vector_store import VectorStoreServer @@ -40,7 +39,7 @@ port = int(os.getenv("PATHWAY_PORT", 8666)) EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") if __name__ == "__main__": @@ -48,7 +47,9 @@ if tei_embedding_endpoint: # create embeddings using TEI endpoint service logging.info(f"Initializing the embedder from tei_embedding_endpoint: {tei_embedding_endpoint}") - embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) + embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=EMBED_MODEL, api_url=tei_embedding_endpoint + ) else: # create embeddings using local embedding model embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) diff --git a/tests/retrievers/test_retrievers_elasticsearch.sh b/tests/retrievers/test_retrievers_elasticsearch.sh index 60996a44ec..a5fd53fb72 100644 --- a/tests/retrievers/test_retrievers_elasticsearch.sh +++ b/tests/retrievers/test_retrievers_elasticsearch.sh @@ -79,6 +79,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=elasticsearch-vector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_milvus.sh b/tests/retrievers/test_retrievers_milvus.sh index 507f43c5af..05bd69601a 100644 --- a/tests/retrievers/test_retrievers_milvus.sh +++ b/tests/retrievers/test_retrievers_milvus.sh @@ -83,6 +83,8 @@ function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=tei-embedding-serving") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh index f6857f35cb..a819e2e485 100644 --- a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh +++ b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh @@ -46,8 +46,8 @@ function start_service() { export RETRIEVER_PORT=11635 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export DATA_PATH="/data2/cache" - export MAX_INPUT_TOKENS=1024 - export MAX_TOTAL_TOKENS=3000 + export MAX_INPUT_TOKENS=4096 + export MAX_TOTAL_TOKENS=8192 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" @@ -67,7 +67,7 @@ function start_service() { docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \ -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \ - -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps + -e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e MAX_INPUT_LEN=$MAX_INPUT_TOKENS -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps sleep 1m @@ -152,7 +152,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-*") + cid=$(docker ps -aq --filter "name=test-comps-*" --filter "name=neo4j-apoc" --filter "name=tgi-gaudi-server" --filter "name=tei-embedding-serving") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans diff --git a/tests/retrievers/test_retrievers_opensearch.sh b/tests/retrievers/test_retrievers_opensearch.sh index 7a5fc0aeb2..ae49c41a90 100644 --- a/tests/retrievers/test_retrievers_opensearch.sh +++ b/tests/retrievers/test_retrievers_opensearch.sh @@ -75,6 +75,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=opensearch-vector-db" --filter "name=tei-embedding-serving") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_pathway.sh b/tests/retrievers/test_retrievers_pathway.sh index 86fadaa812..3dbc2bb301 100644 --- a/tests/retrievers/test_retrievers_pathway.sh +++ b/tests/retrievers/test_retrievers_pathway.sh @@ -69,6 +69,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=pathway-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_pgvector.sh b/tests/retrievers/test_retrievers_pgvector.sh index 021d81a0c2..2a51a3e91d 100644 --- a/tests/retrievers/test_retrievers_pgvector.sh +++ b/tests/retrievers/test_retrievers_pgvector.sh @@ -64,6 +64,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=pgvector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_qdrant.sh b/tests/retrievers/test_retrievers_qdrant.sh index da2d343ffc..e50642ac0b 100644 --- a/tests/retrievers/test_retrievers_qdrant.sh +++ b/tests/retrievers/test_retrievers_qdrant.sh @@ -59,6 +59,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} --remove-orphans + cid=$(docker ps -aq --filter "name=qdrant-vector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_redis.sh b/tests/retrievers/test_retrievers_redis.sh index 0964049f98..aa2bbe61fc 100644 --- a/tests/retrievers/test_retrievers_redis.sh +++ b/tests/retrievers/test_retrievers_redis.sh @@ -131,6 +131,8 @@ function validate_mm_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans + cid=$(docker ps -aq --filter "name=redis-vector-db") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { diff --git a/tests/retrievers/test_retrievers_vdms.sh b/tests/retrievers/test_retrievers_vdms.sh index cd2b41b53e..732fd91134 100644 --- a/tests/retrievers/test_retrievers_vdms.sh +++ b/tests/retrievers/test_retrievers_vdms.sh @@ -78,6 +78,8 @@ function validate_microservice() { function stop_docker() { cd $WORKPATH/comps/retrievers/deployment/docker_compose docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans + cid=$(docker ps -aq --filter "name=retriever-vdms*" --filter "name=vdms-vector-db" --filter "name=tei-embedding-serving") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } function main() { From d1dfd0ef24da221b5ebfef99104c3efdac6d2793 Mon Sep 17 00:00:00 2001 From: Spycsh <39623753+Spycsh@users.noreply.github.com> Date: Thu, 13 Feb 2025 22:39:47 +0800 Subject: [PATCH 17/17] Align mongo related chathistory/feedbackmanagement/promptregistry image names with examples (#1284) Align mongo related chathistory/feedbackmanagement/promptregistry image names with examples Signed-off-by: Spycsh Co-authored-by: Liang Lv --- .github/workflows/docker/compose/chathistory-compose.yaml | 4 ++-- comps/chathistory/deployment/docker_compose/compose.yaml | 2 +- comps/chathistory/src/README.md | 2 +- tests/chathistory/test_chathistory_mongo.sh | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/docker/compose/chathistory-compose.yaml b/.github/workflows/docker/compose/chathistory-compose.yaml index 3991a99734..d3f35db8e6 100644 --- a/.github/workflows/docker/compose/chathistory-compose.yaml +++ b/.github/workflows/docker/compose/chathistory-compose.yaml @@ -3,7 +3,7 @@ # this file should be run in the root of the repo services: - chathistory-mongo-server: + chathistory-mongo: build: dockerfile: comps/chathistory/src/Dockerfile - image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest} diff --git a/comps/chathistory/deployment/docker_compose/compose.yaml b/comps/chathistory/deployment/docker_compose/compose.yaml index db9812e692..0e0a5c9ad2 100644 --- a/comps/chathistory/deployment/docker_compose/compose.yaml +++ b/comps/chathistory/deployment/docker_compose/compose.yaml @@ -15,7 +15,7 @@ services: command: mongod --quiet --logpath /dev/null chathistory-mongo: - image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest} container_name: chathistory-mongo-server ports: - "${CHATHISTORY_PORT:-6012}:6012" diff --git a/comps/chathistory/src/README.md b/comps/chathistory/src/README.md index 5d753fdec3..3cdf5bf270 100644 --- a/comps/chathistory/src/README.md +++ b/comps/chathistory/src/README.md @@ -23,7 +23,7 @@ export COLLECTION_NAME=${COLLECTION_NAME} ```bash cd ../../../../ -docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . +docker build -t opea/chathistory-mongo:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . ``` ### Run Docker with CLI diff --git a/tests/chathistory/test_chathistory_mongo.sh b/tests/chathistory/test_chathistory_mongo.sh index 9f32165be7..4bb098d79c 100644 --- a/tests/chathistory/test_chathistory_mongo.sh +++ b/tests/chathistory/test_chathistory_mongo.sh @@ -16,12 +16,12 @@ function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/chathistory-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . + docker build --no-cache -t opea/chathistory-mongo:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/chathistory-mongo-server built fail" + echo "opea/chathistory-mongo built fail" exit 1 else - echo "opea/chathistory-mongo-server built successful" + echo "opea/chathistory-mongo built successful" fi }