diff --git a/distributions/dependencies.json b/distributions/dependencies.json index 7b5d8b0024..2b2e35a508 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -1,4 +1,34 @@ { + "sambanova": [ + "aiosqlite", + "blobfile", + "chardet", + "chromadb-client", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "matplotlib", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pypdf", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "tqdm", + "transformers", + "uvicorn", + "sentence-transformers --no-deps", + "torch --index-url https://download.pytorch.org/whl/cpu" + ], "hf-serverless": [ "aiohttp", "aiosqlite", diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index 93f4adfb3d..92e1f7dbf6 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -1,3 +1,6 @@ +--- +orphan: true +--- # Ollama Distribution ```{toctree} @@ -79,11 +82,15 @@ docker run \ If you are using Llama Stack Safety / Shield APIs, use: ```bash +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ - -v ./run-with-safety.yaml:/root/my-run.yaml \ + -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \ llamastack/distribution-ollama \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 1638e9b113..b2d28be1ba 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -1,3 +1,6 @@ +--- +orphan: true +--- # Remote vLLM Distribution ```{toctree} :maxdepth: 2 @@ -107,10 +110,15 @@ If you are using Llama Stack Safety / Shield APIs, use: export SAFETY_PORT=8081 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run-with-safety.yaml:/root/my-run.yaml \ + -v ~/.llama:/root/.llama \ + -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \ llamastack/distribution-remote-vllm \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index 52d1cd9629..1992799907 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -16,9 +16,10 @@ The `llamastack/distribution-sambanova` distribution consists of the following p |-----|-------------| | agents | `inline::meta-reference` | | inference | `remote::sambanova` | -| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | | safety | `inline::llama-guard` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` | +| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | ### Environment Variables @@ -32,13 +33,13 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct` -- `meta-llama/Llama-3.1-70B-Instruct` -- `meta-llama/Llama-3.1-405B-Instruct` -- `meta-llama/Llama-3.2-1B-Instruct` -- `meta-llama/Llama-3.2-3B-Instruct` -- `meta-llama/Llama-3.2-11B-Vision-Instruct` -- `meta-llama/Llama-3.2-90B-Vision-Instruct` +- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)` +- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)` +- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)` +- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)` +- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)` +- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)` +- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md index 5a709d0a89..ba5dee77f1 100644 --- a/docs/source/distributions/self_hosted_distro/tgi.md +++ b/docs/source/distributions/self_hosted_distro/tgi.md @@ -1,3 +1,7 @@ +--- +orphan: true +--- + # TGI Distribution ```{toctree} @@ -98,10 +102,15 @@ docker run \ If you are using Llama Stack Safety / Shield APIs, use: ```bash +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run-with-safety.yaml:/root/my-run.yaml \ + -v ~/.llama:/root/.llama \ + -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ llamastack/distribution-tgi \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 707f5be7a7..2d5c8fc77f 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -1,3 +1,6 @@ +--- +orphan: true +--- # Together Distribution ```{toctree} diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml index d6da478d14..ca5ffe618b 100644 --- a/llama_stack/templates/sambanova/build.yaml +++ b/llama_stack/templates/sambanova/build.yaml @@ -1,12 +1,10 @@ version: '2' -name: sambanova distribution_spec: description: Use SambaNova.AI for running LLM inference - docker_image: null providers: inference: - remote::sambanova - memory: + vector_io: - inline::faiss - remote::chromadb - remote::pgvector @@ -16,4 +14,9 @@ distribution_spec: - inline::meta-reference telemetry: - inline::meta-reference + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::rag-runtime image_type: conda diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 03c8ea44ff..31f47e0c12 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -1,21 +1,20 @@ version: '2' image_name: sambanova -docker_image: null -conda_env: sambanova apis: - agents - inference -- memory - safety - telemetry +- tool_runtime +- vector_io providers: inference: - provider_id: sambanova provider_type: remote::sambanova config: - url: https://api.sambanova.ai/v1/ + url: https://api.sambanova.ai/v1 api_key: ${env.SAMBANOVA_API_KEY} - memory: + vector_io: - provider_id: faiss provider_type: inline::faiss config: @@ -23,6 +22,12 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db + - provider_id: chromadb + provider_type: remote::chromadb + config: {} + - provider_id: pgvector + provider_type: remote::pgvector + config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -38,46 +43,63 @@ providers: telemetry: - provider_id: meta-reference provider_type: inline::meta-reference + config: + service_name: ${env.OTEL_SERVICE_NAME:llama-stack} + sinks: ${env.TELEMETRY_SINKS:console,sqlite} + sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: rag-runtime + provider_type: inline::rag-runtime config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: null provider_model_id: Meta-Llama-3.1-8B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: null provider_model_id: Meta-Llama-3.1-70B-Instruct + model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct - provider_id: null + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_model_id: Meta-Llama-3.1-405B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: null provider_model_id: Meta-Llama-3.2-1B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: null provider_model_id: Meta-Llama-3.2-3B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: null provider_model_id: Llama-3.2-11B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: null provider_model_id: Llama-3.2-90B-Vision-Instruct + model_type: llm shields: -- params: null - shield_id: meta-llama/Llama-Guard-3-8B - provider_id: null - provider_shield_id: null -memory_banks: [] +- shield_id: meta-llama/Llama-Guard-3-8B +vector_dbs: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: [] diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 8c231617b1..389e2a6c5d 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -18,10 +18,16 @@ def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::sambanova"], - "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], + "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::rag-runtime", + ], } inference_provider = Provider(