run distro_codegen

meta-llama · Jan 24, 2025 · fdeac78 · fdeac78
1 parent 09f0c5f
commit fdeac78
Show file tree

Hide file tree

Showing 9 changed files with 123 additions and 34 deletions.
diff --git a/distributions/dependencies.json b/distributions/dependencies.json
@@ -1,4 +1,34 @@
 {
+  "sambanova": [
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
   "hf-serverless": [
     "aiohttp",
     "aiosqlite",

diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -1,3 +1,6 @@
+---
+orphan: true
+---
 # Ollama Distribution
 
 ```{toctree}
@@ -79,11 +82,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-ollama \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \

diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -1,3 +1,6 @@
+---
+orphan: true
+---
 # Remote vLLM Distribution
 ```{toctree}
 :maxdepth: 2
@@ -107,10 +110,15 @@ If you are using Llama Stack Safety / Shield APIs, use:
 export SAFETY_PORT=8081
 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-remote-vllm \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \

diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md
@@ -16,9 +16,10 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
 |-----|-------------|
 | agents | `inline::meta-reference` |
 | inference | `remote::sambanova` |
-| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
 | telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
+| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
 ### Environment Variables
@@ -32,13 +33,13 @@ The following environment variables can be configured:
 
 The following models are available by default:
 
-- `meta-llama/Llama-3.1-8B-Instruct`
-- `meta-llama/Llama-3.1-70B-Instruct`
-- `meta-llama/Llama-3.1-405B-Instruct`
-- `meta-llama/Llama-3.2-1B-Instruct`
-- `meta-llama/Llama-3.2-3B-Instruct`
-- `meta-llama/Llama-3.2-11B-Vision-Instruct`
-- `meta-llama/Llama-3.2-90B-Vision-Instruct`
+- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)`
+- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)`
+- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
+- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
+- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
+- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
+- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
 
 
 ### Prerequisite: API Keys

diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md
@@ -1,3 +1,7 @@
+---
+orphan: true
+---
+
 # TGI Distribution
 
 ```{toctree}
@@ -98,10 +102,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-tgi \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \

diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
@@ -1,3 +1,6 @@
+---
+orphan: true
+---
 # Together Distribution
 
 ```{toctree}

diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml
@@ -1,12 +1,10 @@
 version: '2'
-name: sambanova
 distribution_spec:
   description: Use SambaNova.AI for running LLM inference
-  docker_image: null
   providers:
     inference:
     - remote::sambanova
-    memory:
+    vector_io:
     - inline::faiss
     - remote::chromadb
     - remote::pgvector
@@ -16,4 +14,9 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::code-interpreter
+    - inline::rag-runtime
 image_type: conda
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
@@ -1,28 +1,33 @@
 version: '2'
 image_name: sambanova
-docker_image: null
-conda_env: sambanova
 apis:
 - agents
 - inference
-- memory
 - safety
 - telemetry
+- tool_runtime
+- vector_io
 providers:
   inference:
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1/
+      url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY}
-  memory:
+  vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
       kvstore:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config: {}
+  - provider_id: pgvector
+    provider_type: remote::pgvector
+    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -38,46 +43,63 @@ providers:
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
     config: {}
 metadata_store:
-  namespace: null
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
 models:
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: null
   provider_model_id: Meta-Llama-3.1-8B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: null
   provider_model_id: Meta-Llama-3.1-70B-Instruct
+  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct
-  provider_id: null
+  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_model_id: Meta-Llama-3.1-405B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
-  provider_id: null
   provider_model_id: Meta-Llama-3.2-1B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: null
   provider_model_id: Meta-Llama-3.2-3B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: null
   provider_model_id: Llama-3.2-11B-Vision-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: null
   provider_model_id: Llama-3.2-90B-Vision-Instruct
+  model_type: llm
 shields:
-- params: null
-  shield_id: meta-llama/Llama-Guard-3-8B
-  provider_id: null
-  provider_shield_id: null
-memory_banks: []
+- shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
+tool_groups: []
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
@@ -18,10 +18,16 @@
 def get_distribution_template() -> DistributionTemplate:
     providers = {
         "inference": ["remote::sambanova"],
-        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::code-interpreter",
+            "inline::rag-runtime",
+        ],
     }
 
     inference_provider = Provider(