add dynamic clients for all APIs (#348)

* add dynamic clients for all APIs * fix openapi generator * inference + memory + agents tests now pass with "remote" providers * Add docstring which fixes openapi generator :/
meta-llama · Oct 31, 2024 · 37b330b · 37b330b
1 parent f04b566
commit 37b330b
Show file tree

Hide file tree

Showing 11 changed files with 345 additions and 79 deletions.
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
@@ -315,7 +315,20 @@ async def get_object(self, uuid: str, version: int) -> Object:
                 )
         else:
             event_type = None
-            response_type = return_type
+
+            def process_type(t):
+                if typing.get_origin(t) is collections.abc.AsyncIterator:
+                    # NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
+                    # or the item type. I am choosing it to be the latter
+                    args = typing.get_args(t)
+                    return args[0]
+                elif typing.get_origin(t) is typing.Union:
+                    types = [process_type(a) for a in typing.get_args(t)]
+                    return typing._UnionGenericAlias(typing.Union, tuple(types))
+                else:
+                    return t
+
+            response_type = process_type(return_type)
 
         # set HTTP request method based on type of request and presence of payload
         if not request_params:

diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
@@ -21,7 +21,7 @@
     "info": {
         "title": "[DRAFT] Llama Stack Specification",
         "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-10-30 16:17:03.919702"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-10-31 14:28:52.128905"
     },
     "servers": [
         {
@@ -320,11 +320,18 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
                         "content": {
                             "text/event-stream": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/Turn"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
+                                        }
+                                    ]
                                 }
                             }
                         }
@@ -4002,7 +4009,8 @@
                 "additionalProperties": false,
                 "required": [
                     "event"
-                ]
+                ],
+                "title": "streamed agent turn completion response."
             },
             "AgentTurnResponseTurnCompletePayload": {
                 "type": "object",
@@ -7054,29 +7062,26 @@
         }
     ],
     "tags": [
-        {
-            "name": "Inference"
-        },
         {
             "name": "Memory"
         },
         {
-            "name": "Inspect"
+            "name": "Inference"
         },
         {
-            "name": "PostTraining"
+            "name": "Eval"
         },
         {
-            "name": "Models"
+            "name": "MemoryBanks"
         },
         {
-            "name": "Scoring"
+            "name": "Models"
         },
         {
-            "name": "DatasetIO"
+            "name": "BatchInference"
         },
         {
-            "name": "BatchInference"
+            "name": "PostTraining"
         },
         {
             "name": "Agents"
@@ -7085,19 +7090,22 @@
             "name": "Shields"
         },
         {
-            "name": "MemoryBanks"
+            "name": "Telemetry"
         },
         {
-            "name": "Datasets"
+            "name": "Inspect"
+        },
+        {
+            "name": "DatasetIO"
         },
         {
             "name": "SyntheticDataGeneration"
         },
         {
-            "name": "Eval"
+            "name": "Datasets"
         },
         {
-            "name": "Telemetry"
+            "name": "Scoring"
         },
         {
             "name": "ScoringFunctions"
@@ -7307,7 +7315,7 @@
         },
         {
             "name": "AgentTurnResponseStreamChunk",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgentTurnResponseStreamChunk\" />"
+            "description": "streamed agent turn completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgentTurnResponseStreamChunk\" />"
         },
         {
             "name": "AgentTurnResponseTurnCompletePayload",

diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
@@ -190,6 +190,7 @@ components:
           $ref: '#/components/schemas/AgentTurnResponseEvent'
       required:
       - event
+      title: streamed agent turn completion response.
       type: object
     AgentTurnResponseTurnCompletePayload:
       additionalProperties: false
@@ -2997,7 +2998,7 @@ info:
   description: "This is the specification of the llama stack that provides\n     \
     \           a set of endpoints and their corresponding interfaces that are tailored\
     \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-10-30 16:17:03.919702"
+    \ draft and subject to change.\n                Generated at 2024-10-31 14:28:52.128905"
   title: '[DRAFT] Llama Stack Specification'
   version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -3190,8 +3191,11 @@ paths:
           content:
             text/event-stream:
               schema:
-                $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
-          description: OK
+                oneOf:
+                - $ref: '#/components/schemas/Turn'
+                - $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
+          description: A single turn in an interaction with an Agentic System. **OR**
+            streamed agent turn completion response.
       tags:
       - Agents
   /agents/turn/get:
@@ -4276,21 +4280,21 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
-- name: Inference
 - name: Memory
-- name: Inspect
-- name: PostTraining
+- name: Inference
+- name: Eval
+- name: MemoryBanks
 - name: Models
-- name: Scoring
-- name: DatasetIO
 - name: BatchInference
+- name: PostTraining
 - name: Agents
 - name: Shields
-- name: MemoryBanks
-- name: Datasets
-- name: SyntheticDataGeneration
-- name: Eval
 - name: Telemetry
+- name: Inspect
+- name: DatasetIO
+- name: SyntheticDataGeneration
+- name: Datasets
+- name: Scoring
 - name: ScoringFunctions
 - name: Safety
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
@@ -4451,8 +4455,11 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgentTurnResponseStepStartPayload"
     />
   name: AgentTurnResponseStepStartPayload
-- description: <SchemaDefinition schemaRef="#/components/schemas/AgentTurnResponseStreamChunk"
-    />
+- description: 'streamed agent turn completion response.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/AgentTurnResponseStreamChunk"
+    />'
   name: AgentTurnResponseStreamChunk
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgentTurnResponseTurnCompletePayload"
     />

diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
@@ -8,6 +8,7 @@
 from enum import Enum
 from typing import (
     Any,
+    AsyncIterator,
     Dict,
     List,
     Literal,
@@ -405,6 +406,8 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
 
 @json_schema_type
 class AgentTurnResponseStreamChunk(BaseModel):
+    """streamed agent turn completion response."""
+
     event: AgentTurnResponseEvent
 
 
@@ -434,7 +437,7 @@ async def create_agent_turn(
         ],
         attachments: Optional[List[Attachment]] = None,
         stream: Optional[bool] = False,
-    ) -> AgentTurnResponseStreamChunk: ...
+    ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
 
     @webmethod(route="/agents/turn/get")
     async def get_agents_turn(

diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
@@ -6,7 +6,15 @@
 
 from enum import Enum
 
-from typing import List, Literal, Optional, Protocol, runtime_checkable, Union
+from typing import (
+    AsyncIterator,
+    List,
+    Literal,
+    Optional,
+    Protocol,
+    runtime_checkable,
+    Union,
+)
 
 from llama_models.schema_utils import json_schema_type, webmethod
 
@@ -224,7 +232,7 @@ async def completion(
         response_format: Optional[ResponseFormat] = None,
         stream: Optional[bool] = False,
         logprobs: Optional[LogProbConfig] = None,
-    ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
+    ) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]: ...
 
     @webmethod(route="/inference/chat_completion")
     async def chat_completion(
@@ -239,7 +247,9 @@ async def chat_completion(
         response_format: Optional[ResponseFormat] = None,
         stream: Optional[bool] = False,
         logprobs: Optional[LogProbConfig] = None,
-    ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
+    ) -> Union[
+        ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]
+    ]: ...
 
     @webmethod(route="/inference/embeddings")
     async def embeddings(