Skip to content

Commit

Permalink
add dynamic clients for all APIs (#348)
Browse files Browse the repository at this point in the history
* add dynamic clients for all APIs

* fix openapi generator

* inference + memory + agents tests now pass with "remote" providers

* Add docstring which fixes openapi generator :/
  • Loading branch information
ashwinb authored Oct 31, 2024
1 parent f04b566 commit 37b330b
Show file tree
Hide file tree
Showing 11 changed files with 345 additions and 79 deletions.
15 changes: 14 additions & 1 deletion docs/openapi_generator/pyopenapi/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,20 @@ async def get_object(self, uuid: str, version: int) -> Object:
)
else:
event_type = None
response_type = return_type

def process_type(t):
if typing.get_origin(t) is collections.abc.AsyncIterator:
# NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
# or the item type. I am choosing it to be the latter
args = typing.get_args(t)
return args[0]
elif typing.get_origin(t) is typing.Union:
types = [process_type(a) for a in typing.get_args(t)]
return typing._UnionGenericAlias(typing.Union, tuple(types))
else:
return t

response_type = process_type(return_type)

# set HTTP request method based on type of request and presence of payload
if not request_params:
Expand Down
44 changes: 26 additions & 18 deletions docs/resources/llama-stack-spec.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-30 16:17:03.919702"
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-31 14:28:52.128905"
},
"servers": [
{
Expand Down Expand Up @@ -320,11 +320,18 @@
"post": {
"responses": {
"200": {
"description": "OK",
"description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
"content": {
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
"oneOf": [
{
"$ref": "#/components/schemas/Turn"
},
{
"$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
}
]
}
}
}
Expand Down Expand Up @@ -4002,7 +4009,8 @@
"additionalProperties": false,
"required": [
"event"
]
],
"title": "streamed agent turn completion response."
},
"AgentTurnResponseTurnCompletePayload": {
"type": "object",
Expand Down Expand Up @@ -7054,29 +7062,26 @@
}
],
"tags": [
{
"name": "Inference"
},
{
"name": "Memory"
},
{
"name": "Inspect"
"name": "Inference"
},
{
"name": "PostTraining"
"name": "Eval"
},
{
"name": "Models"
"name": "MemoryBanks"
},
{
"name": "Scoring"
"name": "Models"
},
{
"name": "DatasetIO"
"name": "BatchInference"
},
{
"name": "BatchInference"
"name": "PostTraining"
},
{
"name": "Agents"
Expand All @@ -7085,19 +7090,22 @@
"name": "Shields"
},
{
"name": "MemoryBanks"
"name": "Telemetry"
},
{
"name": "Datasets"
"name": "Inspect"
},
{
"name": "DatasetIO"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "Eval"
"name": "Datasets"
},
{
"name": "Telemetry"
"name": "Scoring"
},
{
"name": "ScoringFunctions"
Expand Down Expand Up @@ -7307,7 +7315,7 @@
},
{
"name": "AgentTurnResponseStreamChunk",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgentTurnResponseStreamChunk\" />"
"description": "streamed agent turn completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgentTurnResponseStreamChunk\" />"
},
{
"name": "AgentTurnResponseTurnCompletePayload",
Expand Down
35 changes: 21 additions & 14 deletions docs/resources/llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ components:
$ref: '#/components/schemas/AgentTurnResponseEvent'
required:
- event
title: streamed agent turn completion response.
type: object
AgentTurnResponseTurnCompletePayload:
additionalProperties: false
Expand Down Expand Up @@ -2997,7 +2998,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
\ draft and subject to change.\n Generated at 2024-10-30 16:17:03.919702"
\ draft and subject to change.\n Generated at 2024-10-31 14:28:52.128905"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
Expand Down Expand Up @@ -3190,8 +3191,11 @@ paths:
content:
text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
description: OK
oneOf:
- $ref: '#/components/schemas/Turn'
- $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
description: A single turn in an interaction with an Agentic System. **OR**
streamed agent turn completion response.
tags:
- Agents
/agents/turn/get:
Expand Down Expand Up @@ -4276,21 +4280,21 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
- name: Inference
- name: Memory
- name: Inspect
- name: PostTraining
- name: Inference
- name: Eval
- name: MemoryBanks
- name: Models
- name: Scoring
- name: DatasetIO
- name: BatchInference
- name: PostTraining
- name: Agents
- name: Shields
- name: MemoryBanks
- name: Datasets
- name: SyntheticDataGeneration
- name: Eval
- name: Telemetry
- name: Inspect
- name: DatasetIO
- name: SyntheticDataGeneration
- name: Datasets
- name: Scoring
- name: ScoringFunctions
- name: Safety
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
Expand Down Expand Up @@ -4451,8 +4455,11 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/AgentTurnResponseStepStartPayload"
/>
name: AgentTurnResponseStepStartPayload
- description: <SchemaDefinition schemaRef="#/components/schemas/AgentTurnResponseStreamChunk"
/>
- description: 'streamed agent turn completion response.
<SchemaDefinition schemaRef="#/components/schemas/AgentTurnResponseStreamChunk"
/>'
name: AgentTurnResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/AgentTurnResponseTurnCompletePayload"
/>
Expand Down
5 changes: 4 additions & 1 deletion llama_stack/apis/agents/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from enum import Enum
from typing import (
Any,
AsyncIterator,
Dict,
List,
Literal,
Expand Down Expand Up @@ -405,6 +406,8 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):

@json_schema_type
class AgentTurnResponseStreamChunk(BaseModel):
"""streamed agent turn completion response."""

event: AgentTurnResponseEvent


Expand Down Expand Up @@ -434,7 +437,7 @@ async def create_agent_turn(
],
attachments: Optional[List[Attachment]] = None,
stream: Optional[bool] = False,
) -> AgentTurnResponseStreamChunk: ...
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...

@webmethod(route="/agents/turn/get")
async def get_agents_turn(
Expand Down
16 changes: 13 additions & 3 deletions llama_stack/apis/inference/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,15 @@

from enum import Enum

from typing import List, Literal, Optional, Protocol, runtime_checkable, Union
from typing import (
AsyncIterator,
List,
Literal,
Optional,
Protocol,
runtime_checkable,
Union,
)

from llama_models.schema_utils import json_schema_type, webmethod

Expand Down Expand Up @@ -224,7 +232,7 @@ async def completion(
response_format: Optional[ResponseFormat] = None,
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]: ...

@webmethod(route="/inference/chat_completion")
async def chat_completion(
Expand All @@ -239,7 +247,9 @@ async def chat_completion(
response_format: Optional[ResponseFormat] = None,
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
) -> Union[
ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]
]: ...

@webmethod(route="/inference/embeddings")
async def embeddings(
Expand Down
Loading

0 comments on commit 37b330b

Please sign in to comment.