From 94bb0d87bc6b5b3ee77f402d3fc5c9b6cf3e9dec Mon Sep 17 00:00:00 2001 From: "Ankush Pala ankush@lastmileai.dev" <> Date: Mon, 29 Jan 2024 17:13:22 -0500 Subject: [PATCH 1/2] [python] Claude Model Parser Model Parser for Claude on AWS Bedrock Claude on Bedrock can be called with the - `anthropic_bedrock` python library - AWS' `Boto3` Library - Chose to use the `anthropic_bedrock` library because it has good abstractions and type hints Claude on Bedrock only supports Text Completions, turn style [prompts are not supported](https://docs.anthropic.com/claude/reference/claude-on-amazon-bedrock#:~:text=Messages%20in%20Amazon%20Bedrock) - Added Claude model parser as a core parser. - added dependency on `anthropic_bedrock` ## Testplan using prompt schema from diff ontop https://github.com/lastmile-ai/aiconfig/assets/141073967/29c1faa7-7d13-412f-8606-9ad556eb1c52 --- python/requirements.txt | 1 + python/src/aiconfig/Config.py | 2 + python/src/aiconfig/default_parsers/claude.py | 300 ++++++++++++++++++ 3 files changed, 303 insertions(+) create mode 100644 python/src/aiconfig/default_parsers/claude.py diff --git a/python/requirements.txt b/python/requirements.txt index f6f686c44..5518e11bc 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,4 +1,5 @@ # Editor server +anthropic_bedrock black flake8 flask-cors diff --git a/python/src/aiconfig/Config.py b/python/src/aiconfig/Config.py index 3472b7a4c..a78f98ea1 100644 --- a/python/src/aiconfig/Config.py +++ b/python/src/aiconfig/Config.py @@ -1,6 +1,7 @@ import json import os from typing import Any, Dict, List, Literal, Optional, Tuple +from aiconfig.default_parsers.claude import ClaudeBedrockModelParser import requests import yaml @@ -53,6 +54,7 @@ DefaultAnyscaleEndpointParser("AnyscaleEndpoint") ) ModelParserRegistry.register_model_parser(GeminiModelParser("gemini-pro"), ["gemini-pro"]) +ModelParserRegistry.register_model_parser(ClaudeBedrockModelParser()) ModelParserRegistry.register_model_parser(HuggingFaceTextGenerationParser()) for model in gpt_models_extra: ModelParserRegistry.register_model_parser(DefaultOpenAIParser(model)) diff --git a/python/src/aiconfig/default_parsers/claude.py b/python/src/aiconfig/default_parsers/claude.py new file mode 100644 index 000000000..eb8b803ed --- /dev/null +++ b/python/src/aiconfig/default_parsers/claude.py @@ -0,0 +1,300 @@ +import copy +import json +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union + +from aiconfig.callback import CallbackEvent +from aiconfig.default_parsers.parameterized_model_parser import ( + ParameterizedModelParser, +) +from aiconfig.model_parser import InferenceOptions +from aiconfig.schema import ExecuteResult, Output, Prompt, PromptMetadata +from aiconfig.util.params import resolve_prompt +from anthropic_bedrock import AI_PROMPT, HUMAN_PROMPT, AnthropicBedrock, Stream +from anthropic_bedrock.types import Completion + +if TYPE_CHECKING: + from aiconfig.Config import AIConfigRuntime + + +class ClaudeBedrockModelParser(ParameterizedModelParser): + """ + A ModelParser for the Claude API on AWS Bedrock. + + Claude on bedrock does not support messages aka turn style completion. It only supports Text completion. + see https://docs.anthropic.com/claude/reference/claude-on-amazon-bedrock#list-available-models:~:text=Messages%20in%20Amazon%20Bedrock + """ + + def __init__(self): + super().__init__() + # Client will be set in the run method. This is to avoid having to set the api key in the constructor + self.client = None + + def id(self) -> str: + return "ClaudeBedrockModelParser" + + async def serialize( + self, + prompt_name: str, + data: Dict[Any, Any], + ai_config: "AIConfigRuntime", + parameters: Optional[dict[Any, Any]] = None, + **kwargs, + ) -> list[Prompt]: + """ + Defines how a prompt and model inference settings get serialized in the .aiconfig. + + Args: + prompt (str): The prompt to be serialized. + inference_settings (dict): Model-specific inference settings to be serialized. + + Returns: + str: Serialized representation of the prompt and inference settings. + """ + await ai_config.callback_manager.run_callbacks( + CallbackEvent( + "on_serialize_start", + __name__, + { + "prompt_name": prompt_name, + "data": data, + "parameters": parameters, + "kwargs": kwargs, + }, + ) + ) + + # assume data is completion params for Claude Text Completion + prompt_input = data["prompt"] + + settings = copy.deepcopy(data) + # Prompt is handled, remove from settings + settings.pop("Prompt", None) + + model_metadata = ai_config.get_model_metadata(settings, self.id()) + + prompts: list[Prompt] = [] + + prompt = Prompt( + name=prompt_name, + input=prompt_input, + metadata=PromptMetadata( + model=model_metadata, parameters=parameters, **kwargs + ), + ) + + prompts.append(prompt) + + await ai_config.callback_manager.run_callbacks( + CallbackEvent( + "on_serialize_complete", __name__, {"result": prompts} + ) + ) + + return prompts + + async def deserialize( + self, + prompt: Prompt, + aiconfig: "AIConfigRuntime", + params: Optional[dict[Any, Any]] = {}, + ) -> dict[Any, Any]: + """ + Defines how to parse a prompt in the .aiconfig for a particular model + and constructs the completion params for that model. + + Args: + Update this documentation... serialized_data (str): Serialized data from the .aiconfig. + + Returns: + dict: Model-specific completion parameters. + """ + await aiconfig.callback_manager.run_callbacks( + CallbackEvent( + "on_deserialize_start", + __name__, + {"prompt": prompt, "params": params}, + ) + ) + # Build Completion params + model_settings = self.get_model_settings(prompt, aiconfig) + + completion_data = refine_chat_completion_params( + model_settings, aiconfig, prompt + ) + + resolved_prompt = resolve_prompt(prompt, params, aiconfig) + + # Claude is trained using RLHF, need to add the human prompt to the beginning of the prompt + # See https://docs.anthropic.com/claude/docs/introduction-to-prompt-design#human--assistant-formatting + completion_data[ + "prompt" + ] = f"{HUMAN_PROMPT} {resolved_prompt}{AI_PROMPT}" + + await aiconfig.callback_manager.run_callbacks( + CallbackEvent( + "on_deserialize_complete", + __name__, + {"output": completion_data}, + ) + ) + + return completion_data + + async def run_inference( + self, + prompt: Prompt, + aiconfig: "AIConfigRuntime", + options: Optional[InferenceOptions] = None, + parameters: Dict[Any, Any] = {}, + ) -> List[Output]: + await aiconfig.callback_manager.run_callbacks( + CallbackEvent( + "on_run_start", + __name__, + { + "prompt": prompt, + "options": options, + "parameters": parameters, + }, + ) + ) + + if self.client is None: + # AWS credentials could either be in the environment or in ~/.aws/credentials + # Let Anthropic's API handle AWS credentials validation which happens on Api call, not on client construct + self.client = AnthropicBedrock() + + completion_data = await self.deserialize(prompt, aiconfig, parameters) + + # if stream enabled in runtime options and config, then stream. Otherwise don't stream. + stream = True # Default value + if options is not None and options.stream is not None: + stream = options.stream + elif "stream" in completion_data: + stream = completion_data["stream"] + + completion_data["stream"] = stream + + response = self.client.completions.create(**completion_data) # type: ignore (pyright doesn't understand response object) + + output = None + if stream: + output = construct_stream_output(response, options) # type: ignore + else: + output = construct_output(response) # type: ignore + + # rewrite or extend list of outputs? + prompt.outputs = [output] + + await aiconfig.callback_manager.run_callbacks( + CallbackEvent( + "on_run_complete", __name__, {"result": prompt.outputs} + ) + ) + return prompt.outputs + + def get_output_text( + self, + prompt: Prompt, + aiconfig: "AIConfigRuntime", + output: Optional[Output] = None, + ) -> str: + if output is None: + output = aiconfig.get_latest_output(prompt) + + if output is None: + return "" + + if output.output_type == "execute_result": + output_data = output.data + if isinstance(output_data, str): + return output_data + + # Claude outputs should only ever be string + # format so shouldn't get here, but just being safe + return json.dumps(output_data, indent=2) + return "" + + +def refine_chat_completion_params( + model_settings: Dict[Any, Any], aiconfig: "AIConfigRuntime", prompt: Prompt +) -> Dict[Any, Any]: + # completion parameters to be used for Claude's Text completion api + # See https://docs.anthropic.com/claude/reference/complete_post + # prompt handled separately + # streaming handled separately. + supported_keys = { + "max_tokens_to_sample", + "metadata", + "model", + "stop_sequences", + "temperature", + "top_k", + "top_p", + } + + completion_data: Dict[str, Any] = {} + for key in supported_keys: + if key in model_settings: + completion_data[key] = model_settings[key] + + # Explicitly set the model to use if not already specified + if completion_data.get("model") is None: + model_name = aiconfig.get_model_name(prompt) + completion_data["model"] = model_name + + return completion_data + + +def construct_output(response: Completion) -> Output: + """ + Constructs the output for a non-streaming Text Completion response. + + Response contains text-based output. + See https://github.com/anthropics/anthropic-bedrock-python/blob/728669a89e08b2337c876906a57cbd88d0b7b282/src/anthropic_bedrock/types/completion.py#L9 + """ + return ExecuteResult( + output_type="execute_result", + data=response.completion, + execution_count=0, + metadata=response.model_dump(), + ) + + +def construct_stream_output( + response: Stream[Completion], options: Union[InferenceOptions, None] +) -> Output: + """ + Constructs the output for a stream response. + + Args: + response: Stream of completions + options (InferenceOptions): The inference options. Used to determine the stream callback. + """ + accumulated_message = "" + output = None + # Claude Bedrock api doesn't support multiple outputs + # see for more info https://docs.anthropic.com/claude/reference/complete_post + index = 0 + metadata = ( + {} + ) # TODO: extract the completion stop reason from the response and add it to the metadata + + for iteration in response: + new_text = iteration.completion + + accumulated_message += new_text + + if options is not None and isinstance( + options.stream_callback, Callable + ): + options.stream_callback(new_text, accumulated_message, index) + + output = ExecuteResult( + output_type="execute_result", + data=accumulated_message, + execution_count=index, + metadata=metadata, + ) + + return output From 21d2574519664eb9df488524a5830f149880c360 Mon Sep 17 00:00:00 2001 From: "Ankush Pala ankush@lastmileai.dev" <> Date: Mon, 29 Jan 2024 17:13:23 -0500 Subject: [PATCH 2/2] [editor] Claude Bedrock Prompt Schema ## Testplan https://github.com/lastmile-ai/aiconfig/assets/141073967/29c1faa7-7d13-412f-8606-9ad556eb1c52 --- .../ClaudeBedrockPromptSchema.ts | 71 +++++++++++++++++++ .../editor/client/src/utils/promptUtils.ts | 3 + 2 files changed, 74 insertions(+) create mode 100644 python/src/aiconfig/editor/client/src/shared/prompt_schemas/ClaudeBedrockPromptSchema.ts diff --git a/python/src/aiconfig/editor/client/src/shared/prompt_schemas/ClaudeBedrockPromptSchema.ts b/python/src/aiconfig/editor/client/src/shared/prompt_schemas/ClaudeBedrockPromptSchema.ts new file mode 100644 index 000000000..7d23e93a3 --- /dev/null +++ b/python/src/aiconfig/editor/client/src/shared/prompt_schemas/ClaudeBedrockPromptSchema.ts @@ -0,0 +1,71 @@ +import { PromptSchema } from "../../utils/promptUtils"; + +export const ClaudeBedrockPromptSchema: PromptSchema = { + // See https://docs.anthropic.com/claude/reference/complete_post + // for settings and defaults. The settings below are supported settings specified in the ClaudeBedrockModelParser + // refine_chat_completion_params implementation. + input: { + type: "string", + }, + model_settings: { + type: "object", + properties: { + model: { + type: "string", + }, + max_tokens_to_sample: { + type: "number", + description: `The maximum number of tokens to generate before stopping. + Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.`, + }, + stop_sequences: { + type: "array", + items: { + type: "string", + }, + description: `Sequences that will cause the model to stop generating.`, + }, + stream: { + type: "boolean", + default: true, + description: `If true, send messages token by token. If false, messages send in bulk.`, + }, + temperature: { + type: "number", + minimum: 0.0, + maximum: 1.0, + description: `Amount of randomness injected into the response.`, + }, + top_p: { + type: "number", + minimum: 0.0, + maximum: 1.0, + description: `In nucleus sampling, we compute the cumulative distribution over all the options for each subsequent token in decreasing probability order and cut it off once it reaches a particular probability specified by top_p. + You should either alter temperature or top_p, but not both.`, + }, + top_k: { + type: "number", + description: `Only sample from the top K options for each subsequent token. + Used to remove "long tail" low probability responses.`, + }, + metadata: { + type: "object", + properties: { + user_id: { + type: "string", + }, + }, + description: `An object describing metadata about the request. (Claude specific)`, + } + }, + required: ["model", "max_tokens_to_sample", "stop_sequences"], + }, + prompt_metadata: { + type: "object", + properties: { + remember_chat_context: { + type: "boolean", + }, + }, + }, +}; diff --git a/python/src/aiconfig/editor/client/src/utils/promptUtils.ts b/python/src/aiconfig/editor/client/src/utils/promptUtils.ts index ef4eb77a6..968aa874c 100644 --- a/python/src/aiconfig/editor/client/src/utils/promptUtils.ts +++ b/python/src/aiconfig/editor/client/src/utils/promptUtils.ts @@ -18,6 +18,7 @@ import { HuggingFaceTextGenerationRemoteInferencePromptSchema } from "../shared/ import { HuggingFaceTextSummarizationRemoteInferencePromptSchema } from "../shared/prompt_schemas/HuggingFaceTextSummarizationRemoteInferencePromptSchema"; import { HuggingFaceTextTranslationRemoteInferencePromptSchema } from "../shared/prompt_schemas/HuggingFaceTextTranslationRemoteInferencePromptSchema"; import { HuggingFaceImage2TextRemoteInferencePromptSchema } from "../shared/prompt_schemas/HuggingFaceImage2TextRemoteInferencePromptSchema"; +import { ClaudeBedrockPromptSchema } from "../shared/prompt_schemas/ClaudeBedrockPromptSchema"; /** * Get the name of the model for the specified prompt. The name will either be specified in the prompt's @@ -81,6 +82,8 @@ export const PROMPT_SCHEMAS: Record = { "dall-e-2": DalleImageGenerationParserPromptSchema, "dall-e-3": DalleImageGenerationParserPromptSchema, + "ClaudeBedrockModelParser": ClaudeBedrockPromptSchema, + HuggingFaceImage2TextRemoteInference: HuggingFaceImage2TextRemoteInferencePromptSchema,