diff --git a/.embedmeignore b/.embedmeignore
new file mode 100644
index 00000000..ca5368c3
--- /dev/null
+++ b/.embedmeignore
@@ -0,0 +1 @@
+docs/README.md
diff --git a/.env.template b/.env.template
index ed796c50..d3137938 100644
--- a/.env.template
+++ b/.env.template
@@ -7,46 +7,56 @@ BEE_FRAMEWORK_LOG_SINGLE_LINE="false"
# BEE_FRAMEWORK_INSTRUMENTATION_ENABLED=true
# BEE_FRAMEWORK_INSTRUMENTATION_IGNORED_KEYS=
-# For WatsonX LLM Adapter
+# For Watsonx LLM Adapter
+# WATSONX_CHAT_MODEL=""
+# WATSONX_EMBEDDING_MODEL=""
# WATSONX_API_KEY=""
# WATSONX_PROJECT_ID=""
-# WATSONX_REGION="us-south"
+# WATSONX_SPACE_ID=""
+# WATSONX_VERSION=""
+# WATSONX_REGION=""
# For Ollama LLM Adapter
-# OLLAMA_HOST="http://0.0.0.0:11434"
-# OLLAMA_MODEL="deepseek-r1:8b"
+# OLLAMA_CHAT_MODEL=""
+# OLLAMA_EMBEDDING_MODEL=""
+# OLLAMA_BASE_URL=""
# For OpenAI LLM Adapter
+# OPENAI_CHAT_MODEL=""
+# OPENAI_EMBEDDING_MODEL=""
+# OPENAI_API_ENDPOINT=""
# OPENAI_API_KEY=""
+# OPENAI_API_HEADERS=""
# For Azure OpenAI LLM Adapter
-# AZURE_OPENAI_API_VERSION=""
-# AZURE_OPENAI_API_DEPLOYMENT=""
+# AZURE_OPENAI_CHAT_MODEL=""
+# AZURE_OPENAI_EMBEDDING_MODEL=""
# AZURE_OPENAI_API_KEY=""
# AZURE_OPENAI_API_ENDPOINT=""
+# AZURE_OPENAI_API_RESOURCE=""
+# AZURE_OPENAI_API_VERSION=""
# For Groq LLM Adapter
+# GROQ_CHAT_MODEL=""
+# GROQ_EMBEDDING_MODEL=""
+# GROQ_API_HOST=""
# GROQ_API_KEY=""
-# For IBM VLLM LLM Adapter
-# IBM_VLLM_URL=""
-# IBM_VLLM_ROOT_CERT=""
-# IBM_VLLM_CERT_CHAIN=""
-# IBM_VLLM_PRIVATE_KEY=""
-
-# For IBM RITS LLM Adapter
-# IBM_RITS_URL=""
-# IBM_RITS_API_KEY=""
-# IBM_RITS_MODEL=ibm-granite/granite-3.0-8b-instruct
-
-# LLM Provider, used for some of the example agents
-# (watsonx/ollama/openai/groq/ibmvllm/ibmrits)
-# LLM_BACKEND="ollama"
-
-# For GCP VertexAI Adapter
+# For Google Vertex Adapter
+# GOOGLE_VERTEX_CHAT_MODEL=""
+# GOOGLE_VERTEX_EMBEDDING_MODEL=""
+# GOOGLE_VERTEX_PROJECT=""
+# GOOGLE_VERTEX_ENDPOINT=""
+# GOOGLE_VERTEX_LOCATION=""
# GOOGLE_APPLICATION_CREDENTIALS=""
-# GCP_VERTEXAI_PROJECT=""
-# GCP_VERTEXAI_LOCATION=""
+
+# For Amazon Bedrock
+# AWS_CHAT_MODEL=""
+# AWS_EMBEDDING_MODEL=""
+# AWS_ACCESS_KEY_ID=""
+# AWS_SECRET_ACCESS_KEY=""
+# AWS_REGION=""
+# AWS_SESSION_TOKEN=""
# Tools
# CODE_INTERPRETER_URL="http://127.0.0.1:50081"
@@ -63,4 +73,5 @@ BEE_FRAMEWORK_LOG_SINGLE_LINE="false"
# ELASTICSEARCH_API_KEY=""
## Third-party services
-# TAVILY_API_KEY=your-api-key-here
\ No newline at end of file
+# TAVILY_API_KEY=your-api-key-here
+
diff --git a/.github/workflows/examples-tests.yml b/.github/workflows/examples-tests.yml
index 6fd4ddc0..0839bee5 100644
--- a/.github/workflows/examples-tests.yml
+++ b/.github/workflows/examples-tests.yml
@@ -48,7 +48,7 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_SEARCH_CSE_ID }}
- # TODO: enable WatsonX later
+ # TODO: enable Watsonx later
# WATSONX_API_KEY: ${{ secrets.WATSONX_API_KEY }}
# WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
# WATSONX_SPACE_ID: ${{ secrets.WATSONX_SPACE_ID }}
diff --git a/.gitignore b/.gitignore
index 42e82e55..6c439d3a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,3 @@
-scripts/ibm_vllm_generate_protos/dist
-scripts/ibm_vllm_generate_protos/dts
-scripts/ibm_vllm_generate_protos/types
-
### Node template
# Logs
logs
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 01b54124..3e3e4c2a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -59,26 +59,7 @@ yarn install --immutable
yarn prepare
```
-5. **Setup environmental variables:** To run E2E Tests, you should set the following variables in your `.env` file in the repositoryβs root.
-
-```bash
-# At least one provider API key or an OLLAMA_HOST must be defined!
-OPENAI_API_KEY=""
-GROQ_API_KEY=""
-WATSONX_API_KEY=""
-WATSONX_PROJECT_ID=""
-OLLAMA_HOST=""
-AZURE_OPENAI_API_VERSION=""
-AZURE_OPENAI_DEPLOYMENT=""
-AZURE_OPENAI_API_KEY=""
-AZURE_OPENAI_API_ENDPOINT=""
-GOOGLE_APPLICATION_CREDENTIALS=""
-GCP_VERTEXAI_PROJECT=""
-GCP_VERTEXAI_LOCATION=""
-
-WATSONX_SPACE_ID="" # optional
-WATSONX_DEPLOYMENT_ID="" # optional
-```
+5. **Setup environmental variables:** To run E2E Tests, you should set the requisite environmental variables in your `.env` file.
6. **Follow Conventional Commit Messages:** We use [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/#summary) to structure our commit messages. This helps maintain a clean and manageable commit history. Please use the following format:
diff --git a/README.md b/README.md
index 48281b6a..376c1d53 100644
--- a/README.md
+++ b/README.md
@@ -14,10 +14,11 @@
Open-source framework for building, deploying, and serving powerful multi-agent workflows at scale.
-π **Bee Agent Framework** is an open-source TypeScript library for building **production-ready multi-agent systems**. Pick from a variety of [π LLM providers](/docs/llms.md#providers-adapters), customize the [π prompt templates](/docs/templates.md), create [π€ agents](/docs/agents.md), equip agents with pre-made [π οΈ tools](/docs/tools.md), and orchestrate [π€π€π€ multi-agent workflows](/docs/workflows.md)! πͺ
+π **Bee Agent Framework** is an open-source TypeScript library for building **production-ready multi-agent systems**. Pick from a variety of [π AI Providers](/docs/backend.md), customize the [π prompt templates](/docs/templates.md), create [π€ agents](/docs/agents.md), equip agents with pre-made [π οΈ tools](/docs/tools.md), and orchestrate [π€π€π€ multi-agent workflows](/docs/workflows.md)! πͺ
## Latest updates
+- π **2025-02-07**: Introduced [Backend](/docs/backend.md) module to simplify working with AI services (chat, embedding). See [migration guide](/docs/migration_guide.md).
- π§ **2025-01-28**: Added support for [DeepSeek R1](https://api-docs.deepseek.com/news/news250120), check out the [Competitive Analysis Workflow example](https://github.com/i-am-bee/bee-agent-framework/tree/main/examples/workflows/competitive-analysis)
- π **2025-01-09**:
- Introduced [Workflows](/docs/workflows.md), a way of building multi-agent systems.
@@ -30,7 +31,7 @@ For a full changelog, see the [releases page](https://github.com/i-am-bee/bee-ag
## Why pick Bee?
- βοΈ **Battle-tested.** Bee Agent Framework is at the core of [BeeAI](https://iambee.ai), a powerful platform for building chat assistants and custom AI-powered apps. BeeAI is in a closed beta, but already used by hundreds of users. And it's [fully open-source](https://github.com/i-am-bee/bee-ui) too!
-- π **Production-grade.** In an actual product, you have to reduce token spend through [memory strategies](/docs/memory.md), store and restore the agent state through [(de)serialization](/docs/serialization.md), generate [structured output](/examples/llms/structured.ts), or execute generated code in a [sandboxed environment](https://github.com/i-am-bee/bee-code-interpreter). Leave all that to Bee and focus on building!
+- π **Production-grade.** In an actual product, you have to reduce token spend through [memory strategies](/docs/memory.md), store and restore the agent state through [(de)serialization](/docs/serialization.md), generate [structured output](/examples/backend/structured.ts), or execute generated code in a [sandboxed environment](https://github.com/i-am-bee/bee-code-interpreter). Leave all that to Bee and focus on building!
- π€ **Built for open-source models.** Pick any LLM you want β including small and open-source models. The framework is designed to perform robustly with [Granite](https://www.ibm.com/granite/docs/) and [Llama 3.x](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct). A full agentic workflow can run on your laptop!
- π’ **Bee cares about the sad path too.** Real-world applications encounter errors and failures. Bee lets you observe the full agent workflow through [events](/docs/emitter.md), collect [telemetry](/docs/instrumentation.md), [log](/docs/logger.md) diagnostic data, and throws clear and well-defined [exceptions](/docs/errors.md). Bees may be insects, but not bugs!
- π³ **A part of something greater.** Bee isn't just a framework, but a full ecosystem. Use [Bee UI](https://github.com/i-am-bee/bee-ui) to chat with your agents visually. [Bee Observe](https://github.com/i-am-bee/bee-observe) collects and manages telemetry. [Bee Code Interpreter](https://github.com/i-am-bee/bee-code-interpreter) runs generated code safely in a secure sandbox. The Bee ecosystem also integrates with [Model Context Protocol](https://i-am-bee.github.io/bee-agent-framework/#/tools?id=using-the-mcptool-class), allowing interoperability with the wider agent ecosystem!
@@ -47,7 +48,7 @@ import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMem
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { AgentWorkflow } from "bee-agent-framework/experimental/workflows/agent";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
+import { Message, Role } from "bee-agent-framework/llms/primitives/message";
import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
const workflow = new AgentWorkflow();
@@ -77,7 +78,7 @@ workflow.addAgent({
const memory = new UnconstrainedMemory();
await memory.add(
- BaseMessage.of({
+ Message.of({
role: Role.USER,
text: "What is the capital of France and what is the current weather there?",
meta: { createdAt: new Date() },
@@ -119,16 +120,16 @@ yarn add bee-agent-framework
```ts
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
-const llm = new OllamaChatLLM(); // default is llama3.1 (8B), it is recommended to use 70B model
+const llm = new OllamaChatModel("llama3.1"); // default is llama3.1 (8B), it is recommended to use 70B model
const agent = new BeeAgent({
llm, // for more explore 'bee-agent-framework/adapters'
- memory: new TokenMemory({ llm }), // for more explore 'bee-agent-framework/memory'
+ memory: new TokenMemory(), // for more explore 'bee-agent-framework/memory'
tools: [new DuckDuckGoSearchTool(), new OpenMeteoTool()], // for more explore 'bee-agent-framework/tools'
});
@@ -174,22 +175,21 @@ console.log(`Agent π€ : `, response.result.text);
The source directory (`src`) provides numerous modules that one can use.
-| Name | Description |
-| ------------------------------------------------ | ------------------------------------------------------------------------------------------- |
-| [**agents**](/docs/agents.md) | Base classes defining the common interface for agent. |
-| [**workflows**](/docs/workflows.md) | Build agentic applications in a declarative way via [workflows](/docs/workflows.md). |
-| [**llms**](/docs/llms.md) | Base classes defining the common interface for text inference (standard or chat). |
-| [**template**](/docs/templates.md) | Prompt Templating system based on `Mustache` with various improvements. |
-| [**memory**](/docs/memory.md) | Various types of memories to use with agent. |
-| [**tools**](/docs/tools.md) | Tools that an agent can use. |
-| [**cache**](/docs/cache.md) | Preset of different caching approaches that can be used together with tools. |
-| [**errors**](/docs/errors.md) | Error classes and helpers to catch errors fast. |
-| [**adapters**](/docs/llms.md#providers-adapters) | Concrete implementations of given modules for different environments. |
-| [**logger**](/docs/logger.md) | Core component for logging all actions within the framework. |
-| [**serializer**](/docs/serialization.md) | Core component for the ability to serialize/deserialize modules into the serialized format. |
-| [**version**](/docs/version.md) | Constants representing the framework (e.g., latest version) |
-| [**emitter**](/docs/emitter.md) | Bringing visibility to the system by emitting events. |
-| **internals** | Modules used by other modules within the framework. |
+| Name | Description |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------- |
+| [**agents**](/docs/agents.md) | Base classes defining the common interface for agent. |
+| [**workflows**](/docs/workflows.md) | Build agentic applications in a declarative way via [workflows](/docs/workflows.md). |
+| [**backend**](/docs/backend.md) | Functionalities that relates to AI models (chat, embedding, image, tool calling, ...) |
+| [**template**](/docs/templates.md) | Prompt Templating system based on `Mustache` with various improvements. |
+| [**memory**](/docs/memory.md) | Various types of memories to use with agent. |
+| [**tools**](/docs/tools.md) | Tools that an agent can use. |
+| [**cache**](/docs/cache.md) | Preset of different caching approaches that can be used together with tools. |
+| [**errors**](/docs/errors.md) | Error classes and helpers to catch errors fast. |
+| [**logger**](/docs/logger.md) | Core component for logging all actions within the framework. |
+| [**serializer**](/docs/serialization.md) | Core component for the ability to serialize/deserialize modules into the serialized format. |
+| [**version**](/docs/version.md) | Constants representing the framework (e.g., latest version) |
+| [**emitter**](/docs/emitter.md) | Bringing visibility to the system by emitting events. |
+| **internals** | Modules used by other modules within the framework. |
To see more in-depth explanation see [overview](/docs/overview.md).
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index 01b54124..3e3e4c2a 100644
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -59,26 +59,7 @@ yarn install --immutable
yarn prepare
```
-5. **Setup environmental variables:** To run E2E Tests, you should set the following variables in your `.env` file in the repositoryβs root.
-
-```bash
-# At least one provider API key or an OLLAMA_HOST must be defined!
-OPENAI_API_KEY=""
-GROQ_API_KEY=""
-WATSONX_API_KEY=""
-WATSONX_PROJECT_ID=""
-OLLAMA_HOST=""
-AZURE_OPENAI_API_VERSION=""
-AZURE_OPENAI_DEPLOYMENT=""
-AZURE_OPENAI_API_KEY=""
-AZURE_OPENAI_API_ENDPOINT=""
-GOOGLE_APPLICATION_CREDENTIALS=""
-GCP_VERTEXAI_PROJECT=""
-GCP_VERTEXAI_LOCATION=""
-
-WATSONX_SPACE_ID="" # optional
-WATSONX_DEPLOYMENT_ID="" # optional
-```
+5. **Setup environmental variables:** To run E2E Tests, you should set the requisite environmental variables in your `.env` file.
6. **Follow Conventional Commit Messages:** We use [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/#summary) to structure our commit messages. This helps maintain a clean and manageable commit history. Please use the following format:
diff --git a/docs/README.md b/docs/README.md
index 48281b6a..376c1d53 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -14,10 +14,11 @@
Open-source framework for building, deploying, and serving powerful multi-agent workflows at scale.
-π **Bee Agent Framework** is an open-source TypeScript library for building **production-ready multi-agent systems**. Pick from a variety of [π LLM providers](/docs/llms.md#providers-adapters), customize the [π prompt templates](/docs/templates.md), create [π€ agents](/docs/agents.md), equip agents with pre-made [π οΈ tools](/docs/tools.md), and orchestrate [π€π€π€ multi-agent workflows](/docs/workflows.md)! πͺ
+π **Bee Agent Framework** is an open-source TypeScript library for building **production-ready multi-agent systems**. Pick from a variety of [π AI Providers](/docs/backend.md), customize the [π prompt templates](/docs/templates.md), create [π€ agents](/docs/agents.md), equip agents with pre-made [π οΈ tools](/docs/tools.md), and orchestrate [π€π€π€ multi-agent workflows](/docs/workflows.md)! πͺ
## Latest updates
+- π **2025-02-07**: Introduced [Backend](/docs/backend.md) module to simplify working with AI services (chat, embedding). See [migration guide](/docs/migration_guide.md).
- π§ **2025-01-28**: Added support for [DeepSeek R1](https://api-docs.deepseek.com/news/news250120), check out the [Competitive Analysis Workflow example](https://github.com/i-am-bee/bee-agent-framework/tree/main/examples/workflows/competitive-analysis)
- π **2025-01-09**:
- Introduced [Workflows](/docs/workflows.md), a way of building multi-agent systems.
@@ -30,7 +31,7 @@ For a full changelog, see the [releases page](https://github.com/i-am-bee/bee-ag
## Why pick Bee?
- βοΈ **Battle-tested.** Bee Agent Framework is at the core of [BeeAI](https://iambee.ai), a powerful platform for building chat assistants and custom AI-powered apps. BeeAI is in a closed beta, but already used by hundreds of users. And it's [fully open-source](https://github.com/i-am-bee/bee-ui) too!
-- π **Production-grade.** In an actual product, you have to reduce token spend through [memory strategies](/docs/memory.md), store and restore the agent state through [(de)serialization](/docs/serialization.md), generate [structured output](/examples/llms/structured.ts), or execute generated code in a [sandboxed environment](https://github.com/i-am-bee/bee-code-interpreter). Leave all that to Bee and focus on building!
+- π **Production-grade.** In an actual product, you have to reduce token spend through [memory strategies](/docs/memory.md), store and restore the agent state through [(de)serialization](/docs/serialization.md), generate [structured output](/examples/backend/structured.ts), or execute generated code in a [sandboxed environment](https://github.com/i-am-bee/bee-code-interpreter). Leave all that to Bee and focus on building!
- π€ **Built for open-source models.** Pick any LLM you want β including small and open-source models. The framework is designed to perform robustly with [Granite](https://www.ibm.com/granite/docs/) and [Llama 3.x](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct). A full agentic workflow can run on your laptop!
- π’ **Bee cares about the sad path too.** Real-world applications encounter errors and failures. Bee lets you observe the full agent workflow through [events](/docs/emitter.md), collect [telemetry](/docs/instrumentation.md), [log](/docs/logger.md) diagnostic data, and throws clear and well-defined [exceptions](/docs/errors.md). Bees may be insects, but not bugs!
- π³ **A part of something greater.** Bee isn't just a framework, but a full ecosystem. Use [Bee UI](https://github.com/i-am-bee/bee-ui) to chat with your agents visually. [Bee Observe](https://github.com/i-am-bee/bee-observe) collects and manages telemetry. [Bee Code Interpreter](https://github.com/i-am-bee/bee-code-interpreter) runs generated code safely in a secure sandbox. The Bee ecosystem also integrates with [Model Context Protocol](https://i-am-bee.github.io/bee-agent-framework/#/tools?id=using-the-mcptool-class), allowing interoperability with the wider agent ecosystem!
@@ -47,7 +48,7 @@ import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMem
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { AgentWorkflow } from "bee-agent-framework/experimental/workflows/agent";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
+import { Message, Role } from "bee-agent-framework/llms/primitives/message";
import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
const workflow = new AgentWorkflow();
@@ -77,7 +78,7 @@ workflow.addAgent({
const memory = new UnconstrainedMemory();
await memory.add(
- BaseMessage.of({
+ Message.of({
role: Role.USER,
text: "What is the capital of France and what is the current weather there?",
meta: { createdAt: new Date() },
@@ -119,16 +120,16 @@ yarn add bee-agent-framework
```ts
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
-const llm = new OllamaChatLLM(); // default is llama3.1 (8B), it is recommended to use 70B model
+const llm = new OllamaChatModel("llama3.1"); // default is llama3.1 (8B), it is recommended to use 70B model
const agent = new BeeAgent({
llm, // for more explore 'bee-agent-framework/adapters'
- memory: new TokenMemory({ llm }), // for more explore 'bee-agent-framework/memory'
+ memory: new TokenMemory(), // for more explore 'bee-agent-framework/memory'
tools: [new DuckDuckGoSearchTool(), new OpenMeteoTool()], // for more explore 'bee-agent-framework/tools'
});
@@ -174,22 +175,21 @@ console.log(`Agent π€ : `, response.result.text);
The source directory (`src`) provides numerous modules that one can use.
-| Name | Description |
-| ------------------------------------------------ | ------------------------------------------------------------------------------------------- |
-| [**agents**](/docs/agents.md) | Base classes defining the common interface for agent. |
-| [**workflows**](/docs/workflows.md) | Build agentic applications in a declarative way via [workflows](/docs/workflows.md). |
-| [**llms**](/docs/llms.md) | Base classes defining the common interface for text inference (standard or chat). |
-| [**template**](/docs/templates.md) | Prompt Templating system based on `Mustache` with various improvements. |
-| [**memory**](/docs/memory.md) | Various types of memories to use with agent. |
-| [**tools**](/docs/tools.md) | Tools that an agent can use. |
-| [**cache**](/docs/cache.md) | Preset of different caching approaches that can be used together with tools. |
-| [**errors**](/docs/errors.md) | Error classes and helpers to catch errors fast. |
-| [**adapters**](/docs/llms.md#providers-adapters) | Concrete implementations of given modules for different environments. |
-| [**logger**](/docs/logger.md) | Core component for logging all actions within the framework. |
-| [**serializer**](/docs/serialization.md) | Core component for the ability to serialize/deserialize modules into the serialized format. |
-| [**version**](/docs/version.md) | Constants representing the framework (e.g., latest version) |
-| [**emitter**](/docs/emitter.md) | Bringing visibility to the system by emitting events. |
-| **internals** | Modules used by other modules within the framework. |
+| Name | Description |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------- |
+| [**agents**](/docs/agents.md) | Base classes defining the common interface for agent. |
+| [**workflows**](/docs/workflows.md) | Build agentic applications in a declarative way via [workflows](/docs/workflows.md). |
+| [**backend**](/docs/backend.md) | Functionalities that relates to AI models (chat, embedding, image, tool calling, ...) |
+| [**template**](/docs/templates.md) | Prompt Templating system based on `Mustache` with various improvements. |
+| [**memory**](/docs/memory.md) | Various types of memories to use with agent. |
+| [**tools**](/docs/tools.md) | Tools that an agent can use. |
+| [**cache**](/docs/cache.md) | Preset of different caching approaches that can be used together with tools. |
+| [**errors**](/docs/errors.md) | Error classes and helpers to catch errors fast. |
+| [**logger**](/docs/logger.md) | Core component for logging all actions within the framework. |
+| [**serializer**](/docs/serialization.md) | Core component for the ability to serialize/deserialize modules into the serialized format. |
+| [**version**](/docs/version.md) | Constants representing the framework (e.g., latest version) |
+| [**emitter**](/docs/emitter.md) | Bringing visibility to the system by emitting events. |
+| **internals** | Modules used by other modules within the framework. |
To see more in-depth explanation see [overview](/docs/overview.md).
diff --git a/docs/_sidebar.md b/docs/_sidebar.md
index db1540a9..390a61e6 100644
--- a/docs/_sidebar.md
+++ b/docs/_sidebar.md
@@ -4,13 +4,14 @@
- [Overview](overview.md)
- [Examples](examples.md)
- [Tutorials](tutorials.md)
+ - [Migration Guide](migration_guide.md)
- [Changelog](CHANGELOG.md)
- Modules
- [Agents](agents.md)
- [Workflows](workflows.md)
- - [LLMs](llms.md)
+ - [Backend](backend.md)
- [Templates](templates.md)
- [Memory](memory.md)
- [Emitter](emitter.md)
diff --git a/docs/agents.md b/docs/agents.md
index e1992622..edd97f20 100644
--- a/docs/agents.md
+++ b/docs/agents.md
@@ -47,12 +47,12 @@ In the following example, we will transform the knowledge gained into code.
```ts
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
const agent = new BeeAgent({
- llm: new OllamaChatLLM(), // for more explore 'bee-agent-framework/adapters'
+ llm: new OllamaChatModel("llama3.1"), // for more explore 'bee-agent-framework/adapters'
memory: new UnconstrainedMemory(), // for more explore 'bee-agent-framework/memory'
tools: [new OpenMeteoTool()], // for more explore 'bee-agent-framework/tools'
});
diff --git a/docs/backend.md b/docs/backend.md
new file mode 100644
index 00000000..ff202da6
--- /dev/null
+++ b/docs/backend.md
@@ -0,0 +1,280 @@
+# Backend
+
+> [!TIP]
+>
+> Location for concrete implementations within the framework `bee-agent-framework/adapters/provider/backend`.
+>
+> Location for base abstraction within the framework `bee-agent-framework/backend`.
+
+The backend module is an umbrella module that encapsulates a unified way to work with the following functionalities:
+
+- Chat Models via (`ChatModel` class)
+- Embedding Models via (`EmbeddingModel` class)
+- Audio Models (coming soon)
+- Image Models (coming soon)
+
+## Providers (implementations)
+
+The following table depicts supported providers.
+
+| Name | Chat | Embedding | Dependency | Environment Variables |
+| ---------------- | :--: | :-------: | ------------------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `Ollama` | β
| β
| `ollama-ai-provider` | OLLAMA_CHAT_MODEL
OLLAMA_EMBEDDING_MODEL
OLLAMA_BASE_URL |
+| `OpenAI` | β
| β
| `@ai-sdk/openai` | OPENAI_CHAT_MODEL
OPENAI_EMBEDDING_MODEL
OPENAI_API_ENDPOINT
OPENAI_API_KEY
OPENAI_API_HEADERS |
+| `Groq` | β
| β
| `@ai-sdk/groq` | GROQ_CHAT_MODEL
GROQ_EMBEDDING_MODEL
GROQ_API_HOST
GROQ_API_KEY |
+| `Amazon Bedrock` | β
| β
| `@ai-sdk/amazon-bedrock` | AWS_CHAT_MODEL
AWS_EMBEDDING_MODEL
AWS_ACCESS_KEY_ID
AWS_SECRET_ACCESS_KEY
AWS_REGION
AWS_SESSION_TOKEN |
+| `Google Vertex` | β
| β
| `@ai-sdk/google-vertex` | GOOGLE_VERTEX_CHAT_MODEL
GOOGLE_VERTEX_EMBEDDING_MODEL
GOOGLE_VERTEX_PROJECT
GOOGLE_VERTEX_ENDPOINT
GOOGLE_VERTEX_LOCATION |
+| `Watsonx` | β
| β
| `@ibm-cloud/watsonx-ai` | WATSONX_CHAT_MODEL
WATSONX_EMBEDDING_MODEL
WATSONX_API_KEY
WATSONX_PROJECT_ID
WATSONX_SPACE_ID
WATSONX_VERSION
WATSONX_REGION |
+| `Azure OpenAI` | β
| β
| `@ai-sdk/azure` | AZURE_OPENAI_CHAT_MODEL
AZURE_OPENAI_EMBEDDING_MODEL
AZURE_OPENAI_API_KEY
AZURE_OPENAI_API_ENDPOINT
AZURE_OPENAI_API_RESOURCE
AZURE_OPENAI_API_VERSION |
+
+> [!TIP]
+>
+> If you don't see your provider raise an issue [here](https://github.com/i-am-bee/bee-agent-framework/discussions). Meanwhile, you can use [LangChain adapter](/examples/backend/providers/langchain.ts).
+
+### Initialization
+
+```ts
+import { Backend } from "bee-agent-framework/backend/core";
+
+const backend = await Backend.fromProvider("watsonx"); // use provider's name from the list below, ensure you set all ENVs
+console.log(backend.chat.modelId); // uses provider's default model or the one specified via env
+console.log(backend.embedding.modelId); // uses provider's default model or the one specified via env
+```
+
+All providers examples can be found in [examples/backend/providers](/examples/backend/providers).
+
+## Chat Model
+
+The `ChatModel` class represents a Chat Large Language Model and can be initiated in one of the following ways.
+
+```ts
+import { ChatModel } from "bee-agent-framework/backend/core";
+
+const model = await ChatModel.fromName("ollama:llama3.1");
+console.log(model.providerId); // ollama
+console.log(model.modelId); // llama3.1
+```
+
+or you can always create the concrete provider's chat model directly
+
+```ts
+import { OpenAIChatModel } from "bee-agent-framework/adapters/openai/chat";
+
+const model = new OpenAIChatModel(
+ "gpt-4o",
+ {
+ // optional provider settings
+ reasoningEffort: "low",
+ parallelToolCalls: false,
+ },
+ {
+ // optional provider client settings
+ baseURL: "your_custom_endpoint",
+ apiKey: "your_api_key",
+ compatibility: "compatible",
+ headers: {
+ CUSTOM_HEADER: "...",
+ },
+ },
+);
+```
+
+### Configuration
+
+```ts
+import { ChatModel, UserMessage } from "bee-agent-framework/backend/core";
+import { SlidingCache } from "bee-agent-framework/cache/slidingCache";
+
+const model = await ChatModel.fromName("watsonx:ibm/granite-3-8b-instruct");
+model.config({
+ parameters: {
+ maxTokens: 300,
+ temperature: 0.15,
+ topP: 1,
+ frequencyPenalty: 1.1,
+ topK: 1,
+ n: 1,
+ presencePenalty: 1,
+ seed: 7777,
+ stopSequences: ["\n\n"],
+ },
+ cache: new SlidingCache({
+ size: 25,
+ }),
+});
+```
+
+### Generation
+
+```ts
+import { ChatModel, UserMessage } from "bee-agent-framework/backend/core";
+
+const model = await ChatModel.fromName("ollama:llama3.1");
+const response = await model.create({
+ messages: [new UserMessage("Hello world!")],
+});
+console.log(response.getTextContent());
+```
+
+> [!NOTE]
+>
+> Execution parameters (those passed to `model.create({...})`) are superior to ones defined via `config`.
+
+### Stream
+
+```ts
+import { ChatModel, UserMessage } from "bee-agent-framework/backend/core";
+
+const model = await ChatModel.fromName("ollama:llama3.1");
+const response = await model
+ .create({
+ messages: [new UserMessage("Hello world!")],
+ stream: true,
+ })
+ .observe((emitter) => {
+ emitter.on("update", ({ value }) => {
+ console.log("token", value.getTextContent());
+ });
+ });
+
+console.log("Finish Reason:", response.finishReason);
+console.log("Token Usage:", response.usage);
+```
+
+### Structured Generation
+
+
+
+```ts
+import { ChatModel, UserMessage } from "bee-agent-framework/backend/core";
+import { z } from "zod";
+
+const model = await ChatModel.fromName("ollama:llama3.1");
+const response = await model.createStructure({
+ schema: z.union([
+ z.object({
+ firstName: z.string().min(1),
+ lastName: z.string().min(1),
+ address: z.string(),
+ age: z.number().int().min(1),
+ hobby: z.string(),
+ }),
+ z.object({
+ error: z.string(),
+ }),
+ ]),
+ messages: [new UserMessage("Generate a profile of a citizen of Europe.")],
+});
+console.log(response.object);
+```
+
+_Source: [examples/backend/structured.ts](/examples/backend/structured.ts)_
+
+### Tool Calling
+
+
+
+```ts
+import "dotenv/config";
+import {
+ ChatModel,
+ Message,
+ SystemMessage,
+ ToolMessage,
+ UserMessage,
+} from "bee-agent-framework/backend/core";
+import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
+import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
+import { AnyTool, ToolOutput } from "bee-agent-framework/tools/base";
+
+const model = await ChatModel.fromName("ollama:llama3.1");
+const tools: AnyTool[] = [new DuckDuckGoSearchTool(), new OpenMeteoTool()];
+const messages: Message[] = [
+ new SystemMessage("You are a helpful assistant. Use tools to provide a correct answer."),
+ new UserMessage("What's the fastest marathon time?"),
+];
+
+while (true) {
+ const response = await model.create({
+ messages,
+ tools,
+ });
+ messages.push(...response.messages);
+
+ const toolCalls = response.getToolCalls();
+ const toolResults = await Promise.all(
+ toolCalls.map(async ({ args, toolName, toolCallId }) => {
+ console.log(`-> running '${toolName}' tool with ${JSON.stringify(args)}`);
+ const tool = tools.find((tool) => tool.name === toolName)!;
+ const response: ToolOutput = await tool.run(args as any);
+ const result = response.getTextContent();
+ console.log(
+ `<- got response from '${toolName}'`,
+ result.replaceAll(/\s+/g, " ").substring(0, 90).concat(" (truncated)"),
+ );
+ return new ToolMessage({
+ type: "tool-result",
+ result,
+ isError: false,
+ toolName,
+ toolCallId,
+ });
+ }),
+ );
+ messages.push(...toolResults);
+
+ const answer = response.getTextContent();
+ if (answer) {
+ console.info(`Agent: ${answer}`);
+ break;
+ }
+}
+```
+
+_Source: [examples/backend/toolCalling.ts](/examples/backend/toolCalling.ts)_
+
+## Embedding Model
+
+The `EmbedingModel` class represents an Embedding Model and can be initiated in one of the following ways.
+
+```ts
+import { EmbedingModel } from "bee-agent-framework/backend/core";
+
+const model = await EmbedingModel.fromName("ibm/granite-embedding-107m-multilingual");
+console.log(model.providerId); // watsonx
+console.log(model.modelId); // ibm/granite-embedding-107m-multilingual
+```
+
+or you can always create the concrete provider's embedding model directly
+
+```ts
+import { OpenAIEmbeddingModel } from "bee-agent-framework/adapters/openai/embedding";
+
+const model = new OpenAIEmbeddingModel(
+ "text-embedding-3-large",
+ {
+ dimensions: 512,
+ maxEmbeddingsPerCall: 5,
+ },
+ {
+ baseURL: "your_custom_endpoint",
+ compatibility: "compatible",
+ headers: {
+ CUSTOM_HEADER: "...",
+ },
+ },
+);
+```
+
+### Usage
+
+```ts
+import { EmbeddingModel } from "bee-agent-framework/backend/core";
+
+const model = await EmbeddingModel.fromName("ollama:nomic-embed-text");
+const response = await model.create({
+ values: ["Hello world!", "Hello Bee!"],
+});
+console.log(response.values);
+console.log(response.embeddings);
+```
diff --git a/docs/cache.md b/docs/cache.md
index 3b91aec8..b1d3c8fe 100644
--- a/docs/cache.md
+++ b/docs/cache.md
@@ -104,34 +104,34 @@ _Source: [examples/cache/toolCache.ts](/examples/cache/toolCache.ts)_
```ts
import { SlidingCache } from "bee-agent-framework/cache/slidingCache";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
+import { UserMessage } from "bee-agent-framework/backend/message";
-const llm = new OllamaChatLLM({
- modelId: "llama3.1",
- parameters: {
- temperature: 0,
- num_predict: 50,
- },
+const llm = new OllamaChatModel("llama3.1");
+llm.config({
cache: new SlidingCache({
size: 50,
}),
+ parameters: {
+ maxTokens: 25,
+ },
});
console.info(await llm.cache.size()); // 0
-const first = await llm.generate([BaseMessage.of({ role: "user", text: "Who was Alan Turing?" })]);
+const first = await llm.create({
+ messages: [new UserMessage("Who was Alan Turing?")],
+});
// upcoming requests with the EXACTLY same input will be retrieved from the cache
console.info(await llm.cache.size()); // 1
-const second = await llm.generate([BaseMessage.of({ role: "user", text: "Who was Alan Turing?" })]);
-console.info(first === second); // true
+const second = await llm.create({
+ messages: [new UserMessage("Who was Alan Turing?")],
+});
+console.info(first.getTextContent() === second.getTextContent()); // true
+console.info(await llm.cache.size()); // 1
```
_Source: [examples/cache/llmCache.ts](/examples/cache/llmCache.ts)_
-> [!TIP]
->
-> Caching for non-chat LLMs works exactly the same way.
-
## Cache types
The framework provides multiple out-of-the-box cache implementations.
diff --git a/docs/emitter.md b/docs/emitter.md
index 7f9c439d..69be79c5 100644
--- a/docs/emitter.md
+++ b/docs/emitter.md
@@ -81,7 +81,7 @@ _Source: [examples/emitter/advanced.ts](/examples/emitter/advanced.ts)_
```ts
import { Callback, Emitter } from "bee-agent-framework/emitter/emitter";
-import { BaseLLM } from "bee-agent-framework/llms/base";
+import { ChatModel } from "bee-agent-framework/backend/chat";
interface Events {
update: Callback<{ data: string }>;
@@ -102,7 +102,7 @@ emitter.match("*.*", async (data, event) => {});
// Match events by providing a filter function
emitter.match(
- (event) => event.creator instanceof BaseLLM,
+ (event) => event.creator instanceof ChatModel,
async (data, event) => {},
);
@@ -164,10 +164,10 @@ Typically, you consume out-of-the-box modules that use the `Emitter` concept on
```ts
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const agent = new BeeAgent({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
memory: new UnconstrainedMemory(),
tools: [],
});
@@ -191,7 +191,7 @@ _Source: [examples/emitter/agentMatchers.ts](/examples/emitter/agentMatchers.ts)
> [!IMPORTANT]
>
-> The `observe` method is also supported on [`Tools`](./tools.md) and [`LLMs`](./llms.md).
+> The `observe` method is also supported on [`Tools`](./tools.md) and [`Backend`](./backend.md).
> [!TIP]
>
diff --git a/docs/examples.md b/docs/examples.md
index 08f9b89b..3b603313 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -43,7 +43,6 @@ This repository contains examples demonstrating the usage of the Bee Agent Frame
- [`decoratorCacheComplex.ts`](/examples/cache/decoratorCacheComplex.ts): Complex cache decorator example
- [`fileCache.ts`](/examples/cache/fileCache.ts): File-based caching
- [`fileCacheCustomProvider.ts`](/examples/cache/fileCacheCustomProvider.ts): Custom provider for file cache
-- [`llmCache.ts`](/examples/cache/llmCache.ts): Caching for language models
- [`slidingCache.ts`](/examples/cache/slidingCache.ts): Sliding window cache implementation
- [`toolCache.ts`](/examples/cache/toolCache.ts): Caching for tools
- [`unconstrainedCache.ts`](/examples/cache/unconstrainedCache.ts): Unconstrained cache example
@@ -62,23 +61,17 @@ This repository contains examples demonstrating the usage of the Bee Agent Frame
## LLMs (Language Models)
-- [`chat.ts`](/examples/llms/chat.ts): Chat-based language model usage
-- [`chatCallback.ts`](/examples/llms/chatCallback.ts): Callbacks for chat models
-- [`chatStream.ts`](/examples/llms/chatStream.ts): Streaming with chat models
-- [`structured.ts`](/examples/llms/structured.ts): Structured output from language models
-- [`text.ts`](/examples/llms/text.ts): Text-based language model usage
+- [`chat.ts`](/examples/backend/chat.ts): Chat-based language model usage
+- [`chatCallback.ts`](/examples/backend/chatStream.ts): Callbacks for chat models
+- [`structured.ts`](/examples/backend/structured.ts): Structured output from language models
### LLM Providers
-- [`customChatProvider.ts`](/examples/llms/providers/customChatProvider.ts): Custom chat provider implementation
-- [`customProvider.ts`](/examples/llms/providers/customProvider.ts): Custom language model provider
-- [`groq.ts`](/examples/llms/providers/groq.ts): Groq language model integration
-- [`ibm-vllm.ts`](/examples/llms/providers/ibm-vllm.ts): IBM vLLM integration
-- [`langchain.ts`](/examples/llms/providers/langchain.ts): LangChain integration
-- [`ollama.ts`](/examples/llms/providers/ollama.ts): Ollama model usage
-- [`openai.ts`](/examples/llms/providers/openai.ts): OpenAI integration
-- [`watsonx.ts`](/examples/llms/providers/watsonx.ts): WatsonX integration
-- [`watsonx_verbose.ts`](/examples/llms/providers/watsonx_verbose.ts): Verbose WatsonX usage
+- [`groq.ts`](/examples/backend/providers/groq.ts): Groq language model integration
+- [`langchain.ts`](/examples/backend/providers/langchain.ts): LangChain integration
+- [`ollama.ts`](/examples/backend/providers/ollama.ts): Ollama model usage
+- [`openai.ts`](/examples/backend/providers/openai.ts): OpenAI integration
+- [`watsonx.ts`](/examples/backend/providers/watsonx.ts): Watsonx integration
## Logger
diff --git a/docs/integrations.md b/docs/integrations.md
index 82e99b53..6c72777e 100644
--- a/docs/integrations.md
+++ b/docs/integrations.md
@@ -7,20 +7,20 @@ Bee Agent Framework is open-source framework for building, deploying, and servin
```ts
+import "dotenv/config";
import { DuckDuckGoSearch as LangChainDDG } from "@langchain/community/tools/duckduckgo_search";
-import { ChatMessage as LangChainMessage } from "@langchain/core/messages";
import { createReactAgent as createLangGraphReactAgent } from "@langchain/langgraph/prebuilt";
-import { ChatOllama as LangChainOllamaChat } from "@langchain/ollama";
-import { OllamaChatLLM as BeeOllamaChat } from "bee-agent-framework/adapters/ollama/chat";
-import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { Workflow } from "bee-agent-framework/experimental/workflows/workflow";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { z } from "zod";
+import { createConsoleReader } from "examples/helpers/io.js";
+import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
+import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
+import { ChatOllama as LangChainOllamaChat } from "@langchain/ollama";
import { ReadOnlyMemory } from "bee-agent-framework/memory/base";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
-import "dotenv/config";
-import { createConsoleReader } from "examples/helpers/io.js";
-import { z } from "zod";
+import { Message } from "bee-agent-framework/backend/message";
+import { ChatMessage as LangChainMessage } from "@langchain/core/messages";
+import { ChatModel } from "bee-agent-framework/backend/chat";
const workflow = new Workflow({
schema: z.object({ memory: z.instanceof(ReadOnlyMemory), answer: z.string().default("") }),
@@ -30,7 +30,7 @@ const workflow = new Workflow({
}))
.addStep("bee", async (state, ctx) => {
const beeAgent = new BeeAgent({
- llm: new BeeOllamaChat({ modelId: "llama3.1" }),
+ llm: await ChatModel.fromName("ollama:llama3.1"),
tools: [new DuckDuckGoSearchTool()],
memory: state.memory,
});
@@ -38,8 +38,7 @@ const workflow = new Workflow({
{ prompt: null },
{ signal: ctx.signal, execution: { maxIterations: 5 } },
);
- const answer = response.result.text;
- return { next: Workflow.END, update: { answer } };
+ return { next: Workflow.END, update: { answer: response.result.text } };
})
.addStep("langgraph", async (state, ctx) => {
const langGraphAgent = createLangGraphReactAgent({
@@ -54,7 +53,7 @@ const workflow = new Workflow({
},
{ signal: ctx.signal, recursionLimit: 5 },
);
- const answer = String(response.messages.at(-1)?.content);
+ const answer = response.messages.map((msg) => String(msg.content)).join("");
return { next: Workflow.END, update: { answer } };
});
@@ -62,11 +61,11 @@ const memory = new UnconstrainedMemory();
const reader = createConsoleReader();
for await (const { prompt } of reader) {
- await memory.add(BaseMessage.of({ role: "user", text: prompt }));
+ await memory.add(Message.of({ role: "user", text: prompt }));
const { result, steps } = await workflow.run({ memory: memory.asReadOnly() });
reader.write(`LLM π€ : `, result.answer);
reader.write(`-> solved by `, steps.at(-1)!.name);
- await memory.add(BaseMessage.of({ role: "assistant", text: result.answer }));
+ await memory.add(Message.of({ role: "assistant", text: result.answer }));
}
```
diff --git a/docs/llms.md b/docs/llms.md
deleted file mode 100644
index 48c4cfa3..00000000
--- a/docs/llms.md
+++ /dev/null
@@ -1,243 +0,0 @@
-# LLMs (inference)
-
-> [!TIP]
->
-> Location for concrete implementations within the framework `bee-agent-framework/adapters`.
->
-> Location for base abstraction within the framework `bee-agent-framework/llms`.
-
-A Large Language Model (LLM) is an AI designed to understand and generate human-like text.
-Trained on extensive text data, LLMs learn language patterns, grammar, context, and basic reasoning to perform tasks like text completion, translation, summarization, and answering questions.
-
-To unify differences between various APIs, the framework defines a common interfaceβa set of actions that can be performed with it.
-
-## Providers (adapters)
-
-| Name | LLM | Chat LLM | Structured output (constrained decoding) |
-| ------------------------------------------------------------------------- | -------------------------- | --------------------------------------------- | ---------------------------------------- |
-| `WatsonX` | β
| β οΈ (model specific template must be provided) | β |
-| `Ollama` | β
| β
| β οΈ (JSON only) |
-| `OpenAI` | β | β
| β οΈ (JSON schema only) |
-| `Azure OpenAI` | β | β
| β οΈ (JSON schema only) |
-| `LangChain` | β οΈ (depends on a provider) | β οΈ (depends on a provider) | β |
-| `Groq` | β | β
| β οΈ (JSON object only) |
-| `AWS Bedrock` | β | β
| β οΈ (JSON only) - model specific |
-| `VertexAI` | β
| β
| β οΈ (JSON only) |
-| β [Request](https://github.com/i-am-bee/bee-agent-framework/discussions) | | | |
-
-All providers' examples can be found in [examples/llms/providers](/examples/llms/providers).
-
-Are you interested in creating your own adapter? Jump to the [adding a new provider](#adding-a-new-provider-adapter) section.
-
-## Usage
-
-### Plain text generation
-
-
-
-```ts
-import "dotenv/config.js";
-import { createConsoleReader } from "examples/helpers/io.js";
-import { WatsonXLLM } from "bee-agent-framework/adapters/watsonx/llm";
-
-const llm = new WatsonXLLM({
- modelId: "google/flan-ul2",
- projectId: process.env.WATSONX_PROJECT_ID,
- apiKey: process.env.WATSONX_API_KEY,
- region: process.env.WATSONX_REGION, // (optional) default is us-south
- parameters: {
- decoding_method: "greedy",
- max_new_tokens: 50,
- },
-});
-
-const reader = createConsoleReader();
-const prompt = await reader.prompt();
-const response = await llm.generate(prompt);
-reader.write(`LLM π€ (text) : `, response.getTextContent());
-reader.close();
-```
-
-_Source: [examples/llms/text.ts](/examples/llms/text.ts)_
-
-> [!NOTE]
->
-> The `generate` method returns a class that extends the base [`BaseLLMOutput`](/src/llms/base.ts) class.
-> This class allows you to retrieve the response as text using the `getTextContent` method and other useful metadata.
-
-> [!TIP]
->
-> You can enable streaming communication (internally) by passing `{ stream: true }` as a second parameter to the `generate` method.
-
-### Chat text generation
-
-
-
-```ts
-import "dotenv/config.js";
-import { createConsoleReader } from "examples/helpers/io.js";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-
-const llm = new OllamaChatLLM();
-
-const reader = createConsoleReader();
-
-for await (const { prompt } of reader) {
- const response = await llm.generate([
- BaseMessage.of({
- role: Role.USER,
- text: prompt,
- }),
- ]);
- reader.write(`LLM π€ (txt) : `, response.getTextContent());
- reader.write(`LLM π€ (raw) : `, JSON.stringify(response.finalResult));
-}
-```
-
-_Source: [examples/llms/chat.ts](/examples/llms/chat.ts)_
-
-> [!NOTE]
->
-> The `generate` method returns a class that extends the base [`ChatLLMOutput`](/src/llms/chat.ts) class.
-> This class allows you to retrieve the response as text using the `getTextContent` method and other useful metadata.
-> To retrieve all messages (chunks) access the `messages` property (getter).
-
-> [!TIP]
->
-> You can enable streaming communication (internally) by passing `{ stream: true }` as a second parameter to the `generate` method.
-
-#### Streaming
-
-
-
-```ts
-import "dotenv/config.js";
-import { createConsoleReader } from "examples/helpers/io.js";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-
-const llm = new OllamaChatLLM();
-
-const reader = createConsoleReader();
-
-for await (const { prompt } of reader) {
- for await (const chunk of llm.stream([
- BaseMessage.of({
- role: Role.USER,
- text: prompt,
- }),
- ])) {
- reader.write(`LLM π€ (txt) : `, chunk.getTextContent());
- reader.write(`LLM π€ (raw) : `, JSON.stringify(chunk.finalResult));
- }
-}
-```
-
-_Source: [examples/llms/chatStream.ts](/examples/llms/chatStream.ts)_
-
-#### Callback (Emitter)
-
-
-
-```ts
-import "dotenv/config.js";
-import { createConsoleReader } from "examples/helpers/io.js";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-
-const llm = new OllamaChatLLM();
-
-const reader = createConsoleReader();
-
-for await (const { prompt } of reader) {
- const response = await llm
- .generate(
- [
- BaseMessage.of({
- role: Role.USER,
- text: prompt,
- }),
- ],
- {},
- )
- .observe((emitter) =>
- emitter.match("*", (data, event) => {
- reader.write(`LLM π€ (event: ${event.name})`, JSON.stringify(data));
-
- // if you want to close the stream prematurely, just uncomment the following line
- // callbacks.abort()
- }),
- );
-
- reader.write(`LLM π€ (txt) : `, response.getTextContent());
- reader.write(`LLM π€ (raw) : `, JSON.stringify(response.finalResult));
-}
-```
-
-_Source: [examples/llms/chatCallback.ts](/examples/llms/chatCallback.ts)_
-
-### Structured generation
-
-
-
-```ts
-import "dotenv/config.js";
-import { z } from "zod";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-import { JsonDriver } from "bee-agent-framework/llms/drivers/json";
-
-const llm = new OllamaChatLLM();
-const driver = new JsonDriver(llm);
-const response = await driver.generate(
- z.union([
- z.object({
- firstName: z.string().min(1),
- lastName: z.string().min(1),
- address: z.string(),
- age: z.number().int().min(1),
- hobby: z.string(),
- }),
- z.object({
- error: z.string(),
- }),
- ]),
- [
- BaseMessage.of({
- role: Role.USER,
- text: "Generate a profile of a citizen of Europe.",
- }),
- ],
-);
-console.info(response);
-```
-
-_Source: [examples/llms/structured.ts](/examples/llms/structured.ts)_
-
-## Adding a new provider (adapter)
-
-To use an inference provider that is not mentioned in our providers list feel free to [create a request](https://github.com/i-am-bee/bee-agent-framework/discussions).
-
-If approved and you want to create it on your own, you must do the following things. Let's assume the name of your provider is `Custom.`
-
-- Base location within the framework: `bee-agent-framework/adapters/custom`
- - Text LLM (filename): `llm.ts` ([example implementation](/examples/llms/providers/customProvider.ts))
- - Chat LLM (filename): `chat.ts` ([example implementation](/examples/llms/providers/customChatProvider.ts))
-
-> [!IMPORTANT]
->
-> If the target provider provides an SDK, use it.
-
-> [!IMPORTANT]
->
-> All provider-related dependencies (if any) must be included in `devDependencies` and `peerDependencies` in the [`package.json`](/package.json).
-
-> [!TIP]
->
-> To simplify work with the target RestAPI feel free to use the helper [`RestfulClient`](/src/internals/fetcher.ts) class.
-> The client usage can be seen in the WatsonX LLM Adapter [here](/src/adapters/watsonx/llm.ts).
-
-> [!TIP]
->
-> Parsing environment variables should be done via helper functions (`parseEnv` / `hasEnv` / `getEnv`) that can be found [here](/src/internals/env.ts).
diff --git a/docs/logger.md b/docs/logger.md
index 3bc394f9..2d453c74 100644
--- a/docs/logger.md
+++ b/docs/logger.md
@@ -43,10 +43,10 @@ The [Logger](/src/logger/logger.ts) seamlessly integrates with agents in the fra
```ts
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { Logger } from "bee-agent-framework/logger/logger";
import { Emitter } from "bee-agent-framework/emitter/emitter";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
// Set up logging
Logger.defaults.pretty = true;
@@ -59,12 +59,12 @@ const logger = Logger.root.child({
// Log events emitted during agent execution
Emitter.root.match("*.*", (data, event) => {
const logLevel = event.path.includes(".run.") ? "trace" : "info";
- logger[logLevel](`Event '${event.path}' triggered by '${event.creator.constructor.name}'.`);
+ logger[logLevel](`Event '${event.path}' triggered by '${event.creator.constructor.name}'`);
});
// Create and run an agent
const agent = new BeeAgent({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
memory: new UnconstrainedMemory(),
tools: [],
});
diff --git a/docs/memory.md b/docs/memory.md
index bd095854..15be94ea 100644
--- a/docs/memory.md
+++ b/docs/memory.md
@@ -14,23 +14,15 @@ Memory in the context of an agent refers to the system's capability to store, re
```ts
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { AssistantMessage, SystemMessage, UserMessage } from "bee-agent-framework/backend/message";
const memory = new UnconstrainedMemory();
// Single message
-await memory.add(
- BaseMessage.of({
- role: "system",
- text: `You are a helpful assistant.`,
- }),
-);
+await memory.add(new SystemMessage(`You are a helpful assistant.`));
// Multiple messages
-await memory.addMany([
- BaseMessage.of({ role: "user", text: `What can you do?` }),
- BaseMessage.of({ role: "assistant", text: `Everything!` }),
-]);
+await memory.addMany([new UserMessage(`What can you do?`), new AssistantMessage(`Everything!`)]);
console.info(memory.isEmpty()); // false
console.info(memory.messages); // prints all saved messages
@@ -45,23 +37,23 @@ _Source: [examples/memory/base.ts](/examples/memory/base.ts)_
```ts
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const memory = new UnconstrainedMemory();
await memory.addMany([
- BaseMessage.of({
+ Message.of({
role: "system",
text: `Always respond very concisely.`,
}),
- BaseMessage.of({ role: "user", text: `Give me first 5 prime numbers.` }),
+ Message.of({ role: "user", text: `Give me first 5 prime numbers.` }),
]);
// Generate response
-const llm = new OllamaChatLLM();
-const response = await llm.generate(memory.messages);
-await memory.add(BaseMessage.of({ role: "assistant", text: response.getTextContent() }));
+const llm = new OllamaChatModel("llama3.1");
+const response = await llm.create({ messages: memory.messages });
+await memory.add(Message.of({ role: "assistant", text: response.getTextContent() }));
console.log(`Conversation history`);
for (const message of memory) {
@@ -82,11 +74,11 @@ _Source: [examples/memory/llmMemory.ts](/examples/memory/llmMemory.ts)_
```ts
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const agent = new BeeAgent({
memory: new UnconstrainedMemory(),
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
tools: [],
});
await agent.run({ prompt: "Hello world!" });
@@ -126,11 +118,11 @@ Unlimited in size.
```ts
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
const memory = new UnconstrainedMemory();
await memory.add(
- BaseMessage.of({
+ Message.of({
role: "user",
text: `Hello world!`,
}),
@@ -151,7 +143,7 @@ Keeps last `k` entries in the memory. The oldest ones are deleted (unless specif
```ts
import { SlidingMemory } from "bee-agent-framework/memory/slidingMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
const memory = new SlidingMemory({
size: 3, // (required) number of messages that can be in the memory at a single moment
@@ -162,11 +154,11 @@ const memory = new SlidingMemory({
},
});
-await memory.add(BaseMessage.of({ role: "system", text: "You are a guide through France." }));
-await memory.add(BaseMessage.of({ role: "user", text: "What is the capital?" }));
-await memory.add(BaseMessage.of({ role: "assistant", text: "Paris" }));
-await memory.add(BaseMessage.of({ role: "user", text: "What language is spoken there?" })); // removes the first user's message
-await memory.add(BaseMessage.of({ role: "assistant", text: "French" })); // removes the first assistant's message
+await memory.add(Message.of({ role: "system", text: "You are a guide through France." }));
+await memory.add(Message.of({ role: "user", text: "What is the capital?" }));
+await memory.add(Message.of({ role: "assistant", text: "Paris" }));
+await memory.add(Message.of({ role: "user", text: "What language is spoken there?" })); // removes the first user's message
+await memory.add(Message.of({ role: "assistant", text: "French" })); // removes the first assistant's message
console.info(memory.isEmpty()); // false
console.log(memory.messages.length); // 3
@@ -184,13 +176,10 @@ If overflow occurs, the oldest message will be removed.
```ts
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { Message } from "bee-agent-framework/backend/message";
-const llm = new OllamaChatLLM();
const memory = new TokenMemory({
- llm,
- maxTokens: undefined, // optional (default is inferred from the passed LLM instance),
+ maxTokens: undefined, // optional (default is 128k),
capacityThreshold: 0.75, // maxTokens*capacityThreshold = threshold where we start removing old messages
syncThreshold: 0.25, // maxTokens*syncThreshold = threshold where we start to use a real tokenization endpoint instead of guessing the number of tokens
handlers: {
@@ -202,8 +191,8 @@ const memory = new TokenMemory({
},
});
-await memory.add(BaseMessage.of({ role: "system", text: "You are a helpful assistant." }));
-await memory.add(BaseMessage.of({ role: "user", text: "Hello world!" }));
+await memory.add(Message.of({ role: "system", text: "You are a helpful assistant." }));
+await memory.add(Message.of({ role: "user", text: "Hello world!" }));
console.info(memory.isDirty); // is the consumed token count estimated or retrieved via the tokenize endpoint?
console.log(memory.tokensUsed); // number of used tokens
@@ -220,25 +209,19 @@ Only a single summarization of the conversation is preserved. Summarization is u
```ts
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
import { SummarizeMemory } from "bee-agent-framework/memory/summarizeMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const memory = new SummarizeMemory({
- llm: new OllamaChatLLM({
- modelId: "llama3.1",
- parameters: {
- temperature: 0,
- num_predict: 250,
- },
- }),
+ llm: new OllamaChatModel("llama3.1"),
});
await memory.addMany([
- BaseMessage.of({ role: "system", text: "You are a guide through France." }),
- BaseMessage.of({ role: "user", text: "What is the capital?" }),
- BaseMessage.of({ role: "assistant", text: "Paris" }),
- BaseMessage.of({ role: "user", text: "What language is spoken there?" }),
+ Message.of({ role: "system", text: "You are a guide through France." }),
+ Message.of({ role: "user", text: "What is the capital?" }),
+ Message.of({ role: "assistant", text: "Paris" }),
+ Message.of({ role: "user", text: "What language is spoken there?" }),
]);
console.info(memory.isEmpty()); // false
@@ -256,19 +239,19 @@ To create your memory implementation, you must implement the `BaseMemory` class.
```ts
import { BaseMemory } from "bee-agent-framework/memory/base";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
import { NotImplementedError } from "bee-agent-framework/errors";
export class MyMemory extends BaseMemory {
- get messages(): readonly BaseMessage[] {
+ get messages(): readonly Message[] {
throw new NotImplementedError("Method not implemented.");
}
- add(message: BaseMessage, index?: number): Promise {
+ add(message: Message, index?: number): Promise {
throw new NotImplementedError("Method not implemented.");
}
- delete(message: BaseMessage): Promise {
+ delete(message: Message): Promise {
throw new NotImplementedError("Method not implemented.");
}
diff --git a/docs/migration_guide.md b/docs/migration_guide.md
new file mode 100644
index 00000000..e4719585
--- /dev/null
+++ b/docs/migration_guide.md
@@ -0,0 +1,154 @@
+# Migration Guide
+
+## 0.0.X -> 0.1.0 (2025-02-11)
+
+### Summary
+
+- `ChatLLM` class was replaced by `ChatModel` class and embedding functionality has been replaced by `EmbeddingModel` class.
+- `BaseMessage` class was replaced by `Message` and its subtypes (`UserMessage`, `AssistantMessage`, `SystemMessage`, `ToolMessage`).
+- `TokenMemory` no longer uses `LLM` instance to infer `maxTokens`, user needs to do that manually (if needed).
+- Tokenization has been removed.
+- Non-Chat LLM class (`LLM`) has been removed.
+- The`IBMvLLM` adapter has been removed.
+- Parsers were moved from `bee-agent-framework/agents/parsers` to `bee-agent-framework/parsers`
+
+### Models
+
+#### Before
+
+```ts
+import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+
+const model = new OllamaChatLLM({
+ modelId: "llama3.1",
+});
+
+const response = await model.generate(
+ [
+ BaseMessage.of({
+ role: "user",
+ text: "Hello Bee!",
+ }),
+ ],
+ {
+ stream: true,
+ },
+);
+console.log(response.getTextContent());
+```
+
+#### Now
+
+```ts
+import { ChatModel, UserMessage } from "bee-agent-framework/backend/core";
+
+const model = await ChatModel.fromName("ollama:llama3.1");
+const response = await model.create({
+ messages: [new UserMessage("Hello Bee!")],
+});
+console.log(response.getTextContent());
+```
+
+or you can initiate the provider directly
+
+```ts
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/chat";
+import { UserMessage } from "bee-agent-framework/backend/core";
+
+const model = new OllamaChatModel("llama3.1");
+const response = await model.create({
+ messages: [new UserMessage("Hello Bee!")],
+});
+console.log(response.getTextContent());
+```
+
+More examples can be found in [Backend Documentation Page](/docs/backend.md).
+
+### Messages
+
+The `BaseMessage` class was replaced by `Message` and its subtypes (`UserMessage`, `AssistantMessage`, `SystemMessage`, `ToolMessage`).
+
+#### Before
+
+```ts
+import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+const a = BaseMessage.of({ role: "user", text: "hello", meta: { createdAt: new Date() } });
+```
+
+#### Now
+
+```ts
+import { Message } from "bee-agent-framework/backend/core";
+
+const a = Message.of({ role: "user", text: "Hello", meta: { createdAt: new Date() } });
+```
+
+The new version supports more complex content types.
+
+```ts
+import { UserMessage } from "bee-agent-framework/backend/core";
+
+// using a factory function
+const msg = new UserMessage({
+ type: "file",
+ data: await fs.promises.readFile("document.txt"),
+ mimeType: "text/plain",
+});
+```
+
+```ts
+import { UserMessage } from "bee-agent-framework/backend/core";
+
+// using a factory function
+const msg = new UserMessage({
+ type: "file",
+ data: await fs.promises.readFile("document.txt"),
+ mimeType: "text/plain",
+});
+```
+
+```ts
+import { UserMessage, AssistantMessage, SystemMessage } from "bee-agent-framework/backend/core";
+const a = new UserMessage("Hello assistant!");
+const b = new AssistantMessage("Hello user!");
+const c = new SystemMessage("You are a helpful assistant.");
+```
+
+More examples can be found in [Backend Documentation Page](/docs/backend.md).
+
+### Serialization
+
+The following methods present in `Serializable` class are now asynchronous.
+
+- `serialize`
+- `deserialize`
+- `createSnapshot`
+- `loadSnapshot`
+
+The same applies to the following static methods.
+
+- `fromSerialized`
+- `fromSnapshot`
+
+#### Before
+
+```ts
+import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
+
+const a = new TokenMemory();
+const json = a.serialize();
+
+const b = TokenMemory.fromSerialized(json);
+```
+
+#### Now
+
+```ts
+import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
+
+const a = new TokenMemory();
+const json = await a.serialize();
+
+const b = await TokenMemory.fromSerialized(json);
+```
diff --git a/docs/overview.md b/docs/overview.md
index 97ee6860..28d5e21b 100644
--- a/docs/overview.md
+++ b/docs/overview.md
@@ -4,22 +4,21 @@
The source directory (`src`) provides numerous modules that one can use.
-| Name | Description |
-| -------------------------------------------- | ------------------------------------------------------------------------------------------- |
-| [**agents**](./agents.md) | Base classes defining the common interface for agent. |
-| [**llms**](./llms.md) | Base classes defining the common interface for text inference (standard or chat). |
-| [**template**](./templates.md) | Prompt Templating system based on `Mustache` with various improvements. |
-| [**memory**](./memory.md) | Various types of memories to use with agent. |
-| [**tools**](./tools.md) | Tools that an agent can use. |
-| [**cache**](./cache.md) | Preset of different caching approaches that can be used together with tools. |
-| [**errors**](./errors.md) | Base framework error classes used by each module. |
-| [**adapters**](./llms.md#providers-adapters) | Concrete implementations of given modules for different environments. |
-| [**logger**](./logger.md) | Core component for logging all actions within the framework. |
-| [**serializer**](./serialization.md) | Core component for the ability to serialize/deserialize modules into the serialized format. |
-| [**version**](./version.md) | Constants representing the framework (e.g., the latest version) |
-| [**emitter**](./emitter.md) | Bringing visibility to the system by emitting events. |
-| [**instrumentation**](./instrumentation.md) | Integrate monitoring tools into your application. |
-| **internals** | Modules used by other modules within the framework. |
+| Name | Description |
+| ------------------------------------------- | ------------------------------------------------------------------------------------------- |
+| [**agents**](./agents.md) | Base classes defining the common interface for agent. |
+| [**backend**](/docs/backend.md) | Functionalities that relates to AI models (chat, embedding, image, tool calling, ...) |
+| [**template**](./templates.md) | Prompt Templating system based on `Mustache` with various improvements. |
+| [**memory**](./memory.md) | Various types of memories to use with agent. |
+| [**tools**](./tools.md) | Tools that an agent can use. |
+| [**cache**](./cache.md) | Preset of different caching approaches that can be used together with tools. |
+| [**errors**](./errors.md) | Base framework error classes used by each module. |
+| [**logger**](./logger.md) | Core component for logging all actions within the framework. |
+| [**serializer**](./serialization.md) | Core component for the ability to serialize/deserialize modules into the serialized format. |
+| [**version**](./version.md) | Constants representing the framework (e.g., the latest version) |
+| [**emitter**](./emitter.md) | Bringing visibility to the system by emitting events. |
+| [**instrumentation**](./instrumentation.md) | Integrate monitoring tools into your application. |
+| **internals** | Modules used by other modules within the framework. |
### Emitter
@@ -31,7 +30,7 @@ Moved to a [standalone page](instrumentation.md).
### LLMs
-Moved to a [standalone page](llms.md).
+Moved to a [standalone page](backend.md).
### Templates
diff --git a/docs/serialization.md b/docs/serialization.md
index 21a79639..6f09fd6a 100644
--- a/docs/serialization.md
+++ b/docs/serialization.md
@@ -13,8 +13,8 @@ Serialization is a difficult task, and JavaScript does not provide a magic tool
import { Serializer } from "bee-agent-framework/serializer/serializer";
const original = new Date("2024-01-01T00:00:00.000Z");
-const serialized = Serializer.serialize(original);
-const deserialized = Serializer.deserialize(serialized);
+const serialized = await Serializer.serialize(original);
+const deserialized = await Serializer.deserialize(serialized);
console.info(deserialized instanceof Date); // true
console.info(original.toISOString() === deserialized.toISOString()); // true
@@ -42,27 +42,15 @@ See the direct usage on the following memory example.
```ts
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-
-const llm = new OllamaChatLLM();
-const memory = new TokenMemory({ llm });
-await memory.addMany([
- BaseMessage.of({
- role: "user",
- text: "What is your name?",
- }),
-]);
-
-const serialized = memory.serialize();
-const deserialized = TokenMemory.fromSerialized(serialized);
-
-await deserialized.add(
- BaseMessage.of({
- role: "assistant",
- text: "Bee",
- }),
-);
+import { AssistantMessage, UserMessage } from "bee-agent-framework/backend/message";
+
+const memory = new TokenMemory();
+await memory.add(new UserMessage("What is your name?"));
+
+const serialized = await memory.serialize();
+const deserialized = await TokenMemory.fromSerialized(serialized);
+
+await deserialized.add(new AssistantMessage("Bee"));
```
_Source: [examples/serialization/memory.ts](/examples/serialization/memory.ts)_
@@ -95,8 +83,8 @@ Serializer.register(MyClass, {
});
const instance = new MyClass("Bee");
-const serialized = Serializer.serialize(instance);
-const deserialized = Serializer.deserialize(serialized);
+const serialized = await Serializer.serialize(instance);
+const deserialized = await Serializer.deserialize(serialized);
console.info(instance);
console.info(deserialized);
@@ -133,8 +121,8 @@ class MyClass extends Serializable {
}
const instance = new MyClass("Bee");
-const serialized = instance.serialize();
-const deserialized = MyClass.fromSerialized(serialized);
+const serialized = await instance.serialize();
+const deserialized = await MyClass.fromSerialized(serialized);
console.info(instance);
console.info(deserialized);
@@ -152,15 +140,15 @@ _Source: [examples/serialization/customInternal.ts](/examples/serialization/cust
```ts
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { UserMessage } from "bee-agent-framework/backend/message";
// String containing serialized `UnconstrainedMemory` instance with one message in it.
-const serialized = `{"__version":"0.0.0","__root":{"__serializer":true,"__class":"Object","__ref":"5","__value":{"target":"UnconstrainedMemory","snapshot":{"__serializer":true,"__class":"Object","__ref":"4","__value":{"messages":{"__serializer":true,"__class":"Array","__ref":"1","__value":[{"__serializer":true,"__class":"BaseMessage","__ref":"2","__value":{"role":"user","text":"Serialization is amazing, isn't?","meta":{"__serializer":true,"__class":"Undefined","__ref":"3"}}}]}}}}}}`;
+const serialized = `{"__version":"0.0.0","__root":{"__serializer":true,"__class":"Object","__ref":"18","__value":{"target":"UnconstrainedMemory","snapshot":{"__serializer":true,"__class":"Object","__ref":"17","__value":{"messages":{"__serializer":true,"__class":"Array","__ref":"1","__value":[{"__serializer":true,"__class":"SystemMessage","__ref":"2","__value":{"content":{"__serializer":true,"__class":"Array","__ref":"3","__value":[{"__serializer":true,"__class":"Object","__ref":"4","__value":{"type":"text","text":"You are a helpful assistant."}}]},"meta":{"__serializer":true,"__class":"Object","__ref":"5","__value":{"createdAt":{"__serializer":true,"__class":"Date","__ref":"6","__value":"2025-02-06T14:51:01.459Z"}}},"role":"system"}},{"__serializer":true,"__class":"UserMessage","__ref":"7","__value":{"content":{"__serializer":true,"__class":"Array","__ref":"8","__value":[{"__serializer":true,"__class":"Object","__ref":"9","__value":{"type":"text","text":"Hello!"}}]},"meta":{"__serializer":true,"__class":"Object","__ref":"10","__value":{"createdAt":{"__serializer":true,"__class":"Date","__ref":"11","__value":"2025-02-06T14:51:01.459Z"}}},"role":"user"}},{"__serializer":true,"__class":"AssistantMessage","__ref":"12","__value":{"content":{"__serializer":true,"__class":"Array","__ref":"13","__value":[{"__serializer":true,"__class":"Object","__ref":"14","__value":{"type":"text","text":"Hello, how can I help you?"}}]},"meta":{"__serializer":true,"__class":"Object","__ref":"15","__value":{"createdAt":{"__serializer":true,"__class":"Date","__ref":"16","__value":"2025-02-06T14:51:01.459Z"}}},"role":"assistant"}}]}}}}}}`;
-// If `BaseMessage` was not imported the serialization would fail because the `BaseMessage` had no chance to register itself.
-const memory = UnconstrainedMemory.fromSerialized(serialized, {
+// If `Message` was not imported the serialization would fail because the `Message` had no chance to register itself.
+const memory = await UnconstrainedMemory.fromSerialized(serialized, {
// this part can be omitted if all classes used in the serialized string are imported (and have `static` register block) or at least one initiated
- extraClasses: [BaseMessage],
+ extraClasses: [UserMessage],
});
console.info(memory.messages);
```
diff --git a/docs/tools.md b/docs/tools.md
index 0906c056..e4f8b04f 100644
--- a/docs/tools.md
+++ b/docs/tools.md
@@ -98,13 +98,13 @@ _Source: [examples/tools/advanced.ts](/examples/tools/advanced.ts)_
```ts
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { ArXivTool } from "bee-agent-framework/tools/arxiv";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const agent = new BeeAgent({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
memory: new UnconstrainedMemory(),
tools: [new ArXivTool()],
});
@@ -483,7 +483,7 @@ import { MCPTool } from "bee-agent-framework/tools/mcp";
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
// Create MCP Client
const client = new Client(
@@ -508,7 +508,7 @@ try {
// Server usually supports several tools, use the factory for automatic discovery
const tools = await MCPTool.fromClient(client);
const agent = new BeeAgent({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
memory: new UnconstrainedMemory(),
tools,
});
diff --git a/docs/tutorials.md b/docs/tutorials.md
index 26df981d..97dff260 100644
--- a/docs/tutorials.md
+++ b/docs/tutorials.md
@@ -64,7 +64,7 @@ Now, copy and paste the following code into `agent_slack.ts` module. Then, follo
import { MCPTool } from "bee-agent-framework/tools/mcp";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
@@ -104,7 +104,7 @@ try {
// Create Bee agent
const agent = new BeeAgent({
// We're using LLM ran locally via Ollama
- llm: new OllamaChatLLM({ modelId: "llama3.1" }),
+ llm: new OllamaChatModel("llama3.1"),
// Besides the Slack tools, we also provide DDG tool for web search
tools: [new OpenMeteoTool(), ...filteredSlackTools],
memory: new UnconstrainedMemory(),
diff --git a/docs/workflows.md b/docs/workflows.md
index 57df3728..151e3d22 100644
--- a/docs/workflows.md
+++ b/docs/workflows.md
@@ -97,25 +97,24 @@ _Source: [examples/workflows/nesting.ts](/examples/workflows/nesting.ts)_
import "dotenv/config";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { z } from "zod";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { JsonDriver } from "bee-agent-framework/llms/drivers/json";
+import { Message, UserMessage } from "bee-agent-framework/backend/message";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { ReadOnlyMemory } from "bee-agent-framework/memory/base";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { Workflow } from "bee-agent-framework/experimental/workflows/workflow";
import { createConsoleReader } from "examples/helpers/io.js";
-import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
+import { GroqChatModel } from "bee-agent-framework/adapters/groq/backend/chat";
const schema = z.object({
- answer: z.instanceof(BaseMessage).optional(),
+ answer: z.instanceof(Message).optional(),
memory: z.instanceof(ReadOnlyMemory),
});
const workflow = new Workflow({ schema: schema })
.addStep("simpleAgent", async (state) => {
const simpleAgent = new BeeAgent({
- llm: new GroqChatLLM(),
+ llm: new GroqChatModel("llama-3.3-70b-versatile"),
tools: [],
memory: state.memory,
});
@@ -128,18 +127,18 @@ const workflow = new Workflow({ schema: schema })
};
})
.addStrictStep("critique", schema.required(), async (state) => {
- const llm = new GroqChatLLM();
- const { parsed: critiqueResponse } = await new JsonDriver(llm).generate(
- z.object({ score: z.number().int().min(0).max(100) }),
- [
- BaseMessage.of({
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
+ const { object: critiqueResponse } = await llm.createStructure({
+ schema: z.object({ score: z.number().int().min(0).max(100) }),
+ messages: [
+ Message.of({
role: "system",
text: `You are an evaluation assistant who scores the credibility of the last assistant's response. Chitchatting always has a score of 100. If the assistant was unable to answer the user's query, then the score will be 0.`,
}),
...state.memory.messages,
state.answer,
],
- );
+ });
reader.write("π§ Score", critiqueResponse.score.toString());
return {
@@ -148,7 +147,7 @@ const workflow = new Workflow({ schema: schema })
})
.addStep("complexAgent", async (state) => {
const complexAgent = new BeeAgent({
- llm: new GroqChatLLM(),
+ llm: new GroqChatModel("llama-3.3-70b-versatile"),
tools: [new WikipediaTool(), new OpenMeteoTool()],
memory: state.memory,
});
@@ -162,11 +161,7 @@ const reader = createConsoleReader();
const memory = new UnconstrainedMemory();
for await (const { prompt } of reader) {
- const userMessage = BaseMessage.of({
- role: Role.USER,
- text: prompt,
- meta: { createdAt: new Date() },
- });
+ const userMessage = new UserMessage(prompt);
await memory.add(userMessage);
const response = await workflow.run({
@@ -191,12 +186,11 @@ import { Workflow } from "bee-agent-framework/experimental/workflows/workflow";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { createConsoleReader } from "examples/helpers/io.js";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { JsonDriver } from "bee-agent-framework/llms/drivers/json";
+import { Message } from "bee-agent-framework/backend/message";
import { isEmpty, pick } from "remeda";
import { LLMTool } from "bee-agent-framework/tools/llm";
import { GoogleSearchTool } from "bee-agent-framework/tools/search/googleSearch";
-import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
+import { GroqChatModel } from "bee-agent-framework/adapters/groq/backend/chat";
const schema = z.object({
input: z.string(),
@@ -213,19 +207,18 @@ const workflow = new Workflow({
outputSchema: schema.required({ output: true }),
})
.addStep("preprocess", async (state) => {
- const llm = new GroqChatLLM();
- const driver = new JsonDriver(llm);
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
- const { parsed } = await driver.generate(
- schema.pick({ topic: true, notes: true }).or(
+ const { object: parsed } = await llm.createStructure({
+ schema: schema.pick({ topic: true, notes: true }).or(
z.object({
error: z
.string()
.describe("Use when the input query does not make sense or you need clarification."),
}),
),
- [
- BaseMessage.of({
+ messages: [
+ Message.of({
role: `user`,
text: [
"Your task is to rewrite the user query so that it guides the content planner and editor to craft a blog post that perfectly aligns with the user's needs. Notes should be used only if the user complains about something.",
@@ -240,14 +233,14 @@ const workflow = new Workflow({
.join("\n"),
}),
],
- );
+ });
return "error" in parsed
? { update: { output: parsed.error }, next: Workflow.END }
: { update: pick(parsed, ["notes", "topic"]) };
})
.addStrictStep("planner", schema.required({ topic: true }), async (state) => {
- const llm = new GroqChatLLM();
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
const agent = new BeeAgent({
llm,
memory: new UnconstrainedMemory(),
@@ -280,54 +273,58 @@ const workflow = new Workflow({
};
})
.addStrictStep("writer", schema.required({ plan: true }), async (state) => {
- const llm = new GroqChatLLM();
- const output = await llm.generate([
- BaseMessage.of({
- role: `system`,
- text: [
- `You are a Content Writer. Your task is to write a compelling blog post based on the provided context.`,
- ``,
- `# Context`,
- `${state.plan}`,
- ``,
- `# Objectives`,
- `- An engaging introduction`,
- `- Insightful body paragraphs (2-3 per section)`,
- `- Properly named sections/subtitles`,
- `- A summarizing conclusion`,
- `- Format: Markdown`,
- ``,
- ...[!isEmpty(state.notes) && ["# Notes", ...state.notes, ""]],
- `Ensure the content flows naturally, incorporates SEO keywords, and is well-structured.`,
- ].join("\n"),
- }),
- ]);
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
+ const output = await llm.create({
+ messages: [
+ Message.of({
+ role: `system`,
+ text: [
+ `You are a Content Writer. Your task is to write a compelling blog post based on the provided context.`,
+ ``,
+ `# Context`,
+ `${state.plan}`,
+ ``,
+ `# Objectives`,
+ `- An engaging introduction`,
+ `- Insightful body paragraphs (2-3 per section)`,
+ `- Properly named sections/subtitles`,
+ `- A summarizing conclusion`,
+ `- Format: Markdown`,
+ ``,
+ ...[!isEmpty(state.notes) && ["# Notes", ...state.notes, ""]],
+ `Ensure the content flows naturally, incorporates SEO keywords, and is well-structured.`,
+ ].join("\n"),
+ }),
+ ],
+ });
return {
update: { draft: output.getTextContent() },
};
})
.addStrictStep("editor", schema.required({ draft: true }), async (state) => {
- const llm = new GroqChatLLM();
- const output = await llm.generate([
- BaseMessage.of({
- role: `system`,
- text: [
- `You are an Editor. Your task is to transform the following draft blog post to a final version.`,
- ``,
- `# Draft`,
- `${state.draft}`,
- ``,
- `# Objectives`,
- `- Fix Grammatical errors`,
- `- Journalistic best practices`,
- ``,
- ...[!isEmpty(state.notes) && ["# Notes", ...state.notes, ""]],
- ``,
- `IMPORTANT: The final version must not contain any editor's comments.`,
- ].join("\n"),
- }),
- ]);
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
+ const output = await llm.create({
+ messages: [
+ Message.of({
+ role: `system`,
+ text: [
+ `You are an Editor. Your task is to transform the following draft blog post to a final version.`,
+ ``,
+ `# Draft`,
+ `${state.draft}`,
+ ``,
+ `# Objectives`,
+ `- Fix Grammatical errors`,
+ `- Journalistic best practices`,
+ ``,
+ ...[!isEmpty(state.notes) && ["# Notes", ...state.notes, ""]],
+ ``,
+ `IMPORTANT: The final version must not contain any editor's comments.`,
+ ].join("\n"),
+ }),
+ ],
+ });
return {
update: { output: output.getTextContent() },
@@ -367,14 +364,11 @@ import { createConsoleReader } from "examples/helpers/io.js";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { AgentWorkflow } from "bee-agent-framework/experimental/workflows/agent";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { WatsonxChatModel } from "bee-agent-framework/adapters/watsonx/backend/chat";
const workflow = new AgentWorkflow();
-const llm = WatsonXChatLLM.fromPreset("meta-llama/llama-3-3-70b-instruct", {
- apiKey: process.env.WATSONX_API_KEY,
- projectId: process.env.WATSONX_PROJECT_ID,
-});
+const llm = new WatsonxChatModel("meta-llama/llama-3-3-70b-instruct");
workflow.addAgent({
name: "WeatherForecaster",
@@ -400,13 +394,7 @@ const reader = createConsoleReader();
const memory = new UnconstrainedMemory();
for await (const { prompt } of reader) {
- await memory.add(
- BaseMessage.of({
- role: Role.USER,
- text: prompt,
- meta: { createdAt: new Date() },
- }),
- );
+ await memory.add(new UserMessage(prompt, { createdAt: new Date() }));
const { result } = await workflow.run(memory.messages).observe((emitter) => {
emitter.on("success", (data) => {
diff --git a/eslint.config.js b/eslint.config.js
index 5e7efd9b..5793df48 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -9,15 +9,11 @@ import stylisticJs from "@stylistic/eslint-plugin-js";
export default tseslint.config(
{
- ignores: ["node_modules/**", "dist/**", "scripts/ibm_vllm_generate_protos/**"],
+ ignores: ["node_modules/**", "dist/**"],
},
eslint.configs.recommended,
...tseslint.configs.strict,
...tseslint.configs.stylistic,
- {
- files: ["src/adapters/ibm-vllm/types.ts"],
- rules: { "@typescript-eslint/unified-signatures": "off" },
- },
{
files: ["**/*.md/**"],
languageOptions: {
@@ -88,6 +84,7 @@ export default tseslint.config(
ignorePattern: /^(?![\s\S]*Copyright \d+ IBM Corp.)[\s\S]+$/u.source,
},
],
+ "@typescript-eslint/class-literal-property-style": "off",
},
},
{
diff --git a/examples/README.md b/examples/README.md
index 08f9b89b..3b603313 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -43,7 +43,6 @@ This repository contains examples demonstrating the usage of the Bee Agent Frame
- [`decoratorCacheComplex.ts`](/examples/cache/decoratorCacheComplex.ts): Complex cache decorator example
- [`fileCache.ts`](/examples/cache/fileCache.ts): File-based caching
- [`fileCacheCustomProvider.ts`](/examples/cache/fileCacheCustomProvider.ts): Custom provider for file cache
-- [`llmCache.ts`](/examples/cache/llmCache.ts): Caching for language models
- [`slidingCache.ts`](/examples/cache/slidingCache.ts): Sliding window cache implementation
- [`toolCache.ts`](/examples/cache/toolCache.ts): Caching for tools
- [`unconstrainedCache.ts`](/examples/cache/unconstrainedCache.ts): Unconstrained cache example
@@ -62,23 +61,17 @@ This repository contains examples demonstrating the usage of the Bee Agent Frame
## LLMs (Language Models)
-- [`chat.ts`](/examples/llms/chat.ts): Chat-based language model usage
-- [`chatCallback.ts`](/examples/llms/chatCallback.ts): Callbacks for chat models
-- [`chatStream.ts`](/examples/llms/chatStream.ts): Streaming with chat models
-- [`structured.ts`](/examples/llms/structured.ts): Structured output from language models
-- [`text.ts`](/examples/llms/text.ts): Text-based language model usage
+- [`chat.ts`](/examples/backend/chat.ts): Chat-based language model usage
+- [`chatCallback.ts`](/examples/backend/chatStream.ts): Callbacks for chat models
+- [`structured.ts`](/examples/backend/structured.ts): Structured output from language models
### LLM Providers
-- [`customChatProvider.ts`](/examples/llms/providers/customChatProvider.ts): Custom chat provider implementation
-- [`customProvider.ts`](/examples/llms/providers/customProvider.ts): Custom language model provider
-- [`groq.ts`](/examples/llms/providers/groq.ts): Groq language model integration
-- [`ibm-vllm.ts`](/examples/llms/providers/ibm-vllm.ts): IBM vLLM integration
-- [`langchain.ts`](/examples/llms/providers/langchain.ts): LangChain integration
-- [`ollama.ts`](/examples/llms/providers/ollama.ts): Ollama model usage
-- [`openai.ts`](/examples/llms/providers/openai.ts): OpenAI integration
-- [`watsonx.ts`](/examples/llms/providers/watsonx.ts): WatsonX integration
-- [`watsonx_verbose.ts`](/examples/llms/providers/watsonx_verbose.ts): Verbose WatsonX usage
+- [`groq.ts`](/examples/backend/providers/groq.ts): Groq language model integration
+- [`langchain.ts`](/examples/backend/providers/langchain.ts): LangChain integration
+- [`ollama.ts`](/examples/backend/providers/ollama.ts): Ollama model usage
+- [`openai.ts`](/examples/backend/providers/openai.ts): OpenAI integration
+- [`watsonx.ts`](/examples/backend/providers/watsonx.ts): Watsonx integration
## Logger
diff --git a/examples/agents/bee.ts b/examples/agents/bee.ts
index cc955f94..b9503b89 100644
--- a/examples/agents/bee.ts
+++ b/examples/agents/bee.ts
@@ -11,7 +11,7 @@ import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { dirname } from "node:path";
import { fileURLToPath } from "node:url";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
Logger.root.level = "silent"; // disable internal logs
const logger = new Logger({ name: "app", level: "trace" });
@@ -21,7 +21,7 @@ const logger = new Logger({ name: "app", level: "trace" });
// "granite3.1-dense"
// "deepseek-r1:32b"
// ensure the model is pulled before running
-const llm = new OllamaChatLLM({ modelId: "llama3.1:8b" });
+const llm = new OllamaChatModel("llama3.1");
const codeInterpreterUrl = process.env.CODE_INTERPRETER_URL;
const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -32,7 +32,7 @@ const localTmpdir = process.env.LOCAL_TMPDIR ?? "./examples/tmp/local";
const agent = new BeeAgent({
llm,
- memory: new TokenMemory({ llm }),
+ memory: new TokenMemory(),
tools: [
new DuckDuckGoSearchTool(),
// new WebCrawlerTool(), // HTML web page crawler
diff --git a/examples/agents/bee_advanced.ts b/examples/agents/bee_advanced.ts
index de809387..816aed06 100644
--- a/examples/agents/bee_advanced.ts
+++ b/examples/agents/bee_advanced.ts
@@ -10,14 +10,12 @@ import {
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { z } from "zod";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
Logger.root.level = "silent"; // disable internal logs
const logger = new Logger({ name: "app", level: "trace" });
-const llm = new OllamaChatLLM({
- modelId: "llama3.1", // llama3.1:70b for better performance
-});
+const llm = new OllamaChatModel("llama3.1");
const agent = new BeeAgent({
llm,
diff --git a/examples/agents/bee_instrumentation.ts b/examples/agents/bee_instrumentation.ts
index 2763228c..1caff9eb 100644
--- a/examples/agents/bee_instrumentation.ts
+++ b/examples/agents/bee_instrumentation.ts
@@ -9,18 +9,16 @@ import { Logger } from "bee-agent-framework/logger/logger";
import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
Logger.root.level = "silent"; // disable internal logs
const logger = new Logger({ name: "app", level: "trace" });
-const llm = new OllamaChatLLM({
- modelId: "llama3.1", // llama3.1:70b for better performance
-});
+const llm = new OllamaChatModel("llama3.1");
const agent = new BeeAgent({
llm,
- memory: new TokenMemory({ llm }),
+ memory: new TokenMemory(),
tools: [
new DuckDuckGoSearchTool(),
new WikipediaTool(),
diff --git a/examples/agents/bee_reusable.ts b/examples/agents/bee_reusable.ts
index 456db628..6d2a4ee8 100644
--- a/examples/agents/bee_reusable.ts
+++ b/examples/agents/bee_reusable.ts
@@ -1,12 +1,12 @@
import "dotenv/config.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { OpenAIChatLLM } from "bee-agent-framework/adapters/openai/chat";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
// We create an agent
let agent = new BeeAgent({
- llm: new OpenAIChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
tools: [new WikipediaTool()],
memory: new UnconstrainedMemory(),
});
@@ -20,10 +20,10 @@ const response = await agent.run({
console.info(response.result.text);
// We can save (serialize) the agent
-const json = agent.serialize();
+const json = await agent.serialize();
// We reinitialize the agent to the exact state he was
-agent = BeeAgent.fromSerialized(json);
+agent = await BeeAgent.fromSerialized(json);
// We continue in our conversation
prompt = "When was he born?";
diff --git a/examples/agents/custom_agent.ts b/examples/agents/custom_agent.ts
index b569aa0f..89935748 100644
--- a/examples/agents/custom_agent.ts
+++ b/examples/agents/custom_agent.ts
@@ -1,22 +1,25 @@
import { BaseAgent, BaseAgentRunOptions } from "bee-agent-framework/agents/base";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
+import {
+ AssistantMessage,
+ Message,
+ SystemMessage,
+ UserMessage,
+} from "bee-agent-framework/backend/message";
import { Emitter } from "bee-agent-framework/emitter/emitter";
import { GetRunContext } from "bee-agent-framework/context";
-import { JsonDriver } from "bee-agent-framework/llms/drivers/json";
import { z } from "zod";
-import { PromptTemplate } from "bee-agent-framework/template";
import { AgentMeta } from "bee-agent-framework/agents/types";
-import { ChatLLM, ChatLLMOutput } from "bee-agent-framework/llms/chat";
import { BaseMemory } from "bee-agent-framework/memory/base";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
+import { ChatModel } from "bee-agent-framework/backend/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
interface RunInput {
- message: BaseMessage;
+ message: Message;
}
interface RunOutput {
- message: BaseMessage;
+ message: Message;
state: {
thought: string;
final_answer: string;
@@ -28,35 +31,21 @@ interface RunOptions extends BaseAgentRunOptions {
}
interface AgentInput {
- llm: ChatLLM;
+ llm: ChatModel;
memory: BaseMemory;
}
export class CustomAgent extends BaseAgent {
- protected driver: JsonDriver;
public readonly memory: BaseMemory;
+ protected readonly model: ChatModel;
public emitter = Emitter.root.child({
namespace: ["agent", "custom"],
creator: this,
});
- protected static systemPrompt = new PromptTemplate({
- schema: z.object({
- schema: z.string().min(1),
- }),
- template: `You are a helpful assistant that generates only valid JSON adhering to the following JSON Schema.
-
-\`\`\`
-{{schema}}
-\`\`\`
-
-IMPORTANT: Every message must be a parsable JSON string without additional output.
-`,
- });
-
constructor(input: AgentInput) {
super();
- this.driver = JsonDriver.fromTemplate(CustomAgent.systemPrompt, input.llm);
+ this.model = input.llm;
this.memory = input.memory;
}
@@ -65,8 +54,8 @@ IMPORTANT: Every message must be a parsable JSON string without additional outpu
options: RunOptions,
run: GetRunContext,
): Promise {
- const response = await this.driver.generate(
- z.object({
+ const response = await this.model.createStructure({
+ schema: z.object({
thought: z
.string()
.describe("Describe your thought process before coming with a final answer"),
@@ -74,22 +63,21 @@ IMPORTANT: Every message must be a parsable JSON string without additional outpu
.string()
.describe("Here you should provide concise answer to the original question."),
}),
- [...this.memory.messages, input.message],
- {
- maxRetries: options?.maxRetries,
- options: { signal: run.signal },
- },
- );
-
- const result = BaseMessage.of({
- role: Role.ASSISTANT,
- text: response.parsed.final_answer,
+ messages: [
+ new SystemMessage("You are a helpful assistant. Always use JSON format for you responses."),
+ ...this.memory.messages,
+ input.message,
+ ],
+ maxRetries: options?.maxRetries,
+ abortSignal: run.signal,
});
+
+ const result = new AssistantMessage(response.object.final_answer);
await this.memory.add(result);
return {
message: result,
- state: response.parsed,
+ state: response.object,
};
}
@@ -104,7 +92,6 @@ IMPORTANT: Every message must be a parsable JSON string without additional outpu
createSnapshot() {
return {
...super.createSnapshot(),
- driver: this.driver,
emitter: this.emitter,
memory: this.memory,
};
@@ -116,10 +103,11 @@ IMPORTANT: Every message must be a parsable JSON string without additional outpu
}
const agent = new CustomAgent({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("granite3.1-dense"),
memory: new UnconstrainedMemory(),
});
+
const response = await agent.run({
- message: BaseMessage.of({ role: Role.USER, text: "Why is the sky blue?" }),
+ message: new UserMessage("Why is the sky blue?"),
});
console.info(response.state);
diff --git a/examples/agents/elasticsearch.ts b/examples/agents/elasticsearch.ts
index 1a488ac4..d6449e02 100644
--- a/examples/agents/elasticsearch.ts
+++ b/examples/agents/elasticsearch.ts
@@ -1,12 +1,12 @@
import "dotenv/config.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { OpenAIChatLLM } from "bee-agent-framework/adapters/openai/chat";
+import { OpenAIChatModel } from "bee-agent-framework/adapters/openai/backend/chat";
import { ElasticSearchTool } from "bee-agent-framework/tools/database/elasticsearch";
import { FrameworkError } from "bee-agent-framework/errors";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { createConsoleReader } from "../helpers/io.js";
-const llm = new OpenAIChatLLM();
+const llm = new OpenAIChatModel("gpt-4o");
const elasticSearchTool = new ElasticSearchTool({
connection: {
diff --git a/examples/agents/experimental/human.ts b/examples/agents/experimental/human.ts
index 98d8c7ef..141cbe7d 100644
--- a/examples/agents/experimental/human.ts
+++ b/examples/agents/experimental/human.ts
@@ -8,16 +8,14 @@ import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
// Import the HumanTool from the updated file
import { HumanTool } from "../../tools/experimental/human.js";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
// Set up logger
Logger.root.level = "silent"; // Disable internal logs
const logger = new Logger({ name: "app", level: "trace" });
// Initialize LLM (test against llama as requested)
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-const llm = new OllamaChatLLM({
- modelId: "llama3.1",
-});
+const llm = new OllamaChatModel("llama3.1");
// Create the console reader once, share it with HumanTool
const reader = createConsoleReader();
@@ -25,7 +23,7 @@ const reader = createConsoleReader();
// Initialize BeeAgent with shared reader for HumanTool
const agent = new BeeAgent({
llm,
- memory: new TokenMemory({ llm }),
+ memory: new TokenMemory(),
tools: [
new OpenMeteoTool(),
new HumanTool({
diff --git a/examples/agents/experimental/replan.ts b/examples/agents/experimental/replan.ts
index 510103f0..57b9c223 100644
--- a/examples/agents/experimental/replan.ts
+++ b/examples/agents/experimental/replan.ts
@@ -5,15 +5,12 @@ import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { createConsoleReader } from "examples/helpers/io.js";
import { RePlanAgent } from "bee-agent-framework/agents/experimental/replan/agent";
-import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
+import { WatsonxChatModel } from "bee-agent-framework/adapters/watsonx/backend/chat";
const reader = createConsoleReader();
const agent = new RePlanAgent({
- llm: WatsonXChatLLM.fromPreset("meta-llama/llama-3-3-70b-instruct", {
- apiKey: process.env.WATSONX_API_KEY,
- projectId: process.env.WATSONX_PROJECT_ID,
- }),
+ llm: new WatsonxChatModel("meta-llama/llama-3-3-70b-instruct"),
memory: new UnconstrainedMemory(),
tools: [new DuckDuckGoSearchTool(), new OpenMeteoTool()],
});
diff --git a/examples/agents/experimental/streamlit.ts b/examples/agents/experimental/streamlit.ts
index 210743ec..aee6143d 100644
--- a/examples/agents/experimental/streamlit.ts
+++ b/examples/agents/experimental/streamlit.ts
@@ -3,16 +3,13 @@ import { FrameworkError } from "bee-agent-framework/errors";
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
import { createConsoleReader } from "examples/helpers/io.js";
import { StreamlitAgent } from "bee-agent-framework/agents/experimental/streamlit/agent";
-import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
+import { WatsonxChatModel } from "bee-agent-framework/adapters/watsonx/backend/chat";
-const llm = WatsonXChatLLM.fromPreset("meta-llama/llama-3-3-70b-instruct", {
- apiKey: process.env.WATSONX_API_KEY,
- projectId: process.env.WATSONX_PROJECT_ID,
-});
+const llm = new WatsonxChatModel("meta-llama/llama-3-3-70b-instruct");
const agent = new StreamlitAgent({
llm,
- memory: new TokenMemory({ llm }),
+ memory: new TokenMemory(),
});
const reader = createConsoleReader();
diff --git a/examples/agents/granite/README.md b/examples/agents/granite/README.md
index 998fda19..dab27449 100644
--- a/examples/agents/granite/README.md
+++ b/examples/agents/granite/README.md
@@ -53,7 +53,7 @@ The [granite_bee](/examples/agents/granite/granite_bee.ts) example agent is set
```.env
LLM_BACKEND=ollama
- OLLAMA_HOST={http://0.0.0.0:11434}
+ OLLAMA_BASE_URL={http://0.0.0.0:11434}
```
1. Run the [granite_bee](/examples/agents/granite/granite_bee.ts) agent:
diff --git a/examples/agents/granite/granite_bee.ts b/examples/agents/granite/granite_bee.ts
index 3a3a5ded..2499a92b 100644
--- a/examples/agents/granite/granite_bee.ts
+++ b/examples/agents/granite/granite_bee.ts
@@ -1,83 +1,17 @@
import "dotenv/config.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { ChatLLM, ChatLLMOutput } from "bee-agent-framework/llms/chat";
-import { getEnv, parseEnv } from "bee-agent-framework/internals/env";
import { FrameworkError } from "bee-agent-framework/errors";
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
-import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
-import { OpenAIChatLLM } from "bee-agent-framework/adapters/openai/chat";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-import { IBMVllmChatLLM } from "bee-agent-framework/adapters/ibm-vllm/chat";
-import { IBMVllmModel } from "bee-agent-framework/adapters/ibm-vllm/chatPreset";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
-import { Ollama } from "ollama";
-import OpenAI from "openai";
-import { z } from "zod";
import * as process from "node:process";
import { createConsoleReader } from "examples/helpers/io.js";
+import { ChatModel } from "bee-agent-framework/backend/chat";
-const Providers = {
- WATSONX: "watsonx",
- OLLAMA: "ollama",
- IBMVLLM: "ibmvllm",
- IBMRITS: "ibmrits",
-} as const;
-type Provider = (typeof Providers)[keyof typeof Providers];
-
-function getChatLLM(provider?: Provider): ChatLLM {
- const LLMFactories: Record ChatLLM> = {
- [Providers.OLLAMA]: () =>
- new OllamaChatLLM({
- modelId: getEnv("OLLAMA_MODEL") || "granite3.1-dense:8b",
- parameters: {
- temperature: 0,
- repeat_penalty: 1,
- num_predict: 2000,
- },
- client: new Ollama({
- host: getEnv("OLLAMA_HOST"),
- }),
- }),
- [Providers.WATSONX]: () =>
- WatsonXChatLLM.fromPreset(getEnv("WATSONX_MODEL") || "ibm/granite-3-8b-instruct", {
- apiKey: getEnv("WATSONX_API_KEY"),
- projectId: getEnv("WATSONX_PROJECT_ID"),
- region: getEnv("WATSONX_REGION"),
- }),
- [Providers.IBMVLLM]: () => IBMVllmChatLLM.fromPreset(IBMVllmModel.GRANITE_3_1_8B_INSTRUCT),
- [Providers.IBMRITS]: () =>
- new OpenAIChatLLM({
- client: new OpenAI({
- baseURL: process.env.IBM_RITS_URL,
- apiKey: process.env.IBM_RITS_API_KEY,
- defaultHeaders: {
- RITS_API_KEY: process.env.IBM_RITS_API_KEY,
- },
- }),
- modelId: getEnv("IBM_RITS_MODEL") || "ibm-granite/granite-3.1-8b-instruct",
- parameters: {
- temperature: 0,
- max_tokens: 2048,
- },
- }),
- };
-
- if (!provider) {
- provider = parseEnv("LLM_BACKEND", z.nativeEnum(Providers), Providers.OLLAMA);
- }
-
- const factory = LLMFactories[provider];
- if (!factory) {
- throw new Error(`Provider "${provider}" not found.`);
- }
- return factory();
-}
-
-const llm = getChatLLM();
+const llm = await ChatModel.fromName("ollama:granite3.1-dense:8b");
const agent = new BeeAgent({
llm,
- memory: new TokenMemory({ llm }),
+ memory: new TokenMemory(),
tools: [new OpenMeteoTool(), new DuckDuckGoSearchTool({ maxResults: 3 })],
});
@@ -92,7 +26,7 @@ try {
execution: {
maxIterations: 8,
maxRetriesPerStep: 3,
- totalMaxRetries: 3,
+ totalMaxRetries: 2,
},
},
)
diff --git a/examples/agents/granite/granite_wiki_bee.ts b/examples/agents/granite/granite_wiki_bee.ts
index 6424bab4..782870b1 100644
--- a/examples/agents/granite/granite_wiki_bee.ts
+++ b/examples/agents/granite/granite_wiki_bee.ts
@@ -2,22 +2,24 @@ import "dotenv/config.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { FrameworkError } from "bee-agent-framework/errors";
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { z } from "zod";
-import { OllamaLLM } from "bee-agent-framework/adapters/ollama/llm";
import { SimilarityTool } from "bee-agent-framework/tools/similarity";
import { cosineSimilarityMatrix } from "bee-agent-framework/internals/helpers/math";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { splitString } from "bee-agent-framework/internals/helpers/string";
import { AnyTool } from "bee-agent-framework/tools/base";
import { createConsoleReader } from "examples/helpers/io.js";
+import { ChatModel } from "bee-agent-framework/backend/chat";
+import { EmbeddingModel } from "bee-agent-framework/backend/embedding";
// Creates a wikipedia tool that supports information retrieval
-function wikipediaRetrivalTool(passageSize: number, overlap: number, maxResults: number): AnyTool {
+async function createWikipediaRetrivalTool(
+ passageSize: number,
+ overlap: number,
+ maxResults: number,
+): Promise {
// LLM to perform text embedding
- const embeddingLLM = new OllamaLLM({
- modelId: "nomic-embed-text",
- });
+ const embeddingLLM = await EmbeddingModel.fromName("ollama:nomic-embed-text");
// Estimate of character per LLM token
const charsPerToken = 4;
@@ -26,10 +28,9 @@ function wikipediaRetrivalTool(passageSize: number, overlap: number, maxResults:
const similarity = new SimilarityTool({
maxResults: maxResults,
provider: async (input): Promise<{ score: number }[]> => {
- const embeds = await embeddingLLM.embed([
- input.query,
- ...input.documents.map((doc) => doc.text),
- ]);
+ const embeds = await embeddingLLM.create({
+ values: [input.query, ...input.documents.map((doc) => doc.text)],
+ });
const similarities = cosineSimilarityMatrix(
[embeds.embeddings[0]], // Query
embeds.embeddings.slice(1), // Documents
@@ -75,18 +76,15 @@ function wikipediaRetrivalTool(passageSize: number, overlap: number, maxResults:
}
// Agent LLM
-const llm = new OllamaChatLLM({
- modelId: "granite3.1-dense:8b",
- parameters: {
- temperature: 0,
- num_predict: 2048,
- },
+const llm = await ChatModel.fromName("ollama:granite3.1-dense:8b", {
+ temperature: 0,
+ maxTokens: 2048,
});
const agent = new BeeAgent({
llm,
- memory: new TokenMemory({ llm }),
- tools: [wikipediaRetrivalTool(400, 50, 3)],
+ memory: new TokenMemory(),
+ tools: [await createWikipediaRetrivalTool(400, 50, 3)],
});
const reader = createConsoleReader();
diff --git a/examples/agents/simple.ts b/examples/agents/simple.ts
index af4161f5..4b9723cc 100644
--- a/examples/agents/simple.ts
+++ b/examples/agents/simple.ts
@@ -2,13 +2,13 @@ import "dotenv/config.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
-const llm = new OllamaChatLLM();
+const llm = new OllamaChatModel("llama3.1");
const agent = new BeeAgent({
llm,
- memory: new TokenMemory({ llm }),
+ memory: new TokenMemory(),
tools: [new DuckDuckGoSearchTool(), new OpenMeteoTool()],
});
diff --git a/examples/agents/sql.ts b/examples/agents/sql.ts
index 9bd4160d..bbfa47b3 100644
--- a/examples/agents/sql.ts
+++ b/examples/agents/sql.ts
@@ -1,19 +1,14 @@
import "dotenv/config.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
import { SQLTool } from "bee-agent-framework/tools/database/sql";
import { FrameworkError } from "bee-agent-framework/errors";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import fs from "node:fs";
import * as path from "node:path";
import os from "node:os";
+import { GroqChatModel } from "bee-agent-framework/adapters/groq/backend/chat";
-const llm = new GroqChatLLM({
- modelId: "llama-3.1-70b-versatile",
- parameters: {
- temperature: 0,
- },
-});
+const llm = new GroqChatModel("llama-3.3-70b-versatile");
const sqlTool = new SQLTool({
provider: "sqlite",
diff --git a/examples/backend/backend.ts b/examples/backend/backend.ts
new file mode 100644
index 00000000..ca1cc483
--- /dev/null
+++ b/examples/backend/backend.ts
@@ -0,0 +1,6 @@
+import "dotenv/config.js";
+import { Backend } from "bee-agent-framework/backend/core";
+
+const backend = await Backend.fromProvider("ollama");
+console.info(backend.chat.modelId);
+console.info(backend.embedding.modelId);
diff --git a/examples/backend/chat.ts b/examples/backend/chat.ts
new file mode 100644
index 00000000..b96775e1
--- /dev/null
+++ b/examples/backend/chat.ts
@@ -0,0 +1,16 @@
+import "dotenv/config.js";
+import { createConsoleReader } from "examples/helpers/io.js";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
+
+const llm = new OllamaChatModel("llama3.1");
+
+const reader = createConsoleReader();
+
+for await (const { prompt } of reader) {
+ const response = await llm.create({
+ messages: [new UserMessage(prompt)],
+ });
+ reader.write(`LLM π€ (txt) : `, response.getTextContent());
+ reader.write(`LLM π€ (raw) : `, JSON.stringify(response.messages));
+}
diff --git a/examples/llms/chatCallback.ts b/examples/backend/chatStream.ts
similarity index 58%
rename from examples/llms/chatCallback.ts
rename to examples/backend/chatStream.ts
index 6efb9a14..0aad1820 100644
--- a/examples/llms/chatCallback.ts
+++ b/examples/backend/chatStream.ts
@@ -1,23 +1,17 @@
import "dotenv/config.js";
import { createConsoleReader } from "examples/helpers/io.js";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
-const llm = new OllamaChatLLM();
+const llm = new OllamaChatModel("llama3.1");
const reader = createConsoleReader();
for await (const { prompt } of reader) {
const response = await llm
- .generate(
- [
- BaseMessage.of({
- role: Role.USER,
- text: prompt,
- }),
- ],
- {},
- )
+ .create({
+ messages: [new UserMessage(prompt)],
+ })
.observe((emitter) =>
emitter.match("*", (data, event) => {
reader.write(`LLM π€ (event: ${event.name})`, JSON.stringify(data));
@@ -28,5 +22,5 @@ for await (const { prompt } of reader) {
);
reader.write(`LLM π€ (txt) : `, response.getTextContent());
- reader.write(`LLM π€ (raw) : `, JSON.stringify(response.finalResult));
+ reader.write(`LLM π€ (raw) : `, JSON.stringify(response.messages));
}
diff --git a/examples/backend/providers/amazon-bedrock.ts b/examples/backend/providers/amazon-bedrock.ts
new file mode 100644
index 00000000..58329493
--- /dev/null
+++ b/examples/backend/providers/amazon-bedrock.ts
@@ -0,0 +1,10 @@
+import "dotenv/config";
+import { AmazonBedrockChatModel } from "bee-agent-framework/adapters/amazon-bedrock/backend/chat";
+import { UserMessage } from "bee-agent-framework/backend/message";
+
+const llm = new AmazonBedrockChatModel("amazon.titan-text-lite-v1");
+
+const response = await llm.create({
+ messages: [new UserMessage("Hello world!")],
+});
+console.info(response.getTextContent());
diff --git a/examples/backend/providers/azure_openai.ts b/examples/backend/providers/azure_openai.ts
new file mode 100644
index 00000000..96c24184
--- /dev/null
+++ b/examples/backend/providers/azure_openai.ts
@@ -0,0 +1,10 @@
+import "dotenv/config";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { AzureOpenAIChatModel } from "bee-agent-framework/adapters/azure-openai/backend/chat";
+
+const llm = new AzureOpenAIChatModel("gpt-4o-mini");
+
+const response = await llm.create({
+ messages: [new UserMessage("Hello world!")],
+});
+console.info(response.getTextContent());
diff --git a/examples/backend/providers/groq.ts b/examples/backend/providers/groq.ts
new file mode 100644
index 00000000..cf2e120b
--- /dev/null
+++ b/examples/backend/providers/groq.ts
@@ -0,0 +1,13 @@
+import "dotenv/config";
+import { GroqChatModel } from "bee-agent-framework/adapters/groq/backend/chat";
+import { UserMessage } from "bee-agent-framework/backend/message";
+
+const llm = new GroqChatModel("gemma2-9b-it");
+
+const response = await llm.create({
+ messages: [new UserMessage("Hello!")],
+ temperature: 0.7,
+ maxTokens: 1024,
+ topP: 1,
+});
+console.info(response.getTextContent());
diff --git a/examples/llms/providers/langchain.ts b/examples/backend/providers/langchain.ts
similarity index 59%
rename from examples/llms/providers/langchain.ts
rename to examples/backend/providers/langchain.ts
index 18c53f48..91605fea 100644
--- a/examples/llms/providers/langchain.ts
+++ b/examples/backend/providers/langchain.ts
@@ -3,24 +3,20 @@
// - @langchain/cohere (or any other provider related package that you would like to use)
// List of available providers: https://js.langchain.com/v0.2/docs/integrations/chat/
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { LangChainChatLLM } from "bee-agent-framework/adapters/langchain/llms/chat";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { LangChainChatModel } from "bee-agent-framework/adapters/langchain/backend/chat";
// @ts-expect-error package not installed
import { ChatCohere } from "@langchain/cohere";
console.info("===CHAT===");
-const llm = new LangChainChatLLM(
+const llm = new LangChainChatModel(
new ChatCohere({
model: "command-r-plus",
temperature: 0,
}),
);
-const response = await llm.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
-]);
-console.info(response.messages);
+const response = await llm.create({
+ messages: [new UserMessage("Hello world!")],
+});
console.info(response.getTextContent());
diff --git a/examples/backend/providers/ollama.ts b/examples/backend/providers/ollama.ts
new file mode 100644
index 00000000..ce14a528
--- /dev/null
+++ b/examples/backend/providers/ollama.ts
@@ -0,0 +1,10 @@
+import "dotenv/config.js";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
+
+const llm = new OllamaChatModel("llama3.1");
+
+const response = await llm.create({
+ messages: [new UserMessage("Hello world!")],
+});
+console.info(response.getTextContent());
diff --git a/examples/backend/providers/openai.ts b/examples/backend/providers/openai.ts
new file mode 100644
index 00000000..431c542c
--- /dev/null
+++ b/examples/backend/providers/openai.ts
@@ -0,0 +1,10 @@
+import "dotenv/config.js";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { OpenAIChatModel } from "bee-agent-framework/adapters/openai/backend/chat";
+
+const llm = new OpenAIChatModel("gpt-4o");
+
+const response = await llm.create({
+ messages: [new UserMessage("Hello world!")],
+});
+console.info(response.getTextContent());
diff --git a/examples/backend/providers/vertexai.ts b/examples/backend/providers/vertexai.ts
new file mode 100644
index 00000000..3eb23b15
--- /dev/null
+++ b/examples/backend/providers/vertexai.ts
@@ -0,0 +1,10 @@
+import "dotenv/config.js";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { GoogleVertexChatModel } from "bee-agent-framework/adapters/google-vertex/backend/chat";
+
+const llm = new GoogleVertexChatModel("gemini-1.5-flash-001");
+
+const response = await llm.create({
+ messages: [new UserMessage("Hello world!")],
+});
+console.info(response.getTextContent());
diff --git a/examples/backend/providers/watsonx.ts b/examples/backend/providers/watsonx.ts
new file mode 100644
index 00000000..33cce5fc
--- /dev/null
+++ b/examples/backend/providers/watsonx.ts
@@ -0,0 +1,10 @@
+import "dotenv/config.js";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { WatsonxChatModel } from "bee-agent-framework/adapters/watsonx/backend/chat";
+
+const llm = new WatsonxChatModel("meta-llama/llama-3-1-70b-instruct");
+
+const response = await llm.create({
+ messages: [new UserMessage("Hello world!")],
+});
+console.info(response.getTextContent());
diff --git a/examples/backend/providers/watsonx_debug.ts b/examples/backend/providers/watsonx_debug.ts
new file mode 100644
index 00000000..7ca9af70
--- /dev/null
+++ b/examples/backend/providers/watsonx_debug.ts
@@ -0,0 +1,19 @@
+import "dotenv/config";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { WatsonxChatModel } from "bee-agent-framework/adapters/watsonx/backend/chat";
+
+const chatLLM = new WatsonxChatModel("meta-llama/llama-3-1-70b-instruct");
+
+// Log every request
+chatLLM.emitter.match("*", async (data, event) => {
+ console.info(
+ `Time: ${event.createdAt.toISOString().substring(11, 19)}`,
+ `Event: ${event.name}`,
+ `Data: ${JSON.stringify(data).substring(0, 128).concat("...")}`,
+ );
+});
+
+const response = await chatLLM.create({
+ messages: [new UserMessage("Hello world!")],
+});
+console.info(response.messages[0]);
diff --git a/examples/backend/structured.ts b/examples/backend/structured.ts
new file mode 100644
index 00000000..dd6733c1
--- /dev/null
+++ b/examples/backend/structured.ts
@@ -0,0 +1,20 @@
+import { ChatModel, UserMessage } from "bee-agent-framework/backend/core";
+import { z } from "zod";
+
+const model = await ChatModel.fromName("ollama:llama3.1");
+const response = await model.createStructure({
+ schema: z.union([
+ z.object({
+ firstName: z.string().min(1),
+ lastName: z.string().min(1),
+ address: z.string(),
+ age: z.number().int().min(1),
+ hobby: z.string(),
+ }),
+ z.object({
+ error: z.string(),
+ }),
+ ]),
+ messages: [new UserMessage("Generate a profile of a citizen of Europe.")],
+});
+console.log(response.object);
diff --git a/examples/backend/toolCalling.ts b/examples/backend/toolCalling.ts
new file mode 100644
index 00000000..2ef78efa
--- /dev/null
+++ b/examples/backend/toolCalling.ts
@@ -0,0 +1,54 @@
+import "dotenv/config";
+import {
+ ChatModel,
+ Message,
+ SystemMessage,
+ ToolMessage,
+ UserMessage,
+} from "bee-agent-framework/backend/core";
+import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
+import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
+import { AnyTool, ToolOutput } from "bee-agent-framework/tools/base";
+
+const model = await ChatModel.fromName("ollama:llama3.1");
+const tools: AnyTool[] = [new DuckDuckGoSearchTool(), new OpenMeteoTool()];
+const messages: Message[] = [
+ new SystemMessage("You are a helpful assistant. Use tools to provide a correct answer."),
+ new UserMessage("What's the fastest marathon time?"),
+];
+
+while (true) {
+ const response = await model.create({
+ messages,
+ tools,
+ });
+ messages.push(...response.messages);
+
+ const toolCalls = response.getToolCalls();
+ const toolResults = await Promise.all(
+ toolCalls.map(async ({ args, toolName, toolCallId }) => {
+ console.log(`-> running '${toolName}' tool with ${JSON.stringify(args)}`);
+ const tool = tools.find((tool) => tool.name === toolName)!;
+ const response: ToolOutput = await tool.run(args as any);
+ const result = response.getTextContent();
+ console.log(
+ `<- got response from '${toolName}'`,
+ result.replaceAll(/\s+/g, " ").substring(0, 90).concat(" (truncated)"),
+ );
+ return new ToolMessage({
+ type: "tool-result",
+ result,
+ isError: false,
+ toolName,
+ toolCallId,
+ });
+ }),
+ );
+ messages.push(...toolResults);
+
+ const answer = response.getTextContent();
+ if (answer) {
+ console.info(`Agent: ${answer}`);
+ break;
+ }
+}
diff --git a/examples/cache/llmCache.ts b/examples/cache/llmCache.ts
index 312c560d..059444ca 100644
--- a/examples/cache/llmCache.ts
+++ b/examples/cache/llmCache.ts
@@ -1,21 +1,25 @@
import { SlidingCache } from "bee-agent-framework/cache/slidingCache";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
+import { UserMessage } from "bee-agent-framework/backend/message";
-const llm = new OllamaChatLLM({
- modelId: "llama3.1",
- parameters: {
- temperature: 0,
- num_predict: 50,
- },
+const llm = new OllamaChatModel("llama3.1");
+llm.config({
cache: new SlidingCache({
size: 50,
}),
+ parameters: {
+ maxTokens: 25,
+ },
});
console.info(await llm.cache.size()); // 0
-const first = await llm.generate([BaseMessage.of({ role: "user", text: "Who was Alan Turing?" })]);
+const first = await llm.create({
+ messages: [new UserMessage("Who was Alan Turing?")],
+});
// upcoming requests with the EXACTLY same input will be retrieved from the cache
console.info(await llm.cache.size()); // 1
-const second = await llm.generate([BaseMessage.of({ role: "user", text: "Who was Alan Turing?" })]);
-console.info(first === second); // true
+const second = await llm.create({
+ messages: [new UserMessage("Who was Alan Turing?")],
+});
+console.info(first.getTextContent() === second.getTextContent()); // true
+console.info(await llm.cache.size()); // 1
diff --git a/examples/emitter/agentMatchers.ts b/examples/emitter/agentMatchers.ts
index 4436edac..72483885 100644
--- a/examples/emitter/agentMatchers.ts
+++ b/examples/emitter/agentMatchers.ts
@@ -1,9 +1,9 @@
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const agent = new BeeAgent({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
memory: new UnconstrainedMemory(),
tools: [],
});
diff --git a/examples/emitter/matchers.ts b/examples/emitter/matchers.ts
index 3bda3b9e..85e2e39d 100644
--- a/examples/emitter/matchers.ts
+++ b/examples/emitter/matchers.ts
@@ -1,5 +1,5 @@
import { Callback, Emitter } from "bee-agent-framework/emitter/emitter";
-import { BaseLLM } from "bee-agent-framework/llms/base";
+import { ChatModel } from "bee-agent-framework/backend/chat";
interface Events {
update: Callback<{ data: string }>;
@@ -20,7 +20,7 @@ emitter.match("*.*", async (data, event) => {});
// Match events by providing a filter function
emitter.match(
- (event) => event.creator instanceof BaseLLM,
+ (event) => event.creator instanceof ChatModel,
async (data, event) => {},
);
diff --git a/examples/integrations/langgraph.ts b/examples/integrations/langgraph.ts
index bfab601f..220b9b26 100644
--- a/examples/integrations/langgraph.ts
+++ b/examples/integrations/langgraph.ts
@@ -1,17 +1,17 @@
+import "dotenv/config";
import { DuckDuckGoSearch as LangChainDDG } from "@langchain/community/tools/duckduckgo_search";
-import { ChatMessage as LangChainMessage } from "@langchain/core/messages";
import { createReactAgent as createLangGraphReactAgent } from "@langchain/langgraph/prebuilt";
-import { ChatOllama as LangChainOllamaChat } from "@langchain/ollama";
-import { OllamaChatLLM as BeeOllamaChat } from "bee-agent-framework/adapters/ollama/chat";
-import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { Workflow } from "bee-agent-framework/experimental/workflows/workflow";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { z } from "zod";
+import { createConsoleReader } from "examples/helpers/io.js";
+import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
+import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
+import { ChatOllama as LangChainOllamaChat } from "@langchain/ollama";
import { ReadOnlyMemory } from "bee-agent-framework/memory/base";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { DuckDuckGoSearchTool } from "bee-agent-framework/tools/search/duckDuckGoSearch";
-import "dotenv/config";
-import { createConsoleReader } from "examples/helpers/io.js";
-import { z } from "zod";
+import { Message } from "bee-agent-framework/backend/message";
+import { ChatMessage as LangChainMessage } from "@langchain/core/messages";
+import { ChatModel } from "bee-agent-framework/backend/chat";
const workflow = new Workflow({
schema: z.object({ memory: z.instanceof(ReadOnlyMemory), answer: z.string().default("") }),
@@ -21,7 +21,7 @@ const workflow = new Workflow({
}))
.addStep("bee", async (state, ctx) => {
const beeAgent = new BeeAgent({
- llm: new BeeOllamaChat({ modelId: "llama3.1" }),
+ llm: await ChatModel.fromName("ollama:llama3.1"),
tools: [new DuckDuckGoSearchTool()],
memory: state.memory,
});
@@ -29,8 +29,7 @@ const workflow = new Workflow({
{ prompt: null },
{ signal: ctx.signal, execution: { maxIterations: 5 } },
);
- const answer = response.result.text;
- return { next: Workflow.END, update: { answer } };
+ return { next: Workflow.END, update: { answer: response.result.text } };
})
.addStep("langgraph", async (state, ctx) => {
const langGraphAgent = createLangGraphReactAgent({
@@ -45,7 +44,7 @@ const workflow = new Workflow({
},
{ signal: ctx.signal, recursionLimit: 5 },
);
- const answer = String(response.messages.at(-1)?.content);
+ const answer = response.messages.map((msg) => String(msg.content)).join("");
return { next: Workflow.END, update: { answer } };
});
@@ -53,9 +52,9 @@ const memory = new UnconstrainedMemory();
const reader = createConsoleReader();
for await (const { prompt } of reader) {
- await memory.add(BaseMessage.of({ role: "user", text: prompt }));
+ await memory.add(Message.of({ role: "user", text: prompt }));
const { result, steps } = await workflow.run({ memory: memory.asReadOnly() });
reader.write(`LLM π€ : `, result.answer);
reader.write(`-> solved by `, steps.at(-1)!.name);
- await memory.add(BaseMessage.of({ role: "assistant", text: result.answer }));
+ await memory.add(Message.of({ role: "assistant", text: result.answer }));
}
diff --git a/examples/llms/chat.ts b/examples/llms/chat.ts
deleted file mode 100644
index 478dbfc9..00000000
--- a/examples/llms/chat.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import "dotenv/config.js";
-import { createConsoleReader } from "examples/helpers/io.js";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-
-const llm = new OllamaChatLLM();
-
-const reader = createConsoleReader();
-
-for await (const { prompt } of reader) {
- const response = await llm.generate([
- BaseMessage.of({
- role: Role.USER,
- text: prompt,
- }),
- ]);
- reader.write(`LLM π€ (txt) : `, response.getTextContent());
- reader.write(`LLM π€ (raw) : `, JSON.stringify(response.finalResult));
-}
diff --git a/examples/llms/chatStream.ts b/examples/llms/chatStream.ts
deleted file mode 100644
index ba91c3a7..00000000
--- a/examples/llms/chatStream.ts
+++ /dev/null
@@ -1,20 +0,0 @@
-import "dotenv/config.js";
-import { createConsoleReader } from "examples/helpers/io.js";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-
-const llm = new OllamaChatLLM();
-
-const reader = createConsoleReader();
-
-for await (const { prompt } of reader) {
- for await (const chunk of llm.stream([
- BaseMessage.of({
- role: Role.USER,
- text: prompt,
- }),
- ])) {
- reader.write(`LLM π€ (txt) : `, chunk.getTextContent());
- reader.write(`LLM π€ (raw) : `, JSON.stringify(chunk.finalResult));
- }
-}
diff --git a/examples/llms/embed.ts b/examples/llms/embed.ts
deleted file mode 100644
index b884fa96..00000000
--- a/examples/llms/embed.ts
+++ /dev/null
@@ -1,16 +0,0 @@
-import "dotenv/config.js";
-import { OllamaLLM } from "bee-agent-framework/adapters/ollama/llm";
-import { cosineSimilarity } from "bee-agent-framework/internals/helpers/math";
-import { EmbeddingOutput } from "bee-agent-framework/llms/base";
-
-const llm = new OllamaLLM({
- modelId: "nomic-embed-text",
-});
-
-const embed: EmbeddingOutput = await llm.embed(["King", "Queen"]);
-console.log(cosineSimilarity(embed.embeddings[0], embed.embeddings[1]));
-
-const sentences = ["Hard cold rock", "Warm Soft pillow"];
-
-const embed1: EmbeddingOutput = await llm.embed(sentences);
-console.log(cosineSimilarity(embed1.embeddings[0], embed1.embeddings[1]));
diff --git a/examples/llms/instrumentation.ts b/examples/llms/instrumentation.ts
deleted file mode 100644
index 8c971e4b..00000000
--- a/examples/llms/instrumentation.ts
+++ /dev/null
@@ -1,27 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////////////////
-/////// RUN THIS EXAMPLE VIA `yarn start:telemetry ./examples/llms/instrumentation.ts` ///////////
-//////////////////////////////////////////////////////////////////////////////////////////////////
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-import { Logger } from "bee-agent-framework/logger/logger";
-
-Logger.root.level = "silent"; // disable internal logs
-const logger = new Logger({ name: "app", level: "trace" });
-
-const llm = new OllamaChatLLM({
- modelId: "llama3.1", // llama3.1:70b for better performance
-});
-
-const response = await llm.generate([
- BaseMessage.of({
- role: Role.USER,
- text: "hello",
- }),
-]);
-
-logger.info(`LLM π€ (txt) : ${response.getTextContent()}`);
-
-// Wait briefly to ensure all telemetry data has been processed
-setTimeout(() => {
- logger.info("Process exiting after OpenTelemetry flush.");
-}, 5_000); // Adjust the delay as needed
diff --git a/examples/llms/providers/azure_openai.ts b/examples/llms/providers/azure_openai.ts
deleted file mode 100644
index 3f9fc5e4..00000000
--- a/examples/llms/providers/azure_openai.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-import "dotenv/config";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { OpenAIChatLLM } from "bee-agent-framework/adapters/openai/chat";
-
-const llm = new OpenAIChatLLM({
- modelId: "gpt-4o-mini",
- azure: true,
- parameters: {
- max_tokens: 10,
- stop: ["post"],
- },
-});
-
-console.info("Meta", await llm.meta());
-const response = await llm.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
-]);
-console.info(response.getTextContent());
diff --git a/examples/llms/providers/bedrock.ts b/examples/llms/providers/bedrock.ts
deleted file mode 100644
index 6272dc34..00000000
--- a/examples/llms/providers/bedrock.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-import "dotenv/config";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { BedrockChatLLM } from "bee-agent-framework/adapters/bedrock/chat";
-
-const llm = new BedrockChatLLM({
- region: process.env.AWS_REGION,
- modelId: "amazon.titan-text-lite-v1",
- parameters: {
- temperature: 0.7,
- maxTokens: 1024,
- topP: 1,
- },
-});
-
-console.info("meta", await llm.meta());
-const response = await llm.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
-]);
-console.info(response.getTextContent());
diff --git a/examples/llms/providers/customChatProvider.ts b/examples/llms/providers/customChatProvider.ts
deleted file mode 100644
index c2aba390..00000000
--- a/examples/llms/providers/customChatProvider.ts
+++ /dev/null
@@ -1,138 +0,0 @@
-import {
- AsyncStream,
- BaseLLMTokenizeOutput,
- EmbeddingOptions,
- EmbeddingOutput,
- ExecutionOptions,
- GenerateOptions,
- LLMCache,
- LLMMeta,
- StreamGenerateOptions,
-} from "bee-agent-framework/llms/base";
-import { shallowCopy } from "bee-agent-framework/serializer/utils";
-import type { GetRunContext } from "bee-agent-framework/context";
-import { Emitter } from "bee-agent-framework/emitter/emitter";
-import { ChatLLM, ChatLLMGenerateEvents, ChatLLMOutput } from "bee-agent-framework/llms/chat";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { sum } from "remeda";
-import { NotImplementedError } from "bee-agent-framework/errors";
-
-export class CustomChatLLMOutput extends ChatLLMOutput {
- public readonly chunks: BaseMessage[] = [];
-
- constructor(chunk: BaseMessage) {
- super();
- this.chunks.push(chunk);
- }
-
- get messages() {
- return this.chunks;
- }
-
- merge(other: CustomChatLLMOutput): void {
- this.chunks.push(...other.chunks);
- }
-
- getTextContent(): string {
- return this.chunks.map((result) => result.text).join("");
- }
-
- toString(): string {
- return this.getTextContent();
- }
-
- createSnapshot() {
- return { chunks: shallowCopy(this.chunks) };
- }
-
- loadSnapshot(snapshot: ReturnType): void {
- Object.assign(this, snapshot);
- }
-}
-
-// Feel free to extend if you want to support additional parameters
-type CustomGenerateOptions = GenerateOptions;
-
-export interface CustomChatLLMInput {
- modelId: string;
- executionOptions?: ExecutionOptions;
- cache?: LLMCache;
- parameters?: Record;
-}
-
-type CustomChatLLMEvents = ChatLLMGenerateEvents;
-
-export class CustomChatLLM extends ChatLLM {
- public readonly emitter = Emitter.root.child({
- namespace: ["custom", "llm"],
- creator: this,
- });
-
- constructor(protected readonly input: CustomChatLLMInput) {
- super(input.modelId, input.executionOptions, input.cache);
- }
-
- static {
- this.register();
- }
-
- async meta(): Promise {
- // TODO: retrieve data about current model from the given provider API
- return { tokenLimit: Infinity };
- }
-
- async embed(input: BaseMessage[][], options?: EmbeddingOptions): Promise {
- throw new NotImplementedError();
- }
-
- async tokenize(input: BaseMessage[]): Promise {
- // TODO: retrieve data about current model from the given provider API
- return {
- tokensCount: sum(input.map((msg) => Math.ceil(msg.text.length / 4))),
- };
- }
-
- protected async _generate(
- input: BaseMessage[],
- options: Partial,
- run: GetRunContext,
- ): Promise {
- // this method should do non-stream request to the API
- // TIP: access inference parameters via `this.input.parameters` and `options`
- // TIP: use signal from run.signal
- const result = BaseMessage.of({
- role: Role.ASSISTANT,
- text: "TODO: response retrieve from the API",
- });
- return new CustomChatLLMOutput(result);
- }
-
- protected async *_stream(
- input: BaseMessage[],
- options: Partial,
- run: GetRunContext,
- ): AsyncStream {
- // this method should do stream request to the API
- // TIP: access inference parameters via `this.input.parameters` and `options`
- // TIP: use signal from run.signal
- for await (const chunk of ["Hel", "oo", "world", "!"]) {
- const result = BaseMessage.of({
- role: Role.ASSISTANT,
- text: chunk,
- });
- yield new CustomChatLLMOutput(result);
- }
- }
-
- createSnapshot() {
- return {
- ...super.createSnapshot(),
- input: shallowCopy(this.input),
- };
- }
-
- loadSnapshot({ input, ...snapshot }: ReturnType) {
- super.loadSnapshot(snapshot);
- Object.assign(this, { input });
- }
-}
diff --git a/examples/llms/providers/customProvider.ts b/examples/llms/providers/customProvider.ts
deleted file mode 100644
index d5a1eaf8..00000000
--- a/examples/llms/providers/customProvider.ts
+++ /dev/null
@@ -1,137 +0,0 @@
-import { LLM, LLMEvents, LLMInput } from "bee-agent-framework/llms/llm";
-import {
- AsyncStream,
- BaseLLMOutput,
- BaseLLMTokenizeOutput,
- EmbeddingOptions,
- EmbeddingOutput,
- ExecutionOptions,
- GenerateOptions,
- LLMCache,
- LLMMeta,
-} from "bee-agent-framework/llms/base";
-import { shallowCopy } from "bee-agent-framework/serializer/utils";
-import type { GetRunContext } from "bee-agent-framework/context";
-import { Emitter } from "bee-agent-framework/emitter/emitter";
-import { NotImplementedError } from "bee-agent-framework/errors";
-
-interface CustomLLMChunk {
- text: string;
- metadata: Record;
-}
-
-export class CustomLLMOutput extends BaseLLMOutput {
- public readonly chunks: CustomLLMChunk[] = [];
-
- constructor(chunk: CustomLLMChunk) {
- super();
- this.chunks.push(chunk);
- }
-
- merge(other: CustomLLMOutput): void {
- this.chunks.push(...other.chunks);
- }
-
- getTextContent(): string {
- return this.chunks.map((result) => result.text).join("");
- }
-
- toString(): string {
- return this.getTextContent();
- }
-
- createSnapshot() {
- return { chunks: shallowCopy(this.chunks) };
- }
-
- loadSnapshot(snapshot: ReturnType): void {
- Object.assign(this, snapshot);
- }
-}
-
-// Feel free to extend if you want to support additional parameters
-type CustomGenerateOptions = GenerateOptions;
-
-export interface CustomLLMInput {
- modelId: string;
- executionOptions?: ExecutionOptions;
- cache?: LLMCache;
- parameters?: Record;
-}
-
-type CustomLLMEvents = LLMEvents;
-
-export class CustomLLM extends LLM {
- public readonly emitter = Emitter.root.child({
- namespace: ["custom", "llm"],
- creator: this,
- });
-
- constructor(protected readonly input: CustomLLMInput) {
- super(input.modelId, input.executionOptions, input.cache);
- }
-
- static {
- this.register();
- }
-
- async meta(): Promise {
- // TODO: retrieve data about current model from the given provider API
- return { tokenLimit: Infinity };
- }
-
- async embed(input: LLMInput[], options?: EmbeddingOptions): Promise {
- throw new NotImplementedError();
- }
-
- async tokenize(input: LLMInput): Promise {
- // TODO: retrieve data about current model from the given provider API
- return {
- tokensCount: Math.ceil(input.length / 4),
- };
- }
-
- protected async _generate(
- input: LLMInput,
- options: Partial,
- run: GetRunContext,
- ): Promise {
- // this method should do non-stream request to the API
- // TIP: access inference parameters via `this.input.parameters` and `options`
- // TIP: use signal from run.signal
- const result: CustomLLMChunk = {
- text: "...",
- metadata: {},
- };
- return new CustomLLMOutput(result);
- }
-
- protected async *_stream(
- input: LLMInput,
- options: Partial,
- run: GetRunContext,
- ): AsyncStream {
- // this method should do stream request to the API
- // TIP: access inference parameters via `this.input.parameters` and `options`
- // TIP: use signal from run.signal
- for await (const chunk of ["Hel", "oo", "world", "!"]) {
- const result: CustomLLMChunk = {
- text: chunk,
- metadata: {},
- };
- yield new CustomLLMOutput(result);
- }
- }
-
- createSnapshot() {
- return {
- ...super.createSnapshot(),
- input: shallowCopy(this.input),
- };
- }
-
- loadSnapshot({ input, ...snapshot }: ReturnType) {
- super.loadSnapshot(snapshot);
- Object.assign(this, { input });
- }
-}
diff --git a/examples/llms/providers/groq.ts b/examples/llms/providers/groq.ts
deleted file mode 100644
index 5edec49b..00000000
--- a/examples/llms/providers/groq.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-import "dotenv/config";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
-
-const llm = new GroqChatLLM({
- modelId: "gemma2-9b-it",
- parameters: {
- temperature: 0.7,
- max_tokens: 1024,
- top_p: 1,
- },
-});
-
-console.info("Meta", await llm.meta());
-const response = await llm.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
-]);
-console.info(response.getTextContent());
diff --git a/examples/llms/providers/ibm-vllm.ts b/examples/llms/providers/ibm-vllm.ts
deleted file mode 100644
index 50480c63..00000000
--- a/examples/llms/providers/ibm-vllm.ts
+++ /dev/null
@@ -1,44 +0,0 @@
-import "dotenv/config.js";
-import { IBMvLLM } from "bee-agent-framework/adapters/ibm-vllm/llm";
-import { IBMVllmChatLLM } from "bee-agent-framework/adapters/ibm-vllm/chat";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { Client } from "bee-agent-framework/adapters/ibm-vllm/client";
-
-const client = new Client();
-{
- console.info("===RAW===");
- const llm = new IBMvLLM({
- client,
- modelId: "meta-llama/llama-3-1-70b-instruct",
- });
-
- console.info("Meta", await llm.meta());
-
- const response = await llm.generate("Hello world!", {
- stream: false,
- });
- console.info(response.text);
-}
-
-{
- console.info("===CHAT===");
- const llm = IBMVllmChatLLM.fromPreset("meta-llama/llama-3-1-70b-instruct", { client });
-
- console.info("Meta", await llm.meta());
-
- const response = await llm.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
- ]);
- console.info(response.messages);
-}
-
-{
- console.info("===EMBEDDING===");
- const llm = new IBMvLLM({ client, modelId: "baai/bge-large-en-v1.5" });
-
- const response = await llm.embed([`Hello world!`, `Hello family!`]);
- console.info(response);
-}
diff --git a/examples/llms/providers/ollama.ts b/examples/llms/providers/ollama.ts
deleted file mode 100644
index ec7232ee..00000000
--- a/examples/llms/providers/ollama.ts
+++ /dev/null
@@ -1,64 +0,0 @@
-import { OllamaLLM } from "bee-agent-framework/adapters/ollama/llm";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { Ollama } from "ollama";
-
-{
- console.info("===RAW===");
- const llm = new OllamaLLM({
- modelId: "llama3.1",
- parameters: {
- num_predict: 10,
- stop: ["post"],
- },
- });
-
- console.info("Meta", await llm.meta());
-
- const response = await llm.generate("Hello world!", {
- stream: true,
- });
- console.info(response.finalResult);
-}
-
-{
- console.info("===CHAT===");
- const llm = new OllamaChatLLM({
- modelId: "llama3.1",
- parameters: {
- num_predict: 10,
- temperature: 0,
- },
- });
-
- console.info("Meta", await llm.meta());
-
- const response = await llm.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
- ]);
- console.info(response.finalResult);
-}
-
-{
- console.info("===REMOTE OLLAMA===");
- const llm = new OllamaChatLLM({
- modelId: "llama3.1",
- client: new Ollama({
- // use the IP for the server you have ollama running on
- host: process.env.OLLAMA_HOST || "http://127.0.0.1:11434",
- }),
- });
-
- console.info("Meta", await llm.meta());
-
- const response = await llm.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
- ]);
- console.info(response.finalResult);
-}
diff --git a/examples/llms/providers/openai.ts b/examples/llms/providers/openai.ts
deleted file mode 100644
index b56d0459..00000000
--- a/examples/llms/providers/openai.ts
+++ /dev/null
@@ -1,20 +0,0 @@
-import "dotenv/config";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { OpenAIChatLLM } from "bee-agent-framework/adapters/openai/chat";
-
-const llm = new OpenAIChatLLM({
- modelId: "gpt-4o",
- parameters: {
- max_tokens: 10,
- stop: ["post"],
- },
-});
-
-console.info("Meta", await llm.meta());
-const response = await llm.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
-]);
-console.info(response.getTextContent());
diff --git a/examples/llms/providers/vertexai.ts b/examples/llms/providers/vertexai.ts
deleted file mode 100644
index 8080bf6a..00000000
--- a/examples/llms/providers/vertexai.ts
+++ /dev/null
@@ -1,47 +0,0 @@
-import "dotenv/config.js";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { VertexAILLM } from "bee-agent-framework/adapters/vertexai/llm";
-import { VertexAIChatLLM } from "bee-agent-framework/adapters/vertexai/chat";
-
-const project = process.env.GCP_VERTEXAI_PROJECT;
-const location = process.env.GCP_VERTEXAI_LOCATION;
-
-if (!project || !location) {
- throw new Error("No ENVs has been set!");
-}
-
-{
- console.info("===RAW===");
- const llm = new VertexAILLM({
- modelId: "gemini-1.5-flash-001",
- project,
- location,
- parameters: {},
- });
-
- console.info("Meta", await llm.meta());
-
- const response = await llm.generate("Hello world!", {
- stream: true,
- });
- console.info(response.getTextContent());
-}
-
-{
- console.info("===CHAT===");
- const llm = new VertexAIChatLLM({
- modelId: "gemini-1.5-flash-001",
- project,
- location,
- });
-
- console.info("Meta", await llm.meta());
-
- const response = await llm.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
- ]);
- console.info(response.getTextContent());
-}
diff --git a/examples/llms/providers/watsonx.ts b/examples/llms/providers/watsonx.ts
deleted file mode 100644
index b390b340..00000000
--- a/examples/llms/providers/watsonx.ts
+++ /dev/null
@@ -1,23 +0,0 @@
-import "dotenv/config";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
-
-const chatLLM = WatsonXChatLLM.fromPreset("meta-llama/llama-3-1-70b-instruct", {
- apiKey: process.env.WATSONX_API_KEY,
- projectId: process.env.WATSONX_PROJECT_ID,
- region: process.env.WATSONX_REGION, // (optional) default is us-south
- parameters: {
- decoding_method: "greedy",
- max_new_tokens: 50,
- },
-});
-
-console.info("Meta", await chatLLM.meta());
-
-const response = await chatLLM.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
-]);
-console.info(response.messages[0]);
diff --git a/examples/llms/providers/watsonx_debug.ts b/examples/llms/providers/watsonx_debug.ts
deleted file mode 100644
index f4989883..00000000
--- a/examples/llms/providers/watsonx_debug.ts
+++ /dev/null
@@ -1,47 +0,0 @@
-import "dotenv/config";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
-
-const chatLLM = WatsonXChatLLM.fromPreset("meta-llama/llama-3-1-70b-instruct", {
- apiKey: process.env.WATSONX_API_KEY,
- projectId: process.env.WATSONX_PROJECT_ID,
- region: process.env.WATSONX_REGION, // (optional) default is us-south
- parameters: {
- decoding_method: "greedy",
- max_new_tokens: 50,
- },
-});
-
-// Log every request
-chatLLM.llm.client.emitter.match("*", async (data, event) => {
- console.info(
- `Time: ${event.createdAt.toISOString().substring(11, 19)}`,
- `Event: ${event.name}`,
- `Data: ${JSON.stringify(data).substring(0, 128).concat("...")}`,
- );
-});
-
-chatLLM.llm.client.emitter.on("fetchStart", async (data, event) => {
- console.info(`Fetching ${data.input.url}`);
- // You can also change the 'data' object
-});
-
-chatLLM.llm.client.emitter.on("streamStart", async (data, event) => {
- console.info(`Streaming ${data.input.url}`);
- // You can also change the 'data' object
-});
-
-console.info("Meta", await chatLLM.meta());
-
-const response = await chatLLM.generate(
- [
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
- ],
- {
- stream: true,
- },
-);
-console.info(response.messages[0]);
diff --git a/examples/llms/providers/watsonx_verbose.ts b/examples/llms/providers/watsonx_verbose.ts
deleted file mode 100644
index 95654059..00000000
--- a/examples/llms/providers/watsonx_verbose.ts
+++ /dev/null
@@ -1,59 +0,0 @@
-import "dotenv/config";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
-import { WatsonXLLM } from "bee-agent-framework/adapters/watsonx/llm";
-import { PromptTemplate } from "bee-agent-framework/template";
-import { z } from "zod";
-
-const template = new PromptTemplate({
- schema: z.object({
- messages: z.array(z.record(z.array(z.string()))),
- }),
- template: `{{#messages}}{{#system}}<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-
-{{system}}<|eot_id|>{{/system}}{{#user}}<|start_header_id|>user<|end_header_id|>
-
-{{user}}<|eot_id|>{{/user}}{{#assistant}}<|start_header_id|>assistant<|end_header_id|>
-
-{{assistant}}<|eot_id|>{{/assistant}}{{#ipython}}<|start_header_id|>ipython<|end_header_id|>
-
-{{ipython}}<|eot_id|>{{/ipython}}{{/messages}}<|start_header_id|>assistant<|end_header_id|>
-`,
-});
-
-const llm = new WatsonXLLM({
- modelId: "meta-llama/llama-3-1-70b-instruct",
- projectId: process.env.WATSONX_PROJECT_ID,
- apiKey: process.env.WATSONX_API_KEY,
- region: process.env.WATSONX_REGION, // (optional) default is us-south
- parameters: {
- decoding_method: "greedy",
- max_new_tokens: 50,
- },
-});
-
-const chatLLM = new WatsonXChatLLM({
- llm,
- config: {
- messagesToPrompt(messages: BaseMessage[]) {
- return template.render({
- messages: messages.map((message) => ({
- system: message.role === "system" ? [message.text] : [],
- user: message.role === "user" ? [message.text] : [],
- assistant: message.role === "assistant" ? [message.text] : [],
- ipython: message.role === "ipython" ? [message.text] : [],
- })),
- });
- },
- },
-});
-
-console.info("Meta", await chatLLM.meta());
-
-const response = await chatLLM.generate([
- BaseMessage.of({
- role: "user",
- text: "Hello world!",
- }),
-]);
-console.info(response.messages[0]);
diff --git a/examples/llms/structured.ts b/examples/llms/structured.ts
deleted file mode 100644
index 5292a5fd..00000000
--- a/examples/llms/structured.ts
+++ /dev/null
@@ -1,29 +0,0 @@
-import "dotenv/config.js";
-import { z } from "zod";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-import { JsonDriver } from "bee-agent-framework/llms/drivers/json";
-
-const llm = new OllamaChatLLM();
-const driver = new JsonDriver(llm);
-const response = await driver.generate(
- z.union([
- z.object({
- firstName: z.string().min(1),
- lastName: z.string().min(1),
- address: z.string(),
- age: z.number().int().min(1),
- hobby: z.string(),
- }),
- z.object({
- error: z.string(),
- }),
- ]),
- [
- BaseMessage.of({
- role: Role.USER,
- text: "Generate a profile of a citizen of Europe.",
- }),
- ],
-);
-console.info(response);
diff --git a/examples/llms/text.ts b/examples/llms/text.ts
deleted file mode 100644
index ccf1654e..00000000
--- a/examples/llms/text.ts
+++ /dev/null
@@ -1,20 +0,0 @@
-import "dotenv/config.js";
-import { createConsoleReader } from "examples/helpers/io.js";
-import { WatsonXLLM } from "bee-agent-framework/adapters/watsonx/llm";
-
-const llm = new WatsonXLLM({
- modelId: "google/flan-ul2",
- projectId: process.env.WATSONX_PROJECT_ID,
- apiKey: process.env.WATSONX_API_KEY,
- region: process.env.WATSONX_REGION, // (optional) default is us-south
- parameters: {
- decoding_method: "greedy",
- max_new_tokens: 50,
- },
-});
-
-const reader = createConsoleReader();
-const prompt = await reader.prompt();
-const response = await llm.generate(prompt);
-reader.write(`LLM π€ (text) : `, response.getTextContent());
-reader.close();
diff --git a/examples/logger/agent.ts b/examples/logger/agent.ts
index 76b1ff76..7cce8dca 100644
--- a/examples/logger/agent.ts
+++ b/examples/logger/agent.ts
@@ -1,8 +1,8 @@
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { Logger } from "bee-agent-framework/logger/logger";
import { Emitter } from "bee-agent-framework/emitter/emitter";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
// Set up logging
Logger.defaults.pretty = true;
@@ -15,12 +15,12 @@ const logger = Logger.root.child({
// Log events emitted during agent execution
Emitter.root.match("*.*", (data, event) => {
const logLevel = event.path.includes(".run.") ? "trace" : "info";
- logger[logLevel](`Event '${event.path}' triggered by '${event.creator.constructor.name}'.`);
+ logger[logLevel](`Event '${event.path}' triggered by '${event.creator.constructor.name}'`);
});
// Create and run an agent
const agent = new BeeAgent({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
memory: new UnconstrainedMemory(),
tools: [],
});
diff --git a/examples/memory/agentMemory.ts b/examples/memory/agentMemory.ts
index 49bc8813..a6e17ca4 100644
--- a/examples/memory/agentMemory.ts
+++ b/examples/memory/agentMemory.ts
@@ -1,10 +1,10 @@
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const agent = new BeeAgent({
memory: new UnconstrainedMemory(),
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
tools: [],
});
await agent.run({ prompt: "Hello world!" });
diff --git a/examples/memory/base.ts b/examples/memory/base.ts
index d181f735..a3c98fc9 100644
--- a/examples/memory/base.ts
+++ b/examples/memory/base.ts
@@ -1,21 +1,13 @@
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { AssistantMessage, SystemMessage, UserMessage } from "bee-agent-framework/backend/message";
const memory = new UnconstrainedMemory();
// Single message
-await memory.add(
- BaseMessage.of({
- role: "system",
- text: `You are a helpful assistant.`,
- }),
-);
+await memory.add(new SystemMessage(`You are a helpful assistant.`));
// Multiple messages
-await memory.addMany([
- BaseMessage.of({ role: "user", text: `What can you do?` }),
- BaseMessage.of({ role: "assistant", text: `Everything!` }),
-]);
+await memory.addMany([new UserMessage(`What can you do?`), new AssistantMessage(`Everything!`)]);
console.info(memory.isEmpty()); // false
console.info(memory.messages); // prints all saved messages
diff --git a/examples/memory/custom.ts b/examples/memory/custom.ts
index 5c443375..b0bafdbf 100644
--- a/examples/memory/custom.ts
+++ b/examples/memory/custom.ts
@@ -1,17 +1,17 @@
import { BaseMemory } from "bee-agent-framework/memory/base";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
import { NotImplementedError } from "bee-agent-framework/errors";
export class MyMemory extends BaseMemory {
- get messages(): readonly BaseMessage[] {
+ get messages(): readonly Message[] {
throw new NotImplementedError("Method not implemented.");
}
- add(message: BaseMessage, index?: number): Promise {
+ add(message: Message, index?: number): Promise {
throw new NotImplementedError("Method not implemented.");
}
- delete(message: BaseMessage): Promise {
+ delete(message: Message): Promise {
throw new NotImplementedError("Method not implemented.");
}
diff --git a/examples/memory/llmMemory.ts b/examples/memory/llmMemory.ts
index 9282aec3..7e767ee8 100644
--- a/examples/memory/llmMemory.ts
+++ b/examples/memory/llmMemory.ts
@@ -1,20 +1,20 @@
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const memory = new UnconstrainedMemory();
await memory.addMany([
- BaseMessage.of({
+ Message.of({
role: "system",
text: `Always respond very concisely.`,
}),
- BaseMessage.of({ role: "user", text: `Give me first 5 prime numbers.` }),
+ Message.of({ role: "user", text: `Give me first 5 prime numbers.` }),
]);
// Generate response
-const llm = new OllamaChatLLM();
-const response = await llm.generate(memory.messages);
-await memory.add(BaseMessage.of({ role: "assistant", text: response.getTextContent() }));
+const llm = new OllamaChatModel("llama3.1");
+const response = await llm.create({ messages: memory.messages });
+await memory.add(Message.of({ role: "assistant", text: response.getTextContent() }));
console.log(`Conversation history`);
for (const message of memory) {
diff --git a/examples/memory/slidingMemory.ts b/examples/memory/slidingMemory.ts
index ada69718..43a0a261 100644
--- a/examples/memory/slidingMemory.ts
+++ b/examples/memory/slidingMemory.ts
@@ -1,5 +1,5 @@
import { SlidingMemory } from "bee-agent-framework/memory/slidingMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
const memory = new SlidingMemory({
size: 3, // (required) number of messages that can be in the memory at a single moment
@@ -10,11 +10,11 @@ const memory = new SlidingMemory({
},
});
-await memory.add(BaseMessage.of({ role: "system", text: "You are a guide through France." }));
-await memory.add(BaseMessage.of({ role: "user", text: "What is the capital?" }));
-await memory.add(BaseMessage.of({ role: "assistant", text: "Paris" }));
-await memory.add(BaseMessage.of({ role: "user", text: "What language is spoken there?" })); // removes the first user's message
-await memory.add(BaseMessage.of({ role: "assistant", text: "French" })); // removes the first assistant's message
+await memory.add(Message.of({ role: "system", text: "You are a guide through France." }));
+await memory.add(Message.of({ role: "user", text: "What is the capital?" }));
+await memory.add(Message.of({ role: "assistant", text: "Paris" }));
+await memory.add(Message.of({ role: "user", text: "What language is spoken there?" })); // removes the first user's message
+await memory.add(Message.of({ role: "assistant", text: "French" })); // removes the first assistant's message
console.info(memory.isEmpty()); // false
console.log(memory.messages.length); // 3
diff --git a/examples/memory/summarizeMemory.ts b/examples/memory/summarizeMemory.ts
index c823d72d..ae8d37c0 100644
--- a/examples/memory/summarizeMemory.ts
+++ b/examples/memory/summarizeMemory.ts
@@ -1,22 +1,16 @@
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
import { SummarizeMemory } from "bee-agent-framework/memory/summarizeMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const memory = new SummarizeMemory({
- llm: new OllamaChatLLM({
- modelId: "llama3.1",
- parameters: {
- temperature: 0,
- num_predict: 250,
- },
- }),
+ llm: new OllamaChatModel("llama3.1"),
});
await memory.addMany([
- BaseMessage.of({ role: "system", text: "You are a guide through France." }),
- BaseMessage.of({ role: "user", text: "What is the capital?" }),
- BaseMessage.of({ role: "assistant", text: "Paris" }),
- BaseMessage.of({ role: "user", text: "What language is spoken there?" }),
+ Message.of({ role: "system", text: "You are a guide through France." }),
+ Message.of({ role: "user", text: "What is the capital?" }),
+ Message.of({ role: "assistant", text: "Paris" }),
+ Message.of({ role: "user", text: "What language is spoken there?" }),
]);
console.info(memory.isEmpty()); // false
diff --git a/examples/memory/tokenMemory.ts b/examples/memory/tokenMemory.ts
index c02ff4cf..1ede1114 100644
--- a/examples/memory/tokenMemory.ts
+++ b/examples/memory/tokenMemory.ts
@@ -1,11 +1,8 @@
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { Message } from "bee-agent-framework/backend/message";
-const llm = new OllamaChatLLM();
const memory = new TokenMemory({
- llm,
- maxTokens: undefined, // optional (default is inferred from the passed LLM instance),
+ maxTokens: undefined, // optional (default is 128k),
capacityThreshold: 0.75, // maxTokens*capacityThreshold = threshold where we start removing old messages
syncThreshold: 0.25, // maxTokens*syncThreshold = threshold where we start to use a real tokenization endpoint instead of guessing the number of tokens
handlers: {
@@ -17,8 +14,8 @@ const memory = new TokenMemory({
},
});
-await memory.add(BaseMessage.of({ role: "system", text: "You are a helpful assistant." }));
-await memory.add(BaseMessage.of({ role: "user", text: "Hello world!" }));
+await memory.add(Message.of({ role: "system", text: "You are a helpful assistant." }));
+await memory.add(Message.of({ role: "user", text: "Hello world!" }));
console.info(memory.isDirty); // is the consumed token count estimated or retrieved via the tokenize endpoint?
console.log(memory.tokensUsed); // number of used tokens
diff --git a/examples/memory/unconstrainedMemory.ts b/examples/memory/unconstrainedMemory.ts
index aa5f6ee0..21dee55c 100644
--- a/examples/memory/unconstrainedMemory.ts
+++ b/examples/memory/unconstrainedMemory.ts
@@ -1,9 +1,9 @@
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
const memory = new UnconstrainedMemory();
await memory.add(
- BaseMessage.of({
+ Message.of({
role: "user",
text: `Hello world!`,
}),
diff --git a/examples/serialization/base.ts b/examples/serialization/base.ts
index e0ae9046..0258301a 100644
--- a/examples/serialization/base.ts
+++ b/examples/serialization/base.ts
@@ -1,8 +1,8 @@
import { Serializer } from "bee-agent-framework/serializer/serializer";
const original = new Date("2024-01-01T00:00:00.000Z");
-const serialized = Serializer.serialize(original);
-const deserialized = Serializer.deserialize(serialized);
+const serialized = await Serializer.serialize(original);
+const deserialized = await Serializer.deserialize(serialized);
console.info(deserialized instanceof Date); // true
console.info(original.toISOString() === deserialized.toISOString()); // true
diff --git a/examples/serialization/context.ts b/examples/serialization/context.ts
index 77580cf5..5b1f8f64 100644
--- a/examples/serialization/context.ts
+++ b/examples/serialization/context.ts
@@ -1,12 +1,12 @@
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { UserMessage } from "bee-agent-framework/backend/message";
// String containing serialized `UnconstrainedMemory` instance with one message in it.
-const serialized = `{"__version":"0.0.0","__root":{"__serializer":true,"__class":"Object","__ref":"5","__value":{"target":"UnconstrainedMemory","snapshot":{"__serializer":true,"__class":"Object","__ref":"4","__value":{"messages":{"__serializer":true,"__class":"Array","__ref":"1","__value":[{"__serializer":true,"__class":"BaseMessage","__ref":"2","__value":{"role":"user","text":"Serialization is amazing, isn't?","meta":{"__serializer":true,"__class":"Undefined","__ref":"3"}}}]}}}}}}`;
+const serialized = `{"__version":"0.0.0","__root":{"__serializer":true,"__class":"Object","__ref":"18","__value":{"target":"UnconstrainedMemory","snapshot":{"__serializer":true,"__class":"Object","__ref":"17","__value":{"messages":{"__serializer":true,"__class":"Array","__ref":"1","__value":[{"__serializer":true,"__class":"SystemMessage","__ref":"2","__value":{"content":{"__serializer":true,"__class":"Array","__ref":"3","__value":[{"__serializer":true,"__class":"Object","__ref":"4","__value":{"type":"text","text":"You are a helpful assistant."}}]},"meta":{"__serializer":true,"__class":"Object","__ref":"5","__value":{"createdAt":{"__serializer":true,"__class":"Date","__ref":"6","__value":"2025-02-06T14:51:01.459Z"}}},"role":"system"}},{"__serializer":true,"__class":"UserMessage","__ref":"7","__value":{"content":{"__serializer":true,"__class":"Array","__ref":"8","__value":[{"__serializer":true,"__class":"Object","__ref":"9","__value":{"type":"text","text":"Hello!"}}]},"meta":{"__serializer":true,"__class":"Object","__ref":"10","__value":{"createdAt":{"__serializer":true,"__class":"Date","__ref":"11","__value":"2025-02-06T14:51:01.459Z"}}},"role":"user"}},{"__serializer":true,"__class":"AssistantMessage","__ref":"12","__value":{"content":{"__serializer":true,"__class":"Array","__ref":"13","__value":[{"__serializer":true,"__class":"Object","__ref":"14","__value":{"type":"text","text":"Hello, how can I help you?"}}]},"meta":{"__serializer":true,"__class":"Object","__ref":"15","__value":{"createdAt":{"__serializer":true,"__class":"Date","__ref":"16","__value":"2025-02-06T14:51:01.459Z"}}},"role":"assistant"}}]}}}}}}`;
-// If `BaseMessage` was not imported the serialization would fail because the `BaseMessage` had no chance to register itself.
-const memory = UnconstrainedMemory.fromSerialized(serialized, {
+// If `Message` was not imported the serialization would fail because the `Message` had no chance to register itself.
+const memory = await UnconstrainedMemory.fromSerialized(serialized, {
// this part can be omitted if all classes used in the serialized string are imported (and have `static` register block) or at least one initiated
- extraClasses: [BaseMessage],
+ extraClasses: [UserMessage],
});
console.info(memory.messages);
diff --git a/examples/serialization/customExternal.ts b/examples/serialization/customExternal.ts
index 35f534f3..a6b3d342 100644
--- a/examples/serialization/customExternal.ts
+++ b/examples/serialization/customExternal.ts
@@ -18,8 +18,8 @@ Serializer.register(MyClass, {
});
const instance = new MyClass("Bee");
-const serialized = Serializer.serialize(instance);
-const deserialized = Serializer.deserialize(serialized);
+const serialized = await Serializer.serialize(instance);
+const deserialized = await Serializer.deserialize(serialized);
console.info(instance);
console.info(deserialized);
diff --git a/examples/serialization/customInternal.ts b/examples/serialization/customInternal.ts
index 66dcdeea..902a3ac5 100644
--- a/examples/serialization/customInternal.ts
+++ b/examples/serialization/customInternal.ts
@@ -22,8 +22,8 @@ class MyClass extends Serializable {
}
const instance = new MyClass("Bee");
-const serialized = instance.serialize();
-const deserialized = MyClass.fromSerialized(serialized);
+const serialized = await instance.serialize();
+const deserialized = await MyClass.fromSerialized(serialized);
console.info(instance);
console.info(deserialized);
diff --git a/examples/serialization/memory.ts b/examples/serialization/memory.ts
index 2f7de42f..ad336d33 100644
--- a/examples/serialization/memory.ts
+++ b/examples/serialization/memory.ts
@@ -1,22 +1,10 @@
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { AssistantMessage, UserMessage } from "bee-agent-framework/backend/message";
-const llm = new OllamaChatLLM();
-const memory = new TokenMemory({ llm });
-await memory.addMany([
- BaseMessage.of({
- role: "user",
- text: "What is your name?",
- }),
-]);
+const memory = new TokenMemory();
+await memory.add(new UserMessage("What is your name?"));
-const serialized = memory.serialize();
-const deserialized = TokenMemory.fromSerialized(serialized);
+const serialized = await memory.serialize();
+const deserialized = await TokenMemory.fromSerialized(serialized);
-await deserialized.add(
- BaseMessage.of({
- role: "assistant",
- text: "Bee",
- }),
-);
+await deserialized.add(new AssistantMessage("Bee"));
diff --git a/examples/tools/agent.ts b/examples/tools/agent.ts
index 97923428..da508904 100644
--- a/examples/tools/agent.ts
+++ b/examples/tools/agent.ts
@@ -1,10 +1,10 @@
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { ArXivTool } from "bee-agent-framework/tools/arxiv";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const agent = new BeeAgent({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
memory: new UnconstrainedMemory(),
tools: [new ArXivTool()],
});
diff --git a/examples/tools/llm.ts b/examples/tools/llm.ts
index d27121f5..d9fb4e9b 100644
--- a/examples/tools/llm.ts
+++ b/examples/tools/llm.ts
@@ -1,19 +1,19 @@
import "dotenv/config";
import { LLMTool } from "bee-agent-framework/tools/llm";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { Tool } from "bee-agent-framework/tools/base";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Message } from "bee-agent-framework/backend/message";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
const memory = new UnconstrainedMemory();
await memory.addMany([
- BaseMessage.of({ role: "system", text: "You are a helpful assistant." }),
- BaseMessage.of({ role: "user", text: "Hello!" }),
- BaseMessage.of({ role: "assistant", text: "Hello user. I am here to help you." }),
+ Message.of({ role: "system", text: "You are a helpful assistant." }),
+ Message.of({ role: "user", text: "Hello!" }),
+ Message.of({ role: "assistant", text: "Hello user. I am here to help you." }),
]);
const tool = new LLMTool({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
});
const response = await tool
diff --git a/examples/tools/mcp.ts b/examples/tools/mcp.ts
index f2208b81..41563e27 100644
--- a/examples/tools/mcp.ts
+++ b/examples/tools/mcp.ts
@@ -3,7 +3,7 @@ import { MCPTool } from "bee-agent-framework/tools/mcp";
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
// Create MCP Client
const client = new Client(
@@ -28,7 +28,7 @@ try {
// Server usually supports several tools, use the factory for automatic discovery
const tools = await MCPTool.fromClient(client);
const agent = new BeeAgent({
- llm: new OllamaChatLLM(),
+ llm: new OllamaChatModel("llama3.1"),
memory: new UnconstrainedMemory(),
tools,
});
diff --git a/examples/tools/openapi.ts b/examples/tools/openapi.ts
index 35beebe4..beb29fdb 100644
--- a/examples/tools/openapi.ts
+++ b/examples/tools/openapi.ts
@@ -1,15 +1,13 @@
import "dotenv/config.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { OpenAPITool } from "bee-agent-framework/tools/openapi";
import * as fs from "fs";
import { dirname } from "node:path";
import { fileURLToPath } from "node:url";
+import { ChatModel } from "bee-agent-framework/backend/chat";
-const llm = new OllamaChatLLM({
- modelId: "llama3.1", // llama3.1:70b for better performance
-});
+const llm = await ChatModel.fromName("ollama:llama3.1");
const __dirname = dirname(fileURLToPath(import.meta.url));
const openApiSchema = await fs.promises.readFile(
@@ -19,7 +17,7 @@ const openApiSchema = await fs.promises.readFile(
const agent = new BeeAgent({
llm,
- memory: new TokenMemory({ llm }),
+ memory: new TokenMemory(),
tools: [new OpenAPITool({ openApiSchema })],
});
diff --git a/examples/workflows/agent.ts b/examples/workflows/agent.ts
index 8aaed823..211f3812 100644
--- a/examples/workflows/agent.ts
+++ b/examples/workflows/agent.ts
@@ -1,25 +1,24 @@
import "dotenv/config";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { z } from "zod";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { JsonDriver } from "bee-agent-framework/llms/drivers/json";
+import { Message, UserMessage } from "bee-agent-framework/backend/message";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { ReadOnlyMemory } from "bee-agent-framework/memory/base";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { Workflow } from "bee-agent-framework/experimental/workflows/workflow";
import { createConsoleReader } from "examples/helpers/io.js";
-import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
+import { GroqChatModel } from "bee-agent-framework/adapters/groq/backend/chat";
const schema = z.object({
- answer: z.instanceof(BaseMessage).optional(),
+ answer: z.instanceof(Message).optional(),
memory: z.instanceof(ReadOnlyMemory),
});
const workflow = new Workflow({ schema: schema })
.addStep("simpleAgent", async (state) => {
const simpleAgent = new BeeAgent({
- llm: new GroqChatLLM(),
+ llm: new GroqChatModel("llama-3.3-70b-versatile"),
tools: [],
memory: state.memory,
});
@@ -32,18 +31,18 @@ const workflow = new Workflow({ schema: schema })
};
})
.addStrictStep("critique", schema.required(), async (state) => {
- const llm = new GroqChatLLM();
- const { parsed: critiqueResponse } = await new JsonDriver(llm).generate(
- z.object({ score: z.number().int().min(0).max(100) }),
- [
- BaseMessage.of({
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
+ const { object: critiqueResponse } = await llm.createStructure({
+ schema: z.object({ score: z.number().int().min(0).max(100) }),
+ messages: [
+ Message.of({
role: "system",
text: `You are an evaluation assistant who scores the credibility of the last assistant's response. Chitchatting always has a score of 100. If the assistant was unable to answer the user's query, then the score will be 0.`,
}),
...state.memory.messages,
state.answer,
],
- );
+ });
reader.write("π§ Score", critiqueResponse.score.toString());
return {
@@ -52,7 +51,7 @@ const workflow = new Workflow({ schema: schema })
})
.addStep("complexAgent", async (state) => {
const complexAgent = new BeeAgent({
- llm: new GroqChatLLM(),
+ llm: new GroqChatModel("llama-3.3-70b-versatile"),
tools: [new WikipediaTool(), new OpenMeteoTool()],
memory: state.memory,
});
@@ -66,11 +65,7 @@ const reader = createConsoleReader();
const memory = new UnconstrainedMemory();
for await (const { prompt } of reader) {
- const userMessage = BaseMessage.of({
- role: Role.USER,
- text: prompt,
- meta: { createdAt: new Date() },
- });
+ const userMessage = new UserMessage(prompt);
await memory.add(userMessage);
const response = await workflow.run({
diff --git a/examples/workflows/competitive-analysis/prompts.ts b/examples/workflows/competitive-analysis/prompts.ts
index 882e801d..cdbbd96e 100644
--- a/examples/workflows/competitive-analysis/prompts.ts
+++ b/examples/workflows/competitive-analysis/prompts.ts
@@ -45,13 +45,7 @@ Key Analysis Points:
- Market positioning
- Potential impact
-Provide concise, actionable insights about the competitor.
-
-Return JSON with:
-{
- "key_insights": ["Critical findings"],
- "unique_capabilities": ["Standout features"]
-}`,
+Provide concise, actionable insights about the competitor.`,
});
export const reflectionSchema = z.object({
diff --git a/examples/workflows/competitive-analysis/state.ts b/examples/workflows/competitive-analysis/state.ts
index 66f1e3a3..f002a628 100644
--- a/examples/workflows/competitive-analysis/state.ts
+++ b/examples/workflows/competitive-analysis/state.ts
@@ -1,5 +1,5 @@
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
import { z } from "zod";
+import { Message } from "bee-agent-framework/backend/message";
export const InputSchema = z.object({
industry: z.string(),
@@ -17,7 +17,7 @@ export const StateSchema = z.object({
competitorFindings: z.record(z.string(), z.array(z.string())).default({}),
researchLoopCount: z.number().default(0),
runningSummary: z.string().optional(),
- answer: z.instanceof(BaseMessage).optional(),
+ answer: z.instanceof(Message).optional(),
reflectionFeedback: z.array(z.string()).optional(),
reflectionIteration: z.number().default(0),
maxReflectionIterations: z.number().default(3),
diff --git a/examples/workflows/competitive-analysis/utils.ts b/examples/workflows/competitive-analysis/utils.ts
index 5a886291..5ec948b7 100644
--- a/examples/workflows/competitive-analysis/utils.ts
+++ b/examples/workflows/competitive-analysis/utils.ts
@@ -1,21 +1,12 @@
import { TavilySearchResults } from "@langchain/community/tools/tavily_search";
-import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
import { getEnv } from "bee-agent-framework/internals/env";
import "dotenv/config";
-import { Ollama } from "ollama";
import { State } from "./state.js";
import { Steps } from "./workflow.js";
+import { OllamaChatModel } from "bee-agent-framework/adapters/ollama/backend/chat";
export function getChatLLM() {
- return new OllamaChatLLM({
- modelId: getEnv("OLLAMA_MODEL") || "deepseek-r1:8b",
- parameters: {
- temperature: 0,
- },
- client: new Ollama({
- host: getEnv("OLLAMA_HOST") || "http://0.0.0.0:11434",
- }),
- });
+ return new OllamaChatModel(getEnv("OLLAMA_CHAT_MODEL") || "deepseek-r1:8b");
}
export interface SearchResult {
diff --git a/examples/workflows/competitive-analysis/workflow.ts b/examples/workflows/competitive-analysis/workflow.ts
index 37928798..74f839da 100644
--- a/examples/workflows/competitive-analysis/workflow.ts
+++ b/examples/workflows/competitive-analysis/workflow.ts
@@ -1,6 +1,4 @@
import { Workflow } from "bee-agent-framework/experimental/workflows/workflow";
-import { JsonDriver } from "bee-agent-framework/llms/drivers/json";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
import {
categorizationPromptTemplate,
competitorsPromptTemplate,
@@ -12,6 +10,7 @@ import {
import { State, StateSchema } from "./state.js";
import { deduplicateAndFormatSources, formatSources, getChatLLM, tavilySearch } from "./utils.js";
import { isEmpty, mapValues } from "remeda";
+import { AssistantMessage, SystemMessage } from "bee-agent-framework/backend/message";
export enum Steps {
GENERATE_COMPETITORS = "GENERATE_COMPETITORS",
@@ -34,21 +33,22 @@ async function generateCompetitors(state: State) {
}
const llm = getChatLLM();
- const llmJsonMode = new JsonDriver(llm);
- const result = await llmJsonMode.generate(competitorsSchema, [
- BaseMessage.of({
- role: Role.SYSTEM,
- text: competitorsPromptTemplate.render({
- industry: state.industry,
- specifiedCompetitors: undefined,
- }),
- }),
- ]);
+ const result = await llm.createStructure({
+ schema: competitorsSchema,
+ messages: [
+ new SystemMessage(
+ competitorsPromptTemplate.render({
+ industry: state.industry,
+ specifiedCompetitors: undefined,
+ }),
+ ),
+ ],
+ });
return {
update: {
- competitors: result.parsed.competitors,
- runningSummary: result.parsed.overview,
+ competitors: result.object.competitors,
+ runningSummary: result.object.overview,
competitorFindings: mapValues(state.competitors, () => []),
},
};
@@ -98,22 +98,23 @@ async function categorizeFindings(state: State) {
}
const llm = getChatLLM();
- const llmJsonMode = new JsonDriver(llm);
- const result = await llmJsonMode.generate(findingsSchema, [
- BaseMessage.of({
- role: Role.SYSTEM,
- text: categorizationPromptTemplate.render({
- competitor: state.currentCompetitor,
- searchResults: state.webResearchResults[state.webResearchResults.length - 1],
- }),
- }),
- ]);
+ const result = await llm.createStructure({
+ schema: findingsSchema,
+ messages: [
+ new SystemMessage(
+ categorizationPromptTemplate.render({
+ competitor: state.currentCompetitor,
+ searchResults: state.webResearchResults[state.webResearchResults.length - 1],
+ }),
+ ),
+ ],
+ });
const updatedFindings = {
...state.competitorFindings,
[state.currentCompetitor]: [
- ...(result.parsed.key_insights || []),
- ...(result.parsed.unique_capabilities || []),
+ ...(result.object.key_insights || []),
+ ...(result.object.unique_capabilities || []),
],
};
@@ -142,7 +143,7 @@ ${competitorSections}
${state.sourcesGathered.join("\n")}`;
return {
- update: { answer: BaseMessage.of({ role: Role.ASSISTANT, text: finalSummary }) },
+ update: { answer: new AssistantMessage(finalSummary) },
next: Steps.REFLECTION,
};
}
@@ -153,21 +154,21 @@ async function reflectAndImprove(state: State) {
}
const llm = getChatLLM();
- const llmJsonMode = new JsonDriver(llm);
-
- const result = await llmJsonMode.generate(reflectionSchema, [
- BaseMessage.of({
- role: Role.SYSTEM,
- text: reflectionPromptTemplate.render({
- analysis: state.answer.text,
- previous_feedback: state.reflectionFeedback,
- }),
- }),
- ]);
+ const result = await llm.createStructure({
+ schema: reflectionSchema,
+ messages: [
+ new SystemMessage(
+ reflectionPromptTemplate.render({
+ analysis: state.answer.text,
+ previous_feedback: state.reflectionFeedback,
+ }),
+ ),
+ ],
+ });
- const feedback = [...(result.parsed.critique || []), ...(result.parsed.suggestions || [])];
+ const feedback = [...(result.object.critique || []), ...(result.object.suggestions || [])];
- if (result.parsed.should_iterate && state.reflectionIteration < state.maxReflectionIterations) {
+ if (result.object.should_iterate && state.reflectionIteration < state.maxReflectionIterations) {
return {
update: {
reflectionFeedback: feedback,
@@ -177,10 +178,9 @@ async function reflectAndImprove(state: State) {
};
}
- const finalAnalysis = BaseMessage.of({
- role: Role.ASSISTANT,
- text: `${state.answer.text}\n\n## Reflection Notes\n${feedback.map((f) => `* ${f}`).join("\n")}`,
- });
+ const finalAnalysis = new AssistantMessage(
+ `${state.answer.text}\n\n## Reflection Notes\n${feedback.map((f) => `* ${f}`).join("\n")}`,
+ );
return {
update: { answer: finalAnalysis },
diff --git a/examples/workflows/contentCreator.ts b/examples/workflows/contentCreator.ts
index e25fa499..6958d3a4 100644
--- a/examples/workflows/contentCreator.ts
+++ b/examples/workflows/contentCreator.ts
@@ -4,12 +4,11 @@ import { Workflow } from "bee-agent-framework/experimental/workflows/workflow";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { createConsoleReader } from "examples/helpers/io.js";
-import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
-import { JsonDriver } from "bee-agent-framework/llms/drivers/json";
+import { Message } from "bee-agent-framework/backend/message";
import { isEmpty, pick } from "remeda";
import { LLMTool } from "bee-agent-framework/tools/llm";
import { GoogleSearchTool } from "bee-agent-framework/tools/search/googleSearch";
-import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
+import { GroqChatModel } from "bee-agent-framework/adapters/groq/backend/chat";
const schema = z.object({
input: z.string(),
@@ -26,19 +25,18 @@ const workflow = new Workflow({
outputSchema: schema.required({ output: true }),
})
.addStep("preprocess", async (state) => {
- const llm = new GroqChatLLM();
- const driver = new JsonDriver(llm);
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
- const { parsed } = await driver.generate(
- schema.pick({ topic: true, notes: true }).or(
+ const { object: parsed } = await llm.createStructure({
+ schema: schema.pick({ topic: true, notes: true }).or(
z.object({
error: z
.string()
.describe("Use when the input query does not make sense or you need clarification."),
}),
),
- [
- BaseMessage.of({
+ messages: [
+ Message.of({
role: `user`,
text: [
"Your task is to rewrite the user query so that it guides the content planner and editor to craft a blog post that perfectly aligns with the user's needs. Notes should be used only if the user complains about something.",
@@ -53,14 +51,14 @@ const workflow = new Workflow({
.join("\n"),
}),
],
- );
+ });
return "error" in parsed
? { update: { output: parsed.error }, next: Workflow.END }
: { update: pick(parsed, ["notes", "topic"]) };
})
.addStrictStep("planner", schema.required({ topic: true }), async (state) => {
- const llm = new GroqChatLLM();
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
const agent = new BeeAgent({
llm,
memory: new UnconstrainedMemory(),
@@ -93,54 +91,58 @@ const workflow = new Workflow({
};
})
.addStrictStep("writer", schema.required({ plan: true }), async (state) => {
- const llm = new GroqChatLLM();
- const output = await llm.generate([
- BaseMessage.of({
- role: `system`,
- text: [
- `You are a Content Writer. Your task is to write a compelling blog post based on the provided context.`,
- ``,
- `# Context`,
- `${state.plan}`,
- ``,
- `# Objectives`,
- `- An engaging introduction`,
- `- Insightful body paragraphs (2-3 per section)`,
- `- Properly named sections/subtitles`,
- `- A summarizing conclusion`,
- `- Format: Markdown`,
- ``,
- ...[!isEmpty(state.notes) && ["# Notes", ...state.notes, ""]],
- `Ensure the content flows naturally, incorporates SEO keywords, and is well-structured.`,
- ].join("\n"),
- }),
- ]);
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
+ const output = await llm.create({
+ messages: [
+ Message.of({
+ role: `system`,
+ text: [
+ `You are a Content Writer. Your task is to write a compelling blog post based on the provided context.`,
+ ``,
+ `# Context`,
+ `${state.plan}`,
+ ``,
+ `# Objectives`,
+ `- An engaging introduction`,
+ `- Insightful body paragraphs (2-3 per section)`,
+ `- Properly named sections/subtitles`,
+ `- A summarizing conclusion`,
+ `- Format: Markdown`,
+ ``,
+ ...[!isEmpty(state.notes) && ["# Notes", ...state.notes, ""]],
+ `Ensure the content flows naturally, incorporates SEO keywords, and is well-structured.`,
+ ].join("\n"),
+ }),
+ ],
+ });
return {
update: { draft: output.getTextContent() },
};
})
.addStrictStep("editor", schema.required({ draft: true }), async (state) => {
- const llm = new GroqChatLLM();
- const output = await llm.generate([
- BaseMessage.of({
- role: `system`,
- text: [
- `You are an Editor. Your task is to transform the following draft blog post to a final version.`,
- ``,
- `# Draft`,
- `${state.draft}`,
- ``,
- `# Objectives`,
- `- Fix Grammatical errors`,
- `- Journalistic best practices`,
- ``,
- ...[!isEmpty(state.notes) && ["# Notes", ...state.notes, ""]],
- ``,
- `IMPORTANT: The final version must not contain any editor's comments.`,
- ].join("\n"),
- }),
- ]);
+ const llm = new GroqChatModel("llama-3.3-70b-versatile");
+ const output = await llm.create({
+ messages: [
+ Message.of({
+ role: `system`,
+ text: [
+ `You are an Editor. Your task is to transform the following draft blog post to a final version.`,
+ ``,
+ `# Draft`,
+ `${state.draft}`,
+ ``,
+ `# Objectives`,
+ `- Fix Grammatical errors`,
+ `- Journalistic best practices`,
+ ``,
+ ...[!isEmpty(state.notes) && ["# Notes", ...state.notes, ""]],
+ ``,
+ `IMPORTANT: The final version must not contain any editor's comments.`,
+ ].join("\n"),
+ }),
+ ],
+ });
return {
update: { output: output.getTextContent() },
diff --git a/examples/workflows/multiAgents.ts b/examples/workflows/multiAgents.ts
index c38472cf..fd100cbd 100644
--- a/examples/workflows/multiAgents.ts
+++ b/examples/workflows/multiAgents.ts
@@ -4,14 +4,11 @@ import { createConsoleReader } from "examples/helpers/io.js";
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { AgentWorkflow } from "bee-agent-framework/experimental/workflows/agent";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { WatsonxChatModel } from "bee-agent-framework/adapters/watsonx/backend/chat";
const workflow = new AgentWorkflow();
-const llm = WatsonXChatLLM.fromPreset("meta-llama/llama-3-3-70b-instruct", {
- apiKey: process.env.WATSONX_API_KEY,
- projectId: process.env.WATSONX_PROJECT_ID,
-});
+const llm = new WatsonxChatModel("meta-llama/llama-3-3-70b-instruct");
workflow.addAgent({
name: "WeatherForecaster",
@@ -37,13 +34,7 @@ const reader = createConsoleReader();
const memory = new UnconstrainedMemory();
for await (const { prompt } of reader) {
- await memory.add(
- BaseMessage.of({
- role: Role.USER,
- text: prompt,
- meta: { createdAt: new Date() },
- }),
- );
+ await memory.add(new UserMessage(prompt, { createdAt: new Date() }));
const { result } = await workflow.run(memory.messages).observe((emitter) => {
emitter.on("success", (data) => {
diff --git a/examples/workflows/multiAgentsSimple.ts b/examples/workflows/multiAgentsSimple.ts
index fe090437..b05532bc 100644
--- a/examples/workflows/multiAgentsSimple.ts
+++ b/examples/workflows/multiAgentsSimple.ts
@@ -3,8 +3,8 @@ import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMem
import { OpenMeteoTool } from "bee-agent-framework/tools/weather/openMeteo";
import { WikipediaTool } from "bee-agent-framework/tools/search/wikipedia";
import { AgentWorkflow } from "bee-agent-framework/experimental/workflows/agent";
-import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
-import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
+import { UserMessage } from "bee-agent-framework/backend/message";
+import { WatsonxChatModel } from "bee-agent-framework/adapters/watsonx/backend/chat";
const workflow = new AgentWorkflow();
@@ -12,14 +12,14 @@ workflow.addAgent({
name: "Researcher",
instructions: "You are a researcher assistant. Respond only if you can provide a useful answer.",
tools: [new WikipediaTool()],
- llm: new GroqChatLLM(),
+ llm: new WatsonxChatModel("meta-llama/llama-3-3-70b-instruct"),
});
workflow.addAgent({
name: "WeatherForecaster",
instructions: "You are a weather assistant. Respond only if you can provide a useful answer.",
tools: [new OpenMeteoTool()],
- llm: new GroqChatLLM(),
+ llm: new WatsonxChatModel("meta-llama/llama-3-3-70b-instruct"),
execution: { maxIterations: 3 },
});
@@ -27,16 +27,14 @@ workflow.addAgent({
name: "Solver",
instructions:
"Your task is to provide the most useful final answer based on the assistants' responses which all are relevant. Ignore those where assistant do not know.",
- llm: new GroqChatLLM(),
+ llm: new WatsonxChatModel("meta-llama/llama-3-3-70b-instruct"),
});
const memory = new UnconstrainedMemory();
await memory.add(
- BaseMessage.of({
- role: Role.USER,
- text: "What is the capital of France and what is the current weather there?",
- meta: { createdAt: new Date() },
+ new UserMessage("What is the capital of France and what is the current weather there?", {
+ createdAt: new Date(),
}),
);
diff --git a/package.json b/package.json
index 9b08d51e..a2dadddb 100644
--- a/package.json
+++ b/package.json
@@ -137,7 +137,7 @@
},
"scripts": {
"clean": "rimraf dist",
- "build": "yarn clean && yarn ts:check && NODE_OPTIONS='--max-old-space-size=8192' tsup && cp -r src/adapters/ibm-vllm/proto dist/adapters/ibm-vllm",
+ "build": "yarn clean && yarn ts:check && NODE_OPTIONS='--max-old-space-size=8192' tsup",
"ts:check": "tsc --noEmit && tsc -p tsconfig.examples.json --noEmit",
"start": "tsx --tsconfig tsconfig.examples.json",
"start:bee": "yarn start -- examples/agents/bee.ts",
@@ -162,21 +162,19 @@
"copyright": "./scripts/copyright.sh",
"copyright:check": "TYPE=check ./scripts/copyright.sh",
"release": "release-it",
- "ibm-vllm:generate-types": "./scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh",
"_ensure_env": "cp -n .env.template .env || true"
},
"dependencies": {
"@ai-zen/node-fetch-event-source": "^2.1.4",
"@opentelemetry/api": "^1.9.0",
"@streamparser/json": "^0.0.21",
+ "ai": "^4.1.24",
"ajv": "^8.17.1",
"ajv-formats": "^3.0.1",
"duck-duck-scrape": "^2.2.7",
"fast-xml-parser": "^4.5.0",
"header-generator": "^2.1.57",
"joplin-turndown-plugin-gfm": "^1.0.12",
- "js-yaml": "^4.1.0",
- "json-schema-to-typescript": "^15.0.3",
"jsonrepair": "^3.11.1",
"mathjs": "^14.0.0",
"mustache": "^4.2.0",
@@ -195,68 +193,60 @@
"zod-to-json-schema": "^3.23.5"
},
"peerDependencies": {
- "@aws-sdk/client-bedrock-runtime": "^3.687.0",
+ "@ai-sdk/amazon-bedrock": "^1.1.5",
+ "@ai-sdk/azure": "^1.1.2",
+ "@ai-sdk/google-vertex": "^2.1.6",
+ "@ai-sdk/groq": "^1.1.6",
+ "@ai-sdk/openai": "^1.1.2",
+ "@aws-sdk/client-bedrock-runtime": "^3.706.0",
"@elastic/elasticsearch": "^8.0.0",
- "@google-cloud/vertexai": "*",
"@googleapis/customsearch": "^3.2.0",
- "@grpc/grpc-js": "^1.11.3",
- "@grpc/proto-loader": "^0.7.13",
"@langchain/community": ">=0.2.28",
"@langchain/core": ">=0.2.27",
"@modelcontextprotocol/sdk": "^1.0.4",
"@zilliz/milvus2-sdk-node": "^2.4.9",
- "google-auth-library": "*",
- "groq-sdk": "^0.7.0",
- "ollama": "^0.5.11",
- "openai": "^4.67.3",
- "openai-chat-tokens": "^0.2.8",
+ "ollama-ai-provider": "^1.2.0",
"sequelize": "^6.37.3",
"yaml": "^2.6.1"
},
"peerDependenciesMeta": {
- "@aws-sdk/client-bedrock-runtime": {
+ "@ai-sdk/amazon-bedrock": {
"optional": true
},
- "@elastic/elasticsearch": {
+ "@ai-sdk/azure": {
"optional": true
},
- "@google-cloud/vertexai": {
- "optional": true
- },
- "@googleapis/customsearch": {
+ "@ai-sdk/google-vertex": {
"optional": true
},
- "@grpc/grpc-js": {
+ "@ai-sdk/groq": {
"optional": true
},
- "@grpc/proto-loader": {
+ "@ai-sdk/openai": {
"optional": true
},
- "@langchain/community": {
- "optional": true
- },
- "@langchain/core": {
+ "@elastic/elasticsearch": {
"optional": true
},
- "@modelcontextprotocol/sdk": {
+ "@googleapis/customsearch": {
"optional": true
},
- "@zilliz/milvus2-sdk-node": {
+ "@ibm-cloud/watsonx-ai": {
"optional": true
},
- "google-auth-library": {
+ "@langchain/community": {
"optional": true
},
- "groq-sdk": {
+ "@langchain/core": {
"optional": true
},
- "ollama": {
+ "@modelcontextprotocol/sdk": {
"optional": true
},
- "openai": {
+ "@zilliz/milvus2-sdk-node": {
"optional": true
},
- "openai-chat-tokens": {
+ "ibm-cloud-sdk-core": {
"optional": true
},
"sequelize": {
@@ -267,20 +257,22 @@
}
},
"devDependencies": {
- "@aws-sdk/client-bedrock-runtime": "^3.706.0",
+ "@ai-sdk/amazon-bedrock": "^1.1.5",
+ "@ai-sdk/azure": "^1.1.2",
+ "@ai-sdk/google-vertex": "^2.1.6",
+ "@ai-sdk/groq": "^1.1.6",
+ "@ai-sdk/openai": "^1.1.2",
"@commitlint/cli": "^19.6.0",
"@commitlint/config-conventional": "^19.6.0",
"@elastic/elasticsearch": "^8.16.2",
"@eslint/js": "^9.16.0",
"@eslint/markdown": "^6.2.1",
- "@google-cloud/vertexai": "^1.9.2",
"@googleapis/customsearch": "^3.2.0",
- "@grpc/grpc-js": "^1.12.4",
- "@grpc/proto-loader": "^0.7.13",
- "@langchain/community": "~0.3.17",
- "@langchain/core": "~0.3.22",
- "@langchain/langgraph": "^0.2.39",
- "@langchain/ollama": "^0.1.4",
+ "@ibm-cloud/watsonx-ai": "^1.4.0",
+ "@langchain/community": "~0.3.28",
+ "@langchain/core": "~0.3.37",
+ "@langchain/langgraph": "^0.2.44",
+ "@langchain/ollama": "^0.1.5",
"@modelcontextprotocol/sdk": "^1.0.4",
"@opentelemetry/instrumentation": "^0.56.0",
"@opentelemetry/resources": "^1.29.0",
@@ -309,16 +301,12 @@
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-unused-imports": "^4.1.4",
"glob": "^11.0.0",
- "groq-sdk": "^0.9.0",
"husky": "^9.1.7",
+ "ibm-cloud-sdk-core": "^5.1.2",
"langchain": "~0.3.6",
"linkinator": "^6.1.2",
"lint-staged": "^15.2.10",
- "ollama": "^0.5.11",
- "openai": "^4.76.0",
- "openai-chat-tokens": "^0.2.8",
- "openapi-fetch": "^0.13.3",
- "openapi-typescript": "^7.4.4",
+ "ollama-ai-provider": "^1.2.0",
"picocolors": "^1.1.1",
"pino-pretty": "^13.0.0",
"pino-test": "^1.1.0",
@@ -328,13 +316,11 @@
"sequelize": "^6.37.5",
"sqlite3": "^5.1.7",
"strip-ansi": "^7.1.0",
- "temp-dir": "^3.0.0",
- "tsc-files": "^1.1.4",
- "tsup": "^8.3.5",
+ "tsup": "^8.3.6",
"tsx": "^4.19.2",
- "typescript": "^5.7.2",
+ "typescript": "^5.7.3",
"typescript-eslint": "^8.18.1",
- "vite-tsconfig-paths": "^5.1.3",
+ "vite-tsconfig-paths": "^5.1.4",
"vitest": "^2.1.8",
"yaml": "^2.6.1"
}
diff --git a/scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh b/scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh
deleted file mode 100755
index af15d266..00000000
--- a/scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-# Copyright 2025 IBM Corp.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-GRPC_PROTO_PATH="./src/adapters/ibm-vllm/proto"
-GRPC_TYPES_PATH="./src/adapters/ibm-vllm/types.ts"
-
-SCRIPT_DIR="$(dirname "$0")"
-OUTPUT_RELATIVE_PATH="dist/merged.d.ts"
-GRPC_TYPES_TMP_PATH="types"
-
-rm -f "$GRPC_TYPES_PATH"
-
-rm -rf "${SCRIPT_DIR}"/{dist,dts,types}
-
-
-yarn run proto-loader-gen-types \
- --defaults \
- --keepCase \
- --oneofs \
- --longs=Number \
- --enums=String \
- --grpcLib=@grpc/grpc-js \
- --"outDir=${SCRIPT_DIR}/${GRPC_TYPES_TMP_PATH}" \
- "${GRPC_PROTO_PATH}"/*.proto
-
-
-cd "$SCRIPT_DIR"
- ENTRY="$(basename "$OUTPUT_RELATIVE_PATH" ".d.ts")" tsup --dts-only
- sed -i.bak '$ d' "$OUTPUT_RELATIVE_PATH"
- sed -i.bak -E "s/^interface/export interface/" "$OUTPUT_RELATIVE_PATH"
- sed -i.bak -E "s/^type/export type/" "$OUTPUT_RELATIVE_PATH"
-cd -
-
-mv "$SCRIPT_DIR/$OUTPUT_RELATIVE_PATH" "$GRPC_TYPES_PATH"
-rm -rf "${SCRIPT_DIR}"/{dist,dts,types}
-
-yarn run lint:fix "${GRPC_TYPES_PATH}"
-yarn prettier --write "${GRPC_TYPES_PATH}"
-yarn copyright "${GRPC_TYPES_PATH}"
diff --git a/scripts/ibm_vllm_generate_protos/package.json b/scripts/ibm_vllm_generate_protos/package.json
deleted file mode 100644
index d25b1a7b..00000000
--- a/scripts/ibm_vllm_generate_protos/package.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "name": "ibm-vllm-proto-types",
- "type": "module",
- "version": "1.0.0",
- "typings": "./types/generation.d.ts"
-}
diff --git a/scripts/ibm_vllm_generate_protos/tsconfig.proto.json b/scripts/ibm_vllm_generate_protos/tsconfig.proto.json
deleted file mode 100644
index 0e4a32c2..00000000
--- a/scripts/ibm_vllm_generate_protos/tsconfig.proto.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- "compilerOptions": {
- "moduleResolution": "node",
- "rootDir": ".",
- "baseUrl": ".",
- "target": "ESNext",
- "module": "ESNext",
- "outDir": "dist",
- "declaration": true,
- "emitDeclarationOnly": true,
- "skipLibCheck": true,
- "sourceMap": false
- }
-}
diff --git a/scripts/ibm_vllm_generate_protos/tsup.config.ts b/scripts/ibm_vllm_generate_protos/tsup.config.ts
deleted file mode 100644
index 565fbc82..00000000
--- a/scripts/ibm_vllm_generate_protos/tsup.config.ts
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Copyright 2025 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import { defineConfig } from "tsup";
-import fs from "node:fs";
-
-if (!process.env.ENTRY) {
- throw new Error(`Entry file was not provided!`);
-}
-const target = `types/${process.env.ENTRY}.ts`;
-await fs.promises.writeFile(
- target,
- [
- `export { ProtoGrpcType as A } from "./caikit_runtime_Nlp.js"`,
- `export { ProtoGrpcType as B } from "./generation.js"`,
- ].join("\n"),
-);
-
-export default defineConfig({
- entry: [target],
- tsconfig: "./tsconfig.proto.json",
- sourcemap: false,
- dts: true,
- format: ["esm"],
- treeshake: true,
- legacyOutput: false,
- skipNodeModulesBundle: true,
- bundle: true,
- splitting: false,
- silent: false,
- clean: true,
-});
diff --git a/src/adapters/amazon-bedrock/backend/chat.ts b/src/adapters/amazon-bedrock/backend/chat.ts
new file mode 100644
index 00000000..bb71a04e
--- /dev/null
+++ b/src/adapters/amazon-bedrock/backend/chat.ts
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import {
+ AmazonBedrockClient,
+ AmazonBedrockClientSettings,
+} from "@/adapters/amazon-bedrock/backend/client.js";
+import { VercelChatModel } from "@/adapters/vercel/backend/chat.js";
+import { getEnv } from "@/internals/env.js";
+import { AmazonBedrockProvider } from "@ai-sdk/amazon-bedrock";
+
+type AmazonBedrockParameters = Parameters;
+export type AmazonBedrockChatModelId = NonNullable;
+export type AmazonBedrockChatModelSettings = NonNullable;
+
+export class AmazonBedrockChatModel extends VercelChatModel {
+ constructor(
+ modelId: AmazonBedrockChatModelId = getEnv("AWS_CHAT_MODEL", "meta.llama3-70b-instruct-v1:0"),
+ settings: AmazonBedrockChatModelSettings = {},
+ client?: AmazonBedrockClient | AmazonBedrockClientSettings,
+ ) {
+ const model = AmazonBedrockClient.ensure(client).instance.languageModel(modelId, settings);
+ super(model);
+ }
+}
diff --git a/tests/e2e/adapters/ollama/test.ts b/src/adapters/amazon-bedrock/backend/client.ts
similarity index 55%
rename from tests/e2e/adapters/ollama/test.ts
rename to src/adapters/amazon-bedrock/backend/client.ts
index 83c7b1d9..897e0d44 100644
--- a/tests/e2e/adapters/ollama/test.ts
+++ b/src/adapters/amazon-bedrock/backend/client.ts
@@ -14,24 +14,22 @@
* limitations under the License.
*/
-import { OllamaChatLLM } from "@/adapters/ollama/chat.js";
-import { Ollama } from "ollama";
+import {
+ createAmazonBedrock,
+ AmazonBedrockProviderSettings,
+ AmazonBedrockProvider,
+} from "@ai-sdk/amazon-bedrock";
+import { BackendClient } from "@/backend/client.js";
-const host = process.env.OLLAMA_HOST;
+export type AmazonBedrockClientSettings = AmazonBedrockProviderSettings;
-describe.runIf(Boolean(host))("Ollama LLM", () => {
- const createLLM = () => {
- return new OllamaChatLLM({
- modelId: "llama3.1",
- client: new Ollama({
- host,
- }),
+export class AmazonBedrockClient extends BackendClient<
+ AmazonBedrockClientSettings,
+ AmazonBedrockProvider
+> {
+ protected create(): AmazonBedrockProvider {
+ return createAmazonBedrock({
+ ...this.settings,
});
- };
-
- it("Retrieves version", async () => {
- const llm = createLLM();
- const version = await llm.version();
- expect(version).toBeDefined();
- });
-});
+ }
+}
diff --git a/src/adapters/amazon-bedrock/backend/embedding.ts b/src/adapters/amazon-bedrock/backend/embedding.ts
new file mode 100644
index 00000000..c5bb3d89
--- /dev/null
+++ b/src/adapters/amazon-bedrock/backend/embedding.ts
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { VercelEmbeddingModel } from "@/adapters/vercel/backend/embedding.js";
+import { AmazonBedrockProvider } from "@ai-sdk/amazon-bedrock";
+import { getEnv } from "@/internals/env.js";
+import {
+ AmazonBedrockClient,
+ AmazonBedrockClientSettings,
+} from "@/adapters/amazon-bedrock/backend/client.js";
+
+type Params = Parameters;
+export type BedrockEmbeddingModelId = NonNullable;
+export type BedrockEmbeddingSettings = NonNullable;
+
+export class BedrockEmbeddingModel extends VercelEmbeddingModel {
+ constructor(
+ modelId: BedrockEmbeddingModelId = getEnv("AWS_EMBEDDING_MODEL", "amazon.titan-embed-text-v1"),
+ settings: BedrockEmbeddingSettings = {},
+ client?: AmazonBedrockClient | AmazonBedrockClientSettings,
+ ) {
+ const model = AmazonBedrockClient.ensure(client).instance.embedding(modelId, settings);
+ super(model);
+ }
+}
diff --git a/src/adapters/azure-openai/backend/chat.ts b/src/adapters/azure-openai/backend/chat.ts
new file mode 100644
index 00000000..6b1cd660
--- /dev/null
+++ b/src/adapters/azure-openai/backend/chat.ts
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { VercelChatModel } from "@/adapters/vercel/backend/chat.js";
+import type {
+ AzureOpenAIProvider as VercelAzureOpenAIProvider,
+ AzureOpenAIProviderSettings as VercelAzureOpenAIProviderSettings,
+} from "@ai-sdk/azure";
+import { AzureOpenAIClient } from "@/adapters/azure-openai/backend/client.js";
+import { getEnv } from "@/internals/env.js";
+
+type AzureOpenAIParameters = Parameters;
+export type AzureOpenAIChatModelId = NonNullable;
+export type AzureOpenAIChatModelSettings = NonNullable;
+
+export class AzureOpenAIChatModel extends VercelChatModel {
+ constructor(
+ modelId: AzureOpenAIChatModelId = getEnv("AZURE_OPENAI_CHAT_MODEL", "gpt-4o"),
+ settings: AzureOpenAIChatModelSettings = {},
+ client?: VercelAzureOpenAIProviderSettings | AzureOpenAIClient,
+ ) {
+ const model = AzureOpenAIClient.ensure(client).instance.chat(modelId, settings);
+ super(model);
+ }
+}
diff --git a/src/adapters/azure-openai/backend/client.ts b/src/adapters/azure-openai/backend/client.ts
new file mode 100644
index 00000000..ccd14dbf
--- /dev/null
+++ b/src/adapters/azure-openai/backend/client.ts
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { AzureOpenAIProvider, AzureOpenAIProviderSettings, createAzure } from "@ai-sdk/azure";
+import { getEnv } from "@/internals/env.js";
+import { BackendClient } from "@/backend/client.js";
+
+export type AzureOpenAIClientSettings = AzureOpenAIProviderSettings;
+
+export class AzureOpenAIClient extends BackendClient<
+ AzureOpenAIClientSettings,
+ AzureOpenAIProvider
+> {
+ protected create(options?: AzureOpenAIClientSettings): AzureOpenAIProvider {
+ return createAzure({
+ ...options,
+ apiKey: options?.apiKey || getEnv("AZURE_OPENAI_API_KEY"),
+ baseURL: options?.baseURL || getEnv("AZURE_OPENAI_API_ENDPOINT"),
+ resourceName: options?.resourceName || getEnv("AZURE_OPENAI_API_RESOURCE"),
+ apiVersion: options?.apiVersion || getEnv("AZURE_OPENAI_API_VERSION"),
+ });
+ }
+}
diff --git a/src/adapters/azure-openai/backend/embedding.ts b/src/adapters/azure-openai/backend/embedding.ts
new file mode 100644
index 00000000..8945f23c
--- /dev/null
+++ b/src/adapters/azure-openai/backend/embedding.ts
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { VercelEmbeddingModel } from "@/adapters/vercel/backend/embedding.js";
+import {
+ AzureOpenAIClient,
+ AzureOpenAIClientSettings,
+} from "@/adapters/azure-openai/backend/client.js";
+import { getEnv } from "@/internals/env.js";
+import { AzureOpenAIProvider as VercelAzureOpenAIProviderSettings } from "@ai-sdk/azure";
+
+type AzureOpenAIParameters = Parameters;
+export type AzureOpenAIEmbeddingModelId = NonNullable;
+export type AzureOpenAIEmbeddingModelSettings = Record;
+
+export class AzureOpenAIEmbeddingModel extends VercelEmbeddingModel {
+ constructor(
+ modelId: AzureOpenAIEmbeddingModelId = getEnv(
+ "AZURE_OPENAI_EMBEDDING_MODEL",
+ "text-embedding-3-small",
+ ),
+ settings: AzureOpenAIEmbeddingModelSettings = {},
+ client?: AzureOpenAIClient | AzureOpenAIClientSettings,
+ ) {
+ const model = AzureOpenAIClient.ensure(client).instance.textEmbeddingModel(modelId, settings);
+ super(model);
+ }
+}
diff --git a/src/adapters/bedrock/chat.test.ts b/src/adapters/bedrock/chat.test.ts
deleted file mode 100644
index 100189f6..00000000
--- a/src/adapters/bedrock/chat.test.ts
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Copyright 2025 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import { verifyDeserialization } from "@tests/e2e/utils.js";
-import { BedrockChatLLM } from "@/adapters/bedrock/chat.js";
-import { BedrockRuntimeClient } from "@aws-sdk/client-bedrock-runtime";
-
-describe("Bedrock ChatLLM", () => {
- const getInstance = () => {
- return new BedrockChatLLM({
- modelId: "amazon.titan-text-lite-v1",
- client: new BedrockRuntimeClient({ region: "us-east-1" }),
- });
- };
-
- it("Serializes", async () => {
- const instance = getInstance();
- const serialized = instance.serialize();
- const deserialized = BedrockChatLLM.fromSerialized(serialized);
- verifyDeserialization(instance, deserialized);
- });
-});
diff --git a/src/adapters/bedrock/chat.ts b/src/adapters/bedrock/chat.ts
deleted file mode 100644
index 5cafc808..00000000
--- a/src/adapters/bedrock/chat.ts
+++ /dev/null
@@ -1,313 +0,0 @@
-/**
- * Copyright 2025 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import {
- AsyncStream,
- BaseLLMTokenizeOutput,
- EmbeddingOptions,
- EmbeddingOutput,
- ExecutionOptions,
- GenerateOptions,
- LLMCache,
- LLMMeta,
- StreamGenerateOptions,
-} from "@/llms/base.js";
-import { shallowCopy } from "@/serializer/utils.js";
-import { ChatLLM, ChatLLMGenerateEvents, ChatLLMOutput } from "@/llms/chat.js";
-import { BaseMessage, Role } from "@/llms/primitives/message.js";
-import { Emitter } from "@/emitter/emitter.js";
-import type { AwsCredentialIdentity, Provider } from "@aws-sdk/types";
-import {
- BedrockRuntimeClient as Client,
- InvokeModelCommand,
- ConverseCommand,
- ConverseCommandOutput,
- ConverseStreamCommand,
- ContentBlockDeltaEvent,
- InferenceConfiguration,
- Message as BedrockMessage,
- SystemContentBlock as BedrockSystemContentBlock,
-} from "@aws-sdk/client-bedrock-runtime";
-import { GetRunContext } from "@/context.js";
-import { Serializer } from "@/serializer/serializer.js";
-import { omitUndefined } from "@/internals/helpers/object.js";
-
-type Response = ContentBlockDeltaEvent | ConverseCommandOutput;
-
-export interface BedrockEmbeddingOptions extends EmbeddingOptions {
- body?: Record;
-}
-
-export class ChatBedrockOutput extends ChatLLMOutput {
- public readonly responses: Response[];
-
- constructor(response: Response) {
- super();
- this.responses = [response];
- }
-
- static {
- this.register();
- }
-
- get messages() {
- return this.responses.flatMap((response) => {
- if ("delta" in response && response.delta?.text) {
- return [
- BaseMessage.of({
- role: Role.ASSISTANT,
- text: response.delta.text,
- }),
- ];
- } else if ("output" in response && response.output?.message?.content) {
- return response.output.message.content
- .filter((choice) => choice?.text)
- .map((choice) =>
- BaseMessage.of({
- role: Role.ASSISTANT,
- text: choice.text!,
- }),
- );
- }
- return [];
- });
- }
-
- getTextContent() {
- return this.messages.map((msg) => msg.text).join("");
- }
-
- merge(other: ChatBedrockOutput) {
- this.responses.push(...other.responses);
- }
-
- toString() {
- return this.getTextContent();
- }
-
- createSnapshot() {
- return {
- responses: shallowCopy(this.responses),
- };
- }
-
- loadSnapshot(snapshot: ReturnType) {
- Object.assign(this, snapshot);
- }
-}
-
-interface Input {
- modelId?: string;
- region?: string;
- client?: Client;
- credentials?: AwsCredentialIdentity | Provider;
- parameters?: InferenceConfiguration;
- executionOptions?: ExecutionOptions;
- cache?: LLMCache;
-}
-
-export type BedrockChatLLMEvents = ChatLLMGenerateEvents;
-
-export class BedrockChatLLM extends ChatLLM {
- public readonly emitter = Emitter.root.child({
- namespace: ["bedrock", "chat_llm"],
- creator: this,
- });
-
- public readonly client: Client;
- public readonly parameters: Partial;
-
- constructor({
- client,
- modelId = "amazon.titan-text-lite-v1",
- region = "us-east-1",
- credentials,
- parameters = {
- temperature: 0,
- },
- executionOptions = {},
- cache,
- }: Input = {}) {
- super(modelId, executionOptions, cache);
- this.client = client ?? new Client({ region: region, credentials: credentials });
- this.parameters = parameters ?? {};
- }
-
- static {
- this.register();
- Serializer.register(Client, {
- toPlain: (value) => ({
- config: {
- region: value.config.region,
- credentials: value.config.credentials,
- },
- }),
- fromPlain: (value) =>
- new Client({
- region: value.config.region,
- credentials: value.config.credentials,
- }),
- });
- }
-
- async meta(): Promise {
- if (this.modelId.includes("titan-text-premier")) {
- return { tokenLimit: 3 * 1024 };
- } else if (
- this.modelId.includes("titan-text-express") ||
- this.modelId.includes("anthropic.claude-v2") ||
- this.modelId.includes("anthropic.claude-instant-v1") ||
- this.modelId.includes("anthropic.claude-3-sonnet") ||
- this.modelId.includes("anthropic.claude-3-haiku") ||
- this.modelId.includes("anthropic.claude-3-opus") ||
- this.modelId.includes("meta.llama2") ||
- this.modelId.includes("cohere.command-text") ||
- this.modelId.includes("cohere.command-light")
- ) {
- return { tokenLimit: 4 * 1024 };
- } else if (
- this.modelId.includes("titan-text-lite") ||
- this.modelId.includes("anthropic.claude-3-5-sonnet") ||
- this.modelId.includes("anthropic.claude-3-5-haiku") ||
- this.modelId.includes("meta.llama3-8b") ||
- this.modelId.includes("meta.llama3-70b") ||
- this.modelId.includes("ai21.j2")
- ) {
- return { tokenLimit: 8 * 1024 };
- } else if (
- this.modelId.includes("mistral.mistral-7b") ||
- this.modelId.includes("mistral.mixtral-8x7b") ||
- this.modelId.includes("mistral.mistral-small")
- ) {
- return { tokenLimit: 32 * 1024 };
- } else if (
- this.modelId.includes("meta.llama3-1") ||
- this.modelId.includes("meta.llama3-2") ||
- this.modelId.includes("mistral.mistral-large") ||
- this.modelId.includes("cohere.command-r")
- ) {
- return { tokenLimit: 128 * 1024 };
- } else if (this.modelId.includes("ai21.jamba")) {
- return { tokenLimit: 256 * 1024 };
- }
-
- return {
- tokenLimit: Infinity,
- };
- }
-
- async embed(
- input: BaseMessage[][],
- options: BedrockEmbeddingOptions = {},
- ): Promise {
- const command = new InvokeModelCommand({
- modelId: this.modelId,
- contentType: "application/json",
- accept: "application/json",
- body: JSON.stringify(
- omitUndefined({
- inputText: input.flat().map((msg) => msg.text),
- ...options?.body,
- }),
- ),
- });
-
- const response = await this.client.send(command, { abortSignal: options?.signal });
- const jsonString = new TextDecoder().decode(response.body);
- return JSON.parse(jsonString);
- }
-
- async tokenize(input: BaseMessage[]): Promise {
- const contentLength = input.reduce((acc, msg) => acc + msg.text.length, 0);
- return { tokensCount: Math.ceil(contentLength / 4) };
- }
-
- protected async _generate(
- input: BaseMessage[],
- _options: Partial,
- run: GetRunContext,
- ): Promise {
- const { conversation, systemMessage } = this.convertToConverseMessages(input);
- const command = new ConverseCommand({
- modelId: this.modelId,
- messages: conversation,
- system: systemMessage,
- ...this.parameters,
- });
- const response = await this.client.send(command, { abortSignal: run.signal });
- return new ChatBedrockOutput(response);
- }
-
- protected async *_stream(
- input: BaseMessage[],
- _options: StreamGenerateOptions | undefined,
- run: GetRunContext,
- ): AsyncStream {
- const { conversation, systemMessage } = this.convertToConverseMessages(input);
- const command = new ConverseStreamCommand({
- modelId: this.modelId,
- messages: conversation,
- system: systemMessage,
- ...this.parameters,
- });
- const response = await this.client.send(command, { abortSignal: run.signal });
- for await (const chunk of response?.stream || []) {
- if (chunk.contentBlockDelta) {
- yield new ChatBedrockOutput(chunk.contentBlockDelta);
- }
- }
- }
-
- createSnapshot() {
- return {
- ...super.createSnapshot(),
- client: this.client,
- modelId: this.modelId,
- parameters: shallowCopy(this.parameters),
- };
- }
-
- protected convertToConverseMessages(messages: BaseMessage[]): {
- conversation: BedrockMessage[];
- systemMessage: BedrockSystemContentBlock[];
- } {
- const systemMessage: BedrockSystemContentBlock[] = messages
- .filter((msg) => msg.role === Role.SYSTEM)
- .map((msg) => ({ text: msg.text }));
-
- const converseMessages: BedrockMessage[] = messages
- .filter((msg) => msg.role !== Role.SYSTEM)
- .map((msg) => ({
- role: msg.role === Role.USER ? Role.USER : Role.ASSISTANT,
- content: [{ text: msg.text }],
- }));
-
- const conversation = converseMessages.reduce(
- (messageList, currentMessage) => {
- const lastMessage = messageList[messageList.length - 1];
- if (lastMessage && lastMessage !== currentMessage && lastMessage.role === Role.USER) {
- lastMessage.content = lastMessage.content!.concat(currentMessage.content!);
- } else {
- messageList.push(currentMessage);
- }
-
- return messageList;
- },
- [],
- );
- return { conversation, systemMessage };
- }
-}
diff --git a/src/adapters/dummy/backend/chat.ts b/src/adapters/dummy/backend/chat.ts
new file mode 100644
index 00000000..a6391f0b
--- /dev/null
+++ b/src/adapters/dummy/backend/chat.ts
@@ -0,0 +1,67 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import {
+ ChatModel,
+ ChatModelEvents,
+ ChatModelOutput,
+ ChatModelInput,
+ ChatModelParameters,
+} from "@/backend/chat.js";
+import { GetRunContext } from "@/context.js";
+import { Emitter } from "@/emitter/emitter.js";
+import { NotImplementedError } from "@/errors.js";
+
+export class DummyChatModel extends ChatModel {
+ public readonly emitter = Emitter.root.child({
+ namespace: ["backend", "dummy", "chat"],
+ creator: this,
+ });
+
+ constructor(
+ public readonly modelId = "dummy",
+ public readonly parameters: ChatModelParameters = {},
+ ) {
+ super();
+ }
+
+ get providerId(): string {
+ return "dummy";
+ }
+
+ protected _create(_input: ChatModelInput, _run: GetRunContext): Promise {
+ throw new NotImplementedError();
+ }
+
+ protected _createStream(
+ _input: ChatModelInput,
+ _run: GetRunContext,
+ ): AsyncGenerator {
+ throw new NotImplementedError();
+ }
+
+ createSnapshot() {
+ return { ...super.createSnapshot(), modelId: this.modelId };
+ }
+
+ loadSnapshot(snapshot: ReturnType): void {
+ Object.assign(this, snapshot);
+ }
+
+ static {
+ this.register();
+ }
+}
diff --git a/src/adapters/dummy/backend/embedding.ts b/src/adapters/dummy/backend/embedding.ts
new file mode 100644
index 00000000..2b759f37
--- /dev/null
+++ b/src/adapters/dummy/backend/embedding.ts
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { GetRunContext } from "@/context.js";
+import { Emitter } from "@/emitter/emitter.js";
+import { NotImplementedError } from "@/errors.js";
+import {
+ EmbeddingModel,
+ EmbeddingModelEvents,
+ EmbeddingModelInput,
+ EmbeddingModelOutput,
+} from "@/backend/embedding.js";
+
+export class DummyEmbeddingModel extends EmbeddingModel {
+ public readonly emitter = Emitter.root.child({
+ namespace: ["backend", "dummy", "embedding"],
+ creator: this,
+ });
+
+ constructor(public readonly modelId = "dummy") {
+ super();
+ }
+
+ get providerId(): string {
+ return "dummy";
+ }
+
+ protected _create(
+ _input: EmbeddingModelInput,
+ _run: GetRunContext,
+ ): Promise {
+ throw new NotImplementedError();
+ }
+
+ createSnapshot() {
+ return { ...super.createSnapshot(), modelId: this.modelId };
+ }
+
+ loadSnapshot(snapshot: ReturnType): void {
+ Object.assign(this, snapshot);
+ }
+}
diff --git a/src/adapters/google-vertex/backend/chat.ts b/src/adapters/google-vertex/backend/chat.ts
new file mode 100644
index 00000000..2ee7db99
--- /dev/null
+++ b/src/adapters/google-vertex/backend/chat.ts
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { GoogleVertexProvider } from "@ai-sdk/google-vertex";
+import { VercelChatModel } from "@/adapters/vercel/backend/chat.js";
+import {
+ GoogleVertexClient,
+ GoogleVertexClientSettings,
+} from "@/adapters/google-vertex/backend/client.js";
+import { getEnv } from "@/internals/env.js";
+
+type GoogleVertexParameters = Parameters;
+export type GoogleVertexChatModelId = NonNullable;
+export type GoogleVertexChatModelSettings = NonNullable;
+
+export class GoogleVertexChatModel extends VercelChatModel {
+ constructor(
+ modelId: GoogleVertexChatModelId = getEnv("GOOGLE_VERTEX_CHAT_MODEL", "gemini-1.5-pro"),
+ settings: GoogleVertexChatModelSettings = {},
+ client?: GoogleVertexClientSettings | GoogleVertexClient,
+ ) {
+ const model = GoogleVertexClient.ensure(client).instance.languageModel(modelId, settings);
+ super(model);
+ }
+}
diff --git a/src/adapters/google-vertex/backend/client.ts b/src/adapters/google-vertex/backend/client.ts
new file mode 100644
index 00000000..75dd764b
--- /dev/null
+++ b/src/adapters/google-vertex/backend/client.ts
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { getEnv } from "@/internals/env.js";
+import {
+ createVertex,
+ GoogleVertexProvider,
+ GoogleVertexProviderSettings,
+} from "@ai-sdk/google-vertex";
+import { BackendClient } from "@/backend/client.js";
+
+export type GoogleVertexClientSettings = GoogleVertexProviderSettings;
+
+export class GoogleVertexClient extends BackendClient<
+ GoogleVertexClientSettings,
+ GoogleVertexProvider
+> {
+ protected create(): GoogleVertexProvider {
+ return createVertex({
+ ...this.settings,
+ project: this.settings?.project || getEnv("GOOGLE_VERTEX_PROJECT"),
+ baseURL: this.settings?.baseURL || getEnv("GOOGLE_VERTEX_ENDPOINT"),
+ location: this.settings?.baseURL || getEnv("GOOGLE_VERTEX_LOCATION"),
+ });
+ }
+}
diff --git a/src/adapters/google-vertex/backend/embedding.ts b/src/adapters/google-vertex/backend/embedding.ts
new file mode 100644
index 00000000..5fa4d0c2
--- /dev/null
+++ b/src/adapters/google-vertex/backend/embedding.ts
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { GoogleVertexClient, GoogleVertexClientSettings } from "./client.js";
+import { VercelEmbeddingModel } from "@/adapters/vercel/backend/embedding.js";
+import { getEnv } from "@/internals/env.js";
+import { GoogleVertexProvider } from "@ai-sdk/google-vertex";
+
+type GoogleVertexParameters = Parameters;
+export type GoogleVertexChatModelId = NonNullable;
+export type GoogleVertexChatModelSettings = Record;
+
+export class GoogleVertexEmbeddingModel extends VercelEmbeddingModel {
+ constructor(
+ modelId: GoogleVertexChatModelId = getEnv(
+ "GOOGLE_VERTEX_EMBEDDING_MODEL",
+ "text-embedding-004",
+ ),
+ _settings: GoogleVertexChatModelSettings = {},
+ client?: GoogleVertexClient | GoogleVertexClientSettings,
+ ) {
+ const model = GoogleVertexClient.ensure(client).instance.textEmbeddingModel(modelId);
+ super(model);
+ }
+}
diff --git a/src/adapters/groq/backend/chat.ts b/src/adapters/groq/backend/chat.ts
new file mode 100644
index 00000000..e0c4c730
--- /dev/null
+++ b/src/adapters/groq/backend/chat.ts
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { VercelChatModel } from "@/adapters/vercel/backend/chat.js";
+import { GroqClient, GroqClientSettings } from "@/adapters/groq/backend/client.js";
+import { getEnv } from "@/internals/env.js";
+import { GroqProvider } from "@ai-sdk/groq";
+
+type GroqParameters = Parameters;
+export type GroqChatModelId = NonNullable;
+export type GroqChatModelSettings = NonNullable;
+
+export class GroqChatModel extends VercelChatModel {
+ constructor(
+ modelId: GroqChatModelId = getEnv("GROQ_CHAT_MODEL", "gemma2-9b-it"),
+ settings: GroqChatModelSettings = {},
+ client?: GroqClientSettings | GroqClient,
+ ) {
+ const model = GroqClient.ensure(client).instance.languageModel(modelId, settings);
+ super(model);
+ }
+
+ static {
+ this.register();
+ }
+}
diff --git a/src/llms/llm.ts b/src/adapters/groq/backend/client.ts
similarity index 53%
rename from src/llms/llm.ts
rename to src/adapters/groq/backend/client.ts
index 066ab736..2cbfc220 100644
--- a/src/llms/llm.ts
+++ b/src/adapters/groq/backend/client.ts
@@ -14,19 +14,18 @@
* limitations under the License.
*/
-import { BaseLLM, BaseLLMOutput, BaseLLMEvents, GenerateOptions } from "./base.js";
-import { Emitter } from "@/emitter/emitter.js";
+import { createGroq, GroqProvider, GroqProviderSettings } from "@ai-sdk/groq";
+import { BackendClient } from "@/backend/client.js";
+import { getEnv } from "@/internals/env.js";
-export type LLMInput = string;
+export type GroqClientSettings = GroqProviderSettings;
-export type LLMEvents = BaseLLMEvents<
- LLMInput,
- TOutput
->;
-
-export abstract class LLM<
- TOutput extends BaseLLMOutput,
- TGenerateOptions extends GenerateOptions = GenerateOptions,
-> extends BaseLLM {
- public abstract readonly emitter: Emitter>;
+export class GroqClient extends BackendClient {
+ protected create(settings?: GroqClientSettings): GroqProvider {
+ return createGroq({
+ ...settings,
+ baseURL: getEnv("GROQ_API_BASE_URL"),
+ apiKey: getEnv("GROQ_API_KEY"),
+ });
+ }
}
diff --git a/src/adapters/groq/backend/embedding.ts b/src/adapters/groq/backend/embedding.ts
new file mode 100644
index 00000000..b9a55b7c
--- /dev/null
+++ b/src/adapters/groq/backend/embedding.ts
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2025 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { VercelEmbeddingModel } from "@/adapters/vercel/backend/embedding.js";
+import { getEnv } from "@/internals/env.js";
+import { GroqClient, GroqClientSettings } from "@/adapters/groq/backend/client.js";
+import { ValueError } from "@/errors.js";
+import { GroqProvider } from "@ai-sdk/groq";
+
+type GroqParameters = Parameters;
+export type GroqEmbeddingModelId = NonNullable;
+export type GroqEmbeddingModelSettings = Record;
+
+export class GroqEmbeddingModel extends VercelEmbeddingModel {
+ constructor(
+ modelId: GroqEmbeddingModelId = getEnv("GROQ_EMBEDDING_MODEL", ""),
+ _settings: GroqEmbeddingModelSettings = {},
+ client?: GroqClientSettings | GroqClient,
+ ) {
+ if (!modelId) {
+ throw new ValueError("Missing modelId!");
+ }
+ const model = GroqClient.ensure(client).instance.textEmbeddingModel(modelId);
+ super(model);
+ }
+}
diff --git a/src/adapters/groq/chat.ts b/src/adapters/groq/chat.ts
deleted file mode 100644
index a63f4375..00000000
--- a/src/adapters/groq/chat.ts
+++ /dev/null
@@ -1,258 +0,0 @@
-/**
- * Copyright 2025 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import {
- AsyncStream,
- BaseLLMTokenizeOutput,
- EmbeddingOptions,
- EmbeddingOutput,
- ExecutionOptions,
- GenerateOptions,
- LLMCache,
- LLMMeta,
- StreamGenerateOptions,
-} from "@/llms/base.js";
-import { shallowCopy } from "@/serializer/utils.js";
-import { ChatLLM, ChatLLMGenerateEvents, ChatLLMOutput } from "@/llms/chat.js";
-import { BaseMessage, RoleType } from "@/llms/primitives/message.js";
-import { Emitter } from "@/emitter/emitter.js";
-import { ClientOptions, Groq as Client } from "groq-sdk";
-import { GetRunContext } from "@/context.js";
-import { Serializer } from "@/serializer/serializer.js";
-import { getPropStrict } from "@/internals/helpers/object.js";
-import { ChatCompletionCreateParams } from "groq-sdk/resources/chat/completions";
-
-type Parameters = Omit;
-type Response = Omit;
-
-export class ChatGroqOutput extends ChatLLMOutput {
- public readonly responses: Response[];
-
- constructor(response: Response) {
- super();
- this.responses = [response];
- }
-
- static {
- this.register();
- }
-
- get messages() {
- return this.responses
- .flatMap((response) => response.choices)
- .flatMap((choice) =>
- BaseMessage.of({
- role: choice.delta.role as RoleType,
- text: choice.delta.content!,
- }),
- );
- }
-
- getTextContent(): string {
- return this.messages.map((msg) => msg.text).join("\n");
- }
-
- merge(other: ChatGroqOutput): void {
- this.responses.push(...other.responses);
- }
-
- toString(): string {
- return this.getTextContent();
- }
-
- createSnapshot() {
- return {
- responses: shallowCopy(this.responses),
- };
- }
-
- loadSnapshot(snapshot: ReturnType): void {
- Object.assign(this, snapshot);
- }
-}
-
-interface Input {
- modelId?: string;
- client?: Client;
- parameters?: Parameters;
- executionOptions?: ExecutionOptions;
- cache?: LLMCache;
-}
-
-export type GroqChatLLMEvents = ChatLLMGenerateEvents;
-
-export class GroqChatLLM extends ChatLLM {
- public readonly emitter = Emitter.root.child({
- namespace: ["groq", "chat_llm"],
- creator: this,
- });
-
- public readonly client: Client;
- public readonly parameters: Partial;
-
- constructor({
- client,
- modelId = "llama-3.3-70b-versatile",
- parameters = {
- temperature: 0,
- },
- executionOptions = {},
- cache,
- }: Input = {}) {
- super(modelId, executionOptions, cache);
- this.client = client ?? new Client();
- this.parameters = parameters ?? {};
- }
-
- static {
- this.register();
- Serializer.register(Client, {
- toPlain: (value) => ({
- options: getPropStrict(value, "_options") as ClientOptions,
- }),
- fromPlain: (value) => new Client(value.options),
- });
- }
-
- async meta(): Promise {
- if (
- this.modelId.includes("gemma") ||
- this.modelId.includes("llama3") ||
- this.modelId.includes("llama-guard") ||
- this.modelId.includes("-preview") ||
- this.modelId.includes("-specdec")
- ) {
- return { tokenLimit: 8 * 1024 };
- } else if (this.modelId.includes("llava-v1.5")) {
- return { tokenLimit: 4 * 1024 };
- } else if (
- this.modelId.includes("llama-3.1-70b") ||
- this.modelId.includes("llama-3.1-8b") ||
- this.modelId.includes("llama-3.3-70b")
- ) {
- return { tokenLimit: 128 * 1024 };
- } else if (this.modelId.includes("mixtral-8x7b")) {
- return { tokenLimit: 32 * 1024 };
- }
-
- return {
- tokenLimit: Infinity,
- };
- }
-
- async embed(input: BaseMessage[][], options?: EmbeddingOptions): Promise {
- const { data } = await this.client.embeddings.create(
- {
- model: this.modelId,
- input: input.flatMap((msgs) => msgs.map((msg) => msg.text)) as string[],
- encoding_format: "float",
- },
- {
- signal: options?.signal,
- stream: false,
- },
- );
- return { embeddings: data.map(({ embedding }) => embedding as number[]) };
- }
-
- async tokenize(input: BaseMessage[]): Promise {
- const contentLength = input.reduce((acc, msg) => acc + msg.text.length, 0);
-
- return {
- tokensCount: Math.ceil(contentLength / 4),
- };
- }
-
- protected _prepareRequest(
- input: BaseMessage[],
- options: GenerateOptions,
- ): ChatCompletionCreateParams {
- return {
- ...this.parameters,
- model: this.modelId,
- stream: false,
- messages: input.map(
- (message) =>
- ({
- role: message.role,
- content: message.text,
- }) as Client.Chat.ChatCompletionMessageParam,
- ),
- ...(options?.guided?.json && {
- response_format: {
- type: "json_object",
- },
- }),
- };
- }
-
- protected async _generate(
- input: BaseMessage[],
- options: GenerateOptions,
- run: GetRunContext,
- ): Promise {
- const response = await this.client.chat.completions.create(
- {
- ...this._prepareRequest(input, options),
- stream: false,
- },
- {
- signal: run.signal,
- },
- );
- return new ChatGroqOutput({
- id: response.id,
- model: response.model,
- created: response.created,
- system_fingerprint: response.system_fingerprint,
- choices: response.choices.map(
- (choice) =>
- ({
- delta: choice.message,
- index: choice.index,
- logprobs: choice.logprobs,
- finish_reason: choice.finish_reason,
- }) as Client.Chat.ChatCompletionChunk.Choice,
- ),
- });
- }
-
- protected async *_stream(
- input: BaseMessage[],
- options: Partial,
- run: GetRunContext,
- ): AsyncStream {
- for await (const chunk of await this.client.chat.completions.create(
- {
- ...this._prepareRequest(input, options),
- stream: true,
- },
- {
- signal: run.signal,
- },
- )) {
- yield new ChatGroqOutput(chunk);
- }
- }
-
- createSnapshot() {
- return {
- ...super.createSnapshot(),
- parameters: shallowCopy(this.parameters),
- client: this.client,
- };
- }
-}
diff --git a/src/adapters/ibm-vllm/chat.ts b/src/adapters/ibm-vllm/chat.ts
deleted file mode 100644
index 225298db..00000000
--- a/src/adapters/ibm-vllm/chat.ts
+++ /dev/null
@@ -1,203 +0,0 @@
-/**
- * Copyright 2025 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import { isFunction, isObjectType } from "remeda";
-
-import {
- IBMvLLM,
- IBMvLLMEmbeddingOptions,
- IBMvLLMGenerateOptions,
- IBMvLLMOutput,
- IBMvLLMParameters,
-} from "./llm.js";
-
-import { Cache } from "@/cache/decoratorCache.js";
-import { BaseMessage, Role } from "@/llms/primitives/message.js";
-import { Emitter } from "@/emitter/emitter.js";
-import { ChatLLM, ChatLLMGenerateEvents, ChatLLMOutput } from "@/llms/chat.js";
-import {
- AsyncStream,
- BaseLLMTokenizeOutput,
- EmbeddingOutput,
- LLMCache,
- LLMError,
- LLMMeta,
-} from "@/llms/base.js";
-import { transformAsyncIterable } from "@/internals/helpers/stream.js";
-import { shallowCopy } from "@/serializer/utils.js";
-import { IBMVllmChatLLMPreset, IBMVllmChatLLMPresetModel } from "@/adapters/ibm-vllm/chatPreset.js";
-import { Client } from "./client.js";
-import { GetRunContext } from "@/context.js";
-
-export class GrpcChatLLMOutput extends ChatLLMOutput {
- public readonly raw: IBMvLLMOutput;
-
- constructor(rawOutput: IBMvLLMOutput) {
- super();
- this.raw = rawOutput;
- }
-
- @Cache()
- get messages(): BaseMessage[] {
- const text = this.raw.getTextContent();
- return [
- BaseMessage.of({
- role: Role.ASSISTANT,
- text,
- meta: this.raw.meta,
- }),
- ];
- }
-
- merge(other: GrpcChatLLMOutput): void {
- Cache.getInstance(this, "messages").clear();
- this.raw.merge(other.raw);
- }
-
- getTextContent(): string {
- const [message] = this.messages;
- return message.text;
- }
-
- toString(): string {
- return this.getTextContent();
- }
-
- createSnapshot() {
- return {
- raw: shallowCopy(this.raw),
- };
- }
-
- loadSnapshot(snapshot: ReturnType) {
- Object.assign(this, snapshot);
- }
-}
-
-export interface IBMVllmInputConfig {
- messagesToPrompt: (messages: BaseMessage[]) => string;
-}
-
-export interface GrpcChatLLMInput {
- llm: IBMvLLM;
- config: IBMVllmInputConfig;
- cache?: LLMCache;
-}
-
-export type IBMVllmChatEvents = ChatLLMGenerateEvents;
-
-export class IBMVllmChatLLM extends ChatLLM {
- public readonly emitter = new Emitter({
- namespace: ["ibm_vllm", "chat_llm"],
- creator: this,
- });
-
- public readonly llm: IBMvLLM;
- protected readonly config: IBMVllmInputConfig;
-
- constructor({ llm, config, cache }: GrpcChatLLMInput) {
- super(llm.modelId, llm.executionOptions, cache);
- this.llm = llm;
- this.config = config;
- }
-
- static {
- this.register();
- }
-
- async meta(): Promise {
- return this.llm.meta();
- }
-
- async embed(input: BaseMessage[][], options?: IBMvLLMEmbeddingOptions): Promise {
- const inputs = input.map((messages) => this.messagesToPrompt(messages));
- return this.llm.embed(inputs, options);
- }
-
- createSnapshot() {
- return {
- ...super.createSnapshot(),
- modelId: this.modelId,
- executionOptions: this.executionOptions,
- llm: this.llm,
- config: shallowCopy(this.config),
- };
- }
-
- async tokenize(messages: BaseMessage[]): Promise {
- const prompt = this.messagesToPrompt(messages);
- return this.llm.tokenize(prompt);
- }
-
- protected async _generate(
- messages: BaseMessage[],
- options: IBMvLLMGenerateOptions | undefined,
- run: GetRunContext,
- ): Promise {
- const prompt = this.messagesToPrompt(messages);
- // @ts-expect-error protected property
- const rawResponse = await this.llm._generate(prompt, options, run);
- return new GrpcChatLLMOutput(rawResponse);
- }
-
- protected async *_stream(
- messages: BaseMessage[],
- options: IBMvLLMGenerateOptions | undefined,
- run: GetRunContext,
- ): AsyncStream {
- const prompt = this.messagesToPrompt(messages);
- // @ts-expect-error protected property
- const response = this.llm._stream(prompt, options, run);
- return yield* transformAsyncIterable(response, (output) => new GrpcChatLLMOutput(output));
- }
-
- messagesToPrompt(messages: BaseMessage[]) {
- return this.config.messagesToPrompt(messages);
- }
-
- static fromPreset(
- modelId: IBMVllmChatLLMPresetModel,
- overrides?: {
- client?: Client;
- parameters?: IBMvLLMParameters | ((value: IBMvLLMParameters) => IBMvLLMParameters);
- },
- ) {
- const presetFactory = IBMVllmChatLLMPreset[modelId];
- if (!presetFactory) {
- throw new LLMError(`Model "${modelId}" does not exist in preset.`);
- }
-
- const preset = presetFactory();
- let parameters = preset.base.parameters ?? {};
- if (overrides) {
- if (isFunction(overrides.parameters)) {
- parameters = overrides.parameters(parameters);
- } else if (isObjectType(overrides.parameters)) {
- parameters = overrides.parameters;
- }
- }
-
- return new IBMVllmChatLLM({
- config: preset.chat,
- llm: new IBMvLLM({
- ...preset.base,
- ...overrides,
- parameters,
- modelId,
- }),
- });
- }
-}
diff --git a/src/adapters/ibm-vllm/chatPreset.ts b/src/adapters/ibm-vllm/chatPreset.ts
deleted file mode 100644
index 887c0d7d..00000000
--- a/src/adapters/ibm-vllm/chatPreset.ts
+++ /dev/null
@@ -1,142 +0,0 @@
-/**
- * Copyright 2025 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import { LLMChatTemplates } from "@/adapters/shared/llmChatTemplates.js";
-import { IBMVllmInputConfig } from "./chat.js";
-import { IBMvLLMInput } from "./llm.js";
-
-interface IBMVllmChatLLMPreset {
- chat: IBMVllmInputConfig;
- base: IBMvLLMInput;
-}
-
-export const IBMVllmModel = {
- LLAMA_3_3_70B_INSTRUCT: "meta-llama/llama-3-3-70b-instruct",
- LLAMA_3_1_405B_INSTRUCT_FP8: "meta-llama/llama-3-1-405b-instruct-fp8",
- LLAMA_3_1_70B_INSTRUCT: "meta-llama/llama-3-1-70b-instruct",
- LLAMA_3_1_8B_INSTRUCT: "meta-llama/llama-3-1-8b-instruct",
- GRANITE_3_1_8B_INSTRUCT: "ibm-granite/granite-3-1-8b-instruct",
-} as const;
-export type IBMVllmModel = (typeof IBMVllmModel)[keyof typeof IBMVllmModel];
-
-export const IBMVllmChatLLMPreset = {
- [IBMVllmModel.LLAMA_3_3_70B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
- const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3.3");
- return {
- base: {
- modelId: IBMVllmModel.LLAMA_3_3_70B_INSTRUCT,
- parameters: {
- method: "GREEDY",
- stopping: {
- stop_sequences: [...parameters.stop_sequence],
- include_stop_sequence: false,
- max_new_tokens: 2048,
- },
- decoding: {
- repetition_penalty: 1,
- },
- },
- },
- chat: {
- messagesToPrompt: messagesToPrompt(template),
- },
- };
- },
- [IBMVllmModel.LLAMA_3_1_405B_INSTRUCT_FP8]: (): IBMVllmChatLLMPreset => {
- const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3.1");
- return {
- base: {
- modelId: IBMVllmModel.LLAMA_3_1_70B_INSTRUCT,
- parameters: {
- method: "GREEDY",
- stopping: {
- stop_sequences: [...parameters.stop_sequence],
- include_stop_sequence: false,
- max_new_tokens: 2048,
- },
- decoding: {
- repetition_penalty: 1,
- },
- },
- },
- chat: {
- messagesToPrompt: messagesToPrompt(template),
- },
- };
- },
- [IBMVllmModel.LLAMA_3_1_70B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
- const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3.1");
- return {
- base: {
- modelId: IBMVllmModel.LLAMA_3_1_70B_INSTRUCT,
- parameters: {
- method: "GREEDY",
- stopping: {
- stop_sequences: [...parameters.stop_sequence],
- include_stop_sequence: false,
- max_new_tokens: 2048,
- },
- decoding: {
- repetition_penalty: 1,
- },
- },
- },
- chat: {
- messagesToPrompt: messagesToPrompt(template),
- },
- };
- },
- [IBMVllmModel.LLAMA_3_1_8B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
- const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3");
- return {
- base: {
- modelId: IBMVllmModel.LLAMA_3_1_8B_INSTRUCT,
- parameters: {
- method: "GREEDY",
- stopping: {
- stop_sequences: [...parameters.stop_sequence],
- include_stop_sequence: false,
- max_new_tokens: 2048,
- },
- },
- },
- chat: {
- messagesToPrompt: messagesToPrompt(template),
- },
- };
- },
- [IBMVllmModel.GRANITE_3_1_8B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
- const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("granite3.1-Instruct");
- return {
- base: {
- modelId: IBMVllmModel.GRANITE_3_1_8B_INSTRUCT,
- parameters: {
- method: "GREEDY",
- stopping: {
- stop_sequences: [...parameters.stop_sequence],
- include_stop_sequence: false,
- max_new_tokens: 2048,
- },
- },
- },
- chat: {
- messagesToPrompt: messagesToPrompt(template),
- },
- };
- },
-} as const satisfies Record IBMVllmChatLLMPreset>;
-
-export type IBMVllmChatLLMPresetModel = keyof typeof IBMVllmChatLLMPreset;
diff --git a/src/adapters/ibm-vllm/client.ts b/src/adapters/ibm-vllm/client.ts
deleted file mode 100644
index de4f751f..00000000
--- a/src/adapters/ibm-vllm/client.ts
+++ /dev/null
@@ -1,269 +0,0 @@
-/**
- * Copyright 2025 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import grpc, {
- CallOptions as GRPCCallOptions,
- ClientOptions as GRPCClientOptions,
- ClientReadableStream,
- ClientUnaryCall,
- Metadata,
-} from "@grpc/grpc-js";
-
-import * as R from "remeda";
-// eslint-disable-next-line no-restricted-imports
-import { UnaryCallback } from "@grpc/grpc-js/build/src/client.js";
-import { FrameworkError, ValueError } from "@/errors.js";
-import protoLoader, { Options } from "@grpc/proto-loader";
-
-import {
- BatchedGenerationRequest,
- BatchedGenerationResponse__Output,
- BatchedTokenizeRequest,
- BatchedTokenizeResponse__Output,
- type EmbeddingTasksRequest,
- GenerationRequest__Output,
- ModelInfoRequest,
- ModelInfoResponse__Output,
- ProtoGrpcType as GenerationProtoGentypes,
- ProtoGrpcType$1 as CaikitProtoGentypes,
- SingleGenerationRequest,
- EmbeddingResults__Output,
- type SubtypeConstructor,
-} from "@/adapters/ibm-vllm/types.js";
-import { parseEnv } from "@/internals/env.js";
-import { z } from "zod";
-import { Cache } from "@/cache/decoratorCache.js";
-import { Serializable } from "@/internals/serializable.js";
-import PQueue from "p-queue-compat";
-import { getProp } from "@/internals/helpers/object.js";
-
-const GENERATION_PROTO_PATH = new URL("./proto/generation.proto", import.meta.url);
-const NLP_PROTO_PATH = new URL("./proto/caikit_runtime_Nlp.proto", import.meta.url);
-
-interface ClientOptions {
- modelRouterSubdomain?: string;
- url: string;
- credentials: {
- rootCert: string;
- certChain: string;
- privateKey: string;
- };
- grpcClientOptions: GRPCClientOptions;
- clientShutdownDelay: number;
- limits?: {
- concurrency?: {
- embeddings?: number;
- };
- };
-}
-
-const defaultOptions = {
- clientShutdownDelay: 5 * 60 * 1000,
- grpcClientOptions: {
- // This is needed, otherwise communication to DIPC cluster fails with "Dropped connection" error after +- 50 secs
- "grpc.keepalive_time_ms": 25000,
- "grpc.max_receive_message_length": 32 * 1024 * 1024, // 32MiB
- },
-};
-
-const grpcConfig: Options = {
- longs: Number,
- enums: String,
- arrays: true,
- objects: true,
- oneofs: true,
- keepCase: true,
- defaults: true,
-};
-
-const generationPackage = grpc.loadPackageDefinition(
- protoLoader.loadSync([GENERATION_PROTO_PATH.pathname], grpcConfig),
-) as unknown as GenerationProtoGentypes;
-
-const embeddingsPackage = grpc.loadPackageDefinition(
- protoLoader.loadSync([NLP_PROTO_PATH.pathname], grpcConfig),
-) as unknown as CaikitProtoGentypes;
-
-const GRPC_CLIENT_TTL = 15 * 60 * 1000;
-
-type CallOptions = GRPCCallOptions & { signal?: AbortSignal };
-type RequiredModel = T & { model_id: string };
-
-export class Client extends Serializable {
- public readonly options: ClientOptions;
- private usedDefaultCredentials = false;
-
- @Cache({ ttl: GRPC_CLIENT_TTL })
- protected getClient void }>(
- modelId: string,
- factory: SubtypeConstructor,
- ): T {
- const modelSpecificUrl = this.options.url.replace(/{model_id}/, modelId.replaceAll("/", "--"));
- const client = new factory(
- modelSpecificUrl,
- grpc.credentials.createSsl(
- Buffer.from(this.options.credentials.rootCert),
- Buffer.from(this.options.credentials.privateKey),
- Buffer.from(this.options.credentials.certChain),
- ),
- this.options.grpcClientOptions,
- );
- setTimeout(() => {
- try {
- client.close();
- } catch {
- /* empty */
- }
- }, GRPC_CLIENT_TTL + this.options.clientShutdownDelay).unref();
- return client;
- }
-
- protected getDefaultCredentials() {
- this.usedDefaultCredentials = true;
- return {
- rootCert: parseEnv("IBM_VLLM_ROOT_CERT", z.string()),
- privateKey: parseEnv("IBM_VLLM_PRIVATE_KEY", z.string()),
- certChain: parseEnv("IBM_VLLM_CERT_CHAIN", z.string()),
- };
- }
-
- constructor(options?: Partial) {
- super();
- this.options = {
- ...defaultOptions,
- ...options,
- url: options?.url ?? parseEnv("IBM_VLLM_URL", z.string()),
- credentials: options?.credentials ?? this.getDefaultCredentials(),
- };
- }
-
- async modelInfo(request: RequiredModel, options?: CallOptions) {
- const client = this.getClient(request.model_id, generationPackage.fmaas.GenerationService);
- return this.wrapGrpcCall(
- client.modelInfo.bind(client),
- )(request, options);
- }
-
- async generate(request: RequiredModel, options?: CallOptions) {
- const client = this.getClient(request.model_id, generationPackage.fmaas.GenerationService);
- return this.wrapGrpcCall(
- client.generate.bind(client),
- )(request, options);
- }
-
- async generateStream(request: RequiredModel, options?: CallOptions) {
- const client = this.getClient(request.model_id, generationPackage.fmaas.GenerationService);
- return this.wrapGrpcStream(
- client.generateStream.bind(client),
- )(request, options);
- }
-
- async tokenize(request: RequiredModel, options?: CallOptions) {
- const client = this.getClient(request.model_id, generationPackage.fmaas.GenerationService);
- return this.wrapGrpcCall(
- client.tokenize.bind(client),
- )(request, options);
- }
-
- async embed(request: RequiredModel, options?: CallOptions) {
- const client = this.getClient(
- request.model_id,
- embeddingsPackage.caikit.runtime.Nlp.NlpService,
- );
- return this.queues.embeddings.add(
- () =>
- this.wrapGrpcCall(
- client.embeddingTasksPredict.bind(client),
- )(request, options),
- { throwOnTimeout: true },
- );
- }
-
- protected wrapGrpcCall(
- fn: (
- request: TRequest,
- metadata: Metadata,
- options: CallOptions,
- callback: UnaryCallback,
- ) => ClientUnaryCall,
- ) {
- return (request: TRequest, { signal, ...options }: CallOptions = {}): Promise => {
- const metadata = new Metadata();
- const modelId = getProp(request, ["model_id"]);
- if (modelId) {
- metadata.add("mm-model-id", modelId);
- }
-
- return new Promise((resolve, reject) => {
- const call = fn(request, metadata, options, (err, response) => {
- signal?.removeEventListener("abort", abortHandler);
- if (err) {
- reject(err);
- } else {
- if (response === undefined) {
- reject(new FrameworkError("Invalid response from GRPC server"));
- } else {
- resolve(response);
- }
- }
- });
- const abortHandler = () => call.cancel();
- signal?.addEventListener("abort", abortHandler, { once: true });
- });
- };
- }
-
- protected wrapGrpcStream(
- fn: (request: TRequest, options: CallOptions) => ClientReadableStream,
- ) {
- return async (
- request: TRequest,
- { signal, ...options }: CallOptions = {},
- ): Promise> => {
- const stream = fn(request, options);
- const abortHandler = () => stream.cancel();
- signal?.addEventListener("abort", abortHandler, { once: true });
- stream.addListener("close", () => signal?.removeEventListener("abort", abortHandler));
- return stream;
- };
- }
-
- createSnapshot() {
- if (!this.usedDefaultCredentials) {
- throw new ValueError(
- "Cannot serialize a client with credentials passed directly. Use environment variables.",
- );
- }
- return {
- options: R.omit(this.options, ["credentials"]),
- };
- }
-
- loadSnapshot(snapshot: ReturnType) {
- Object.assign(this, snapshot);
- this.options.credentials = this.getDefaultCredentials();
- }
-
- @Cache({ enumerable: false })
- protected get queues() {
- return {
- embeddings: new PQueue({
- concurrency: this.options.limits?.concurrency?.embeddings ?? 5,
- throwOnTimeout: true,
- }),
- };
- }
-}
diff --git a/src/adapters/ibm-vllm/llm.ts b/src/adapters/ibm-vllm/llm.ts
deleted file mode 100644
index c42f53d3..00000000
--- a/src/adapters/ibm-vllm/llm.ts
+++ /dev/null
@@ -1,299 +0,0 @@
-/**
- * Copyright 2025 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import {
- AsyncStream,
- BaseLLMOutput,
- BaseLLMTokenizeOutput,
- EmbeddingOptions,
- EmbeddingOutput,
- ExecutionOptions,
- GenerateOptions,
- GuidedOptions,
- LLMCache,
- LLMError,
- LLMMeta,
-} from "@/llms/base.js";
-import { chunk, isEmpty, isString } from "remeda";
-import type {
- DecodingParameters,
- SingleGenerationRequest,
- EmbeddingTasksRequest,
-} from "@/adapters/ibm-vllm/types.js";
-import { LLM, LLMEvents, LLMInput } from "@/llms/llm.js";
-import { Emitter } from "@/emitter/emitter.js";
-import { GenerationResponse__Output } from "@/adapters/ibm-vllm/types.js";
-import { shallowCopy } from "@/serializer/utils.js";
-import { FrameworkError, NotImplementedError } from "@/errors.js";
-import { assign, omitUndefined } from "@/internals/helpers/object.js";
-import { ServiceError } from "@grpc/grpc-js";
-import { Client } from "@/adapters/ibm-vllm/client.js";
-import { GetRunContext } from "@/context.js";
-import { BatchedGenerationRequest } from "./types.js";
-import { OmitPrivateKeys } from "@/internals/types.js";
-
-function isGrpcServiceError(err: unknown): err is ServiceError {
- return (
- err instanceof Error &&
- err.constructor.name === "Error" &&
- "code" in err &&
- typeof err.code === "number"
- );
-}
-
-export class IBMvLLMOutput extends BaseLLMOutput {
- constructor(
- public text: string,
- public readonly meta: Record,
- ) {
- super();
- }
-
- static {
- this.register();
- }
-
- merge(other: IBMvLLMOutput): void {
- this.text += other.text;
- assign(this.meta, other.meta);
- }
-
- getTextContent(): string {
- return this.text;
- }
-
- toString(): string {
- return this.getTextContent();
- }
-
- createSnapshot() {
- return {
- text: this.text,
- meta: shallowCopy(this.meta),
- };
- }
-
- loadSnapshot(snapshot: ReturnType) {
- Object.assign(this, snapshot);
- }
-}
-
-export interface IBMvLLMInput {
- client?: Client;
- modelId: string;
- parameters?: IBMvLLMParameters;
- executionOptions?: ExecutionOptions;
- cache?: LLMCache;
-}
-
-export type IBMvLLMParameters = NonNullable<
- BatchedGenerationRequest["params"] & SingleGenerationRequest["params"]
->;
-
-export interface IBMvLLMGenerateOptions extends GenerateOptions {}
-
-export interface IBMvLLMEmbeddingOptions
- extends EmbeddingOptions,
- Omit, "texts"> {
- chunkSize?: number;
-}
-
-export type IBMvLLMEvents = LLMEvents;
-
-export class IBMvLLM extends LLM {
- public readonly emitter = new Emitter({
- namespace: ["ibm_vllm", "llm"],
- creator: this,
- });
-
- public readonly client: Client;
- public readonly parameters: Partial;
-
- constructor({ client, modelId, parameters = {}, executionOptions, cache }: IBMvLLMInput) {
- super(modelId, executionOptions, cache);
- this.client = client ?? new Client();
- this.parameters = parameters ?? {};
- }
-
- static {
- this.register();
- }
-
- async meta(): Promise {
- const response = await this.client.modelInfo({ model_id: this.modelId });
- return {
- tokenLimit: response.max_sequence_length,
- };
- }
-
- async embed(
- input: LLMInput[],
- { chunkSize, signal, ...options }: IBMvLLMEmbeddingOptions = {},
- ): Promise {
- const results = await Promise.all(
- chunk(input, chunkSize ?? 100).map(async (texts) => {
- const response = await this.client.embed(
- {
- model_id: this.modelId,
- truncate_input_tokens: options?.truncate_input_tokens ?? 512,
- texts,
- },
- {
- signal,
- },
- );
- const embeddings = response.results?.vectors.map((vector) => {
- const embedding = vector[vector.data]?.values;
- if (!embedding) {
- throw new LLMError("Missing embedding");
- }
- return embedding;
- });
- if (embeddings?.length !== texts.length) {
- throw new LLMError("Missing embedding");
- }
- return embeddings;
- }),
- );
- return { embeddings: results.flat() };
- }
-
- async tokenize(input: LLMInput): Promise {
- try {
- const response = await this.client.tokenize({
- model_id: this.modelId,
- requests: [{ text: input }],
- });
- const output = response.responses.at(0);
- if (!output) {
- throw new LLMError("Missing output", [], { context: { response } });
- }
- return {
- tokens: output.tokens,
- tokensCount: output.token_count,
- };
- } catch (err) {
- throw this._transformError(err);
- }
- }
-
- protected async _generate(
- input: LLMInput,
- options: IBMvLLMGenerateOptions | undefined,
- run: GetRunContext,
- ): Promise {
- try {
- const response = await this.client.generate(
- {
- model_id: this.modelId,
- requests: [{ text: input }],
- params: this._prepareParameters(options),
- },
- { signal: run.signal },
- );
- const output = response.responses.at(0);
- if (!output) {
- throw new LLMError("Missing output", [], { context: { response } });
- }
-
- const { text, ...rest } = output;
- return new IBMvLLMOutput(text, rest);
- } catch (err) {
- throw this._transformError(err);
- }
- }
-
- protected async *_stream(
- input: string,
- options: IBMvLLMGenerateOptions | undefined,
- run: GetRunContext,
- ): AsyncStream {
- try {
- const stream = await this.client.generateStream(
- {
- model_id: this.modelId,
- request: { text: input },
- params: this._prepareParameters(options),
- },
- { signal: run.signal },
- );
- for await (const chunk of stream) {
- const typedChunk = chunk as GenerationResponse__Output;
- const { text, ...rest } = typedChunk;
- if (text.length > 0) {
- yield new IBMvLLMOutput(text, rest);
- }
- }
- } catch (err) {
- throw this._transformError(err);
- }
- }
-
- createSnapshot() {
- return {
- ...super.createSnapshot(),
- client: this.client,
- modelId: this.modelId,
- parameters: shallowCopy(this.parameters),
- executionOptions: shallowCopy(this.executionOptions),
- };
- }
-
- loadSnapshot(snapshot: ReturnType) {
- super.loadSnapshot(snapshot);
- Object.assign(this, snapshot);
- }
-
- protected _transformError(error: Error): Error {
- if (error instanceof FrameworkError) {
- throw error;
- }
- if (isGrpcServiceError(error)) {
- throw new LLMError("LLM has occurred an error!", [error], {
- isRetryable: [8, 4, 14].includes(error.code),
- });
- }
- return new LLMError("LLM has occurred an error!", [error]);
- }
-
- protected _prepareParameters(overrides?: GenerateOptions): typeof this.parameters {
- const guided: DecodingParameters = omitUndefined(
- overrides?.guided ? {} : (this.parameters.decoding ?? {}),
- );
- const guidedOverride: GuidedOptions = omitUndefined(overrides?.guided ?? {});
-
- if (guidedOverride?.choice) {
- guided.choice = { ...guided.choice, choices: guidedOverride.choice };
- } else if (guidedOverride?.grammar) {
- guided.grammar = guidedOverride.grammar;
- } else if (guidedOverride?.json) {
- guided.json_schema = isString(guidedOverride.json)
- ? guidedOverride.json
- : JSON.stringify(guidedOverride.json);
- } else if (guidedOverride?.regex) {
- guided.regex = guidedOverride.regex;
- } else if (!isEmpty(guidedOverride ?? {})) {
- throw new NotImplementedError(
- `Following types ${Object.keys(overrides!.guided!).join(",")}" for the constraint decoding are not supported!`,
- );
- }
-
- return {
- ...this.parameters,
- decoding: guided,
- };
- }
-}
diff --git a/src/adapters/ibm-vllm/proto/caikit_data_model_caikit_nlp.proto b/src/adapters/ibm-vllm/proto/caikit_data_model_caikit_nlp.proto
deleted file mode 100644
index ca253f57..00000000
--- a/src/adapters/ibm-vllm/proto/caikit_data_model_caikit_nlp.proto
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright 2025 IBM Corp.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Source: https://github.com/IBM/vllm/blob/main/proto/caikit_data_model_caikit_nlp.proto
-
-/*------------------------------------------------------------------------------
- * AUTO GENERATED
- *----------------------------------------------------------------------------*/
-
-syntax = "proto3";
-package caikit_data_model.caikit_nlp;
-import "google/protobuf/struct.proto";
-import "caikit_data_model_common.proto";
-
-
-/*-- MESSAGES ----------------------------------------------------------------*/
-
-message EmbeddingResult {
-
- /*-- fields --*/
- caikit_data_model.common.Vector1D result = 1;
- caikit_data_model.common.ProducerId producer_id = 2;
- int64 input_token_count = 3;
-}
-
-message EmbeddingResults {
-
- /*-- fields --*/
- caikit_data_model.common.ListOfVector1D results = 1;
- caikit_data_model.common.ProducerId producer_id = 2;
- int64 input_token_count = 3;
-}
-
-message ExponentialDecayLengthPenalty {
-
- /*-- fields --*/
- int64 start_index = 1;
- double decay_factor = 2;
-}
-
-message GenerationTrainRecord {
-
- /*-- fields --*/
- string input = 1;
- string output = 2;
-}
-
-message RerankResult {
-
- /*-- fields --*/
- caikit_data_model.caikit_nlp.RerankScores result = 1;
- caikit_data_model.common.ProducerId producer_id = 2;
- int64 input_token_count = 3;
-}
-
-message RerankResults {
-
- /*-- fields --*/
- repeated caikit_data_model.caikit_nlp.RerankScores results = 1;
- caikit_data_model.common.ProducerId producer_id = 2;
- int64 input_token_count = 3;
-}
-
-message RerankScore {
-
- /*-- fields --*/
- google.protobuf.Struct document = 1;
- int64 index = 2;
- double score = 3;
- string text = 4;
-}
-
-message RerankScores {
-
- /*-- fields --*/
- string query = 1;
- repeated caikit_data_model.caikit_nlp.RerankScore scores = 2;
-}
-
-message SentenceSimilarityResult {
-
- /*-- fields --*/
- caikit_data_model.caikit_nlp.SentenceSimilarityScores result = 1;
- caikit_data_model.common.ProducerId producer_id = 2;
- int64 input_token_count = 3;
-}
-
-message SentenceSimilarityResults {
-
- /*-- fields --*/
- repeated caikit_data_model.caikit_nlp.SentenceSimilarityScores results = 1;
- caikit_data_model.common.ProducerId producer_id = 2;
- int64 input_token_count = 3;
-}
-
-message SentenceSimilarityScores {
-
- /*-- fields --*/
- repeated double scores = 1;
-}
-
-message TuningConfig {
-
- /*-- fields --*/
- int64 num_virtual_tokens = 1;
- string prompt_tuning_init_text = 2;
- string prompt_tuning_init_method = 3;
- string prompt_tuning_init_source_model = 4;
- repeated string output_model_types = 5;
-}
diff --git a/src/adapters/ibm-vllm/proto/caikit_data_model_common.proto b/src/adapters/ibm-vllm/proto/caikit_data_model_common.proto
deleted file mode 100644
index 6265b622..00000000
--- a/src/adapters/ibm-vllm/proto/caikit_data_model_common.proto
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright 2025 IBM Corp.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Source: https://github.com/IBM/vllm/blob/main/proto/caikit_data_model_common.proto
-
-/*------------------------------------------------------------------------------
- * AUTO GENERATED
- *----------------------------------------------------------------------------*/
-
-syntax = "proto3";
-package caikit_data_model.common;
-
-
-/*-- ENUMS -------------------------------------------------------------------*/
-
-enum TrainingStatus {
- PLACEHOLDER_UNSET = 0;
- QUEUED = 1;
- RUNNING = 2;
- COMPLETED = 3;
- CANCELED = 4;
- ERRORED = 5;
-}
-
-
-/*-- MESSAGES ----------------------------------------------------------------*/
-
-message BoolSequence {
-
- /*-- fields --*/
- repeated bool values = 1;
-}
-
-message ConnectionInfo {
-
- /*-- nested messages --*/
-
- /*-- fields --*/
- string hostname = 1;
- optional int64 port = 2;
- optional caikit_data_model.common.ConnectionTlsInfo tls = 3;
- optional int64 timeout = 4;
- map options = 5;
-}
-
-message ConnectionTlsInfo {
-
- /*-- fields --*/
- optional bool enabled = 1;
- optional bool insecure_verify = 2;
- optional string ca_file = 3;
- optional string cert_file = 4;
- optional string key_file = 5;
-}
-
-message Directory {
-
- /*-- fields --*/
- string dirname = 1;
- string extension = 2;
-}
-
-message File {
-
- /*-- fields --*/
- bytes data = 1;
- string filename = 2;
- string type = 3;
-}
-
-message FileReference {
-
- /*-- fields --*/
- string filename = 1;
-}
-
-message FloatSequence {
-
- /*-- fields --*/
- repeated double values = 1;
-}
-
-message IntSequence {
-
- /*-- fields --*/
- repeated int64 values = 1;
-}
-
-message ListOfFileReferences {
-
- /*-- fields --*/
- repeated string files = 1;
-}
-
-message ListOfVector1D {
-
- /*-- fields --*/
- repeated caikit_data_model.common.Vector1D vectors = 1;
-}
-
-message NpFloat32Sequence {
-
- /*-- fields --*/
- repeated float values = 1;
-}
-
-message NpFloat64Sequence {
-
- /*-- fields --*/
- repeated double values = 1;
-}
-
-message ProducerId {
-
- /*-- fields --*/
- string name = 1;
- string version = 2;
-}
-
-message ProducerPriority {
-
- /*-- fields --*/
- repeated caikit_data_model.common.ProducerId producers = 1;
-}
-
-message PyFloatSequence {
-
- /*-- fields --*/
- repeated double values = 1;
-}
-
-message S3Base {
-
- /*-- fields --*/
- string endpoint = 2;
- string region = 3;
- string bucket = 4;
- string accessKey = 5;
- string secretKey = 6;
- string IAM_id = 7;
- string IAM_api_key = 8;
-}
-
-message S3Files {
-
- /*-- fields --*/
- string endpoint = 2;
- string region = 3;
- string bucket = 4;
- string accessKey = 5;
- string secretKey = 6;
- string IAM_id = 7;
- string IAM_api_key = 8;
- repeated string files = 1;
-}
-
-message S3Path {
-
- /*-- fields --*/
- string endpoint = 2;
- string region = 3;
- string bucket = 4;
- string accessKey = 5;
- string secretKey = 6;
- string IAM_id = 7;
- string IAM_api_key = 8;
- string path = 1;
-}
-
-message StrSequence {
-
- /*-- fields --*/
- repeated string values = 1;
-}
-
-message Vector1D {
-
- /*-- fields --*/
-
- /*-- oneofs --*/
- oneof data {
- caikit_data_model.common.PyFloatSequence data_pyfloatsequence = 1;
- caikit_data_model.common.NpFloat32Sequence data_npfloat32sequence = 2;
- caikit_data_model.common.NpFloat64Sequence data_npfloat64sequence = 3;
- }
-}
diff --git a/src/adapters/ibm-vllm/proto/caikit_data_model_nlp.proto b/src/adapters/ibm-vllm/proto/caikit_data_model_nlp.proto
deleted file mode 100644
index 0396927b..00000000
--- a/src/adapters/ibm-vllm/proto/caikit_data_model_nlp.proto
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright 2025 IBM Corp.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Source: https://github.com/IBM/vllm/blob/main/proto/caikit_data_model_nlp.proto
-
-/*------------------------------------------------------------------------------
- * AUTO GENERATED
- *----------------------------------------------------------------------------*/
-
-syntax = "proto3";
-package caikit_data_model.nlp;
-import "caikit_data_model_common.proto";
-
-
-/*-- ENUMS -------------------------------------------------------------------*/
-
-enum FinishReason {
- NOT_FINISHED = 0;
- MAX_TOKENS = 1;
- EOS_TOKEN = 2;
- CANCELLED = 3;
- TIME_LIMIT = 4;
- STOP_SEQUENCE = 5;
- TOKEN_LIMIT = 6;
- ERROR = 7;
-}
-
-enum InputWarningReason {
- UNSUITABLE_INPUT = 0;
-}
-
-
-/*-- MESSAGES ----------------------------------------------------------------*/
-
-message ClassificationResult {
-
- /*-- fields --*/
- string label = 1;
- double score = 2;
-}
-
-message ClassificationResults {
-
- /*-- fields --*/
- repeated caikit_data_model.nlp.ClassificationResult results = 1;
-}
-
-message ClassificationTrainRecord {
-
- /*-- fields --*/
- string text = 1;
- repeated string labels = 2;
-}
-
-message ClassifiedGeneratedTextResult {
-
- /*-- fields --*/
- string generated_text = 1;
- caikit_data_model.nlp.TextGenTokenClassificationResults token_classification_results = 2;
- caikit_data_model.nlp.FinishReason finish_reason = 3;
- int64 generated_token_count = 4;
- uint64 seed = 5;
- int64 input_token_count = 6;
- repeated caikit_data_model.nlp.InputWarning warnings = 9;
- repeated caikit_data_model.nlp.GeneratedToken tokens = 10;
- repeated caikit_data_model.nlp.GeneratedToken input_tokens = 11;
-}
-
-message ClassifiedGeneratedTextStreamResult {
-
- /*-- fields --*/
- string generated_text = 1;
- caikit_data_model.nlp.TextGenTokenClassificationResults token_classification_results = 2;
- caikit_data_model.nlp.FinishReason finish_reason = 3;
- int64 generated_token_count = 4;
- uint64 seed = 5;
- int64 input_token_count = 6;
- repeated caikit_data_model.nlp.InputWarning warnings = 9;
- repeated caikit_data_model.nlp.GeneratedToken tokens = 10;
- repeated caikit_data_model.nlp.GeneratedToken input_tokens = 11;
- int64 processed_index = 7;
- int64 start_index = 8;
-}
-
-message GeneratedTextResult {
-
- /*-- fields --*/
- string generated_text = 1;
- int64 generated_tokens = 2;
- caikit_data_model.nlp.FinishReason finish_reason = 3;
- caikit_data_model.common.ProducerId producer_id = 4;
- int64 input_token_count = 5;
- uint64 seed = 6;
- repeated caikit_data_model.nlp.GeneratedToken tokens = 7;
- repeated caikit_data_model.nlp.GeneratedToken input_tokens = 8;
-}
-
-message GeneratedTextStreamResult {
-
- /*-- fields --*/
- string generated_text = 1;
- repeated caikit_data_model.nlp.GeneratedToken tokens = 2;
- caikit_data_model.nlp.TokenStreamDetails details = 3;
- caikit_data_model.common.ProducerId producer_id = 4;
- repeated caikit_data_model.nlp.GeneratedToken input_tokens = 5;
-}
-
-message GeneratedToken {
-
- /*-- fields --*/
- string text = 1;
- double logprob = 3;
-}
-
-message InputWarning {
-
- /*-- fields --*/
- caikit_data_model.nlp.InputWarningReason id = 1;
- string message = 2;
-}
-
-message TextGenTokenClassificationResults {
-
- /*-- fields --*/
- repeated caikit_data_model.nlp.TokenClassificationResult input = 10;
- repeated caikit_data_model.nlp.TokenClassificationResult output = 20;
-}
-
-message Token {
-
- /*-- fields --*/
- int64 start = 1;
- int64 end = 2;
- string text = 3;
-}
-
-message TokenClassificationResult {
-
- /*-- fields --*/
- int64 start = 1;
- int64 end = 2;
- string word = 3;
- string entity = 4;
- string entity_group = 5;
- double score = 6;
- int64 token_count = 7;
-}
-
-message TokenClassificationResults {
-
- /*-- fields --*/
- repeated caikit_data_model.nlp.TokenClassificationResult results = 1;
-}
-
-message TokenClassificationStreamResult {
-
- /*-- fields --*/
- repeated caikit_data_model.nlp.TokenClassificationResult results = 1;
- int64 processed_index = 2;
- int64 start_index = 3;
-}
-
-message TokenStreamDetails {
-
- /*-- fields --*/
- caikit_data_model.nlp.FinishReason finish_reason = 1;
- uint32 generated_tokens = 2;
- uint64 seed = 3;
- int64 input_token_count = 4;
-}
-
-message TokenizationResults {
-
- /*-- fields --*/
- repeated caikit_data_model.nlp.Token results = 1;
- int64 token_count = 4;
-}
-
-message TokenizationStreamResult {
-
- /*-- fields --*/
- repeated caikit_data_model.nlp.Token results = 1;
- int64 token_count = 4;
- int64 processed_index = 2;
- int64 start_index = 3;
-}
diff --git a/src/adapters/ibm-vllm/proto/caikit_data_model_runtime.proto b/src/adapters/ibm-vllm/proto/caikit_data_model_runtime.proto
deleted file mode 100644
index 569057f6..00000000
--- a/src/adapters/ibm-vllm/proto/caikit_data_model_runtime.proto
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright 2025 IBM Corp.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Source: https://github.com/IBM/vllm/blob/main/proto/caikit_data_model_runtime.proto
-
-/*------------------------------------------------------------------------------
- * AUTO GENERATED
- *----------------------------------------------------------------------------*/
-
-syntax = "proto3";
-package caikit_data_model.runtime;
-import "google/protobuf/timestamp.proto";
-import "caikit_data_model_common.proto";
-
-
-/*-- MESSAGES ----------------------------------------------------------------*/
-
-message ModelPointer {
-
- /*-- fields --*/
- string model_id = 1;
-}
-
-message TrainingInfoRequest {
-
- /*-- fields --*/
- string training_id = 1;
-}
-
-message TrainingJob {
-
- /*-- fields --*/
- string training_id = 1;
- string model_name = 2;
-}
-
-message TrainingStatusResponse {
-
- /*-- fields --*/
- string training_id = 1;
- caikit_data_model.common.TrainingStatus state = 2;
- google.protobuf.Timestamp submission_timestamp = 3;
- google.protobuf.Timestamp completion_timestamp = 4;
- repeated string reasons = 5;
-}
diff --git a/src/adapters/ibm-vllm/proto/caikit_runtime_Nlp.proto b/src/adapters/ibm-vllm/proto/caikit_runtime_Nlp.proto
deleted file mode 100644
index 413e7bbb..00000000
--- a/src/adapters/ibm-vllm/proto/caikit_runtime_Nlp.proto
+++ /dev/null
@@ -1,246 +0,0 @@
-// Copyright 2025 IBM Corp.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Source: https://github.com/IBM/vllm/blob/main/proto/caikit_runtime_Nlp.proto
-
-/*------------------------------------------------------------------------------
- * AUTO GENERATED
- *----------------------------------------------------------------------------*/
-
-syntax = "proto3";
-package caikit.runtime.Nlp;
-import "google/protobuf/struct.proto";
-import "caikit_data_model_caikit_nlp.proto";
-import "caikit_data_model_common.proto";
-import "caikit_data_model_nlp.proto";
-import "caikit_data_model_runtime.proto";
-
-
-/*-- MESSAGES ----------------------------------------------------------------*/
-
-message BidiStreamingTokenClassificationTaskRequest {
-
- /*-- fields --*/
- string text_stream = 1;
- optional double threshold = 2;
-}
-
-message DataStreamSourceGenerationTrainRecord {
-
- /*-- fields --*/
-
- /*-- oneofs --*/
- oneof data_stream {
- caikit.runtime.Nlp.DataStreamSourceGenerationTrainRecordJsonData jsondata = 1;
- caikit_data_model.common.FileReference file = 2;
- caikit_data_model.common.ListOfFileReferences list_of_files = 3;
- caikit_data_model.common.Directory directory = 4;
- caikit_data_model.common.S3Files s3files = 5;
- }
-}
-
-message DataStreamSourceGenerationTrainRecordJsonData {
-
- /*-- fields --*/
- repeated caikit_data_model.caikit_nlp.GenerationTrainRecord data = 1;
-}
-
-message EmbeddingTaskRequest {
-
- /*-- fields --*/
- string text = 1;
- optional int64 truncate_input_tokens = 2;
-}
-
-message EmbeddingTasksRequest {
-
- /*-- fields --*/
- repeated string texts = 1;
- optional int64 truncate_input_tokens = 2;
-}
-
-message RerankTaskRequest {
-
- /*-- fields --*/
- string query = 1;
- repeated google.protobuf.Struct documents = 2;
- optional int64 top_n = 3;
- optional int64 truncate_input_tokens = 4;
- optional bool return_documents = 5;
- optional bool return_query = 6;
- optional bool return_text = 7;
-}
-
-message RerankTasksRequest {
-
- /*-- fields --*/
- repeated string queries = 1;
- repeated google.protobuf.Struct documents = 2;
- optional int64 top_n = 3;
- optional int64 truncate_input_tokens = 4;
- optional bool return_documents = 5;
- optional bool return_queries = 6;
- optional bool return_text = 7;
-}
-
-message SentenceSimilarityTaskRequest {
-
- /*-- fields --*/
- string source_sentence = 1;
- repeated string sentences = 2;
- optional int64 truncate_input_tokens = 3;
-}
-
-message SentenceSimilarityTasksRequest {
-
- /*-- fields --*/
- repeated string source_sentences = 1;
- repeated string sentences = 2;
- optional int64 truncate_input_tokens = 3;
-}
-
-message ServerStreamingTextGenerationTaskRequest {
-
- /*-- fields --*/
- string text = 1;
- optional int64 max_new_tokens = 2;
- optional int64 min_new_tokens = 3;
- optional int64 truncate_input_tokens = 4;
- optional string decoding_method = 5;
- optional int64 top_k = 6;
- optional double top_p = 7;
- optional double typical_p = 8;
- optional double temperature = 9;
- optional double repetition_penalty = 10;
- optional double max_time = 11;
- optional caikit_data_model.caikit_nlp.ExponentialDecayLengthPenalty exponential_decay_length_penalty = 12;
- repeated string stop_sequences = 13;
- optional uint64 seed = 14;
- optional bool preserve_input_text = 15;
-}
-
-message TextClassificationTaskRequest {
-
- /*-- fields --*/
- string text = 1;
-}
-
-message TextGenerationTaskPeftPromptTuningTrainParameters {
-
- /*-- fields --*/
- string base_model = 1;
- caikit.runtime.Nlp.DataStreamSourceGenerationTrainRecord train_stream = 2;
- caikit_data_model.caikit_nlp.TuningConfig tuning_config = 3;
- optional caikit.runtime.Nlp.DataStreamSourceGenerationTrainRecord val_stream = 4;
- optional string device = 5;
- optional string tuning_type = 6;
- optional int64 num_epochs = 7;
- optional double learning_rate = 8;
- optional string verbalizer = 9;
- optional int64 batch_size = 10;
- optional int64 max_source_length = 11;
- optional int64 max_target_length = 12;
- optional int64 accumulate_steps = 13;
- optional string torch_dtype = 14;
- optional bool silence_progress_bars = 15;
- optional int64 seed = 16;
-}
-
-message TextGenerationTaskPeftPromptTuningTrainRequest {
-
- /*-- fields --*/
- string model_name = 1;
- caikit_data_model.common.S3Path output_path = 2;
- caikit.runtime.Nlp.TextGenerationTaskPeftPromptTuningTrainParameters parameters = 3;
-}
-
-message TextGenerationTaskRequest {
-
- /*-- fields --*/
- string text = 1;
- optional int64 max_new_tokens = 2;
- optional int64 min_new_tokens = 3;
- optional int64 truncate_input_tokens = 4;
- optional string decoding_method = 5;
- optional int64 top_k = 6;
- optional double top_p = 7;
- optional double typical_p = 8;
- optional double temperature = 9;
- optional double repetition_penalty = 10;
- optional double max_time = 11;
- optional caikit_data_model.caikit_nlp.ExponentialDecayLengthPenalty exponential_decay_length_penalty = 12;
- repeated string stop_sequences = 13;
- optional uint64 seed = 14;
- optional bool preserve_input_text = 15;
-}
-
-message TextGenerationTaskTextGenerationTrainParameters {
-
- /*-- fields --*/
- string base_model = 1;
- caikit.runtime.Nlp.DataStreamSourceGenerationTrainRecord train_stream = 2;
- optional string torch_dtype = 3;
- optional int64 max_source_length = 4;
- optional int64 max_target_length = 5;
- optional int64 batch_size = 6;
- optional int64 num_epochs = 7;
- optional int64 accumulate_steps = 8;
- optional int64 random_seed = 9;
- optional double lr = 10;
- optional bool use_iterable_dataset = 11;
-}
-
-message TextGenerationTaskTextGenerationTrainRequest {
-
- /*-- fields --*/
- string model_name = 1;
- caikit_data_model.common.S3Path output_path = 2;
- caikit.runtime.Nlp.TextGenerationTaskTextGenerationTrainParameters parameters = 3;
-}
-
-message TokenClassificationTaskRequest {
-
- /*-- fields --*/
- string text = 1;
- optional double threshold = 2;
-}
-
-message TokenizationTaskRequest {
-
- /*-- fields --*/
- string text = 1;
-}
-
-
-/*-- SERVICES ----------------------------------------------------------------*/
-
-service NlpService {
- rpc BidiStreamingTokenClassificationTaskPredict(stream caikit.runtime.Nlp.BidiStreamingTokenClassificationTaskRequest) returns (stream caikit_data_model.nlp.TokenClassificationStreamResult);
- rpc EmbeddingTaskPredict(caikit.runtime.Nlp.EmbeddingTaskRequest) returns (caikit_data_model.caikit_nlp.EmbeddingResult);
- rpc EmbeddingTasksPredict(caikit.runtime.Nlp.EmbeddingTasksRequest) returns (caikit_data_model.caikit_nlp.EmbeddingResults);
- rpc RerankTaskPredict(caikit.runtime.Nlp.RerankTaskRequest) returns (caikit_data_model.caikit_nlp.RerankResult);
- rpc RerankTasksPredict(caikit.runtime.Nlp.RerankTasksRequest) returns (caikit_data_model.caikit_nlp.RerankResults);
- rpc SentenceSimilarityTaskPredict(caikit.runtime.Nlp.SentenceSimilarityTaskRequest) returns (caikit_data_model.caikit_nlp.SentenceSimilarityResult);
- rpc SentenceSimilarityTasksPredict(caikit.runtime.Nlp.SentenceSimilarityTasksRequest) returns (caikit_data_model.caikit_nlp.SentenceSimilarityResults);
- rpc ServerStreamingTextGenerationTaskPredict(caikit.runtime.Nlp.ServerStreamingTextGenerationTaskRequest) returns (stream caikit_data_model.nlp.GeneratedTextStreamResult);
- rpc TextClassificationTaskPredict(caikit.runtime.Nlp.TextClassificationTaskRequest) returns (caikit_data_model.nlp.ClassificationResults);
- rpc TextGenerationTaskPredict(caikit.runtime.Nlp.TextGenerationTaskRequest) returns (caikit_data_model.nlp.GeneratedTextResult);
- rpc TokenClassificationTaskPredict(caikit.runtime.Nlp.TokenClassificationTaskRequest) returns (caikit_data_model.nlp.TokenClassificationResults);
- rpc TokenizationTaskPredict(caikit.runtime.Nlp.TokenizationTaskRequest) returns (caikit_data_model.nlp.TokenizationResults);
-}
-
-service NlpTrainingService {
- rpc TextGenerationTaskPeftPromptTuningTrain(caikit.runtime.Nlp.TextGenerationTaskPeftPromptTuningTrainRequest) returns (caikit_data_model.runtime.TrainingJob);
- rpc TextGenerationTaskTextGenerationTrain(caikit.runtime.Nlp.TextGenerationTaskTextGenerationTrainRequest) returns (caikit_data_model.runtime.TrainingJob);
-}
diff --git a/src/adapters/ibm-vllm/proto/generation.proto b/src/adapters/ibm-vllm/proto/generation.proto
deleted file mode 100644
index 14ac3b1b..00000000
--- a/src/adapters/ibm-vllm/proto/generation.proto
+++ /dev/null
@@ -1,287 +0,0 @@
-// Copyright 2025 IBM Corp.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Source: https://github.com/IBM/vllm/blob/main/proto/generation.proto
-
-syntax = "proto3";
-package fmaas;
-
-
-service GenerationService {
- // Generates text given a text prompt, for one or more inputs
- rpc Generate (BatchedGenerationRequest) returns (BatchedGenerationResponse) {}
- // Generates text given a single input prompt, streaming the response
- rpc GenerateStream (SingleGenerationRequest) returns (stream GenerationResponse) {}
- // Tokenize text
- rpc Tokenize (BatchedTokenizeRequest) returns (BatchedTokenizeResponse) {}
- // Model info
- rpc ModelInfo (ModelInfoRequest) returns (ModelInfoResponse) {}
-}
-
-// ============================================================================================================
-// Generation API
-
-enum DecodingMethod {
- GREEDY = 0;
- SAMPLE = 1;
-}
-
-message BatchedGenerationRequest {
- string model_id = 1;
- // Deprecated in favor of adapter_id
- optional string prefix_id = 2;
- optional string adapter_id = 4;
- repeated GenerationRequest requests = 3;
-
- Parameters params = 10;
-}
-
-message SingleGenerationRequest {
- string model_id = 1;
- // Deprecated in favor of adapter_id
- optional string prefix_id = 2;
- optional string adapter_id = 4;
- GenerationRequest request = 3;
-
- Parameters params = 10;
-}
-
-message BatchedGenerationResponse {
- repeated GenerationResponse responses = 1;
-}
-
-message GenerationRequest {
- string text = 2;
-}
-
-message GenerationResponse {
- uint32 input_token_count = 6;
- uint32 generated_token_count = 2;
- string text = 4;
- StopReason stop_reason = 7;
- // The stop sequence encountered, iff stop_reason == STOP_SEQUENCE
- string stop_sequence = 11;
- // Random seed used, not applicable for greedy requests
- uint64 seed = 10;
-
- // Individual generated tokens and associated details, if requested
- repeated TokenInfo tokens = 8;
-
- // Input tokens and associated details, if requested
- repeated TokenInfo input_tokens = 9;
-}
-
-message Parameters {
- // The high level decoding approach
- DecodingMethod method = 1;
- // Parameters related to sampling, applicable only when method == SAMPLING
- SamplingParameters sampling = 2;
- // Parameters controlling when generation should stop
- StoppingCriteria stopping = 3;
- // Flags to control what is returned in the response
- ResponseOptions response = 4;
- // Parameters for conditionally penalizing/boosting
- // candidate tokens during decoding
- DecodingParameters decoding = 5;
- // Truncate to this many input tokens. Can be used to avoid requests
- // failing due to input being longer than configured limits.
- // Zero means don't truncate.
- uint32 truncate_input_tokens = 6;
-}
-
-message DecodingParameters {
- message LengthPenalty {
- // Start the decay after this number of tokens have been generated
- uint32 start_index = 1;
- // Factor of exponential decay
- float decay_factor = 2;
- }
-
- // Default (0.0) means no penalty (equivalent to 1.0)
- // 1.2 is a recommended value
- float repetition_penalty = 1;
-
- // Exponentially increases the score of the EOS token
- // once start_index tokens have been generated
- optional LengthPenalty length_penalty = 2;
-
- enum ResponseFormat {
- // Plain text, no constraints
- TEXT = 0;
- // Valid json
- JSON = 1;
- }
-
- message StringChoices {
- repeated string choices = 1;
- }
-
- // Mutually-exclusive guided decoding options
- oneof guided {
- // Output will be in the specified format
- ResponseFormat format = 3;
- // Output will follow the provided JSON schema
- string json_schema = 4;
- // Output will follow the provided regex pattern
- string regex = 5;
- // Output will be exactly one of the specified choices
- StringChoices choice = 6;
- // Output will follow the provided context free grammar
- string grammar = 7;
- }
-}
-
-
-message SamplingParameters {
- // Default (0.0) means disabled (equivalent to 1.0)
- float temperature = 1;
- // Default (0) means disabled
- uint32 top_k = 2;
- // Default (0) means disabled (equivalent to 1.0)
- float top_p = 3;
- // Default (0) means disabled (equivalent to 1.0)
- float typical_p = 4;
-
- optional uint64 seed = 5;
-}
-
-message StoppingCriteria {
- // Default (0) is currently 20
- uint32 max_new_tokens = 1;
- // Default (0) means no minimum
- uint32 min_new_tokens = 2;
- // Default (0) means no time limit
- uint32 time_limit_millis = 3;
- repeated string stop_sequences = 4;
- // If not specified, default behavior depends on server setting
- optional bool include_stop_sequence = 5;
-
- //more to come
-}
-
-message ResponseOptions {
- // Include input text
- bool input_text = 1;
- // Include list of individual generated tokens
- // "Extra" token information is included based on the other flags below
- bool generated_tokens = 2;
- // Include list of input tokens
- // "Extra" token information is included based on the other flags here,
- // but only for decoder-only models
- bool input_tokens = 3;
- // Include logprob for each returned token
- // Applicable only if generated_tokens == true and/or input_tokens == true
- bool token_logprobs = 4;
- // Include rank of each returned token
- // Applicable only if generated_tokens == true and/or input_tokens == true
- bool token_ranks = 5;
- // Include top n candidate tokens at the position of each returned token
- // The maximum value permitted is 5, but more may be returned if there is a tie
- // for nth place.
- // Applicable only if generated_tokens == true and/or input_tokens == true
- uint32 top_n_tokens = 6;
-}
-
-enum StopReason {
- // Possibly more tokens to be streamed
- NOT_FINISHED = 0;
- // Maximum requested tokens reached
- MAX_TOKENS = 1;
- // End-of-sequence token encountered
- EOS_TOKEN = 2;
- // Request cancelled by client
- CANCELLED = 3;
- // Time limit reached
- TIME_LIMIT = 4;
- // Stop sequence encountered
- STOP_SEQUENCE = 5;
- // Total token limit reached
- TOKEN_LIMIT = 6;
- // Decoding error
- ERROR = 7;
-}
-
-message TokenInfo {
- // uint32 id = 1; // TBD
- string text = 2;
- // The logprob (log of normalized probability), if requested
- float logprob = 3;
- // One-based rank relative to other tokens, if requested
- uint32 rank = 4;
-
- message TopToken {
- // uint32 id = 1; // TBD
- string text = 2;
- float logprob = 3;
- }
-
- // Top N candidate tokens at this position, if requested
- // May or may not include this token
- repeated TopToken top_tokens = 5;
-}
-
-
-// ============================================================================================================
-// Tokenization API
-
-message BatchedTokenizeRequest {
- string model_id = 1;
- repeated TokenizeRequest requests = 2;
- bool return_tokens = 3;
- bool return_offsets = 4;
-
- // Zero means don't truncate.
- uint32 truncate_input_tokens = 5;
-}
-
-message BatchedTokenizeResponse {
- repeated TokenizeResponse responses = 1;
-}
-
-message TokenizeRequest {
- string text = 1;
-}
-
-message TokenizeResponse {
- message Offset {
- uint32 start = 1;
- uint32 end = 2;
- }
-
- uint32 token_count = 1;
-
- // if return_tokens = true
- repeated string tokens = 2;
- // if return_tokens = true
- repeated Offset offsets = 3;
-}
-
-
-// ============================================================================================================
-// Model Info API
-
-message ModelInfoRequest {
- string model_id = 1;
-}
-
-message ModelInfoResponse {
- enum ModelKind {
- DECODER_ONLY = 0;
- ENCODER_DECODER = 1;
- }
-
- ModelKind model_kind = 1;
- uint32 max_sequence_length = 2;
- uint32 max_new_tokens = 3;
-}
diff --git a/src/adapters/ibm-vllm/types.ts b/src/adapters/ibm-vllm/types.ts
deleted file mode 100644
index 3e2b3579..00000000
--- a/src/adapters/ibm-vllm/types.ts
+++ /dev/null
@@ -1,2133 +0,0 @@
-/**
- * Copyright 2025 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import * as grpc from "@grpc/grpc-js";
-import {
- Long,
- MethodDefinition,
- MessageTypeDefinition,
- EnumTypeDefinition,
-} from "@grpc/proto-loader";
-
-export interface BidiStreamingTokenClassificationTaskRequest {
- text_stream?: string;
- threshold?: number | string;
- _threshold?: "threshold";
-}
-export interface BidiStreamingTokenClassificationTaskRequest__Output {
- text_stream: string;
- threshold?: number;
- _threshold: "threshold";
-}
-
-export interface ClassificationResult {
- label?: string;
- score?: number | string;
-}
-export interface ClassificationResult__Output {
- label: string;
- score: number;
-}
-
-export interface ClassificationResults {
- results?: ClassificationResult[];
-}
-export interface ClassificationResults__Output {
- results: ClassificationResult__Output[];
-}
-
-export interface PyFloatSequence {
- values?: (number | string)[];
-}
-export interface PyFloatSequence__Output {
- values: number[];
-}
-
-export interface NpFloat32Sequence {
- values?: (number | string)[];
-}
-export interface NpFloat32Sequence__Output {
- values: number[];
-}
-
-export interface NpFloat64Sequence {
- values?: (number | string)[];
-}
-export interface NpFloat64Sequence__Output {
- values: number[];
-}
-
-export interface Vector1D {
- data_pyfloatsequence?: PyFloatSequence | null;
- data_npfloat32sequence?: NpFloat32Sequence | null;
- data_npfloat64sequence?: NpFloat64Sequence | null;
- data?: "data_pyfloatsequence" | "data_npfloat32sequence" | "data_npfloat64sequence";
-}
-export interface Vector1D__Output {
- data_pyfloatsequence?: PyFloatSequence__Output | null;
- data_npfloat32sequence?: NpFloat32Sequence__Output | null;
- data_npfloat64sequence?: NpFloat64Sequence__Output | null;
- data: "data_pyfloatsequence" | "data_npfloat32sequence" | "data_npfloat64sequence";
-}
-
-export interface ProducerId {
- name?: string;
- version?: string;
-}
-export interface ProducerId__Output {
- name: string;
- version: string;
-}
-
-export interface EmbeddingResult {
- result?: Vector1D | null;
- producer_id?: ProducerId | null;
- input_token_count?: number | string | Long;
-}
-export interface EmbeddingResult__Output {
- result: Vector1D__Output | null;
- producer_id: ProducerId__Output | null;
- input_token_count: number;
-}
-
-export interface ListOfVector1D {
- vectors?: Vector1D[];
-}
-export interface ListOfVector1D__Output {
- vectors: Vector1D__Output[];
-}
-
-export interface EmbeddingResults {
- results?: ListOfVector1D | null;
- producer_id?: ProducerId | null;
- input_token_count?: number | string | Long;
-}
-export interface EmbeddingResults__Output {
- results: ListOfVector1D__Output | null;
- producer_id: ProducerId__Output | null;
- input_token_count: number;
-}
-
-export interface EmbeddingTaskRequest {
- text?: string;
- truncate_input_tokens?: number | string | Long;
- _truncate_input_tokens?: "truncate_input_tokens";
-}
-export interface EmbeddingTaskRequest__Output {
- text: string;
- truncate_input_tokens?: number;
- _truncate_input_tokens: "truncate_input_tokens";
-}
-
-export interface EmbeddingTasksRequest {
- texts?: string[];
- truncate_input_tokens?: number | string | Long;
- _truncate_input_tokens?: "truncate_input_tokens";
-}
-export interface EmbeddingTasksRequest__Output {
- texts: string[];
- truncate_input_tokens?: number;
- _truncate_input_tokens: "truncate_input_tokens";
-}
-
-declare const FinishReason: {
- readonly NOT_FINISHED: "NOT_FINISHED";
- readonly MAX_TOKENS: "MAX_TOKENS";
- readonly EOS_TOKEN: "EOS_TOKEN";
- readonly CANCELLED: "CANCELLED";
- readonly TIME_LIMIT: "TIME_LIMIT";
- readonly STOP_SEQUENCE: "STOP_SEQUENCE";
- readonly TOKEN_LIMIT: "TOKEN_LIMIT";
- readonly ERROR: "ERROR";
-};
-export type FinishReason =
- | "NOT_FINISHED"
- | 0
- | "MAX_TOKENS"
- | 1
- | "EOS_TOKEN"
- | 2
- | "CANCELLED"
- | 3
- | "TIME_LIMIT"
- | 4
- | "STOP_SEQUENCE"
- | 5
- | "TOKEN_LIMIT"
- | 6
- | "ERROR"
- | 7;
-export type FinishReason__Output = (typeof FinishReason)[keyof typeof FinishReason];
-
-export interface GeneratedToken {
- text?: string;
- logprob?: number | string;
-}
-export interface GeneratedToken__Output {
- text: string;
- logprob: number;
-}
-
-export interface GeneratedTextResult {
- generated_text?: string;
- generated_tokens?: number | string | Long;
- finish_reason?: FinishReason;
- producer_id?: ProducerId | null;
- input_token_count?: number | string | Long;
- seed?: number | string | Long;
- tokens?: GeneratedToken[];
- input_tokens?: GeneratedToken[];
-}
-export interface GeneratedTextResult__Output {
- generated_text: string;
- generated_tokens: number;
- finish_reason: FinishReason__Output;
- producer_id: ProducerId__Output | null;
- input_token_count: number;
- seed: number;
- tokens: GeneratedToken__Output[];
- input_tokens: GeneratedToken__Output[];
-}
-
-export interface TokenStreamDetails {
- finish_reason?: FinishReason;
- generated_tokens?: number;
- seed?: number | string | Long;
- input_token_count?: number | string | Long;
-}
-export interface TokenStreamDetails__Output {
- finish_reason: FinishReason__Output;
- generated_tokens: number;
- seed: number;
- input_token_count: number;
-}
-
-export interface GeneratedTextStreamResult {
- generated_text?: string;
- tokens?: GeneratedToken[];
- details?: TokenStreamDetails | null;
- producer_id?: ProducerId | null;
- input_tokens?: GeneratedToken[];
-}
-export interface GeneratedTextStreamResult__Output {
- generated_text: string;
- tokens: GeneratedToken__Output[];
- details: TokenStreamDetails__Output | null;
- producer_id: ProducerId__Output | null;
- input_tokens: GeneratedToken__Output[];
-}
-
-declare const NullValue: {
- readonly NULL_VALUE: "NULL_VALUE";
-};
-export type NullValue = "NULL_VALUE" | 0;
-export type NullValue__Output = (typeof NullValue)[keyof typeof NullValue];
-
-export interface ListValue {
- values?: Value[];
-}
-export interface ListValue__Output {
- values: Value__Output[];
-}
-
-export interface Value {
- nullValue?: NullValue;
- numberValue?: number | string;
- stringValue?: string;
- boolValue?: boolean;
- structValue?: Struct | null;
- listValue?: ListValue | null;
- kind?: "nullValue" | "numberValue" | "stringValue" | "boolValue" | "structValue" | "listValue";
-}
-export interface Value__Output {
- nullValue?: NullValue__Output;
- numberValue?: number;
- stringValue?: string;
- boolValue?: boolean;
- structValue?: Struct__Output | null;
- listValue?: ListValue__Output | null;
- kind: "nullValue" | "numberValue" | "stringValue" | "boolValue" | "structValue" | "listValue";
-}
-
-export interface Struct {
- fields?: Record;
-}
-export interface Struct__Output {
- fields: Record;
-}
-
-export interface RerankScore {
- document?: Struct | null;
- index?: number | string | Long;
- score?: number | string;
- text?: string;
-}
-export interface RerankScore__Output {
- document: Struct__Output | null;
- index: number;
- score: number;
- text: string;
-}
-
-export interface RerankScores {
- query?: string;
- scores?: RerankScore[];
-}
-export interface RerankScores__Output {
- query: string;
- scores: RerankScore__Output[];
-}
-
-export interface RerankResult {
- result?: RerankScores | null;
- producer_id?: ProducerId | null;
- input_token_count?: number | string | Long;
-}
-export interface RerankResult__Output {
- result: RerankScores__Output | null;
- producer_id: ProducerId__Output | null;
- input_token_count: number;
-}
-
-export interface RerankResults {
- results?: RerankScores[];
- producer_id?: ProducerId | null;
- input_token_count?: number | string | Long;
-}
-export interface RerankResults__Output {
- results: RerankScores__Output[];
- producer_id: ProducerId__Output | null;
- input_token_count: number;
-}
-
-export interface RerankTaskRequest {
- query?: string;
- documents?: Struct[];
- top_n?: number | string | Long;
- truncate_input_tokens?: number | string | Long;
- return_documents?: boolean;
- return_query?: boolean;
- return_text?: boolean;
- _top_n?: "top_n";
- _truncate_input_tokens?: "truncate_input_tokens";
- _return_documents?: "return_documents";
- _return_query?: "return_query";
- _return_text?: "return_text";
-}
-export interface RerankTaskRequest__Output {
- query: string;
- documents: Struct__Output[];
- top_n?: number;
- truncate_input_tokens?: number;
- return_documents?: boolean;
- return_query?: boolean;
- return_text?: boolean;
- _top_n: "top_n";
- _truncate_input_tokens: "truncate_input_tokens";
- _return_documents: "return_documents";
- _return_query: "return_query";
- _return_text: "return_text";
-}
-
-export interface RerankTasksRequest {
- queries?: string[];
- documents?: Struct[];
- top_n?: number | string | Long;
- truncate_input_tokens?: number | string | Long;
- return_documents?: boolean;
- return_queries?: boolean;
- return_text?: boolean;
- _top_n?: "top_n";
- _truncate_input_tokens?: "truncate_input_tokens";
- _return_documents?: "return_documents";
- _return_queries?: "return_queries";
- _return_text?: "return_text";
-}
-export interface RerankTasksRequest__Output {
- queries: string[];
- documents: Struct__Output[];
- top_n?: number;
- truncate_input_tokens?: number;
- return_documents?: boolean;
- return_queries?: boolean;
- return_text?: boolean;
- _top_n: "top_n";
- _truncate_input_tokens: "truncate_input_tokens";
- _return_documents: "return_documents";
- _return_queries: "return_queries";
- _return_text: "return_text";
-}
-
-export interface SentenceSimilarityScores {
- scores?: (number | string)[];
-}
-export interface SentenceSimilarityScores__Output {
- scores: number[];
-}
-
-export interface SentenceSimilarityResult {
- result?: SentenceSimilarityScores | null;
- producer_id?: ProducerId | null;
- input_token_count?: number | string | Long;
-}
-export interface SentenceSimilarityResult__Output {
- result: SentenceSimilarityScores__Output | null;
- producer_id: ProducerId__Output | null;
- input_token_count: number;
-}
-
-export interface SentenceSimilarityResults {
- results?: SentenceSimilarityScores[];
- producer_id?: ProducerId | null;
- input_token_count?: number | string | Long;
-}
-export interface SentenceSimilarityResults__Output {
- results: SentenceSimilarityScores__Output[];
- producer_id: ProducerId__Output | null;
- input_token_count: number;
-}
-
-export interface SentenceSimilarityTaskRequest {
- source_sentence?: string;
- sentences?: string[];
- truncate_input_tokens?: number | string | Long;
- _truncate_input_tokens?: "truncate_input_tokens";
-}
-export interface SentenceSimilarityTaskRequest__Output {
- source_sentence: string;
- sentences: string[];
- truncate_input_tokens?: number;
- _truncate_input_tokens: "truncate_input_tokens";
-}
-
-export interface SentenceSimilarityTasksRequest {
- source_sentences?: string[];
- sentences?: string[];
- truncate_input_tokens?: number | string | Long;
- _truncate_input_tokens?: "truncate_input_tokens";
-}
-export interface SentenceSimilarityTasksRequest__Output {
- source_sentences: string[];
- sentences: string[];
- truncate_input_tokens?: number;
- _truncate_input_tokens: "truncate_input_tokens";
-}
-
-export interface ExponentialDecayLengthPenalty {
- start_index?: number | string | Long;
- decay_factor?: number | string;
-}
-export interface ExponentialDecayLengthPenalty__Output {
- start_index: number;
- decay_factor: number;
-}
-
-export interface ServerStreamingTextGenerationTaskRequest {
- text?: string;
- max_new_tokens?: number | string | Long;
- min_new_tokens?: number | string | Long;
- truncate_input_tokens?: number | string | Long;
- decoding_method?: string;
- top_k?: number | string | Long;
- top_p?: number | string;
- typical_p?: number | string;
- temperature?: number | string;
- repetition_penalty?: number | string;
- max_time?: number | string;
- exponential_decay_length_penalty?: ExponentialDecayLengthPenalty | null;
- stop_sequences?: string[];
- seed?: number | string | Long;
- preserve_input_text?: boolean;
- _max_new_tokens?: "max_new_tokens";
- _min_new_tokens?: "min_new_tokens";
- _truncate_input_tokens?: "truncate_input_tokens";
- _decoding_method?: "decoding_method";
- _top_k?: "top_k";
- _top_p?: "top_p";
- _typical_p?: "typical_p";
- _temperature?: "temperature";
- _repetition_penalty?: "repetition_penalty";
- _max_time?: "max_time";
- _exponential_decay_length_penalty?: "exponential_decay_length_penalty";
- _seed?: "seed";
- _preserve_input_text?: "preserve_input_text";
-}
-export interface ServerStreamingTextGenerationTaskRequest__Output {
- text: string;
- max_new_tokens?: number;
- min_new_tokens?: number;
- truncate_input_tokens?: number;
- decoding_method?: string;
- top_k?: number;
- top_p?: number;
- typical_p?: number;
- temperature?: number;
- repetition_penalty?: number;
- max_time?: number;
- exponential_decay_length_penalty?: ExponentialDecayLengthPenalty__Output | null;
- stop_sequences: string[];
- seed?: number;
- preserve_input_text?: boolean;
- _max_new_tokens: "max_new_tokens";
- _min_new_tokens: "min_new_tokens";
- _truncate_input_tokens: "truncate_input_tokens";
- _decoding_method: "decoding_method";
- _top_k: "top_k";
- _top_p: "top_p";
- _typical_p: "typical_p";
- _temperature: "temperature";
- _repetition_penalty: "repetition_penalty";
- _max_time: "max_time";
- _exponential_decay_length_penalty: "exponential_decay_length_penalty";
- _seed: "seed";
- _preserve_input_text: "preserve_input_text";
-}
-
-export interface TextClassificationTaskRequest {
- text?: string;
-}
-export interface TextClassificationTaskRequest__Output {
- text: string;
-}
-
-export interface TextGenerationTaskRequest {
- text?: string;
- max_new_tokens?: number | string | Long;
- min_new_tokens?: number | string | Long;
- truncate_input_tokens?: number | string | Long;
- decoding_method?: string;
- top_k?: number | string | Long;
- top_p?: number | string;
- typical_p?: number | string;
- temperature?: number | string;
- repetition_penalty?: number | string;
- max_time?: number | string;
- exponential_decay_length_penalty?: ExponentialDecayLengthPenalty | null;
- stop_sequences?: string[];
- seed?: number | string | Long;
- preserve_input_text?: boolean;
- _max_new_tokens?: "max_new_tokens";
- _min_new_tokens?: "min_new_tokens";
- _truncate_input_tokens?: "truncate_input_tokens";
- _decoding_method?: "decoding_method";
- _top_k?: "top_k";
- _top_p?: "top_p";
- _typical_p?: "typical_p";
- _temperature?: "temperature";
- _repetition_penalty?: "repetition_penalty";
- _max_time?: "max_time";
- _exponential_decay_length_penalty?: "exponential_decay_length_penalty";
- _seed?: "seed";
- _preserve_input_text?: "preserve_input_text";
-}
-export interface TextGenerationTaskRequest__Output {
- text: string;
- max_new_tokens?: number;
- min_new_tokens?: number;
- truncate_input_tokens?: number;
- decoding_method?: string;
- top_k?: number;
- top_p?: number;
- typical_p?: number;
- temperature?: number;
- repetition_penalty?: number;
- max_time?: number;
- exponential_decay_length_penalty?: ExponentialDecayLengthPenalty__Output | null;
- stop_sequences: string[];
- seed?: number;
- preserve_input_text?: boolean;
- _max_new_tokens: "max_new_tokens";
- _min_new_tokens: "min_new_tokens";
- _truncate_input_tokens: "truncate_input_tokens";
- _decoding_method: "decoding_method";
- _top_k: "top_k";
- _top_p: "top_p";
- _typical_p: "typical_p";
- _temperature: "temperature";
- _repetition_penalty: "repetition_penalty";
- _max_time: "max_time";
- _exponential_decay_length_penalty: "exponential_decay_length_penalty";
- _seed: "seed";
- _preserve_input_text: "preserve_input_text";
-}
-
-export interface TokenClassificationResult {
- start?: number | string | Long;
- end?: number | string | Long;
- word?: string;
- entity?: string;
- entity_group?: string;
- score?: number | string;
- token_count?: number | string | Long;
-}
-export interface TokenClassificationResult__Output {
- start: number;
- end: number;
- word: string;
- entity: string;
- entity_group: string;
- score: number;
- token_count: number;
-}
-
-export interface TokenClassificationResults {
- results?: TokenClassificationResult[];
-}
-export interface TokenClassificationResults__Output {
- results: TokenClassificationResult__Output[];
-}
-
-export interface TokenClassificationStreamResult {
- results?: TokenClassificationResult[];
- processed_index?: number | string | Long;
- start_index?: number | string | Long;
-}
-export interface TokenClassificationStreamResult__Output {
- results: TokenClassificationResult__Output[];
- processed_index: number;
- start_index: number;
-}
-
-export interface TokenClassificationTaskRequest {
- text?: string;
- threshold?: number | string;
- _threshold?: "threshold";
-}
-export interface TokenClassificationTaskRequest__Output {
- text: string;
- threshold?: number;
- _threshold: "threshold";
-}
-
-export interface Token {
- start?: number | string | Long;
- end?: number | string | Long;
- text?: string;
-}
-export interface Token__Output {
- start: number;
- end: number;
- text: string;
-}
-
-export interface TokenizationResults {
- results?: Token[];
- token_count?: number | string | Long;
-}
-export interface TokenizationResults__Output {
- results: Token__Output[];
- token_count: number;
-}
-
-export interface TokenizationTaskRequest {
- text?: string;
-}
-export interface TokenizationTaskRequest__Output {
- text: string;
-}
-
-export interface NlpServiceClient extends grpc.Client {
- BidiStreamingTokenClassificationTaskPredict(
- metadata: grpc.Metadata,
- options?: grpc.CallOptions,
- ): grpc.ClientDuplexStream<
- BidiStreamingTokenClassificationTaskRequest,
- TokenClassificationStreamResult__Output
- >;
- BidiStreamingTokenClassificationTaskPredict(
- options?: grpc.CallOptions,
- ): grpc.ClientDuplexStream<
- BidiStreamingTokenClassificationTaskRequest,
- TokenClassificationStreamResult__Output
- >;
- bidiStreamingTokenClassificationTaskPredict(
- metadata: grpc.Metadata,
- options?: grpc.CallOptions,
- ): grpc.ClientDuplexStream<
- BidiStreamingTokenClassificationTaskRequest,
- TokenClassificationStreamResult__Output
- >;
- bidiStreamingTokenClassificationTaskPredict(
- options?: grpc.CallOptions,
- ): grpc.ClientDuplexStream<
- BidiStreamingTokenClassificationTaskRequest,
- TokenClassificationStreamResult__Output
- >;
- EmbeddingTaskPredict(
- argument: EmbeddingTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- EmbeddingTaskPredict(
- argument: EmbeddingTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- EmbeddingTaskPredict(
- argument: EmbeddingTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- EmbeddingTaskPredict(
- argument: EmbeddingTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- embeddingTaskPredict(
- argument: EmbeddingTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- embeddingTaskPredict(
- argument: EmbeddingTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- embeddingTaskPredict(
- argument: EmbeddingTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- embeddingTaskPredict(
- argument: EmbeddingTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- EmbeddingTasksPredict(
- argument: EmbeddingTasksRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- EmbeddingTasksPredict(
- argument: EmbeddingTasksRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- EmbeddingTasksPredict(
- argument: EmbeddingTasksRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- EmbeddingTasksPredict(
- argument: EmbeddingTasksRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- embeddingTasksPredict(
- argument: EmbeddingTasksRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- embeddingTasksPredict(
- argument: EmbeddingTasksRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- embeddingTasksPredict(
- argument: EmbeddingTasksRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- embeddingTasksPredict(
- argument: EmbeddingTasksRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- RerankTaskPredict(
- argument: RerankTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- RerankTaskPredict(
- argument: RerankTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- RerankTaskPredict(
- argument: RerankTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- RerankTaskPredict(
- argument: RerankTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- rerankTaskPredict(
- argument: RerankTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- rerankTaskPredict(
- argument: RerankTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- rerankTaskPredict(
- argument: RerankTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- rerankTaskPredict(
- argument: RerankTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- RerankTasksPredict(
- argument: RerankTasksRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- RerankTasksPredict(
- argument: RerankTasksRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- RerankTasksPredict(
- argument: RerankTasksRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- RerankTasksPredict(
- argument: RerankTasksRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- rerankTasksPredict(
- argument: RerankTasksRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- rerankTasksPredict(
- argument: RerankTasksRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- rerankTasksPredict(
- argument: RerankTasksRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- rerankTasksPredict(
- argument: RerankTasksRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- SentenceSimilarityTaskPredict(
- argument: SentenceSimilarityTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- SentenceSimilarityTaskPredict(
- argument: SentenceSimilarityTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- SentenceSimilarityTaskPredict(
- argument: SentenceSimilarityTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- SentenceSimilarityTaskPredict(
- argument: SentenceSimilarityTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- sentenceSimilarityTaskPredict(
- argument: SentenceSimilarityTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- sentenceSimilarityTaskPredict(
- argument: SentenceSimilarityTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- sentenceSimilarityTaskPredict(
- argument: SentenceSimilarityTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- sentenceSimilarityTaskPredict(
- argument: SentenceSimilarityTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- SentenceSimilarityTasksPredict(
- argument: SentenceSimilarityTasksRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- SentenceSimilarityTasksPredict(
- argument: SentenceSimilarityTasksRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- SentenceSimilarityTasksPredict(
- argument: SentenceSimilarityTasksRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- SentenceSimilarityTasksPredict(
- argument: SentenceSimilarityTasksRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- sentenceSimilarityTasksPredict(
- argument: SentenceSimilarityTasksRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- sentenceSimilarityTasksPredict(
- argument: SentenceSimilarityTasksRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- sentenceSimilarityTasksPredict(
- argument: SentenceSimilarityTasksRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- sentenceSimilarityTasksPredict(
- argument: SentenceSimilarityTasksRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- ServerStreamingTextGenerationTaskPredict(
- argument: ServerStreamingTextGenerationTaskRequest,
- metadata: grpc.Metadata,
- options?: grpc.CallOptions,
- ): grpc.ClientReadableStream;
- ServerStreamingTextGenerationTaskPredict(
- argument: ServerStreamingTextGenerationTaskRequest,
- options?: grpc.CallOptions,
- ): grpc.ClientReadableStream;
- serverStreamingTextGenerationTaskPredict(
- argument: ServerStreamingTextGenerationTaskRequest,
- metadata: grpc.Metadata,
- options?: grpc.CallOptions,
- ): grpc.ClientReadableStream;
- serverStreamingTextGenerationTaskPredict(
- argument: ServerStreamingTextGenerationTaskRequest,
- options?: grpc.CallOptions,
- ): grpc.ClientReadableStream;
- TextClassificationTaskPredict(
- argument: TextClassificationTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TextClassificationTaskPredict(
- argument: TextClassificationTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TextClassificationTaskPredict(
- argument: TextClassificationTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TextClassificationTaskPredict(
- argument: TextClassificationTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- textClassificationTaskPredict(
- argument: TextClassificationTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- textClassificationTaskPredict(
- argument: TextClassificationTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- textClassificationTaskPredict(
- argument: TextClassificationTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- textClassificationTaskPredict(
- argument: TextClassificationTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TextGenerationTaskPredict(
- argument: TextGenerationTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TextGenerationTaskPredict(
- argument: TextGenerationTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TextGenerationTaskPredict(
- argument: TextGenerationTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TextGenerationTaskPredict(
- argument: TextGenerationTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- textGenerationTaskPredict(
- argument: TextGenerationTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- textGenerationTaskPredict(
- argument: TextGenerationTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- textGenerationTaskPredict(
- argument: TextGenerationTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- textGenerationTaskPredict(
- argument: TextGenerationTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TokenClassificationTaskPredict(
- argument: TokenClassificationTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TokenClassificationTaskPredict(
- argument: TokenClassificationTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TokenClassificationTaskPredict(
- argument: TokenClassificationTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- TokenClassificationTaskPredict(
- argument: TokenClassificationTaskRequest,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- tokenClassificationTaskPredict(
- argument: TokenClassificationTaskRequest,
- metadata: grpc.Metadata,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- tokenClassificationTaskPredict(
- argument: TokenClassificationTaskRequest,
- metadata: grpc.Metadata,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- tokenClassificationTaskPredict(
- argument: TokenClassificationTaskRequest,
- options: grpc.CallOptions,
- callback: grpc.requestCallback,
- ): grpc.ClientUnaryCall;
- tokenClassificationTaskPredict(
- argument: TokenClassificationTaskRequest,
- callback: grpc.requestCallback