From 41c800e73f612119a6930276e388c2efbfe1e6f2 Mon Sep 17 00:00:00 2001 From: Dmitri Nasonov Date: Tue, 10 Dec 2024 22:22:05 +0000 Subject: [PATCH 1/3] added gpt-4o-2024-11-20 and other models --- public/models.json | 1230 +++++++++++++++++++++++++++----------------- 1 file changed, 759 insertions(+), 471 deletions(-) diff --git a/public/models.json b/public/models.json index 69f5b1d66..21b49ce2a 100644 --- a/public/models.json +++ b/public/models.json @@ -1,5 +1,29 @@ { "data": [ + { + "architecture": { + "instruct_type": null, + "modality": "text->text", + "tokenizer": "Yi" + }, + "context_length": 32768, + "created": 1719273600, + "description": "The Yi Large model was designed by 01.AI with the following usecases in mind: knowledge search, data classification, human-like chat bots, and customer service.\n\nIt stands out for its multilingual proficiency, particularly in Spanish, Chinese, Japanese, German, and French.\n\nCheck out the [launch announcement](https://01-ai.github.io/blog/01.ai-yi-large-llm-launch) to learn more.", + "id": "01-ai/yi-large", + "name": "01.AI: Yi Large", + "per_request_limits": null, + "pricing": { + "completion": "0.000003", + "image": "0", + "prompt": "0.000003", + "request": "0" + }, + "top_provider": { + "context_length": 32768, + "is_moderated": false, + "max_completion_tokens": 4096 + } + }, { "architecture": { "instruct_type": null, @@ -96,6 +120,78 @@ "max_completion_tokens": null } }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", + "tokenizer": "Nova" + }, + "context_length": 300000, + "created": 1733437363, + "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite can handle real-time customer interactions, document analysis, and visual question-answering tasks with high accuracy.\n\nWith an input context of 300K tokens, it can analyze multiple images or up to 30 minutes of video in a single input.", + "id": "amazon/nova-lite-v1", + "name": "Amazon: Nova Lite 1.0", + "per_request_limits": null, + "pricing": { + "completion": "0.00000024", + "image": "0.00009", + "prompt": "0.00000006", + "request": "0" + }, + "top_provider": { + "context_length": 300000, + "is_moderated": true, + "max_completion_tokens": 5120 + } + }, + { + "architecture": { + "instruct_type": null, + "modality": "text->text", + "tokenizer": "Nova" + }, + "context_length": 128000, + "created": 1733437237, + "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length of 128K tokens and optimized for speed and cost, Amazon Nova Micro excels at tasks such as text summarization, translation, content classification, interactive chat, and brainstorming. It has simple mathematical reasoning and coding abilities.", + "id": "amazon/nova-micro-v1", + "name": "Amazon: Nova Micro 1.0", + "per_request_limits": null, + "pricing": { + "completion": "0.00000014", + "image": "0", + "prompt": "0.000000035", + "request": "0" + }, + "top_provider": { + "context_length": 128000, + "is_moderated": true, + "max_completion_tokens": 5120 + } + }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", + "tokenizer": "Nova" + }, + "context_length": 300000, + "created": 1733436303, + "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December 2024, it achieves state-of-the-art performance on key benchmarks including visual question answering (TextVQA) and video understanding (VATEX).\n\nAmazon Nova Pro demonstrates strong capabilities in processing both visual and textual information and at analyzing financial documents.\n\n**NOTE**: Video input is not supported at this time.", + "id": "amazon/nova-pro-v1", + "name": "Amazon: Nova Pro 1.0", + "per_request_limits": null, + "pricing": { + "completion": "0.0000032", + "image": "0.0012", + "prompt": "0.0000008", + "request": "0" + }, + "top_provider": { + "context_length": 300000, + "is_moderated": true, + "max_completion_tokens": 5120 + } + }, { "architecture": { "instruct_type": null, @@ -128,7 +224,7 @@ }, "context_length": 200000, "created": 1710288000, - "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3-haiku) variant._", + "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal", "id": "anthropic/claude-3-haiku:beta", "name": "Anthropic: Claude 3 Haiku (self-moderated)", "per_request_limits": null, @@ -176,7 +272,7 @@ }, "context_length": 200000, "created": 1709596800, - "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3-opus) variant._", + "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal", "id": "anthropic/claude-3-opus:beta", "name": "Anthropic: Claude 3 Opus (self-moderated)", "per_request_limits": null, @@ -224,7 +320,7 @@ }, "context_length": 200000, "created": 1709596800, - "description": "Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3-sonnet) variant._", + "description": "Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal", "id": "anthropic/claude-3-sonnet:beta", "name": "Anthropic: Claude 3 Sonnet (self-moderated)", "per_request_limits": null, @@ -243,19 +339,19 @@ { "architecture": { "instruct_type": null, - "modality": "text+image->text", + "modality": "text->text", "tokenizer": "Claude" }, "context_length": 200000, - "created": 1729555200, - "description": "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal", - "id": "anthropic/claude-3.5-sonnet", - "name": "Anthropic: Claude 3.5 Sonnet", + "created": 1730678400, + "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.\n\nThis makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.\n\nThis model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).", + "id": "anthropic/claude-3.5-haiku", + "name": "Anthropic: Claude 3.5 Haiku", "per_request_limits": null, "pricing": { - "completion": "0.000015", - "image": "0.0048", - "prompt": "0.000003", + "completion": "0.000004", + "image": "0", + "prompt": "0.0000008", "request": "0" }, "top_provider": { @@ -267,19 +363,19 @@ { "architecture": { "instruct_type": null, - "modality": "text+image->text", + "modality": "text->text", "tokenizer": "Claude" }, "context_length": 200000, - "created": 1718841600, - "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\nFor the latest version (2024-10-23), check out [Claude 3.5 Sonnet](/anthropic/claude-3.5-sonnet).\n\n#multimodal", - "id": "anthropic/claude-3.5-sonnet-20240620", - "name": "Anthropic: Claude 3.5 Sonnet (2024-06-20)", + "created": 1730678400, + "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", + "id": "anthropic/claude-3.5-haiku-20241022", + "name": "Anthropic: Claude 3.5 Haiku (2024-10-22)", "per_request_limits": null, "pricing": { - "completion": "0.000015", - "image": "0.0048", - "prompt": "0.000003", + "completion": "0.000004", + "image": "0", + "prompt": "0.0000008", "request": "0" }, "top_provider": { @@ -291,19 +387,19 @@ { "architecture": { "instruct_type": null, - "modality": "text+image->text", + "modality": "text->text", "tokenizer": "Claude" }, "context_length": 200000, - "created": 1718841600, - "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\nFor the latest version (2024-10-23), check out [Claude 3.5 Sonnet](/anthropic/claude-3.5-sonnet).\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet-20240620) variant._", - "id": "anthropic/claude-3.5-sonnet-20240620:beta", - "name": "Anthropic: Claude 3.5 Sonnet (2024-06-20) (self-moderated)", + "created": 1730678400, + "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", + "id": "anthropic/claude-3.5-haiku-20241022:beta", + "name": "Anthropic: Claude 3.5 Haiku (2024-10-22) (self-moderated)", "per_request_limits": null, "pricing": { - "completion": "0.000015", - "image": "0.0048", - "prompt": "0.000003", + "completion": "0.000004", + "image": "0", + "prompt": "0.0000008", "request": "0" }, "top_provider": { @@ -315,19 +411,19 @@ { "architecture": { "instruct_type": null, - "modality": "text+image->text", + "modality": "text->text", "tokenizer": "Claude" }, "context_length": 200000, - "created": 1729555200, - "description": "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._", - "id": "anthropic/claude-3.5-sonnet:beta", - "name": "Anthropic: Claude 3.5 Sonnet (self-moderated)", + "created": 1730678400, + "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.\n\nThis makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.\n\nThis model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).", + "id": "anthropic/claude-3.5-haiku:beta", + "name": "Anthropic: Claude 3.5 Haiku (self-moderated)", "per_request_limits": null, "pricing": { - "completion": "0.000015", - "image": "0.0048", - "prompt": "0.000003", + "completion": "0.000004", + "image": "0", + "prompt": "0.0000008", "request": "0" }, "top_provider": { @@ -339,145 +435,97 @@ { "architecture": { "instruct_type": null, - "modality": "text->text", + "modality": "text+image->text", "tokenizer": "Claude" }, - "context_length": 100000, - "created": 1690502400, - "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.", - "id": "anthropic/claude-instant-1", - "name": "Anthropic: Claude Instant v1", + "context_length": 200000, + "created": 1729555200, + "description": "New Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal", + "id": "anthropic/claude-3.5-sonnet", + "name": "Anthropic: Claude 3.5 Sonnet", "per_request_limits": null, "pricing": { - "completion": "0.0000024", - "image": "0", - "prompt": "0.0000008", + "completion": "0.000015", + "image": "0.0048", + "prompt": "0.000003", "request": "0" }, "top_provider": { - "context_length": 100000, + "context_length": 200000, "is_moderated": true, - "max_completion_tokens": 4096 + "max_completion_tokens": 8192 } }, { "architecture": { "instruct_type": null, - "modality": "text->text", - "tokenizer": "Claude" - }, - "context_length": 100000, - "created": 1690502400, - "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-instant-1) variant._", - "id": "anthropic/claude-instant-1:beta", - "name": "Anthropic: Claude Instant v1 (self-moderated)", - "per_request_limits": null, - "pricing": { - "completion": "0.0000024", - "image": "0", - "prompt": "0.0000008", - "request": "0" - }, - "top_provider": { - "context_length": 100000, - "is_moderated": false, - "max_completion_tokens": 4096 - } - }, - { - "architecture": { - "instruct_type": "claude", - "modality": "text->text", - "tokenizer": "Claude" - }, - "context_length": 100000, - "created": 1690502400, - "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.", - "id": "anthropic/claude-instant-1.0", - "name": "Anthropic: Claude Instant v1.0", - "per_request_limits": null, - "pricing": { - "completion": "0.0000024", - "image": "0", - "prompt": "0.0000008", - "request": "0" - }, - "top_provider": { - "context_length": 100000, - "is_moderated": true, - "max_completion_tokens": 4096 - } - }, - { - "architecture": { - "instruct_type": "claude", - "modality": "text->text", + "modality": "text+image->text", "tokenizer": "Claude" }, - "context_length": 100000, - "created": 1700611200, - "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.", - "id": "anthropic/claude-instant-1.1", - "name": "Anthropic: Claude Instant v1.1", + "context_length": 200000, + "created": 1718841600, + "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\nFor the latest version (2024-10-23), check out [Claude 3.5 Sonnet](/anthropic/claude-3.5-sonnet).\n\n#multimodal", + "id": "anthropic/claude-3.5-sonnet-20240620", + "name": "Anthropic: Claude 3.5 Sonnet (2024-06-20)", "per_request_limits": null, "pricing": { - "completion": "0.0000024", - "image": "0", - "prompt": "0.0000008", + "completion": "0.000015", + "image": "0.0048", + "prompt": "0.000003", "request": "0" }, "top_provider": { - "context_length": 100000, + "context_length": 200000, "is_moderated": true, - "max_completion_tokens": 2048 + "max_completion_tokens": 8192 } }, { "architecture": { - "instruct_type": "claude", - "modality": "text->text", + "instruct_type": null, + "modality": "text+image->text", "tokenizer": "Claude" }, - "context_length": 100000, - "created": 1690502400, - "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.", - "id": "anthropic/claude-1", - "name": "Anthropic: Claude v1", + "context_length": 200000, + "created": 1718841600, + "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\nFor the latest version (2024-10-23), check out [Claude 3.5 Sonnet](/anthropic/claude-3.5-sonnet).\n\n#multimodal", + "id": "anthropic/claude-3.5-sonnet-20240620:beta", + "name": "Anthropic: Claude 3.5 Sonnet (2024-06-20) (self-moderated)", "per_request_limits": null, "pricing": { - "completion": "0.000024", - "image": "0", - "prompt": "0.000008", + "completion": "0.000015", + "image": "0.0048", + "prompt": "0.000003", "request": "0" }, "top_provider": { - "context_length": 100000, - "is_moderated": true, - "max_completion_tokens": 4096 + "context_length": 200000, + "is_moderated": false, + "max_completion_tokens": 8192 } }, { "architecture": { - "instruct_type": "claude", - "modality": "text->text", + "instruct_type": null, + "modality": "text+image->text", "tokenizer": "Claude" }, - "context_length": 100000, - "created": 1690502400, - "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.", - "id": "anthropic/claude-1.2", - "name": "Anthropic: Claude v1.2", + "context_length": 200000, + "created": 1729555200, + "description": "New Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal", + "id": "anthropic/claude-3.5-sonnet:beta", + "name": "Anthropic: Claude 3.5 Sonnet (self-moderated)", "per_request_limits": null, "pricing": { - "completion": "0.000024", - "image": "0", - "prompt": "0.000008", + "completion": "0.000015", + "image": "0.0048", + "prompt": "0.000003", "request": "0" }, "top_provider": { - "context_length": 100000, - "is_moderated": true, - "max_completion_tokens": 4096 + "context_length": 200000, + "is_moderated": false, + "max_completion_tokens": 8192 } }, { @@ -512,7 +560,7 @@ }, "context_length": 200000, "created": 1700611200, - "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-2) variant._", + "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.", "id": "anthropic/claude-2:beta", "name": "Anthropic: Claude v2 (self-moderated)", "per_request_limits": null, @@ -560,7 +608,7 @@ }, "context_length": 100000, "created": 1690502400, - "description": "Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-2.0) variant._", + "description": "Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.", "id": "anthropic/claude-2.0:beta", "name": "Anthropic: Claude v2.0 (self-moderated)", "per_request_limits": null, @@ -608,7 +656,7 @@ }, "context_length": 200000, "created": 1700611200, - "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-2.1) variant._", + "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.", "id": "anthropic/claude-2.1:beta", "name": "Anthropic: Claude v2.1 (self-moderated)", "per_request_limits": null, @@ -728,7 +776,7 @@ }, "context_length": 128000, "created": 1724976000, - "description": "command-r-08-2024 is an update of the [Command R](/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", + "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", "id": "cohere/command-r-08-2024", "name": "Cohere: Command R (08-2024)", "per_request_limits": null, @@ -800,7 +848,7 @@ }, "context_length": 128000, "created": 1724976000, - "description": "command-r-plus-08-2024 is an update of the [Command R+](/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", + "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", "id": "cohere/command-r-plus-08-2024", "name": "Cohere: Command R+ (08-2024)", "per_request_limits": null, @@ -824,7 +872,7 @@ }, "context_length": 32768, "created": 1711670400, - "description": "DBRX is a new open source large language model developed by Databricks. At 132B, it outperforms existing open source LLMs like Llama 2 70B and [Mixtral-8x7b](/mistralai/mixtral-8x7b) on standard industry benchmarks for language understanding, programming, math, and logic.\n\nIt uses a fine-grained mixture-of-experts (MoE) architecture. 36B parameters are active on any input. It was pre-trained on 12T tokens of text and code data. Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts.\n\nSee the launch announcement and benchmark results [here](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm).\n\n#moe", + "description": "DBRX is a new open source large language model developed by Databricks. At 132B, it outperforms existing open source LLMs like Llama 2 70B and [Mixtral-8x7b](/models/mistralai/mixtral-8x7b) on standard industry benchmarks for language understanding, programming, math, and logic.\n\nIt uses a fine-grained mixture-of-experts (MoE) architecture. 36B parameters are active on any input. It was pre-trained on 12T tokens of text and code data. Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts.\n\nSee the launch announcement and benchmark results [here](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm).\n\n#moe", "id": "databricks/dbrx-instruct", "name": "Databricks: DBRX 132B Instruct", "per_request_limits": null, @@ -846,9 +894,9 @@ "modality": "text->text", "tokenizer": "Other" }, - "context_length": 128000, + "context_length": 65536, "created": 1715644800, - "description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.\n\nDeepSeek-V2 Chat is a conversational finetune of DeepSeek-V2, a Mixture-of-Experts (MoE) language model. It comprises 236B total parameters, of which 21B are activated for each token.\n\nCompared with DeepSeek 67B, DeepSeek-V2 achieves stronger performance, and meanwhile saves 42.5% of training costs, reduces the KV cache by 93.3%, and boosts the maximum generation throughput to 5.76 times.\n\nDeepSeek-V2 achieves remarkable performance on both standard benchmarks and open-ended generation evaluations.", + "description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions. For model details, please visit [DeepSeek-V2 page](https://github.com/deepseek-ai/DeepSeek-V2) for more information.", "id": "deepseek/deepseek-chat", "name": "DeepSeek V2.5", "per_request_limits": null, @@ -859,9 +907,9 @@ "request": "0" }, "top_provider": { - "context_length": 128000, + "context_length": 65536, "is_moderated": false, - "max_completion_tokens": 4096 + "max_completion_tokens": 8192 } }, { @@ -872,7 +920,7 @@ }, "context_length": 32768, "created": 1703116800, - "description": "This is a 16k context fine-tune of [Mixtral-8x7b](/mistralai/mixtral-8x7b). It excels in coding tasks due to extensive training with coding data and is known for its obedience, although it lacks DPO tuning.\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored", + "description": "This is a 16k context fine-tune of [Mixtral-8x7b](/models/mistralai/mixtral-8x7b). It excels in coding tasks due to extensive training with coding data and is known for its obedience, although it lacks DPO tuning.\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored", "id": "cognitivecomputations/dolphin-mixtral-8x7b", "name": "Dolphin 2.6 Mixtral 8x7B 🐬", "per_request_limits": null, @@ -894,9 +942,9 @@ "modality": "text->text", "tokenizer": "Mistral" }, - "context_length": 65536, + "context_length": 16000, "created": 1717804800, - "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.\n\nThis model is a successor to [Dolphin Mixtral 8x7B](/cognitivecomputations/dolphin-mixtral-8x7b).\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored", + "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/models/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.\n\nThis model is a successor to [Dolphin Mixtral 8x7B](/models/cognitivecomputations/dolphin-mixtral-8x7b).\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored", "id": "cognitivecomputations/dolphin-mixtral-8x22b", "name": "Dolphin 2.9.2 Mixtral 8x22B 🐬", "per_request_limits": null, @@ -918,46 +966,46 @@ "modality": "text->text", "tokenizer": "Qwen" }, - "context_length": 32768, - "created": 1727654400, - "description": "A model specializing in RP and creative writing, this model is based on Qwen2.5-14B, fine-tuned with a mixture of synthetic and natural data.\n\nIt is trained on 1.5M tokens of role-play data, and fine-tuned on 1.5M tokens of synthetic data.", - "id": "eva-unit-01/eva-qwen-2.5-14b", - "name": "EVA Qwen2.5 14B", + "context_length": 16384, + "created": 1731104847, + "description": "A roleplaying/storywriting specialist model, full-parameter finetune of Qwen2.5-32B on mixture of synthetic and natural data.\n\nIt uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and \"flavor\" of the resulting model.", + "id": "eva-unit-01/eva-qwen-2.5-32b", + "name": "EVA Qwen2.5 32B", "per_request_limits": null, "pricing": { - "completion": "0.0000005", + "completion": "0.0000034", "image": "0", - "prompt": "0.00000025", + "prompt": "0.0000026", "request": "0" }, "top_provider": { - "context_length": 32768, + "context_length": 16384, "is_moderated": false, - "max_completion_tokens": null + "max_completion_tokens": 4096 } }, { "architecture": { - "instruct_type": "alpaca", + "instruct_type": "chatml", "modality": "text->text", - "tokenizer": "Llama2" + "tokenizer": "Qwen" }, - "context_length": 8192, - "created": 1713657600, - "description": "Creative writing model, routed with permission. It's fast, it keeps the conversation going, and it stays in character.\n\nIf you submit a raw prompt, you can use Alpaca or Vicuna formats.", - "id": "sao10k/fimbulvetr-11b-v2", - "name": "Fimbulvetr 11B v2", + "context_length": 16384, + "created": 1732210606, + "description": "A roleplay and storywriting specialist model, full-parameter finetune of Qwen2.5-72B on mixture of synthetic and natural data.\n\nIt uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and \"flavor\" of the resulting model.", + "id": "eva-unit-01/eva-qwen-2.5-72b", + "name": "EVA Qwen2.5 72B", "per_request_limits": null, "pricing": { - "completion": "0.0000015", + "completion": "0.000006", "image": "0", - "prompt": "0.000000375", + "prompt": "0.000004", "request": "0" }, "top_provider": { - "context_length": 8192, + "context_length": 16384, "is_moderated": false, - "max_completion_tokens": 2048 + "max_completion_tokens": 4096 } }, { @@ -981,7 +1029,7 @@ "top_provider": { "context_length": 6144, "is_moderated": false, - "max_completion_tokens": 400 + "max_completion_tokens": 512 } }, { @@ -997,15 +1045,87 @@ "name": "Google: Gemini 1.5 Flash-8B", "per_request_limits": null, "pricing": { - "completion": "0.00000015", + "completion": "0.00000015", + "image": "0", + "prompt": "0.0000000375", + "request": "0" + }, + "top_provider": { + "context_length": 1000000, + "is_moderated": false, + "max_completion_tokens": 8192 + } + }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", + "tokenizer": "Gemini" + }, + "context_length": 8192, + "created": 1731714740, + "description": "Gemini 11-14 (2024) experimental model features \"quality\" improvements.", + "id": "google/gemini-exp-1114:free", + "name": "Google: Gemini Experimental 1114 (free)", + "per_request_limits": null, + "pricing": { + "completion": "0", + "image": "0", + "prompt": "0", + "request": "0" + }, + "top_provider": { + "context_length": 8192, + "is_moderated": false, + "max_completion_tokens": 4096 + } + }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", + "tokenizer": "Gemini" + }, + "context_length": 8192, + "created": 1732216725, + "description": "Experimental release (November 21st, 2024) of Gemini.", + "id": "google/gemini-exp-1121:free", + "name": "Google: Gemini Experimental 1121 (free)", + "per_request_limits": null, + "pricing": { + "completion": "0", + "image": "0", + "prompt": "0", + "request": "0" + }, + "top_provider": { + "context_length": 8192, + "is_moderated": false, + "max_completion_tokens": 4096 + } + }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", + "tokenizer": "Gemini" + }, + "context_length": 8192, + "created": 1733507713, + "description": "Experimental release (December 6, 2024) of Gemini.", + "id": "google/gemini-exp-1206:free", + "name": "Google: Gemini Experimental 1206 (free)", + "per_request_limits": null, + "pricing": { + "completion": "0", "image": "0", - "prompt": "0.0000000375", + "prompt": "0", "request": "0" }, "top_provider": { - "context_length": 1000000, + "context_length": 8192, "is_moderated": false, - "max_completion_tokens": 8192 + "max_completion_tokens": 4096 } }, { @@ -1040,7 +1160,7 @@ }, "context_length": 1000000, "created": 1724803200, - "description": "Gemini 1.5 Flash Experimental is an experimental version of the [Gemini 1.5 Flash](/google/gemini-flash-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is currently experimental and not suitable for production use-cases, and may be heavily rate-limited.", + "description": "Gemini 1.5 Flash Experimental is an experimental version of the [Gemini 1.5 Flash](/models/google/gemini-flash-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.", "id": "google/gemini-flash-1.5-exp", "name": "Google: Gemini Flash 1.5 Experimental", "per_request_limits": null, @@ -1064,7 +1184,7 @@ }, "context_length": 1000000, "created": 1724803200, - "description": "Gemini 1.5 Flash 8B Experimental is an experimental, 8B parameter version of the [Gemini 1.5 Flash](/google/gemini-flash-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is currently experimental and not suitable for production use-cases, and may be heavily rate-limited.", + "description": "Gemini 1.5 Flash 8B Experimental is an experimental, 8B parameter version of the [Gemini 1.5 Flash](/models/google/gemini-flash-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is currently experimental and not suitable for production use-cases, and may be heavily rate-limited.", "id": "google/gemini-flash-1.5-8b-exp", "name": "Google: Gemini Flash 8B 1.5 Experimental", "per_request_limits": null, @@ -1112,7 +1232,7 @@ }, "context_length": 2000000, "created": 1712620800, - "description": "Google's latest multimodal model, supporting image and video in text or chat prompts.\n\nOptimized for language tasks including:\n\n- Code generation\n- Text generation\n- Text editing\n- Problem solving\n- Recommendations\n- Information extraction\n- Data extraction or generation\n- AI agents\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal", + "description": "Google's latest multimodal model, supports image and video[0] in text or chat prompts.\n\nOptimized for language tasks including:\n\n- Code generation\n- Text generation\n- Text editing\n- Problem solving\n- Recommendations\n- Information extraction\n- Data extraction or generation\n- AI agents\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n* [0]: Video input is not available through OpenRouter at this time.", "id": "google/gemini-pro-1.5", "name": "Google: Gemini Pro 1.5", "per_request_limits": null, @@ -1136,7 +1256,7 @@ }, "context_length": 1000000, "created": 1722470400, - "description": "Gemini 1.5 Pro (0827) is an experimental version of the [Gemini 1.5 Pro](/google/gemini-pro-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is currently experimental and not suitable for production use-cases, and may be heavily rate-limited.", + "description": "Gemini 1.5 Pro Experimental is a bleeding-edge version of the [Gemini 1.5 Pro](/models/google/gemini-pro-1.5) model. Because it's currently experimental, it will be **heavily rate-limited** by Google.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal", "id": "google/gemini-pro-1.5-exp", "name": "Google: Gemini Pro 1.5 Experimental", "per_request_limits": null, @@ -1206,7 +1326,7 @@ "modality": "text->text", "tokenizer": "Gemini" }, - "context_length": 8192, + "context_length": 4096, "created": 1719532800, "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", "id": "google/gemma-2-9b-it", @@ -1215,7 +1335,7 @@ "pricing": { "completion": "0.00000006", "image": "0", - "prompt": "0.00000006", + "prompt": "0.00000003", "request": "0" }, "top_provider": { @@ -1232,7 +1352,7 @@ }, "context_length": 4096, "created": 1719532800, - "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).\n\n_These are free, rate-limited endpoints for [Gemma 2 9B](/google/gemma-2-9b-it). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", "id": "google/gemma-2-9b-it:free", "name": "Google: Gemma 2 9B (free)", "per_request_limits": null, @@ -1248,6 +1368,30 @@ "max_completion_tokens": 2048 } }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", + "tokenizer": "Gemini" + }, + "context_length": 8192, + "created": 1732216551, + "description": "An experimental version of [Gemini 1.5 Pro](/google/gemini-pro-1.5) from Google.", + "id": "google/learnlm-1.5-pro-experimental:free", + "name": "Google: LearnLM 1.5 Pro Experimental (free)", + "per_request_limits": null, + "pricing": { + "completion": "0", + "image": "0", + "prompt": "0", + "request": "0" + }, + "top_provider": { + "context_length": 8192, + "is_moderated": false, + "max_completion_tokens": 4096 + } + }, { "architecture": { "instruct_type": null, @@ -1278,7 +1422,7 @@ "modality": "text->text", "tokenizer": "PaLM" }, - "context_length": 32760, + "context_length": 32768, "created": 1698969600, "description": "PaLM 2 is a language model by Google with improved multilingual, reasoning and coding capabilities.", "id": "google/palm-2-chat-bison-32k", @@ -1326,7 +1470,7 @@ "modality": "text->text", "tokenizer": "PaLM" }, - "context_length": 32760, + "context_length": 32768, "created": 1698969600, "description": "PaLM 2 fine-tuned for chatbot conversations that help with code-related questions.", "id": "google/palm-2-codechat-bison-32k", @@ -1352,7 +1496,7 @@ }, "context_length": 4096, "created": 1690934400, - "description": "Zephyr is a series of language models that are trained to act as helpful assistants. Zephyr-7B-β is the second model in the series, and is a fine-tuned version of [mistralai/Mistral-7B-v0.1](/mistralai/mistral-7b-instruct-v0.1) that was trained on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO).\n\n_These are free, rate-limited endpoints for [Zephyr 7B](/huggingfaceh4/zephyr-7b-beta). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Zephyr is a series of language models that are trained to act as helpful assistants. Zephyr-7B-β is the second model in the series, and is a fine-tuned version of [mistralai/Mistral-7B-v0.1](/models/mistralai/mistral-7b-instruct-v0.1) that was trained on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO).", "id": "huggingfaceh4/zephyr-7b-beta:free", "name": "Hugging Face: Zephyr 7B (free)", "per_request_limits": null, @@ -1422,46 +1566,22 @@ "modality": "text->text", "tokenizer": "Other" }, - "context_length": 32768, + "context_length": 66000, "created": 1727654400, "description": "Liquid's 40.3B Mixture of Experts (MoE) model. Liquid Foundation Models (LFMs) are large neural networks built with computational units rooted in dynamic systems.\n\nLFMs are general-purpose AI models that can be used to model any kind of sequential data, including video, audio, text, time series, and signals.\n\nSee the [launch announcement](https://www.liquid.ai/liquid-foundation-models) for benchmarks and more info.", "id": "liquid/lfm-40b", "name": "Liquid: LFM 40B MoE", "per_request_limits": null, "pricing": { - "completion": "0.000002", - "image": "0", - "prompt": "0.000001", - "request": "0" - }, - "top_provider": { - "context_length": 32768, - "is_moderated": false, - "max_completion_tokens": null - } - }, - { - "architecture": { - "instruct_type": "vicuna", - "modality": "text->text", - "tokenizer": "Other" - }, - "context_length": 8192, - "created": 1727654400, - "description": "Liquid's 40.3B Mixture of Experts (MoE) model. Liquid Foundation Models (LFMs) are large neural networks built with computational units rooted in dynamic systems.\n\nLFMs are general-purpose AI models that can be used to model any kind of sequential data, including video, audio, text, time series, and signals.\n\nSee the [launch announcement](https://www.liquid.ai/liquid-foundation-models) for benchmarks and more info.\n\n_These are free, rate-limited endpoints for [LFM 40B MoE](/liquid/lfm-40b). Outputs may be cached. Read about rate limits [here](/docs/limits)._", - "id": "liquid/lfm-40b:free", - "name": "Liquid: LFM 40B MoE (free)", - "per_request_limits": null, - "pricing": { - "completion": "0", + "completion": "0.00000015", "image": "0", - "prompt": "0", + "prompt": "0.00000015", "request": "0" }, "top_provider": { - "context_length": 8192, + "context_length": 66000, "is_moderated": false, - "max_completion_tokens": 4096 + "max_completion_tokens": 66000 } }, { @@ -1477,9 +1597,9 @@ "name": "Llama 3 8B Lunaris", "per_request_limits": null, "pricing": { - "completion": "0.000002", + "completion": "0.00000006", "image": "0", - "prompt": "0.000002", + "prompt": "0.00000003", "request": "0" }, "top_provider": { @@ -1555,9 +1675,9 @@ "request": "0" }, "top_provider": { - "context_length": 8192, + "context_length": 24576, "is_moderated": false, - "max_completion_tokens": null + "max_completion_tokens": 2048 } }, { @@ -1568,7 +1688,7 @@ }, "context_length": 24576, "created": 1714780800, - "description": "The NeverSleep team is back, with a Llama 3 8B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.\n\nTo enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).\n\n_These are extended-context endpoints for [Llama 3 Lumimaid v0.1 8B](/neversleep/llama-3-lumimaid-8b). They may have higher prices._", + "description": "The NeverSleep team is back, with a Llama 3 8B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.\n\nTo enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "neversleep/llama-3-lumimaid-8b:extended", "name": "Llama 3 Lumimaid 8B (extended)", "per_request_limits": null, @@ -1590,9 +1710,9 @@ "modality": "text->text", "tokenizer": "Llama3" }, - "context_length": 8192, + "context_length": 128000, "created": 1724803200, - "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/sao10k/l3-euryale-70b).", + "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", "id": "sao10k/l3.1-euryale-70b", "name": "Llama 3.1 Euryale 70B v2.2", "per_request_limits": null, @@ -1614,7 +1734,7 @@ "modality": "text->text", "tokenizer": "Llama3" }, - "context_length": 131072, + "context_length": 16384, "created": 1729555200, "description": "Lumimaid v0.2 70B is a finetune of [Llama 3.1 70B](/meta-llama/llama-3.1-70b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "neversleep/llama-3.1-lumimaid-70b", @@ -1638,9 +1758,9 @@ "modality": "text->text", "tokenizer": "Llama3" }, - "context_length": 131072, + "context_length": 32768, "created": 1726358400, - "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "neversleep/llama-3.1-lumimaid-8b", "name": "Lumimaid v0.2 8B", "per_request_limits": null, @@ -1688,7 +1808,7 @@ }, "context_length": 16384, "created": 1720656000, - "description": "From the maker of [Goliath](https://openrouter.ai/alpindale/goliath-120b), Magnum 72B is the first in a new family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.\n\nThe model is based on [Qwen2 72B](https://openrouter.ai/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.", + "description": "From the maker of [Goliath](https://openrouter.ai/models/alpindale/goliath-120b), Magnum 72B is the first in a new family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.\n\nThe model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.", "id": "alpindale/magnum-72b", "name": "Magnum 72B", "per_request_limits": null, @@ -1712,14 +1832,14 @@ }, "context_length": 32768, "created": 1727654400, - "description": "From the maker of [Goliath](https://openrouter.ai/alpindale/goliath-120b), Magnum 72B is the seventh in a family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.\n\nThe model is based on [Qwen2 72B](https://openrouter.ai/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.", + "description": "From the maker of [Goliath](https://openrouter.ai/models/alpindale/goliath-120b), Magnum 72B is the seventh in a family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.\n\nThe model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.", "id": "anthracite-org/magnum-v2-72b", "name": "Magnum v2 72B", "per_request_limits": null, "pricing": { - "completion": "0.0000045", + "completion": "0.000003", "image": "0", - "prompt": "0.00000375", + "prompt": "0.000003", "request": "0" }, "top_provider": { @@ -1734,7 +1854,7 @@ "modality": "text->text", "tokenizer": "Qwen" }, - "context_length": 32768, + "context_length": 16384, "created": 1729555200, "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).", "id": "anthracite-org/magnum-v4-72b", @@ -1767,7 +1887,7 @@ "pricing": { "completion": "0.00000225", "image": "0", - "prompt": "0.000001875", + "prompt": "0.0000015", "request": "0" }, "top_provider": { @@ -1784,14 +1904,14 @@ }, "context_length": 8192, "created": 1713398400, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3-70b-instruct", "name": "Meta: Llama 3 70B Instruct", "per_request_limits": null, "pricing": { "completion": "0.0000004", "image": "0", - "prompt": "0.00000035", + "prompt": "0.00000023", "request": "0" }, "top_provider": { @@ -1808,7 +1928,7 @@ }, "context_length": 8192, "created": 1713398400, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are higher-throughput endpoints for [Llama 3 70B Instruct](/meta-llama/llama-3-70b-instruct). They may have higher prices._", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3-70b-instruct:nitro", "name": "Meta: Llama 3 70B Instruct (nitro)", "per_request_limits": null, @@ -1832,20 +1952,20 @@ }, "context_length": 8192, "created": 1713398400, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3-8b-instruct", "name": "Meta: Llama 3 8B Instruct", "per_request_limits": null, "pricing": { - "completion": "0.00000004", + "completion": "0.00000006", "image": "0", - "prompt": "0.00000004", + "prompt": "0.00000003", "request": "0" }, "top_provider": { "context_length": 8192, "is_moderated": false, - "max_completion_tokens": 8192 + "max_completion_tokens": 4096 } }, { @@ -1856,7 +1976,7 @@ }, "context_length": 16384, "created": 1713398400, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are extended-context endpoints for [Llama 3 8B Instruct](/meta-llama/llama-3-8b-instruct). They may have higher prices._", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3-8b-instruct:extended", "name": "Meta: Llama 3 8B Instruct (extended)", "per_request_limits": null, @@ -1880,7 +2000,7 @@ }, "context_length": 8192, "created": 1713398400, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3 8B Instruct](/meta-llama/llama-3-8b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3-8b-instruct:free", "name": "Meta: Llama 3 8B Instruct (free)", "per_request_limits": null, @@ -1904,14 +2024,14 @@ }, "context_length": 8192, "created": 1713398400, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are higher-throughput endpoints for [Llama 3 8B Instruct](/meta-llama/llama-3-8b-instruct). They may have higher prices._", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3-8b-instruct:nitro", "name": "Meta: Llama 3 8B Instruct (nitro)", "per_request_limits": null, "pricing": { - "completion": "0.000000162", + "completion": "0.0000002", "image": "0", - "prompt": "0.000000162", + "prompt": "0.0000002", "request": "0" }, "top_provider": { @@ -1926,9 +2046,9 @@ "modality": "text->text", "tokenizer": "Llama3" }, - "context_length": 131072, + "context_length": 32768, "created": 1722556800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3.1-405b", "name": "Meta: Llama 3.1 405B (base)", "per_request_limits": null, @@ -1950,16 +2070,16 @@ "modality": "text->text", "tokenizer": "Llama3" }, - "context_length": 131072, + "context_length": 32000, "created": 1721692800, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3.1-405b-instruct", "name": "Meta: Llama 3.1 405B Instruct", "per_request_limits": null, "pricing": { - "completion": "0.00000179", + "completion": "0.0000009", "image": "0", - "prompt": "0.00000179", + "prompt": "0.0000009", "request": "0" }, "top_provider": { @@ -1976,7 +2096,7 @@ }, "context_length": 8000, "created": 1721692800, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3.1 405B Instruct](/meta-llama/llama-3.1-405b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3.1-405b-instruct:free", "name": "Meta: Llama 3.1 405B Instruct (free)", "per_request_limits": null, @@ -2000,7 +2120,7 @@ }, "context_length": 8000, "created": 1721692800, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are higher-throughput endpoints for [Llama 3.1 405B Instruct](/meta-llama/llama-3.1-405b-instruct). They may have higher prices._", + "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3.1-405b-instruct:nitro", "name": "Meta: Llama 3.1 405B Instruct (nitro)", "per_request_limits": null, @@ -2024,18 +2144,18 @@ }, "context_length": 131072, "created": 1721692800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3.1-70b-instruct", "name": "Meta: Llama 3.1 70B Instruct", "per_request_limits": null, "pricing": { "completion": "0.0000004", "image": "0", - "prompt": "0.00000035", + "prompt": "0.00000013", "request": "0" }, "top_provider": { - "context_length": 100000, + "context_length": 131072, "is_moderated": false, "max_completion_tokens": 4096 } @@ -2048,7 +2168,7 @@ }, "context_length": 8192, "created": 1721692800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3.1 70B Instruct](/meta-llama/llama-3.1-70b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3.1-70b-instruct:free", "name": "Meta: Llama 3.1 70B Instruct (free)", "per_request_limits": null, @@ -2072,7 +2192,7 @@ }, "context_length": 64000, "created": 1721692800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are higher-throughput endpoints for [Llama 3.1 70B Instruct](/meta-llama/llama-3.1-70b-instruct). They may have higher prices._", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3.1-70b-instruct:nitro", "name": "Meta: Llama 3.1 70B Instruct (nitro)", "per_request_limits": null, @@ -2096,20 +2216,20 @@ }, "context_length": 131072, "created": 1721692800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3.1-8b-instruct", "name": "Meta: Llama 3.1 8B Instruct", "per_request_limits": null, "pricing": { "completion": "0.00000005", "image": "0", - "prompt": "0.00000005", + "prompt": "0.00000002", "request": "0" }, "top_provider": { - "context_length": 8192, + "context_length": 131072, "is_moderated": false, - "max_completion_tokens": 8192 + "max_completion_tokens": 4096 } }, { @@ -2120,7 +2240,7 @@ }, "context_length": 8192, "created": 1721692800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3.1 8B Instruct](/meta-llama/llama-3.1-8b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-3.1-8b-instruct:free", "name": "Meta: Llama 3.1 8B Instruct (free)", "per_request_limits": null, @@ -2150,7 +2270,7 @@ "per_request_limits": null, "pricing": { "completion": "0.000000055", - "image": "0.000079475", + "image": "0.00007948", "prompt": "0.000000055", "request": "0" }, @@ -2166,9 +2286,9 @@ "modality": "text+image->text", "tokenizer": "Llama3" }, - "context_length": 4096, + "context_length": 8192, "created": 1727222400, - "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3.2 11B Vision Instruct](/meta-llama/llama-3.2-11b-vision-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "id": "meta-llama/llama-3.2-11b-vision-instruct:free", "name": "Meta: Llama 3.2 11B Vision Instruct (free)", "per_request_limits": null, @@ -2179,9 +2299,9 @@ "request": "0" }, "top_provider": { - "context_length": 4096, + "context_length": 8192, "is_moderated": false, - "max_completion_tokens": 2048 + "max_completion_tokens": 4096 } }, { @@ -2216,7 +2336,7 @@ }, "context_length": 4096, "created": 1727222400, - "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\n\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3.2 1B Instruct](/meta-llama/llama-3.2-1b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\n\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "id": "meta-llama/llama-3.2-1b-instruct:free", "name": "Meta: Llama 3.2 1B Instruct (free)", "per_request_limits": null, @@ -2240,14 +2360,14 @@ }, "context_length": 131072, "created": 1727222400, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "id": "meta-llama/llama-3.2-3b-instruct", "name": "Meta: Llama 3.2 3B Instruct", "per_request_limits": null, "pricing": { - "completion": "0.00000005", + "completion": "0.00000003", "image": "0", - "prompt": "0.00000003", + "prompt": "0.000000018", "request": "0" }, "top_provider": { @@ -2264,7 +2384,7 @@ }, "context_length": 4096, "created": 1727222400, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3.2 3B Instruct](/meta-llama/llama-3.2-3b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "id": "meta-llama/llama-3.2-3b-instruct:free", "name": "Meta: Llama 3.2 3B Instruct (free)", "per_request_limits": null, @@ -2293,15 +2413,15 @@ "name": "Meta: Llama 3.2 90B Vision Instruct", "per_request_limits": null, "pricing": { - "completion": "0.0000004", - "image": "0.00050575", - "prompt": "0.00000035", + "completion": "0.0000009", + "image": "0.001301", + "prompt": "0.0000009", "request": "0" }, "top_provider": { - "context_length": 8192, + "context_length": 131072, "is_moderated": false, - "max_completion_tokens": 4096 + "max_completion_tokens": null } }, { @@ -2312,7 +2432,7 @@ }, "context_length": 4096, "created": 1727222400, - "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\n\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3.2 90B Vision Instruct](/meta-llama/llama-3.2-90b-vision-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\n\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "id": "meta-llama/llama-3.2-90b-vision-instruct:free", "name": "Meta: Llama 3.2 90B Vision Instruct (free)", "per_request_limits": null, @@ -2328,6 +2448,30 @@ "max_completion_tokens": 2048 } }, + { + "architecture": { + "instruct_type": "llama3", + "modality": "text->text", + "tokenizer": "Llama3" + }, + "context_length": 131072, + "created": 1733506137, + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "id": "meta-llama/llama-3.3-70b-instruct", + "name": "Meta: Llama 3.3 70B Instruct", + "per_request_limits": null, + "pricing": { + "completion": "0.0000004", + "image": "0", + "prompt": "0.00000013", + "request": "0" + }, + "top_provider": { + "context_length": 131072, + "is_moderated": false, + "max_completion_tokens": null + } + }, { "architecture": { "instruct_type": "llama2", @@ -2360,7 +2504,7 @@ }, "context_length": 8192, "created": 1715558400, - "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "id": "meta-llama/llama-guard-2-8b", "name": "Meta: LlamaGuard 2 8B", "per_request_limits": null, @@ -2456,7 +2600,7 @@ }, "context_length": 128000, "created": 1708905600, - "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt is fluent in English, French, Spanish, German, and Italian, with high grammatical accuracy, and its long context window allows precise information recall from large documents.", + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.", "id": "mistralai/mistral-large", "name": "Mistral Large", "per_request_limits": null, @@ -2472,6 +2616,54 @@ "max_completion_tokens": null } }, + { + "architecture": { + "instruct_type": null, + "modality": "text->text", + "tokenizer": "Mistral" + }, + "context_length": 128000, + "created": 1731978415, + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n", + "id": "mistralai/mistral-large-2407", + "name": "Mistral Large 2407", + "per_request_limits": null, + "pricing": { + "completion": "0.000006", + "image": "0", + "prompt": "0.000002", + "request": "0" + }, + "top_provider": { + "context_length": 128000, + "is_moderated": false, + "max_completion_tokens": null + } + }, + { + "architecture": { + "instruct_type": null, + "modality": "text->text", + "tokenizer": "Mistral" + }, + "context_length": 128000, + "created": 1731978685, + "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.", + "id": "mistralai/mistral-large-2411", + "name": "Mistral Large 2411", + "per_request_limits": null, + "pricing": { + "completion": "0.000006", + "image": "0", + "prompt": "0.000002", + "request": "0" + }, + "top_provider": { + "context_length": 128000, + "is_moderated": false, + "max_completion_tokens": null + } + }, { "architecture": { "instruct_type": null, @@ -2502,22 +2694,22 @@ "modality": "text->text", "tokenizer": "Mistral" }, - "context_length": 32000, + "context_length": 16384, "created": 1722556800, "description": "A specialized story writing and roleplaying model based on Mistral's NeMo 12B Instruct. Fine-tuned on curated datasets including Reddit Writing Prompts and Opus Instruct 25K.\n\nThis model excels at creative writing, offering improved NSFW capabilities, with smarter and more active narration. It demonstrates remarkable versatility in both SFW and NSFW scenarios, with strong Out of Character (OOC) steering capabilities, allowing fine-tuned control over narrative direction and character behavior.\n\nCheck out the model's [HuggingFace page](https://huggingface.co/nothingiisreal/MN-12B-Celeste-V1.9) for details on what parameters and prompts work best!", "id": "nothingiisreal/mn-celeste-12b", "name": "Mistral Nemo 12B Celeste", "per_request_limits": null, "pricing": { - "completion": "0.0000015", + "completion": "0.0000012", "image": "0", - "prompt": "0.0000015", + "prompt": "0.0000008", "request": "0" }, "top_provider": { - "context_length": 32000, + "context_length": 16384, "is_moderated": false, - "max_completion_tokens": null + "max_completion_tokens": 4096 } }, { @@ -2526,20 +2718,44 @@ "modality": "text->text", "tokenizer": "Mistral" }, - "context_length": 12000, + "context_length": 16384, "created": 1723507200, - "description": "Starcannon 12B is a creative roleplay and story writing model, using [nothingiisreal/mn-celeste-12b](https://openrouter.ai/nothingiisreal/mn-celeste-12b) as a base and [intervitens/mini-magnum-12b-v1.1](https://huggingface.co/intervitens/mini-magnum-12b-v1.1) merged in using the [TIES](https://arxiv.org/abs/2306.01708) method.\n\nAlthough more similar to Magnum overall, the model remains very creative, with a pleasant writing style. It is recommended for people wanting more variety than Magnum, and yet more verbose prose than Celeste.", + "description": "Starcannon 12B is a creative roleplay and story writing model, using [nothingiisreal/mn-celeste-12b](https://openrouter.ai/models/nothingiisreal/mn-celeste-12b) as a base and [intervitens/mini-magnum-12b-v1.1](https://huggingface.co/intervitens/mini-magnum-12b-v1.1) merged in using the [TIES](https://arxiv.org/abs/2306.01708) method.\n\nAlthough more similar to Magnum overall, the model remains very creative, with a pleasant writing style. It is recommended for people wanting more variety than Magnum, and yet more verbose prose than Celeste.", "id": "aetherwiing/mn-starcannon-12b", "name": "Mistral Nemo 12B Starcannon", "per_request_limits": null, "pricing": { - "completion": "0.000002", + "completion": "0.0000012", "image": "0", - "prompt": "0.000002", + "prompt": "0.0000008", + "request": "0" + }, + "top_provider": { + "context_length": 16384, + "is_moderated": false, + "max_completion_tokens": 4096 + } + }, + { + "architecture": { + "instruct_type": "mistral", + "modality": "text->text", + "tokenizer": "Mistral" + }, + "context_length": 32000, + "created": 1731464428, + "description": "Inferor is a merge of top roleplay models, expert on immersive narratives and storytelling.\n\nThis model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [anthracite-org/magnum-v4-12b](https://openrouter.ai/anthracite-org/magnum-v4-72b) as a base.\n", + "id": "infermatic/mn-inferor-12b", + "name": "Mistral Nemo Inferor 12B", + "per_request_limits": null, + "pricing": { + "completion": "0.0000005", + "image": "0", + "prompt": "0.00000025", "request": "0" }, "top_provider": { - "context_length": 12000, + "context_length": 32000, "is_moderated": false, "max_completion_tokens": null } @@ -2552,7 +2768,7 @@ }, "context_length": 32000, "created": 1704844800, - "description": "Cost-efficient, fast, and reliable option for use cases such as translation, summarization, and sentiment analysis.", + "description": "With 22 billion parameters, Mistral Small v24.09 offers a convenient mid-point between (Mistral NeMo 12B)[/mistralai/mistral-nemo] and (Mistral Large 2)[/mistralai/mistral-large], providing a cost-effective solution that can be deployed across various platforms and environments. It has better reasoning, exhibits more capabilities, can produce and reason about code, and is multiligual, supporting English, French, German, Italian, and Spanish.", "id": "mistralai/mistral-small", "name": "Mistral Small", "per_request_limits": null, @@ -2576,7 +2792,7 @@ }, "context_length": 32000, "created": 1704844800, - "description": "This model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.", + "description": "This model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.", "id": "mistralai/mistral-tiny", "name": "Mistral Tiny", "per_request_limits": null, @@ -2594,7 +2810,7 @@ }, { "architecture": { - "instruct_type": "mistral", + "instruct_type": null, "modality": "text->text", "tokenizer": "Mistral" }, @@ -2631,7 +2847,7 @@ "pricing": { "completion": "0.000000055", "image": "0", - "prompt": "0.000000055", + "prompt": "0.00000003", "request": "0" }, "top_provider": { @@ -2648,7 +2864,7 @@ }, "context_length": 8192, "created": 1716768000, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*\n\n_These are free, rate-limited endpoints for [Mistral 7B Instruct](/mistralai/mistral-7b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", "id": "mistralai/mistral-7b-instruct:free", "name": "Mistral: Mistral 7B Instruct (free)", "per_request_limits": null, @@ -2672,7 +2888,7 @@ }, "context_length": 32768, "created": 1716768000, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*\n\n_These are higher-throughput endpoints for [Mistral 7B Instruct](/mistralai/mistral-7b-instruct). They may have higher prices._", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", "id": "mistralai/mistral-7b-instruct:nitro", "name": "Mistral: Mistral 7B Instruct (nitro)", "per_request_limits": null, @@ -2744,14 +2960,14 @@ }, "context_length": 32768, "created": 1716768000, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", "id": "mistralai/mistral-7b-instruct-v0.3", "name": "Mistral: Mistral 7B Instruct v0.3", "per_request_limits": null, "pricing": { "completion": "0.000000055", "image": "0", - "prompt": "0.000000055", + "prompt": "0.00000003", "request": "0" }, "top_provider": { @@ -2773,9 +2989,9 @@ "name": "Mistral: Mistral Nemo", "per_request_limits": null, "pricing": { - "completion": "0.00000013", + "completion": "0.0000001", "image": "0", - "prompt": "0.00000013", + "prompt": "0.00000004", "request": "0" }, "top_provider": { @@ -2792,7 +3008,7 @@ }, "context_length": 65536, "created": 1713312000, - "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe", + "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe", "id": "mistralai/mixtral-8x22b-instruct", "name": "Mistral: Mixtral 8x22B Instruct", "per_request_limits": null, @@ -2810,13 +3026,13 @@ }, { "architecture": { - "instruct_type": "mistral", + "instruct_type": null, "modality": "text+image->text", "tokenizer": "Mistral" }, "context_length": 4096, "created": 1725926400, - "description": "The first image to text model from Mistral AI. Its weight was launched via torrent per their tradition: https://x.com/mistralai/status/1833758285167722836", + "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", "id": "mistralai/pixtral-12b", "name": "Mistral: Pixtral 12B", "per_request_limits": null, @@ -2832,6 +3048,30 @@ "max_completion_tokens": null } }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", + "tokenizer": "Mistral" + }, + "context_length": 128000, + "created": 1731977388, + "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", + "id": "mistralai/pixtral-large-2411", + "name": "Mistral: Pixtral Large 2411", + "per_request_limits": null, + "pricing": { + "completion": "0.000006", + "image": "0.002888", + "prompt": "0.000002", + "request": "0" + }, + "top_provider": { + "context_length": 128000, + "is_moderated": false, + "max_completion_tokens": null + } + }, { "architecture": { "instruct_type": "none", @@ -2840,7 +3080,7 @@ }, "context_length": 32768, "created": 1702166400, - "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI. Incorporates 8 experts (feed-forward networks) for a total of 47B parameters. Base model (not fine-tuned for instructions) - see [Mixtral 8x7B Instruct](/mistralai/mixtral-8x7b-instruct) for an instruct-tuned model.\n\n#moe", + "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI. Incorporates 8 experts (feed-forward networks) for a total of 47B parameters. Base model (not fine-tuned for instructions) - see [Mixtral 8x7B Instruct](/models/mistralai/mixtral-8x7b-instruct) for an instruct-tuned model.\n\n#moe", "id": "mistralai/mixtral-8x7b", "name": "Mixtral 8x7B (base)", "per_request_limits": null, @@ -2888,7 +3128,7 @@ }, "context_length": 32768, "created": 1702166400, - "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe\n\n_These are higher-throughput endpoints for [Mixtral 8x7B Instruct](/mistralai/mixtral-8x7b-instruct). They may have higher prices._", + "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", "id": "mistralai/mixtral-8x7b-instruct:nitro", "name": "Mixtral 8x7B Instruct (nitro)", "per_request_limits": null, @@ -2917,15 +3157,15 @@ "name": "MythoMax 13B", "per_request_limits": null, "pricing": { - "completion": "0.00000009", + "completion": "0.00000008", "image": "0", - "prompt": "0.00000009", + "prompt": "0.00000008", "request": "0" }, "top_provider": { "context_length": 4096, "is_moderated": false, - "max_completion_tokens": null + "max_completion_tokens": 4096 } }, { @@ -2936,7 +3176,7 @@ }, "context_length": 8192, "created": 1688256000, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge\n\n_These are extended-context endpoints for [MythoMax 13B](/gryphe/mythomax-l2-13b). They may have higher prices._", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", "id": "gryphe/mythomax-l2-13b:extended", "name": "MythoMax 13B (extended)", "per_request_limits": null, @@ -2949,7 +3189,7 @@ "top_provider": { "context_length": 8192, "is_moderated": false, - "max_completion_tokens": 400 + "max_completion_tokens": 512 } }, { @@ -2960,7 +3200,7 @@ }, "context_length": 4096, "created": 1688256000, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge\n\n_These are free, rate-limited endpoints for [MythoMax 13B](/gryphe/mythomax-l2-13b). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", "id": "gryphe/mythomax-l2-13b:free", "name": "MythoMax 13B (free)", "per_request_limits": null, @@ -2984,7 +3224,7 @@ }, "context_length": 4096, "created": 1688256000, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge\n\n_These are higher-throughput endpoints for [MythoMax 13B](/gryphe/mythomax-l2-13b). They may have higher prices._", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", "id": "gryphe/mythomax-l2-13b:nitro", "name": "MythoMax 13B (nitro)", "per_request_limits": null, @@ -3000,54 +3240,6 @@ "max_completion_tokens": null } }, - { - "architecture": { - "instruct_type": "alpaca", - "modality": "text->text", - "tokenizer": "Mistral" - }, - "context_length": 32768, - "created": 1701907200, - "description": "From the creator of [MythoMax](/gryphe/mythomax-l2-13b), merges a suite of models to reduce word anticipation, ministrations, and other undesirable words in ChatGPT roleplaying data.\n\nIt combines [Neural Chat 7B](/intel/neural-chat-7b), Airoboros 7b, [Toppy M 7B](/undi95/toppy-m-7b), [Zepher 7b beta](/huggingfaceh4/zephyr-7b-beta), [Nous Capybara 34B](/nousresearch/nous-capybara-34b), [OpenHeremes 2.5](/teknium/openhermes-2.5-mistral-7b), and many others.\n\n#merge", - "id": "gryphe/mythomist-7b", - "name": "MythoMist 7B", - "per_request_limits": null, - "pricing": { - "completion": "0.000000375", - "image": "0", - "prompt": "0.000000375", - "request": "0" - }, - "top_provider": { - "context_length": 32768, - "is_moderated": false, - "max_completion_tokens": 2048 - } - }, - { - "architecture": { - "instruct_type": "alpaca", - "modality": "text->text", - "tokenizer": "Mistral" - }, - "context_length": 8192, - "created": 1701907200, - "description": "From the creator of [MythoMax](/gryphe/mythomax-l2-13b), merges a suite of models to reduce word anticipation, ministrations, and other undesirable words in ChatGPT roleplaying data.\n\nIt combines [Neural Chat 7B](/intel/neural-chat-7b), Airoboros 7b, [Toppy M 7B](/undi95/toppy-m-7b), [Zepher 7b beta](/huggingfaceh4/zephyr-7b-beta), [Nous Capybara 34B](/nousresearch/nous-capybara-34b), [OpenHeremes 2.5](/teknium/openhermes-2.5-mistral-7b), and many others.\n\n#merge\n\n_These are free, rate-limited endpoints for [MythoMist 7B](/gryphe/mythomist-7b). Outputs may be cached. Read about rate limits [here](/docs/limits)._", - "id": "gryphe/mythomist-7b:free", - "name": "MythoMist 7B (free)", - "per_request_limits": null, - "pricing": { - "completion": "0", - "image": "0", - "prompt": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8192, - "is_moderated": false, - "max_completion_tokens": 4096 - } - }, { "architecture": { "instruct_type": "alpaca", @@ -3104,7 +3296,7 @@ }, "context_length": 32768, "created": 1705363200, - "description": "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the [Mixtral 8x7B MoE LLM](/mistralai/mixtral-8x7b).\n\nThe model was trained on over 1,000,000 entries of primarily [GPT-4](/openai/gpt-4) generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.\n\n#moe", + "description": "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the [Mixtral 8x7B MoE LLM](/models/mistralai/mixtral-8x7b).\n\nThe model was trained on over 1,000,000 entries of primarily [GPT-4](/models/openai/gpt-4) generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.\n\n#moe", "id": "nousresearch/nous-hermes-2-mixtral-8x7b-dpo", "name": "Nous: Hermes 2 Mixtral 8x7B DPO", "per_request_limits": null, @@ -3120,30 +3312,6 @@ "max_completion_tokens": null } }, - { - "architecture": { - "instruct_type": "chatml", - "modality": "text->text", - "tokenizer": "Llama3" - }, - "context_length": 16384, - "created": 1720656000, - "description": "An experimental merge model based on Llama 3, exhibiting a very distinctive style of writing. It combines the the best of [Meta's Llama 3 8B](https://openrouter.ai/meta-llama/llama-3-8b-instruct) and Nous Research's [Hermes 2 Pro](https://openrouter.ai/nousresearch/hermes-2-pro-llama-3-8b).\n\nHermes-2 Θ (theta) was specifically designed with a few capabilities in mind: executing function calls, generating JSON output, and most remarkably, demonstrating metacognitive abilities (contemplating the nature of thought and recognizing the diversity of cognitive processes among individuals).", - "id": "nousresearch/hermes-2-theta-llama-3-8b", - "name": "Nous: Hermes 2 Theta 8B", - "per_request_limits": null, - "pricing": { - "completion": "0.000001125", - "image": "0", - "prompt": "0.0000001875", - "request": "0" - }, - "top_provider": { - "context_length": 16384, - "is_moderated": false, - "max_completion_tokens": 2048 - } - }, { "architecture": { "instruct_type": "chatml", @@ -3157,9 +3325,9 @@ "name": "Nous: Hermes 3 405B Instruct", "per_request_limits": null, "pricing": { - "completion": "0.00000249", + "completion": "0.0000009", "image": "0", - "prompt": "0.00000179", + "prompt": "0.0000009", "request": "0" }, "top_provider": { @@ -3174,46 +3342,22 @@ "modality": "text->text", "tokenizer": "Llama3" }, - "context_length": 8192, - "created": 1723766400, - "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.\n\n_These are free, rate-limited endpoints for [Hermes 3 405B Instruct](/nousresearch/hermes-3-llama-3.1-405b). Outputs may be cached. Read about rate limits [here](/docs/limits)._", - "id": "nousresearch/hermes-3-llama-3.1-405b:free", - "name": "Nous: Hermes 3 405B Instruct (free)", - "per_request_limits": null, - "pricing": { - "completion": "0", - "image": "0", - "prompt": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8192, - "is_moderated": false, - "max_completion_tokens": 4096 - } - }, - { - "architecture": { - "instruct_type": "chatml", - "modality": "text->text", - "tokenizer": "Llama3" - }, - "context_length": 131072, + "context_length": 131000, "created": 1723939200, - "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", + "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", "id": "nousresearch/hermes-3-llama-3.1-70b", "name": "Nous: Hermes 3 70B Instruct", "per_request_limits": null, "pricing": { - "completion": "0.0000004", + "completion": "0.0000002", "image": "0", - "prompt": "0.0000004", + "prompt": "0.0000002", "request": "0" }, "top_provider": { - "context_length": 12288, + "context_length": 131000, "is_moderated": false, - "max_completion_tokens": null + "max_completion_tokens": 131000 } }, { @@ -3222,22 +3366,22 @@ "modality": "text->text", "tokenizer": "Llama3" }, - "context_length": 8192, + "context_length": 131000, "created": 1716768000, "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", "id": "nousresearch/hermes-2-pro-llama-3-8b", "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", "per_request_limits": null, "pricing": { - "completion": "0.00000014", + "completion": "0.00000003", "image": "0", - "prompt": "0.00000014", + "prompt": "0.00000003", "request": "0" }, "top_provider": { - "context_length": 8192, + "context_length": 131000, "is_moderated": false, - "max_completion_tokens": null + "max_completion_tokens": 131000 } }, { @@ -3246,22 +3390,22 @@ "modality": "text->text", "tokenizer": "Llama3" }, - "context_length": 131072, + "context_length": 131000, "created": 1728950400, "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "id": "nvidia/llama-3.1-nemotron-70b-instruct", "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", "per_request_limits": null, "pricing": { - "completion": "0.0000004", + "completion": "0.0000002", "image": "0", - "prompt": "0.00000035", + "prompt": "0.0000002", "request": "0" }, "top_provider": { - "context_length": 131072, + "context_length": 131000, "is_moderated": false, - "max_completion_tokens": 4096 + "max_completion_tokens": 131000 } }, { @@ -3272,7 +3416,7 @@ }, "context_length": 128000, "created": 1723593600, - "description": "Dynamic model continuously updated to the current version of [GPT-4o](/openai/gpt-4o) in ChatGPT. Intended for research and evaluation.\n\nNote: This model is currently experimental and not suitable for production use-cases, and may be heavily rate-limited.", + "description": "OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT. It therefore differs slightly from the API version of [GPT-4o](/models/openai/gpt-4o) in that it has additional RLHF. It is intended for research and evaluation.\n\nOpenAI notes that this model is not suited for production use-cases as it may be removed or redirected to another model in the future.", "id": "openai/chatgpt-4o-latest", "name": "OpenAI: ChatGPT-4o", "per_request_limits": null, @@ -3332,7 +3476,7 @@ }, "top_provider": { "context_length": 4095, - "is_moderated": true, + "is_moderated": false, "max_completion_tokens": 4096 } }, @@ -3608,7 +3752,7 @@ }, "context_length": 128000, "created": 1699833600, - "description": "Ability to understand images, in addition to all other [GPT-4 Turbo capabilties](/openai/gpt-4-turbo). Training data: up to Apr 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.\n\n#multimodal", + "description": "Ability to understand images, in addition to all other [GPT-4 Turbo capabilties](/models/openai/gpt-4-turbo). Training data: up to Apr 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.\n\n#multimodal", "id": "openai/gpt-4-vision-preview", "name": "OpenAI: GPT-4 Vision", "per_request_limits": null, @@ -3632,13 +3776,13 @@ }, "context_length": 128000, "created": 1715558400, - "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "id": "openai/gpt-4o", "name": "OpenAI: GPT-4o", "per_request_limits": null, "pricing": { "completion": "0.00001", - "image": "0.0036125", + "image": "0.003613", "prompt": "0.0000025", "request": "0" }, @@ -3656,7 +3800,7 @@ }, "context_length": 128000, "created": 1715558400, - "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "id": "openai/gpt-4o-2024-05-13", "name": "OpenAI: GPT-4o (2024-05-13)", "per_request_limits": null, @@ -3680,13 +3824,13 @@ }, "context_length": 128000, "created": 1722902400, - "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", + "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", "id": "openai/gpt-4o-2024-08-06", "name": "OpenAI: GPT-4o (2024-08-06)", "per_request_limits": null, "pricing": { "completion": "0.00001", - "image": "0.0036125", + "image": "0.003613", "prompt": "0.0000025", "request": "0" }, @@ -3699,12 +3843,36 @@ { "architecture": { "instruct_type": null, - "modality": "text->text", + "modality": "text+image->text", + "tokenizer": "GPT" + }, + "context_length": 128000, + "created": 1732127594, + "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded files, providing deeper insights & more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.", + "id": "openai/gpt-4o-2024-11-20", + "name": "OpenAI: GPT-4o (2024-11-20)", + "per_request_limits": null, + "pricing": { + "completion": "0.00001", + "image": "0.003613", + "prompt": "0.0000025", + "request": "0" + }, + "top_provider": { + "context_length": 128000, + "is_moderated": true, + "max_completion_tokens": 16384 + } + }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", "tokenizer": "GPT" }, "context_length": 128000, "created": 1715558400, - "description": "GPT-4o Extended is an experimental variant of GPT-4o with an extended max output tokens. This model supports only text input to text output.\n\n_These are extended-context endpoints for [GPT-4o](/openai/gpt-4o). They may have higher prices._", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "id": "openai/gpt-4o:extended", "name": "OpenAI: GPT-4o (extended)", "per_request_limits": null, @@ -3728,7 +3896,7 @@ }, "context_length": 128000, "created": 1721260800, - "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "id": "openai/gpt-4o-mini", "name": "OpenAI: GPT-4o-mini", "per_request_limits": null, @@ -3752,7 +3920,7 @@ }, "context_length": 128000, "created": 1721260800, - "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "id": "openai/gpt-4o-mini-2024-07-18", "name": "OpenAI: GPT-4o-mini (2024-07-18)", "per_request_limits": null, @@ -3872,7 +4040,7 @@ }, "context_length": 8192, "created": 1701129600, - "description": "OpenChat 7B is a library of open-source language models, fine-tuned with \"C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)\" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.\n\n- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/openchat/openchat-7b).\n- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/openchat/openchat-8b).\n\n#open-source", + "description": "OpenChat 7B is a library of open-source language models, fine-tuned with \"C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)\" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.\n\n- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).\n- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/models/openchat/openchat-8b).\n\n#open-source", "id": "openchat/openchat-7b", "name": "OpenChat 3.5 7B", "per_request_limits": null, @@ -3896,7 +4064,7 @@ }, "context_length": 8192, "created": 1701129600, - "description": "OpenChat 7B is a library of open-source language models, fine-tuned with \"C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)\" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.\n\n- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/openchat/openchat-7b).\n- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/openchat/openchat-8b).\n\n#open-source\n\n_These are free, rate-limited endpoints for [OpenChat 3.5 7B](/openchat/openchat-7b). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "OpenChat 7B is a library of open-source language models, fine-tuned with \"C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)\" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.\n\n- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).\n- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/models/openchat/openchat-8b).\n\n#open-source", "id": "openchat/openchat-7b:free", "name": "OpenChat 3.5 7B (free)", "per_request_limits": null, @@ -3920,7 +4088,7 @@ }, "context_length": 4096, "created": 1700438400, - "description": "A continuation of [OpenHermes 2 model](/teknium/openhermes-2-mistral-7b), trained on additional code datasets.\nPotentially the most interesting finding from training on a good ratio (est. of around 7-14% of the total dataset) of code instruction was that it has boosted several non-code benchmarks, including TruthfulQA, AGIEval, and GPT4All suite. It did however reduce BigBench benchmark score, but the net gain overall is significant.", + "description": "A continuation of [OpenHermes 2 model](/models/teknium/openhermes-2-mistral-7b), trained on additional code datasets.\nPotentially the most interesting finding from training on a good ratio (est. of around 7-14% of the total dataset) of code instruction was that it has boosted several non-code benchmarks, including TruthfulQA, AGIEval, and GPT4All suite. It did however reduce BigBench benchmark score, but the net gain overall is significant.", "id": "teknium/openhermes-2.5-mistral-7b", "name": "OpenHermes 2.5 Mistral 7B", "per_request_limits": null, @@ -3968,7 +4136,7 @@ }, "context_length": 131072, "created": 1722470400, - "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/perplexity/llama-3.1-sonar-large-128k-online) of this model has Internet access.", + "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-large-128k-online) of this model has Internet access.", "id": "perplexity/llama-3.1-sonar-large-128k-chat", "name": "Perplexity: Llama 3.1 Sonar 70B", "per_request_limits": null, @@ -3992,7 +4160,7 @@ }, "context_length": 127072, "created": 1722470400, - "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/perplexity/llama-3.1-sonar-large-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online", + "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/models/perplexity/llama-3.1-sonar-large-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online", "id": "perplexity/llama-3.1-sonar-large-128k-online", "name": "Perplexity: Llama 3.1 Sonar 70B Online", "per_request_limits": null, @@ -4016,7 +4184,7 @@ }, "context_length": 131072, "created": 1722470400, - "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/perplexity/llama-3.1-sonar-small-128k-online) of this model has Internet access.", + "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-small-128k-online) of this model has Internet access.", "id": "perplexity/llama-3.1-sonar-small-128k-chat", "name": "Perplexity: Llama 3.1 Sonar 8B", "per_request_limits": null, @@ -4040,7 +4208,7 @@ }, "context_length": 127072, "created": 1722470400, - "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/perplexity/llama-3.1-sonar-small-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online", + "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/models/perplexity/llama-3.1-sonar-small-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online", "id": "perplexity/llama-3.1-sonar-small-128k-online", "name": "Perplexity: Llama 3.1 Sonar 8B Online", "per_request_limits": null, @@ -4064,7 +4232,7 @@ }, "context_length": 32768, "created": 1715644800, - "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/perplexity/llama-3-sonar-large-32k-online) of this model has Internet access.", + "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3-sonar-large-32k-online) of this model has Internet access.", "id": "perplexity/llama-3-sonar-large-32k-chat", "name": "Perplexity: Llama3 Sonar 70B", "per_request_limits": null, @@ -4088,7 +4256,7 @@ }, "context_length": 28000, "created": 1715644800, - "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/perplexity/llama-3-sonar-large-32k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online", + "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/models/perplexity/llama-3-sonar-large-32k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online", "id": "perplexity/llama-3-sonar-large-32k-online", "name": "Perplexity: Llama3 Sonar 70B Online", "per_request_limits": null, @@ -4112,7 +4280,7 @@ }, "context_length": 32768, "created": 1715644800, - "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/perplexity/llama-3-sonar-small-32k-online) of this model has Internet access.", + "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3-sonar-small-32k-online) of this model has Internet access.", "id": "perplexity/llama-3-sonar-small-32k-chat", "name": "Perplexity: Llama3 Sonar 8B", "per_request_limits": null, @@ -4136,7 +4304,7 @@ }, "context_length": 128000, "created": 1716508800, - "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.\n\nFor 4k context length, try [Phi-3 Medium 4K](/microsoft/phi-3-medium-4k-instruct).", + "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.\n\nFor 4k context length, try [Phi-3 Medium 4K](/models/microsoft/phi-3-medium-4k-instruct).", "id": "microsoft/phi-3-medium-128k-instruct", "name": "Phi-3 Medium 128K Instruct", "per_request_limits": null, @@ -4160,7 +4328,7 @@ }, "context_length": 8192, "created": 1716508800, - "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.\n\nFor 4k context length, try [Phi-3 Medium 4K](/microsoft/phi-3-medium-4k-instruct).\n\n_These are free, rate-limited endpoints for [Phi-3 Medium 128K Instruct](/microsoft/phi-3-medium-128k-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.\n\nFor 4k context length, try [Phi-3 Medium 4K](/models/microsoft/phi-3-medium-4k-instruct).", "id": "microsoft/phi-3-medium-128k-instruct:free", "name": "Phi-3 Medium 128K Instruct (free)", "per_request_limits": null, @@ -4208,7 +4376,7 @@ }, "context_length": 8192, "created": 1716681600, - "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.\n\n_These are free, rate-limited endpoints for [Phi-3 Mini 128K Instruct](/microsoft/phi-3-mini-128k-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.", "id": "microsoft/phi-3-mini-128k-instruct:free", "name": "Phi-3 Mini 128K Instruct (free)", "per_request_limits": null, @@ -4232,7 +4400,7 @@ }, "context_length": 128000, "created": 1724198400, - "description": "Phi-3.5 models are lightweight, state-of-the-art open models. These models were trained with Phi-3 datasets that include both synthetic data and the filtered, publicly available websites data, with a focus on high quality and reasoning-dense properties. Phi-3.5 Mini uses 3.8B parameters, and is a dense decoder-only transformer model using the same tokenizer as [Phi-3 Mini](/microsoft/phi-3-mini-128k-instruct).\n\nThe models underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks that test common sense, language understanding, math, code, long context and logical reasoning, Phi-3.5 models showcased robust and state-of-the-art performance among models with less than 13 billion parameters.", + "description": "Phi-3.5 models are lightweight, state-of-the-art open models. These models were trained with Phi-3 datasets that include both synthetic data and the filtered, publicly available websites data, with a focus on high quality and reasoning-dense properties. Phi-3.5 Mini uses 3.8B parameters, and is a dense decoder-only transformer model using the same tokenizer as [Phi-3 Mini](/models/microsoft/phi-3-mini-128k-instruct).\n\nThe models underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks that test common sense, language understanding, math, code, long context and logical reasoning, Phi-3.5 models showcased robust and state-of-the-art performance among models with less than 13 billion parameters.", "id": "microsoft/phi-3.5-mini-128k-instruct", "name": "Phi-3.5 Mini 128K Instruct", "per_request_limits": null, @@ -4254,22 +4422,22 @@ "modality": "text->text", "tokenizer": "Llama2" }, - "context_length": 8192, + "context_length": 4096, "created": 1693612800, "description": "A blend of the new Pygmalion-13b and MythoMax. #merge", "id": "pygmalionai/mythalion-13b", "name": "Pygmalion: Mythalion 13B", "per_request_limits": null, "pricing": { - "completion": "0.000001125", + "completion": "0.0000012", "image": "0", - "prompt": "0.000001125", + "prompt": "0.0000008", "request": "0" }, "top_provider": { - "context_length": 8192, + "context_length": 4096, "is_moderated": false, - "max_completion_tokens": 400 + "max_completion_tokens": 4096 } }, { @@ -4328,7 +4496,7 @@ }, "context_length": 8192, "created": 1721088000, - "description": "Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.\n\nIt features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2/) and [GitHub repo](https://github.com/QwenLM/Qwen2).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).\n\n_These are free, rate-limited endpoints for [Qwen 2 7B Instruct](/qwen/qwen-2-7b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.\n\nIt features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2/) and [GitHub repo](https://github.com/QwenLM/Qwen2).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "id": "qwen/qwen-2-7b-instruct:free", "name": "Qwen 2 7B Instruct (free)", "per_request_limits": null, @@ -4346,11 +4514,35 @@ }, { "architecture": { - "instruct_type": "chatml", - "modality": "text+image->text", + "instruct_type": null, + "modality": "text->text", "tokenizer": "Qwen" }, "context_length": 32768, + "created": 1732754541, + "description": "QwQ-32B-Preview is an experimental research model focused on AI reasoning capabilities developed by the Qwen Team. As a preview release, it demonstrates promising analytical abilities while having several important limitations:\n\n1. **Language Mixing and Code-Switching**: The model may mix languages or switch between them unexpectedly, affecting response clarity.\n2. **Recursive Reasoning Loops**: The model may enter circular reasoning patterns, leading to lengthy responses without a conclusive answer.\n3. **Safety and Ethical Considerations**: The model requires enhanced safety measures to ensure reliable and secure performance, and users should exercise caution when deploying it.\n4. **Performance and Benchmark Limitations**: The model excels in math and coding but has room for improvement in other areas, such as common sense reasoning and nuanced language understanding.\n\n", + "id": "qwen/qwq-32b-preview", + "name": "Qwen: QwQ 32B Preview", + "per_request_limits": null, + "pricing": { + "completion": "0.0000006", + "image": "0", + "prompt": "0.00000015", + "request": "0" + }, + "top_provider": { + "context_length": 32768, + "is_moderated": false, + "max_completion_tokens": null + } + }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", + "tokenizer": "Qwen" + }, + "context_length": 4096, "created": 1726617600, "description": "Qwen2 VL 72B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "id": "qwen/qwen-2-vl-72b-instruct", @@ -4370,11 +4562,11 @@ }, { "architecture": { - "instruct_type": "chatml", + "instruct_type": null, "modality": "text+image->text", "tokenizer": "Qwen" }, - "context_length": 32768, + "context_length": 4096, "created": 1724803200, "description": "Qwen2 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "id": "qwen/qwen-2-vl-7b-instruct", @@ -4398,7 +4590,7 @@ "modality": "text->text", "tokenizer": "Qwen" }, - "context_length": 131072, + "context_length": 32000, "created": 1726704000, "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "id": "qwen/qwen-2.5-72b-instruct", @@ -4407,7 +4599,7 @@ "pricing": { "completion": "0.0000004", "image": "0", - "prompt": "0.00000035", + "prompt": "0.00000023", "request": "0" }, "top_provider": { @@ -4422,7 +4614,7 @@ "modality": "text->text", "tokenizer": "Qwen" }, - "context_length": 131072, + "context_length": 32768, "created": 1729036800, "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "id": "qwen/qwen-2.5-7b-instruct", @@ -4440,6 +4632,30 @@ "max_completion_tokens": null } }, + { + "architecture": { + "instruct_type": "chatml", + "modality": "text->text", + "tokenizer": "Qwen" + }, + "context_length": 32768, + "created": 1731368400, + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", + "id": "qwen/qwen-2.5-coder-32b-instruct", + "name": "Qwen2.5 Coder 32B Instruct", + "per_request_limits": null, + "pricing": { + "completion": "0.00000018", + "image": "0", + "prompt": "0.00000008", + "request": "0" + }, + "top_provider": { + "context_length": 32768, + "is_moderated": false, + "max_completion_tokens": 4096 + } + }, { "architecture": { "instruct_type": "alpaca", @@ -4453,15 +4669,15 @@ "name": "ReMM SLERP 13B", "per_request_limits": null, "pricing": { - "completion": "0.000001125", + "completion": "0.0000012", "image": "0", - "prompt": "0.000001125", + "prompt": "0.0000008", "request": "0" }, "top_provider": { - "context_length": 6144, + "context_length": 4096, "is_moderated": false, - "max_completion_tokens": 400 + "max_completion_tokens": 4096 } }, { @@ -4472,7 +4688,7 @@ }, "context_length": 6144, "created": 1689984000, - "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge\n\n_These are extended-context endpoints for [ReMM SLERP 13B](/undi95/remm-slerp-l2-13b). They may have higher prices._", + "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", "id": "undi95/remm-slerp-l2-13b:extended", "name": "ReMM SLERP 13B (extended)", "per_request_limits": null, @@ -4485,7 +4701,7 @@ "top_provider": { "context_length": 6144, "is_moderated": false, - "max_completion_tokens": 400 + "max_completion_tokens": 512 } }, { @@ -4512,6 +4728,30 @@ "max_completion_tokens": null } }, + { + "architecture": { + "instruct_type": "vicuna", + "modality": "text->text", + "tokenizer": "Mistral" + }, + "context_length": 16000, + "created": 1731105083, + "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling", + "id": "raifle/sorcererlm-8x22b", + "name": "SorcererLM 8x22B", + "per_request_limits": null, + "pricing": { + "completion": "0.0000045", + "image": "0", + "prompt": "0.0000045", + "request": "0" + }, + "top_provider": { + "context_length": 16000, + "is_moderated": false, + "max_completion_tokens": null + } + }, { "architecture": { "instruct_type": "alpaca", @@ -4520,7 +4760,7 @@ }, "context_length": 4096, "created": 1699574400, - "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored", + "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored", "id": "undi95/toppy-m-7b", "name": "Toppy M 7B", "per_request_limits": null, @@ -4544,7 +4784,7 @@ }, "context_length": 4096, "created": 1699574400, - "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored\n\n_These are free, rate-limited endpoints for [Toppy M 7B](/undi95/toppy-m-7b). Outputs may be cached. Read about rate limits [here](/docs/limits)._", + "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored", "id": "undi95/toppy-m-7b:free", "name": "Toppy M 7B (free)", "per_request_limits": null, @@ -4568,7 +4808,7 @@ }, "context_length": 4096, "created": 1699574400, - "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored\n\n_These are higher-throughput endpoints for [Toppy M 7B](/undi95/toppy-m-7b). They may have higher prices._", + "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored", "id": "undi95/toppy-m-7b:nitro", "name": "Toppy M 7B (nitro)", "per_request_limits": null, @@ -4584,6 +4824,30 @@ "max_completion_tokens": null } }, + { + "architecture": { + "instruct_type": "mistral", + "modality": "text->text", + "tokenizer": "Mistral" + }, + "context_length": 32000, + "created": 1731103448, + "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.", + "id": "thedrummer/unslopnemo-12b", + "name": "Unslopnemo 12b", + "per_request_limits": null, + "pricing": { + "completion": "0.0000005", + "image": "0", + "prompt": "0.0000005", + "request": "0" + }, + "top_provider": { + "context_length": 32000, + "is_moderated": false, + "max_completion_tokens": null + } + }, { "architecture": { "instruct_type": "vicuna", @@ -4592,7 +4856,7 @@ }, "context_length": 32000, "created": 1713225600, - "description": "WizardLM-2 7B is the smaller variant of Microsoft AI's latest Wizard model. It is the fastest and achieves comparable performance with existing 10x larger opensource leading models\n\nIt is a finetune of [Mistral 7B Instruct](/mistralai/mistral-7b-instruct), using the same technique as [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", + "description": "WizardLM-2 7B is the smaller variant of Microsoft AI's latest Wizard model. It is the fastest and achieves comparable performance with existing 10x larger opensource leading models\n\nIt is a finetune of [Mistral 7B Instruct](/models/mistralai/mistral-7b-instruct), using the same technique as [WizardLM-2 8x22B](/models/microsoft/wizardlm-2-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", "id": "microsoft/wizardlm-2-7b", "name": "WizardLM-2 7B", "per_request_limits": null, @@ -4616,7 +4880,7 @@ }, "context_length": 65536, "created": 1713225600, - "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", + "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", "id": "microsoft/wizardlm-2-8x22b", "name": "WizardLM-2 8x22B", "per_request_limits": null, @@ -4656,6 +4920,30 @@ "max_completion_tokens": null } }, + { + "architecture": { + "instruct_type": null, + "modality": "text+image->text", + "tokenizer": "Grok" + }, + "context_length": 8192, + "created": 1731976624, + "description": "Grok Vision Beta is xAI's experimental language model with vision capability.\n\n", + "id": "x-ai/grok-vision-beta", + "name": "xAI: Grok Vision Beta", + "per_request_limits": null, + "pricing": { + "completion": "0.000015", + "image": "0.0018", + "prompt": "0.000005", + "request": "0" + }, + "top_provider": { + "context_length": 8192, + "is_moderated": false, + "max_completion_tokens": null + } + }, { "architecture": { "instruct_type": "airoboros", @@ -4664,7 +4952,7 @@ }, "context_length": 8192, "created": 1697328000, - "description": "Xwin-LM aims to develop and open-source alignment tech for LLMs. Our first release, built-upon on the [Llama2](/${Model.Llama_2_13B_Chat}) base models, ranked TOP-1 on AlpacaEval. Notably, it's the first to surpass [GPT-4](/${Model.GPT_4}) on this benchmark. The project will be continuously updated.", + "description": "Xwin-LM aims to develop and open-source alignment tech for LLMs. Our first release, built-upon on the [Llama2](/models/${Model.Llama_2_13B_Chat}) base models, ranked TOP-1 on AlpacaEval. Notably, it's the first to surpass [GPT-4](/models/${Model.GPT_4}) on this benchmark. The project will be continuously updated.", "id": "xwin-lm/xwin-lm-70b", "name": "Xwin 70B", "per_request_limits": null, @@ -4677,7 +4965,7 @@ "top_provider": { "context_length": 8192, "is_moderated": false, - "max_completion_tokens": 400 + "max_completion_tokens": 512 } } ] From b72978d92df5c42c33d8ec3a0922dd4a02d0a21a Mon Sep 17 00:00:00 2001 From: Dmitri Nasonov Date: Wed, 11 Dec 2024 00:24:31 +0000 Subject: [PATCH 2/3] A: Added version to be displayed at the bottom right R: QoL A2: Updated export image process to correctly position the sticky chat menu R: Previously it was rendered at the bottom instead of the top --- index.html | 15 +++++++++++++++ src/utils/chat.ts | 22 +++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/index.html b/index.html index a698b3675..347ba9149 100644 --- a/index.html +++ b/index.html @@ -23,6 +23,21 @@
+
+ diff --git a/src/utils/chat.ts b/src/utils/chat.ts index 0436aff72..90fc1993c 100644 --- a/src/utils/chat.ts +++ b/src/utils/chat.ts @@ -15,11 +15,31 @@ export const formatNumber = (num: number): string => { export const htmlToImg = async (html: HTMLDivElement) => { const needResize = window.innerWidth >= 1024; const initialWidth = html.style.width; + const initialHeight = html.style.height; + const initialPosition = html.style.position; + if (needResize) { html.style.width = '1023px'; } + + // Apply print styles temporarily + html.classList.add('print-mode'); + + // Force layout reflow + html.offsetHeight; + + // Temporarily set position to absolute + html.style.position = 'absolute'; + const canvas = await html2canvas(html); - if (needResize) html.style.width = initialWidth; + + // Revert styles + if (needResize) { + html.style.width = initialWidth; + html.style.height = initialHeight; + } + html.style.position = initialPosition; + html.classList.remove('print-mode'); const dataURL = canvas.toDataURL('image/png'); return dataURL; }; From b6156478e661cf3c1fe3037eff03be8a281fefde Mon Sep 17 00:00:00 2001 From: Dmitri Nasonov Date: Wed, 11 Dec 2024 00:27:12 +0000 Subject: [PATCH 3/3] chore: bump version to 1.16.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index a534b6ff3..5135cea85 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "better-chatgpt", "private": true, - "version": "1.15.0", + "version": "1.16.0", "type": "module", "homepage": "./", "main": "electron/index.cjs",