-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(deps): update dependency tiktoken to v1.0.20 #239
Conversation
anthropic debug - [puLL-Merge] - dqbd/tiktoken@@dqbd/[email protected]..@dqbd/[email protected] Diffdiff --git js/CHANGELOG.md js/CHANGELOG.md
index e654ab7..0a4ce5c 100644
--- js/CHANGELOG.md
+++ js/CHANGELOG.md
@@ -1,5 +1,17 @@
# js-tiktoken
+## 1.0.18
+
+### Patch Changes
+
+- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models
+
+## 1.0.17
+
+### Patch Changes
+
+- 5982def: feat: add o3-mini, missing o1 model
+
## 1.0.16
### Patch Changes
diff --git js/package.json js/package.json
index b0fab15..42c78c4 100644
--- js/package.json
+++ js/package.json
@@ -1,6 +1,6 @@
{
"name": "js-tiktoken",
- "version": "1.0.16",
+ "version": "1.0.18",
"description": "JavaScript port of tiktoken",
"license": "MIT",
"scripts": {
diff --git js/src/core.ts js/src/core.ts
index 6688b32..90bde7a 100644
--- js/src/core.ts
+++ js/src/core.ts
@@ -279,13 +279,17 @@ export function getEncodingNameForModel(model: TiktokenModel) {
case "gpt-4o":
case "gpt-4o-2024-05-13":
case "gpt-4o-2024-08-06":
+ case "gpt-4o-2024-11-20":
case "gpt-4o-mini-2024-07-18":
case "gpt-4o-mini":
+ case "o1":
case "o1-2024-12-17":
case "o1-mini":
case "o1-preview":
case "o1-preview-2024-09-12":
case "o1-mini-2024-09-12":
+ case "o3-mini":
+ case "o3-mini-2025-01-31":
case "chatgpt-4o-latest":
case "gpt-4o-realtime":
case "gpt-4o-realtime-preview-2024-10-01":
diff --git tiktoken/model_to_encoding.json tiktoken/model_to_encoding.json
index 445f32a..6b55f31 100644
--- tiktoken/model_to_encoding.json
+++ tiktoken/model_to_encoding.json
@@ -57,14 +57,18 @@
"gpt-4-vision-preview": "cl100k_base",
"gpt-4o": "o200k_base",
"gpt-4o-2024-05-13": "o200k_base",
- "gpt-4o-2024-08-06":"o200k_base",
+ "gpt-4o-2024-08-06": "o200k_base",
+ "gpt-4o-2024-11-20": "o200k_base",
"gpt-4o-mini-2024-07-18": "o200k_base",
"gpt-4o-mini": "o200k_base",
+ "o1": "o200k_base",
"o1-2024-12-17": "o200k_base",
"o1-mini": "o200k_base",
"o1-preview": "o200k_base",
"o1-preview-2024-09-12": "o200k_base",
"o1-mini-2024-09-12": "o200k_base",
+ "o3-mini": "o200k_base",
+ "o3-mini-2025-01-31": "o200k_base",
"chatgpt-4o-latest": "o200k_base",
"gpt-4o-realtime": "o200k_base",
"gpt-4o-realtime-preview-2024-10-01": "o200k_base"
diff --git wasm/CHANGELOG.md wasm/CHANGELOG.md
index f457b73..8d9bf5b 100644
--- wasm/CHANGELOG.md
+++ wasm/CHANGELOG.md
@@ -1,5 +1,17 @@
# tiktoken
+## 1.0.20
+
+### Patch Changes
+
+- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models
+
+## 1.0.19
+
+### Patch Changes
+
+- 5982def: feat: add o3-mini, missing o1 model
+
## 1.0.18
### Patch Changes
diff --git wasm/alias/CHANGELOG.md wasm/alias/CHANGELOG.md
index 41dfd2d..0379d0c 100644
--- wasm/alias/CHANGELOG.md
+++ wasm/alias/CHANGELOG.md
@@ -1,5 +1,17 @@
# @dqbd/tiktoken
+## 1.0.20
+
+### Patch Changes
+
+- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models
+
+## 1.0.19
+
+### Patch Changes
+
+- 5982def: feat: add o3-mini, missing o1 model
+
## 1.0.18
### Patch Changes
diff --git wasm/alias/package.json wasm/alias/package.json
index 2e73252..ad1b291 100644
--- wasm/alias/package.json
+++ wasm/alias/package.json
@@ -1,6 +1,6 @@
{
"name": "@dqbd/tiktoken",
- "version": "1.0.18",
+ "version": "1.0.20",
"repository": {
"type": "git",
"url": "https://github.com/dqbd/tiktoken"
diff --git wasm/package.json wasm/package.json
index 59b446b..7dec0d5 100644
--- wasm/package.json
+++ wasm/package.json
@@ -1,6 +1,6 @@
{
"name": "tiktoken",
- "version": "1.0.18",
+ "version": "1.0.20",
"description": "JS/WASM bindings for tiktoken",
"license": "MIT",
"scripts": {
diff --git wasm/src/lib.rs wasm/src/lib.rs
index 7da08e8..65b29a0 100644
--- wasm/src/lib.rs
+++ wasm/src/lib.rs
@@ -385,6 +385,8 @@ export type TiktokenModel =
| "text-davinci-edit-001"
| "code-davinci-edit-001"
| "text-embedding-ada-002"
+ | "text-embedding-3-small"
+ | "text-embedding-3-large"
| "text-similarity-davinci-001"
| ",text-similarity-curie-001"
| "text-similarity-babbage-001"
@@ -421,13 +423,17 @@ export type TiktokenModel =
| "gpt-4o"
| "gpt-4o-2024-05-13"
| "gpt-4o-2024-08-06"
+ | "gpt-4o-2024-11-20"
| "gpt-4o-mini-2024-07-18"
| "gpt-4o-mini"
+ | "o1"
| "o1-2024-12-17"
| "o1-mini"
| "o1-preview"
| "o1-preview-2024-09-12"
| "o1-mini-2024-09-12"
+ | "o3-mini"
+ | "o3-mini-2025-01-31"
| "chatgpt-4o-latest"
| "gpt-4o-realtime"
| "gpt-4o-realtime-preview-2024-10-01"
@@ -454,9 +460,10 @@ pub fn encoding_for_model(
"text-babbage-001" => Ok("r50k_base"),
"text-ada-001" => Ok("r50k_base"),
"davinci" => Ok("r50k_base"),
+ "davinci-002" => Ok("cl100k_base"),
"curie" => Ok("r50k_base"),
"babbage" => Ok("r50k_base"),
- "babbage-002" => Ok("r50k_base"),
+ "babbage-002" => Ok("cl100k_base"),
"ada" => Ok("r50k_base"),
"code-davinci-002" => Ok("p50k_base"),
"code-davinci-001" => Ok("p50k_base"),
@@ -485,7 +492,7 @@ pub fn encoding_for_model(
"gpt-3.5-turbo-0613" => Ok("cl100k_base"),
"gpt-3.5-turbo-16k" => Ok("cl100k_base"),
"gpt-3.5-turbo-16k-0613" => Ok("cl100k_base"),
- "gpt-3.5-turbo-instruct" => Ok("clk100k_base"),
+ "gpt-3.5-turbo-instruct" => Ok("cl100k_base"),
"gpt-3.5-turbo-instruct-0914" => Ok("cl100k_base"),
"gpt-4" => Ok("cl100k_base"),
"gpt-4-0314" => Ok("cl100k_base"),
@@ -505,8 +512,10 @@ pub fn encoding_for_model(
"gpt-4o" => Ok("o200k_base"),
"gpt-4o-2024-05-13" => Ok("o200k_base"),
"gpt-4o-2024-08-06" => Ok("o200k_base"),
+ "gpt-4o-2024-11-20" => Ok("o200k_base"),
"gpt-4o-mini-2024-07-18" => Ok("o200k_base"),
"gpt-4o-mini" => Ok("o200k_base"),
+ "o1" => Ok("o200k_base"),
"o1-2024-12-17" => Ok("o200k_base"),
"o1-mini" => Ok("o200k_base"),
"o1-preview" => Ok("o200k_base"),
@@ -515,6 +524,8 @@ pub fn encoding_for_model(
"chatgpt-4o-latest" => Ok("o200k_base"),
"gpt-4o-realtime" => Ok("o200k_base"),
"gpt-4o-realtime-preview-2024-10-01" => Ok("o200k_base"),
+ "o3-mini" => Ok("o200k_base"),
+ "o3-mini-2025-01-31" => Ok("o200k_base"),
model => Err(JsError::new(
format!("Invalid model: {}", model.to_string()).as_str(),
)),
DescriptionThis PR updates the tiktoken library to add support for new AI models and fix incorrect tokenizer mappings for some existing models. It includes version bumps across multiple packages and adds several new model identifiers to the supported list. ChangesChanges
sequenceDiagram
participant Client
participant Tiktoken
participant Tokenizer
Client->>Tiktoken: Request tokenization for model
Tiktoken->>Tiktoken: getEncodingNameForModel()
alt New Model (o3-mini, gpt-4o-2024-11-20, etc)
Tiktoken->>Tokenizer: Use o200k_base
else Fixed Model (gpt-3.5-turbo-instruct)
Tiktoken->>Tokenizer: Use cl100k_base
end
Tokenizer->>Client: Return tokenized result
|
bedrock debug - [puLL-Merge] - dqbd/tiktoken@@dqbd/[email protected]..@dqbd/[email protected] Diffdiff --git js/CHANGELOG.md js/CHANGELOG.md
index e654ab7..0a4ce5c 100644
--- js/CHANGELOG.md
+++ js/CHANGELOG.md
@@ -1,5 +1,17 @@
# js-tiktoken
+## 1.0.18
+
+### Patch Changes
+
+- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models
+
+## 1.0.17
+
+### Patch Changes
+
+- 5982def: feat: add o3-mini, missing o1 model
+
## 1.0.16
### Patch Changes
diff --git js/package.json js/package.json
index b0fab15..42c78c4 100644
--- js/package.json
+++ js/package.json
@@ -1,6 +1,6 @@
{
"name": "js-tiktoken",
- "version": "1.0.16",
+ "version": "1.0.18",
"description": "JavaScript port of tiktoken",
"license": "MIT",
"scripts": {
diff --git js/src/core.ts js/src/core.ts
index 6688b32..90bde7a 100644
--- js/src/core.ts
+++ js/src/core.ts
@@ -279,13 +279,17 @@ export function getEncodingNameForModel(model: TiktokenModel) {
case "gpt-4o":
case "gpt-4o-2024-05-13":
case "gpt-4o-2024-08-06":
+ case "gpt-4o-2024-11-20":
case "gpt-4o-mini-2024-07-18":
case "gpt-4o-mini":
+ case "o1":
case "o1-2024-12-17":
case "o1-mini":
case "o1-preview":
case "o1-preview-2024-09-12":
case "o1-mini-2024-09-12":
+ case "o3-mini":
+ case "o3-mini-2025-01-31":
case "chatgpt-4o-latest":
case "gpt-4o-realtime":
case "gpt-4o-realtime-preview-2024-10-01":
diff --git tiktoken/model_to_encoding.json tiktoken/model_to_encoding.json
index 445f32a..6b55f31 100644
--- tiktoken/model_to_encoding.json
+++ tiktoken/model_to_encoding.json
@@ -57,14 +57,18 @@
"gpt-4-vision-preview": "cl100k_base",
"gpt-4o": "o200k_base",
"gpt-4o-2024-05-13": "o200k_base",
- "gpt-4o-2024-08-06":"o200k_base",
+ "gpt-4o-2024-08-06": "o200k_base",
+ "gpt-4o-2024-11-20": "o200k_base",
"gpt-4o-mini-2024-07-18": "o200k_base",
"gpt-4o-mini": "o200k_base",
+ "o1": "o200k_base",
"o1-2024-12-17": "o200k_base",
"o1-mini": "o200k_base",
"o1-preview": "o200k_base",
"o1-preview-2024-09-12": "o200k_base",
"o1-mini-2024-09-12": "o200k_base",
+ "o3-mini": "o200k_base",
+ "o3-mini-2025-01-31": "o200k_base",
"chatgpt-4o-latest": "o200k_base",
"gpt-4o-realtime": "o200k_base",
"gpt-4o-realtime-preview-2024-10-01": "o200k_base"
diff --git wasm/CHANGELOG.md wasm/CHANGELOG.md
index f457b73..8d9bf5b 100644
--- wasm/CHANGELOG.md
+++ wasm/CHANGELOG.md
@@ -1,5 +1,17 @@
# tiktoken
+## 1.0.20
+
+### Patch Changes
+
+- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models
+
+## 1.0.19
+
+### Patch Changes
+
+- 5982def: feat: add o3-mini, missing o1 model
+
## 1.0.18
### Patch Changes
diff --git wasm/alias/CHANGELOG.md wasm/alias/CHANGELOG.md
index 41dfd2d..0379d0c 100644
--- wasm/alias/CHANGELOG.md
+++ wasm/alias/CHANGELOG.md
@@ -1,5 +1,17 @@
# @dqbd/tiktoken
+## 1.0.20
+
+### Patch Changes
+
+- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models
+
+## 1.0.19
+
+### Patch Changes
+
+- 5982def: feat: add o3-mini, missing o1 model
+
## 1.0.18
### Patch Changes
diff --git wasm/alias/package.json wasm/alias/package.json
index 2e73252..ad1b291 100644
--- wasm/alias/package.json
+++ wasm/alias/package.json
@@ -1,6 +1,6 @@
{
"name": "@dqbd/tiktoken",
- "version": "1.0.18",
+ "version": "1.0.20",
"repository": {
"type": "git",
"url": "https://github.com/dqbd/tiktoken"
diff --git wasm/package.json wasm/package.json
index 59b446b..7dec0d5 100644
--- wasm/package.json
+++ wasm/package.json
@@ -1,6 +1,6 @@
{
"name": "tiktoken",
- "version": "1.0.18",
+ "version": "1.0.20",
"description": "JS/WASM bindings for tiktoken",
"license": "MIT",
"scripts": {
diff --git wasm/src/lib.rs wasm/src/lib.rs
index 7da08e8..65b29a0 100644
--- wasm/src/lib.rs
+++ wasm/src/lib.rs
@@ -385,6 +385,8 @@ export type TiktokenModel =
| "text-davinci-edit-001"
| "code-davinci-edit-001"
| "text-embedding-ada-002"
+ | "text-embedding-3-small"
+ | "text-embedding-3-large"
| "text-similarity-davinci-001"
| "text-similarity-curie-001"
| "text-similarity-babbage-001"
@@ -421,13 +423,17 @@ export type TiktokenModel =
| "gpt-4o"
| "gpt-4o-2024-05-13"
| "gpt-4o-2024-08-06"
+ | "gpt-4o-2024-11-20"
| "gpt-4o-mini-2024-07-18"
| "gpt-4o-mini"
+ | "o1"
| "o1-2024-12-17"
| "o1-mini"
| "o1-preview"
| "o1-preview-2024-09-12"
| "o1-mini-2024-09-12"
+ | "o3-mini"
+ | "o3-mini-2025-01-31"
| "chatgpt-4o-latest"
| "gpt-4o-realtime"
| "gpt-4o-realtime-preview-2024-10-01"
@@ -454,9 +460,10 @@ pub fn encoding_for_model(
"text-babbage-001" => Ok("r50k_base"),
"text-ada-001" => Ok("r50k_base"),
"davinci" => Ok("r50k_base"),
+ "davinci-002" => Ok("cl100k_base"),
"curie" => Ok("r50k_base"),
"babbage" => Ok("r50k_base"),
- "babbage-002" => Ok("r50k_base"),
+ "babbage-002" => Ok("cl100k_base"),
"ada" => Ok("r50k_base"),
"code-davinci-002" => Ok("p50k_base"),
"code-davinci-001" => Ok("p50k_base"),
@@ -485,7 +492,7 @@ pub fn encoding_for_model(
"gpt-3.5-turbo-0613" => Ok("cl100k_base"),
"gpt-3.5-turbo-16k" => Ok("cl100k_base"),
"gpt-3.5-turbo-16k-0613" => Ok("cl100k_base"),
- "gpt-3.5-turbo-instruct" => Ok("clk100k_base"),
+ "gpt-3.5-turbo-instruct" => Ok("cl100k_base"),
"gpt-3.5-turbo-instruct-0914" => Ok("cl100k_base"),
"gpt-4" => Ok("cl100k_base"),
"gpt-4-0314" => Ok("cl100k_base"),
@@ -505,8 +512,10 @@ pub fn encoding_for_model(
"gpt-4o" => Ok("o200k_base"),
"gpt-4o-2024-05-13" => Ok("o200k_base"),
"gpt-4o-2024-08-06" => Ok("o200k_base"),
+ "gpt-4o-2024-11-20" => Ok("o200k_base"),
"gpt-4o-mini-2024-07-18" => Ok("o200k_base"),
"gpt-4o-mini" => Ok("o200k_base"),
+ "o1" => Ok("o200k_base"),
"o1-2024-12-17" => Ok("o200k_base"),
"o1-mini" => Ok("o200k_base"),
"o1-preview" => Ok("o200k_base"),
@@ -515,6 +524,8 @@ pub fn encoding_for_model(
"chatgpt-4o-latest" => Ok("o200k_base"),
"gpt-4o-realtime" => Ok("o200k_base"),
"gpt-4o-realtime-preview-2024-10-01" => Ok("o200k_base"),
+ "o3-mini" => Ok("o200k_base"),
+ "o3-mini-2025-01-31" => Ok("o200k_base"),
model => Err(JsError::new(
format!("Invalid model: {}", model.to_string()).as_str(),
)),
DescriptionThis PR updates the model mappings in the tiktoken library by adding support for new models and fixing incorrect tokenizer assignments for some existing models. The changes include version bumps across packages and the addition of several new OpenAI models including o3-mini and various GPT-4 variants. ChangesChangesBy file: js/src/core.ts & tiktoken/model_to_encoding.json
wasm/src/lib.rs
Version Updates
sequenceDiagram
participant Client
participant Tokenizer
participant ModelMapping
Client->>ModelMapping: Request tokenizer for model
ModelMapping->>ModelMapping: Check model name
alt New Model (o3-mini, gpt-4o-2024-11-20, etc)
ModelMapping->>Tokenizer: Use o200k_base
else Updated Model (babbage-002, davinci-002)
ModelMapping->>Tokenizer: Use cl100k_base
else GPT-3.5-turbo-instruct
ModelMapping->>Tokenizer: Use cl100k_base (fixed)
end
Tokenizer->>Client: Return appropriate tokenizer
|
openai debug - [puLL-Merge] - dqbd/tiktoken@@dqbd/[email protected]..@dqbd/[email protected] Diffdiff --git js/CHANGELOG.md js/CHANGELOG.md
index e654ab7..0a4ce5c 100644
--- js/CHANGELOG.md
+++ js/CHANGELOG.md
@@ -1,5 +1,17 @@
# js-tiktoken
+## 1.0.18
+
+### Patch Changes
+
+- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models
+
+## 1.0.17
+
+### Patch Changes
+
+- 5982def: feat: add o3-mini, missing o1 model
+
## 1.0.16
### Patch Changes
diff --git js/package.json js/package.json
index b0fab15..42c78c4 100644
--- js/package.json
+++ js/package.json
@@ -1,6 +1,6 @@
{
"name": "js-tiktoken",
- "version": "1.0.16",
+ "version": "1.0.18",
"description": "JavaScript port of tiktoken",
"license": "MIT",
"scripts": {
diff --git js/src/core.ts js/src/core.ts
index 6688b32..90bde7a 100644
--- js/src/core.ts
+++ js/src/core.ts
@@ -279,13 +279,17 @@ export function getEncodingNameForModel(model: TiktokenModel) {
case "gpt-4o":
case "gpt-4o-2024-05-13":
case "gpt-4o-2024-08-06":
+ case "gpt-4o-2024-11-20":
case "gpt-4o-mini-2024-07-18":
case "gpt-4o-mini":
+ case "o1":
case "o1-2024-12-17":
case "o1-mini":
case "o1-preview":
case "o1-preview-2024-09-12":
case "o1-mini-2024-09-12":
+ case "o3-mini":
+ case "o3-mini-2025-01-31":
case "chatgpt-4o-latest":
case "gpt-4o-realtime":
case "gpt-4o-realtime-preview-2024-10-01":
diff --git tiktoken/model_to_encoding.json tiktoken/model_to_encoding.json
index 445f32a..6b55f31 100644
--- tiktoken/model_to_encoding.json
+++ tiktoken/model_to_encoding.json
@@ -57,14 +57,18 @@
"gpt-4-vision-preview": "cl100k_base",
"gpt-4o": "o200k_base",
"gpt-4o-2024-05-13": "o200k_base",
- "gpt-4o-2024-08-06":"o200k_base",
+ "gpt-4o-2024-08-06": "o200k_base",
+ "gpt-4o-2024-11-20": "o200k_base",
"gpt-4o-mini-2024-07-18": "o200k_base",
"gpt-4o-mini": "o200k_base",
+ "o1": "o200k_base",
"o1-2024-12-17": "o200k_base",
"o1-mini": "o200k_base",
"o1-preview": "o200k_base",
"o1-preview-2024-09-12": "o200k_base",
"o1-mini-2024-09-12": "o200k_base",
+ "o3-mini": "o200k_base",
+ "o3-mini-2025-01-31": "o200k_base",
"chatgpt-4o-latest": "o200k_base",
"gpt-4o-realtime": "o200k_base",
"gpt-4o-realtime-preview-2024-10-01": "o200k_base"
diff --git wasm/CHANGELOG.md wasm/CHANGELOG.md
index f457b73..8d9bf5b 100644
--- wasm/CHANGELOG.md
+++ wasm/CHANGELOG.md
@@ -1,5 +1,17 @@
# tiktoken
+## 1.0.20
+
+### Patch Changes
+
+- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models
+
+## 1.0.19
+
+### Patch Changes
+
+- 5982def: feat: add o3-mini, missing o1 model
+
## 1.0.18
### Patch Changes
diff --git wasm/alias/CHANGELOG.md wasm/alias/CHANGELOG.md
index 41dfd2d..0379d0c 100644
--- wasm/alias/CHANGELOG.md
+++ wasm/alias/CHANGELOG.md
@@ -1,5 +1,17 @@
# @dqbd/tiktoken
+## 1.0.20
+
+### Patch Changes
+
+- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models
+
+## 1.0.19
+
+### Patch Changes
+
+- 5982def: feat: add o3-mini, missing o1 model
+
## 1.0.18
### Patch Changes
diff --git wasm/alias/package.json wasm/alias/package.json
index 2e73252..ad1b291 100644
--- wasm/alias/package.json
+++ wasm/alias/package.json
@@ -1,6 +1,6 @@
{
"name": "@dqbd/tiktoken",
- "version": "1.0.18",
+ "version": "1.0.20",
"repository": {
"type": "git",
"url": "https://github.com/dqbd/tiktoken"
diff --git wasm/package.json wasm/package.json
index 59b446b..7dec0d5 100644
--- wasm/package.json
+++ wasm/package.json
@@ -1,6 +1,6 @@
{
"name": "tiktoken",
- "version": "1.0.18",
+ "version": "1.0.20",
"description": "JS/WASM bindings for tiktoken",
"license": "MIT",
"scripts": {
diff --git wasm/src/lib.rs wasm/src/lib.rs
index 7da08e8..65b29a0 100644
--- wasm/src/lib.rs
+++ wasm/src/lib.rs
@@ -385,6 +385,8 @@ export type TiktokenModel =
| "text-davinci-edit-001"
| "code-davinci-edit-001"
| "text-embedding-ada-002"
+ | "text-embedding-3-small"
+ | "text-embedding-3-large"
| "text-similarity-davinci-001"
| "text-similarity-curie-001"
| "text-similarity-babbage-001"
@@ -421,13 +423,17 @@ export type TiktokenModel =
| "gpt-4o"
| "gpt-4o-2024-05-13"
| "gpt-4o-2024-08-06"
+ | "gpt-4o-2024-11-20"
| "gpt-4o-mini-2024-07-18"
| "gpt-4o-mini"
+ | "o1"
| "o1-2024-12-17"
| "o1-mini"
| "o1-preview"
| "o1-preview-2024-09-12"
| "o1-mini-2024-09-12"
+ | "o3-mini"
+ | "o3-mini-2025-01-31"
| "chatgpt-4o-latest"
| "gpt-4o-realtime"
| "gpt-4o-realtime-preview-2024-10-01"
@@ -454,9 +460,10 @@ pub fn encoding_for_model(
"text-babbage-001" => Ok("r50k_base"),
"text-ada-001" => Ok("r50k_base"),
"davinci" => Ok("r50k_base"),
+ "davinci-002" => Ok("cl100k_base"),
"curie" => Ok("r50k_base"),
"babbage" => Ok("r50k_base"),
- "babbage-002" => Ok("r50k_base"),
+ "babbage-002" => Ok("cl100k_base"),
"ada" => Ok("r50k_base"),
"code-davinci-002" => Ok("p50k_base"),
"code-davinci-001" => Ok("p50k_base"),
@@ -485,7 +492,7 @@ pub fn encoding_for_model(
"gpt-3.5-turbo-0613" => Ok("cl100k_base"),
"gpt-3.5-turbo-16k" => Ok("cl100k_base"),
"gpt-3.5-turbo-16k-0613" => Ok("cl100k_base"),
- "gpt-3.5-turbo-instruct" => Ok("clk100k_base"),
+ "gpt-3.5-turbo-instruct" => Ok("cl100k_base"),
"gpt-3.5-turbo-instruct-0914" => Ok("cl100k_base"),
"gpt-4" => Ok("cl100k_base"),
"gpt-4-0314" => Ok("cl100k_base"),
@@ -505,8 +512,10 @@ pub fn encoding_for_model(
"gpt-4o" => Ok("o200k_base"),
"gpt-4o-2024-05-13" => Ok("o200k_base"),
"gpt-4o-2024-08-06" => Ok("o200k_base"),
+ "gpt-4o-2024-11-20" => Ok("o200k_base"),
"gpt-4o-mini-2024-07-18" => Ok("o200k_base"),
"gpt-4o-mini" => Ok("o200k_base"),
+ "o1" => Ok("o200k_base"),
"o1-2024-12-17" => Ok("o200k_base"),
"o1-mini" => Ok("o200k_base"),
"o1-preview" => Ok("o200k_base"),
@@ -515,6 +524,8 @@ pub fn encoding_for_model(
"chatgpt-4o-latest" => Ok("o200k_base"),
"gpt-4o-realtime" => Ok("o200k_base"),
"gpt-4o-realtime-preview-2024-10-01" => Ok("o200k_base"),
+ "o3-mini" => Ok("o200k_base"),
+ "o3-mini-2025-01-31" => Ok("o200k_base"),
model => Err(JsError::new(
format!("Invalid model: {}", model.to_string()).as_str(),
)),
DescriptionThis PR introduces several updates and fixes to multiple parts of the codebase:
These changes enhance the functionality of the package by adding support for new models and correcting previously incorrect tokenizer configurations. Possible Issues
Security HotspotsNone identified in this PR. ChangesChanges
|
This PR contains the following updates:
1.0.18
->1.0.20
Release Notes
dqbd/tiktoken (tiktoken)
v1.0.20
Compare Source
Patch Changes
5f92348
: Add missing historic models, fix incorrect tokenizers for old instruct modelsv1.0.19
Compare Source
Patch Changes
5982def
: feat: add o3-mini, missing o1 modelConfiguration
📅 Schedule: Branch creation - "* 0-12 * * 3" (UTC), Automerge - At any time (no schedule defined).
🚦 Automerge: Disabled by config. Please merge this manually once you are satisfied.
♻ Rebasing: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox.
🔕 Ignore: Close this PR and you won't be reminded about this update again.
This PR was generated by Mend Renovate. View the repository job log.