diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts
index 6fe69e87ae2..88511768cd3 100644
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@@ -1,12 +1,13 @@
"use client";
+import { ApiPath, Alibaba, ALIBABA_BASE_URL } from "@/app/constant";
import {
- ApiPath,
- Alibaba,
- ALIBABA_BASE_URL,
- REQUEST_TIMEOUT_MS,
-} from "@/app/constant";
-import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
-
+ useAccessStore,
+ useAppConfig,
+ useChatStore,
+ ChatMessageTool,
+ usePluginStore,
+} from "@/app/store";
+import { streamWithThink } from "@/app/utils/chat";
import {
ChatOptions,
getHeaders,
@@ -15,14 +16,12 @@ import {
SpeechOptions,
MultimodalContent,
} from "../api";
-import Locale from "../../locales";
-import {
- EventStreamContentType,
- fetchEventSource,
-} from "@fortaine/fetch-event-source";
-import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client";
-import { getMessageTextContent } from "@/app/utils";
+import {
+ getMessageTextContent,
+ getMessageTextContentWithoutThinking,
+ getTimeoutMSByModel,
+} from "@/app/utils";
import { fetch } from "@/app/utils/stream";
export interface OpenAIListModelResponse {
@@ -92,7 +91,10 @@ export class QwenApi implements LLMApi {
async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
role: v.role,
- content: getMessageTextContent(v),
+ content:
+ v.role === "assistant"
+ ? getMessageTextContentWithoutThinking(v)
+ : getMessageTextContent(v),
}));
const modelConfig = {
@@ -122,134 +124,118 @@ export class QwenApi implements LLMApi {
options.onController?.(controller);
try {
+ const headers = {
+ ...getHeaders(),
+ "X-DashScope-SSE": shouldStream ? "enable" : "disable",
+ };
+
const chatPath = this.path(Alibaba.ChatPath);
const chatPayload = {
method: "POST",
body: JSON.stringify(requestPayload),
signal: controller.signal,
- headers: {
- ...getHeaders(),
- "X-DashScope-SSE": shouldStream ? "enable" : "disable",
- },
+ headers: headers,
};
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
- let responseText = "";
- let remainText = "";
- let finished = false;
- let responseRes: Response;
-
- // animate response to make it looks smooth
- function animateResponseText() {
- if (finished || controller.signal.aborted) {
- responseText += remainText;
- console.log("[Response Animation] finished");
- if (responseText?.length === 0) {
- options.onError?.(new Error("empty response from server"));
+ const [tools, funcs] = usePluginStore
+ .getState()
+ .getAsTools(
+ useChatStore.getState().currentSession().mask?.plugin || [],
+ );
+ return streamWithThink(
+ chatPath,
+ requestPayload,
+ headers,
+ tools as any,
+ funcs,
+ controller,
+ // parseSSE
+ (text: string, runTools: ChatMessageTool[]) => {
+ // console.log("parseSSE", text, runTools);
+ const json = JSON.parse(text);
+ const choices = json.output.choices as Array<{
+ message: {
+ content: string | null;
+ tool_calls: ChatMessageTool[];
+ reasoning_content: string | null;
+ };
+ }>;
+
+ if (!choices?.length) return { isThinking: false, content: "" };
+
+ const tool_calls = choices[0]?.message?.tool_calls;
+ if (tool_calls?.length > 0) {
+ const index = tool_calls[0]?.index;
+ const id = tool_calls[0]?.id;
+ const args = tool_calls[0]?.function?.arguments;
+ if (id) {
+ runTools.push({
+ id,
+ type: tool_calls[0]?.type,
+ function: {
+ name: tool_calls[0]?.function?.name as string,
+ arguments: args,
+ },
+ });
+ } else {
+ // @ts-ignore
+ runTools[index]["function"]["arguments"] += args;
+ }
}
- return;
- }
-
- if (remainText.length > 0) {
- const fetchCount = Math.max(1, Math.round(remainText.length / 60));
- const fetchText = remainText.slice(0, fetchCount);
- responseText += fetchText;
- remainText = remainText.slice(fetchCount);
- options.onUpdate?.(responseText, fetchText);
- }
-
- requestAnimationFrame(animateResponseText);
- }
-
- // start animaion
- animateResponseText();
-
- const finish = () => {
- if (!finished) {
- finished = true;
- options.onFinish(responseText + remainText, responseRes);
- }
- };
-
- controller.signal.onabort = finish;
-
- fetchEventSource(chatPath, {
- fetch: fetch as any,
- ...chatPayload,
- async onopen(res) {
- clearTimeout(requestTimeoutId);
- const contentType = res.headers.get("content-type");
- console.log(
- "[Alibaba] request response content type: ",
- contentType,
- );
- responseRes = res;
- if (contentType?.startsWith("text/plain")) {
- responseText = await res.clone().text();
- return finish();
- }
+ const reasoning = choices[0]?.message?.reasoning_content;
+ const content = choices[0]?.message?.content;
+ // Skip if both content and reasoning_content are empty or null
if (
- !res.ok ||
- !res.headers
- .get("content-type")
- ?.startsWith(EventStreamContentType) ||
- res.status !== 200
+ (!reasoning || reasoning.length === 0) &&
+ (!content || content.length === 0)
) {
- const responseTexts = [responseText];
- let extraInfo = await res.clone().text();
- try {
- const resJson = await res.clone().json();
- extraInfo = prettyObject(resJson);
- } catch {}
-
- if (res.status === 401) {
- responseTexts.push(Locale.Error.Unauthorized);
- }
-
- if (extraInfo) {
- responseTexts.push(extraInfo);
- }
-
- responseText = responseTexts.join("\n\n");
-
- return finish();
+ return {
+ isThinking: false,
+ content: "",
+ };
}
- },
- onmessage(msg) {
- if (msg.data === "[DONE]" || finished) {
- return finish();
- }
- const text = msg.data;
- try {
- const json = JSON.parse(text);
- const choices = json.output.choices as Array<{
- message: { content: string };
- }>;
- const delta = choices[0]?.message?.content;
- if (delta) {
- remainText += delta;
- }
- } catch (e) {
- console.error("[Request] parse error", text, msg);
+
+ if (reasoning && reasoning.length > 0) {
+ return {
+ isThinking: true,
+ content: reasoning,
+ };
+ } else if (content && content.length > 0) {
+ return {
+ isThinking: false,
+ content: content,
+ };
}
+
+ return {
+ isThinking: false,
+ content: "",
+ };
},
- onclose() {
- finish();
- },
- onerror(e) {
- options.onError?.(e);
- throw e;
+ // processToolMessage, include tool_calls message and tool call results
+ (
+ requestPayload: RequestPayload,
+ toolCallMessage: any,
+ toolCallResult: any[],
+ ) => {
+ requestPayload?.input?.messages?.splice(
+ requestPayload?.input?.messages?.length,
+ 0,
+ toolCallMessage,
+ ...toolCallResult,
+ );
},
- openWhenHidden: true,
- });
+ options,
+ );
} else {
const res = await fetch(chatPath, chatPayload);
clearTimeout(requestTimeoutId);
diff --git a/app/client/platforms/baidu.ts b/app/client/platforms/baidu.ts
index 9e8c2f139b6..dc990db4103 100644
--- a/app/client/platforms/baidu.ts
+++ b/app/client/platforms/baidu.ts
@@ -1,10 +1,5 @@
"use client";
-import {
- ApiPath,
- Baidu,
- BAIDU_BASE_URL,
- REQUEST_TIMEOUT_MS,
-} from "@/app/constant";
+import { ApiPath, Baidu, BAIDU_BASE_URL } from "@/app/constant";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { getAccessToken } from "@/app/utils/baidu";
@@ -23,7 +18,7 @@ import {
} from "@fortaine/fetch-event-source";
import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client";
-import { getMessageTextContent } from "@/app/utils";
+import { getMessageTextContent, getTimeoutMSByModel } from "@/app/utils";
import { fetch } from "@/app/utils/stream";
export interface OpenAIListModelResponse {
@@ -155,7 +150,7 @@ export class ErnieApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
diff --git a/app/client/platforms/bytedance.ts b/app/client/platforms/bytedance.ts
index c2f128128fe..f9524cba28d 100644
--- a/app/client/platforms/bytedance.ts
+++ b/app/client/platforms/bytedance.ts
@@ -1,11 +1,12 @@
"use client";
+import { ApiPath, ByteDance, BYTEDANCE_BASE_URL } from "@/app/constant";
import {
- ApiPath,
- ByteDance,
- BYTEDANCE_BASE_URL,
- REQUEST_TIMEOUT_MS,
-} from "@/app/constant";
-import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
+ useAccessStore,
+ useAppConfig,
+ useChatStore,
+ ChatMessageTool,
+ usePluginStore,
+} from "@/app/store";
import {
ChatOptions,
@@ -15,14 +16,14 @@ import {
MultimodalContent,
SpeechOptions,
} from "../api";
-import Locale from "../../locales";
-import {
- EventStreamContentType,
- fetchEventSource,
-} from "@fortaine/fetch-event-source";
-import { prettyObject } from "@/app/utils/format";
+
+import { streamWithThink } from "@/app/utils/chat";
import { getClientConfig } from "@/app/config/client";
import { preProcessImageContent } from "@/app/utils/chat";
+import {
+ getMessageTextContentWithoutThinking,
+ getTimeoutMSByModel,
+} from "@/app/utils";
import { fetch } from "@/app/utils/stream";
export interface OpenAIListModelResponse {
@@ -34,7 +35,7 @@ export interface OpenAIListModelResponse {
}>;
}
-interface RequestPayload {
+interface RequestPayloadForByteDance {
messages: {
role: "system" | "user" | "assistant";
content: string | MultimodalContent[];
@@ -86,7 +87,10 @@ export class DoubaoApi implements LLMApi {
async chat(options: ChatOptions) {
const messages: ChatOptions["messages"] = [];
for (const v of options.messages) {
- const content = await preProcessImageContent(v.content);
+ const content =
+ v.role === "assistant"
+ ? getMessageTextContentWithoutThinking(v)
+ : await preProcessImageContent(v.content);
messages.push({ role: v.role, content });
}
@@ -99,7 +103,7 @@ export class DoubaoApi implements LLMApi {
};
const shouldStream = !!options.config.stream;
- const requestPayload: RequestPayload = {
+ const requestPayload: RequestPayloadForByteDance = {
messages,
stream: shouldStream,
model: modelConfig.model,
@@ -124,119 +128,101 @@ export class DoubaoApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
- let responseText = "";
- let remainText = "";
- let finished = false;
- let responseRes: Response;
-
- // animate response to make it looks smooth
- function animateResponseText() {
- if (finished || controller.signal.aborted) {
- responseText += remainText;
- console.log("[Response Animation] finished");
- if (responseText?.length === 0) {
- options.onError?.(new Error("empty response from server"));
- }
- return;
- }
-
- if (remainText.length > 0) {
- const fetchCount = Math.max(1, Math.round(remainText.length / 60));
- const fetchText = remainText.slice(0, fetchCount);
- responseText += fetchText;
- remainText = remainText.slice(fetchCount);
- options.onUpdate?.(responseText, fetchText);
- }
-
- requestAnimationFrame(animateResponseText);
- }
-
- // start animaion
- animateResponseText();
-
- const finish = () => {
- if (!finished) {
- finished = true;
- options.onFinish(responseText + remainText, responseRes);
- }
- };
-
- controller.signal.onabort = finish;
-
- fetchEventSource(chatPath, {
- fetch: fetch as any,
- ...chatPayload,
- async onopen(res) {
- clearTimeout(requestTimeoutId);
- const contentType = res.headers.get("content-type");
- console.log(
- "[ByteDance] request response content type: ",
- contentType,
- );
- responseRes = res;
- if (contentType?.startsWith("text/plain")) {
- responseText = await res.clone().text();
- return finish();
+ const [tools, funcs] = usePluginStore
+ .getState()
+ .getAsTools(
+ useChatStore.getState().currentSession().mask?.plugin || [],
+ );
+ return streamWithThink(
+ chatPath,
+ requestPayload,
+ getHeaders(),
+ tools as any,
+ funcs,
+ controller,
+ // parseSSE
+ (text: string, runTools: ChatMessageTool[]) => {
+ // console.log("parseSSE", text, runTools);
+ const json = JSON.parse(text);
+ const choices = json.choices as Array<{
+ delta: {
+ content: string | null;
+ tool_calls: ChatMessageTool[];
+ reasoning_content: string | null;
+ };
+ }>;
+
+ if (!choices?.length) return { isThinking: false, content: "" };
+
+ const tool_calls = choices[0]?.delta?.tool_calls;
+ if (tool_calls?.length > 0) {
+ const index = tool_calls[0]?.index;
+ const id = tool_calls[0]?.id;
+ const args = tool_calls[0]?.function?.arguments;
+ if (id) {
+ runTools.push({
+ id,
+ type: tool_calls[0]?.type,
+ function: {
+ name: tool_calls[0]?.function?.name as string,
+ arguments: args,
+ },
+ });
+ } else {
+ // @ts-ignore
+ runTools[index]["function"]["arguments"] += args;
+ }
}
+ const reasoning = choices[0]?.delta?.reasoning_content;
+ const content = choices[0]?.delta?.content;
+ // Skip if both content and reasoning_content are empty or null
if (
- !res.ok ||
- !res.headers
- .get("content-type")
- ?.startsWith(EventStreamContentType) ||
- res.status !== 200
+ (!reasoning || reasoning.length === 0) &&
+ (!content || content.length === 0)
) {
- const responseTexts = [responseText];
- let extraInfo = await res.clone().text();
- try {
- const resJson = await res.clone().json();
- extraInfo = prettyObject(resJson);
- } catch {}
-
- if (res.status === 401) {
- responseTexts.push(Locale.Error.Unauthorized);
- }
-
- if (extraInfo) {
- responseTexts.push(extraInfo);
- }
-
- responseText = responseTexts.join("\n\n");
-
- return finish();
+ return {
+ isThinking: false,
+ content: "",
+ };
}
- },
- onmessage(msg) {
- if (msg.data === "[DONE]" || finished) {
- return finish();
- }
- const text = msg.data;
- try {
- const json = JSON.parse(text);
- const choices = json.choices as Array<{
- delta: { content: string };
- }>;
- const delta = choices[0]?.delta?.content;
- if (delta) {
- remainText += delta;
- }
- } catch (e) {
- console.error("[Request] parse error", text, msg);
+
+ if (reasoning && reasoning.length > 0) {
+ return {
+ isThinking: true,
+ content: reasoning,
+ };
+ } else if (content && content.length > 0) {
+ return {
+ isThinking: false,
+ content: content,
+ };
}
+
+ return {
+ isThinking: false,
+ content: "",
+ };
},
- onclose() {
- finish();
- },
- onerror(e) {
- options.onError?.(e);
- throw e;
+ // processToolMessage, include tool_calls message and tool call results
+ (
+ requestPayload: RequestPayloadForByteDance,
+ toolCallMessage: any,
+ toolCallResult: any[],
+ ) => {
+ requestPayload?.messages?.splice(
+ requestPayload?.messages?.length,
+ 0,
+ toolCallMessage,
+ ...toolCallResult,
+ );
},
- openWhenHidden: true,
- });
+ options,
+ );
} else {
const res = await fetch(chatPath, chatPayload);
clearTimeout(requestTimeoutId);
diff --git a/app/client/platforms/deepseek.ts b/app/client/platforms/deepseek.ts
index c436ae61d01..b21d24cefc4 100644
--- a/app/client/platforms/deepseek.ts
+++ b/app/client/platforms/deepseek.ts
@@ -1,12 +1,6 @@
"use client";
// azure and openai, using same models. so using same LLMApi.
-import {
- ApiPath,
- DEEPSEEK_BASE_URL,
- DeepSeek,
- REQUEST_TIMEOUT_MS,
- REQUEST_TIMEOUT_MS_FOR_THINKING,
-} from "@/app/constant";
+import { ApiPath, DEEPSEEK_BASE_URL, DeepSeek } from "@/app/constant";
import {
useAccessStore,
useAppConfig,
@@ -26,6 +20,7 @@ import { getClientConfig } from "@/app/config/client";
import {
getMessageTextContent,
getMessageTextContentWithoutThinking,
+ getTimeoutMSByModel,
} from "@/app/utils";
import { RequestPayload } from "./openai";
import { fetch } from "@/app/utils/stream";
@@ -116,16 +111,10 @@ export class DeepSeekApi implements LLMApi {
headers: getHeaders(),
};
- // console.log(chatPayload);
-
- const isR1 =
- options.config.model.endsWith("-reasoner") ||
- options.config.model.endsWith("-r1");
-
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- isR1 ? REQUEST_TIMEOUT_MS_FOR_THINKING : REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
@@ -176,8 +165,8 @@ export class DeepSeekApi implements LLMApi {
// Skip if both content and reasoning_content are empty or null
if (
- (!reasoning || reasoning.trim().length === 0) &&
- (!content || content.trim().length === 0)
+ (!reasoning || reasoning.length === 0) &&
+ (!content || content.length === 0)
) {
return {
isThinking: false,
@@ -185,12 +174,12 @@ export class DeepSeekApi implements LLMApi {
};
}
- if (reasoning && reasoning.trim().length > 0) {
+ if (reasoning && reasoning.length > 0) {
return {
isThinking: true,
content: reasoning,
};
- } else if (content && content.trim().length > 0) {
+ } else if (content && content.length > 0) {
return {
isThinking: false,
content: content,
diff --git a/app/client/platforms/glm.ts b/app/client/platforms/glm.ts
index a8d1869e30e..98b10277de9 100644
--- a/app/client/platforms/glm.ts
+++ b/app/client/platforms/glm.ts
@@ -1,10 +1,5 @@
"use client";
-import {
- ApiPath,
- CHATGLM_BASE_URL,
- ChatGLM,
- REQUEST_TIMEOUT_MS,
-} from "@/app/constant";
+import { ApiPath, CHATGLM_BASE_URL, ChatGLM } from "@/app/constant";
import {
useAccessStore,
useAppConfig,
@@ -21,7 +16,11 @@ import {
SpeechOptions,
} from "../api";
import { getClientConfig } from "@/app/config/client";
-import { getMessageTextContent, isVisionModel } from "@/app/utils";
+import {
+ getMessageTextContent,
+ isVisionModel,
+ getTimeoutMSByModel,
+} from "@/app/utils";
import { RequestPayload } from "./openai";
import { fetch } from "@/app/utils/stream";
import { preProcessImageContent } from "@/app/utils/chat";
@@ -191,7 +190,7 @@ export class ChatGLMApi implements LLMApi {
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (modelType === "image" || modelType === "video") {
diff --git a/app/client/platforms/google.ts b/app/client/platforms/google.ts
index 1e593dd4257..654f0e3e4c7 100644
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@@ -1,9 +1,4 @@
-import {
- ApiPath,
- Google,
- REQUEST_TIMEOUT_MS,
- REQUEST_TIMEOUT_MS_FOR_THINKING,
-} from "@/app/constant";
+import { ApiPath, Google } from "@/app/constant";
import {
ChatOptions,
getHeaders,
@@ -27,6 +22,7 @@ import {
getMessageTextContent,
getMessageImages,
isVisionModel,
+ getTimeoutMSByModel,
} from "@/app/utils";
import { preProcessImageContent } from "@/app/utils/chat";
import { nanoid } from "nanoid";
@@ -206,7 +202,7 @@ export class GeminiProApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- isThinking ? REQUEST_TIMEOUT_MS_FOR_THINKING : REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts
index fbe533cadab..c6f3fc4253f 100644
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -8,7 +8,6 @@ import {
Azure,
REQUEST_TIMEOUT_MS,
ServiceProvider,
- REQUEST_TIMEOUT_MS_FOR_THINKING,
} from "@/app/constant";
import {
ChatMessageTool,
@@ -22,7 +21,7 @@ import {
preProcessImageContent,
uploadImage,
base64Image2Blob,
- stream,
+ streamWithThink,
} from "@/app/utils/chat";
import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
import { ModelSize, DalleQuality, DalleStyle } from "@/app/typing";
@@ -42,6 +41,7 @@ import {
getMessageTextContent,
isVisionModel,
isDalle3 as _isDalle3,
+ getTimeoutMSByModel,
} from "@/app/utils";
import { fetch } from "@/app/utils/stream";
@@ -294,7 +294,7 @@ export class ChatGPTApi implements LLMApi {
useChatStore.getState().currentSession().mask?.plugin || [],
);
// console.log("getAsTools", tools, funcs);
- stream(
+ streamWithThink(
chatPath,
requestPayload,
getHeaders(),
@@ -309,8 +309,12 @@ export class ChatGPTApi implements LLMApi {
delta: {
content: string;
tool_calls: ChatMessageTool[];
+ reasoning_content: string | null;
};
}>;
+
+ if (!choices?.length) return { isThinking: false, content: "" };
+
const tool_calls = choices[0]?.delta?.tool_calls;
if (tool_calls?.length > 0) {
const id = tool_calls[0]?.id;
@@ -330,7 +334,37 @@ export class ChatGPTApi implements LLMApi {
runTools[index]["function"]["arguments"] += args;
}
}
- return choices[0]?.delta?.content;
+
+ const reasoning = choices[0]?.delta?.reasoning_content;
+ const content = choices[0]?.delta?.content;
+
+ // Skip if both content and reasoning_content are empty or null
+ if (
+ (!reasoning || reasoning.length === 0) &&
+ (!content || content.length === 0)
+ ) {
+ return {
+ isThinking: false,
+ content: "",
+ };
+ }
+
+ if (reasoning && reasoning.length > 0) {
+ return {
+ isThinking: true,
+ content: reasoning,
+ };
+ } else if (content && content.length > 0) {
+ return {
+ isThinking: false,
+ content: content,
+ };
+ }
+
+ return {
+ isThinking: false,
+ content: "",
+ };
},
// processToolMessage, include tool_calls message and tool call results
(
@@ -362,9 +396,7 @@ export class ChatGPTApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- isDalle3 || isO1OrO3
- ? REQUEST_TIMEOUT_MS_FOR_THINKING
- : REQUEST_TIMEOUT_MS, // dalle3 using b64_json is slow.
+ getTimeoutMSByModel(options.config.model),
);
const res = await fetch(chatPath, chatPayload);
diff --git a/app/client/platforms/siliconflow.ts b/app/client/platforms/siliconflow.ts
index 029e5245a8e..34f0844c328 100644
--- a/app/client/platforms/siliconflow.ts
+++ b/app/client/platforms/siliconflow.ts
@@ -4,7 +4,6 @@ import {
ApiPath,
SILICONFLOW_BASE_URL,
SiliconFlow,
- REQUEST_TIMEOUT_MS_FOR_THINKING,
DEFAULT_MODELS,
} from "@/app/constant";
import {
@@ -27,6 +26,7 @@ import {
getMessageTextContent,
getMessageTextContentWithoutThinking,
isVisionModel,
+ getTimeoutMSByModel,
} from "@/app/utils";
import { RequestPayload } from "./openai";
@@ -137,7 +137,7 @@ export class SiliconflowApi implements LLMApi {
// Use extended timeout for thinking models as they typically require more processing time
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS_FOR_THINKING,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
diff --git a/app/client/platforms/tencent.ts b/app/client/platforms/tencent.ts
index 580844a5b31..8adeb1b3ea7 100644
--- a/app/client/platforms/tencent.ts
+++ b/app/client/platforms/tencent.ts
@@ -1,5 +1,5 @@
"use client";
-import { ApiPath, TENCENT_BASE_URL, REQUEST_TIMEOUT_MS } from "@/app/constant";
+import { ApiPath, TENCENT_BASE_URL } from "@/app/constant";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import {
@@ -17,7 +17,11 @@ import {
} from "@fortaine/fetch-event-source";
import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client";
-import { getMessageTextContent, isVisionModel } from "@/app/utils";
+import {
+ getMessageTextContent,
+ isVisionModel,
+ getTimeoutMSByModel,
+} from "@/app/utils";
import mapKeys from "lodash-es/mapKeys";
import mapValues from "lodash-es/mapValues";
import isArray from "lodash-es/isArray";
@@ -135,7 +139,7 @@ export class HunyuanApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
diff --git a/app/client/platforms/xai.ts b/app/client/platforms/xai.ts
index 8c41c2d988f..830ad4778ac 100644
--- a/app/client/platforms/xai.ts
+++ b/app/client/platforms/xai.ts
@@ -1,6 +1,6 @@
"use client";
// azure and openai, using same models. so using same LLMApi.
-import { ApiPath, XAI_BASE_URL, XAI, REQUEST_TIMEOUT_MS } from "@/app/constant";
+import { ApiPath, XAI_BASE_URL, XAI } from "@/app/constant";
import {
useAccessStore,
useAppConfig,
@@ -17,6 +17,7 @@ import {
SpeechOptions,
} from "../api";
import { getClientConfig } from "@/app/config/client";
+import { getTimeoutMSByModel } from "@/app/utils";
import { preProcessImageContent } from "@/app/utils/chat";
import { RequestPayload } from "./openai";
import { fetch } from "@/app/utils/stream";
@@ -103,7 +104,7 @@ export class XAIApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
diff --git a/app/utils.ts b/app/utils.ts
index f2337801976..6183e03b057 100644
--- a/app/utils.ts
+++ b/app/utils.ts
@@ -2,7 +2,11 @@ import { useEffect, useState } from "react";
import { showToast } from "./components/ui-lib";
import Locale from "./locales";
import { RequestMessage } from "./client/api";
-import { ServiceProvider } from "./constant";
+import {
+ REQUEST_TIMEOUT_MS,
+ REQUEST_TIMEOUT_MS_FOR_THINKING,
+ ServiceProvider,
+} from "./constant";
// import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";
import { fetch as tauriStreamFetch } from "./utils/stream";
import { VISION_MODEL_REGEXES, EXCLUDE_VISION_MODEL_REGEXES } from "./constant";
@@ -292,6 +296,20 @@ export function isDalle3(model: string) {
return "dall-e-3" === model;
}
+export function getTimeoutMSByModel(model: string) {
+ model = model.toLowerCase();
+ if (
+ model.startsWith("dall-e") ||
+ model.startsWith("dalle") ||
+ model.startsWith("o1") ||
+ model.startsWith("o3") ||
+ model.includes("deepseek-r") ||
+ model.includes("-thinking")
+ )
+ return REQUEST_TIMEOUT_MS_FOR_THINKING;
+ return REQUEST_TIMEOUT_MS;
+}
+
export function getModelSizes(model: string): ModelSize[] {
if (isDalle3(model)) {
return ["1024x1024", "1792x1024", "1024x1792"];
diff --git a/app/utils/chat.ts b/app/utils/chat.ts
index b77955e6ecf..efc496f2c32 100644
--- a/app/utils/chat.ts
+++ b/app/utils/chat.ts
@@ -400,6 +400,7 @@ export function streamWithThink(
let responseRes: Response;
let isInThinkingMode = false;
let lastIsThinking = false;
+ let lastIsThinkingTagged = false; //between and tags
// animate response to make it looks smooth
function animateResponseText() {
@@ -579,6 +580,23 @@ export function streamWithThink(
if (!chunk?.content || chunk.content.length === 0) {
return;
}
+
+ // deal with and tags start
+ if (!chunk.isThinking) {
+ if (chunk.content.startsWith("")) {
+ chunk.isThinking = true;
+ chunk.content = chunk.content.slice(7).trim();
+ lastIsThinkingTagged = true;
+ } else if (chunk.content.endsWith("")) {
+ chunk.isThinking = false;
+ chunk.content = chunk.content.slice(0, -8).trim();
+ lastIsThinkingTagged = false;
+ } else if (lastIsThinkingTagged) {
+ chunk.isThinking = true;
+ }
+ }
+ // deal with and tags start
+
// Check if thinking mode changed
const isThinkingChanged = lastIsThinking !== chunk.isThinking;
lastIsThinking = chunk.isThinking;