diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts index 6fe69e87ae2..88511768cd3 100644 --- a/app/client/platforms/alibaba.ts +++ b/app/client/platforms/alibaba.ts @@ -1,12 +1,13 @@ "use client"; +import { ApiPath, Alibaba, ALIBABA_BASE_URL } from "@/app/constant"; import { - ApiPath, - Alibaba, - ALIBABA_BASE_URL, - REQUEST_TIMEOUT_MS, -} from "@/app/constant"; -import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; - + useAccessStore, + useAppConfig, + useChatStore, + ChatMessageTool, + usePluginStore, +} from "@/app/store"; +import { streamWithThink } from "@/app/utils/chat"; import { ChatOptions, getHeaders, @@ -15,14 +16,12 @@ import { SpeechOptions, MultimodalContent, } from "../api"; -import Locale from "../../locales"; -import { - EventStreamContentType, - fetchEventSource, -} from "@fortaine/fetch-event-source"; -import { prettyObject } from "@/app/utils/format"; import { getClientConfig } from "@/app/config/client"; -import { getMessageTextContent } from "@/app/utils"; +import { + getMessageTextContent, + getMessageTextContentWithoutThinking, + getTimeoutMSByModel, +} from "@/app/utils"; import { fetch } from "@/app/utils/stream"; export interface OpenAIListModelResponse { @@ -92,7 +91,10 @@ export class QwenApi implements LLMApi { async chat(options: ChatOptions) { const messages = options.messages.map((v) => ({ role: v.role, - content: getMessageTextContent(v), + content: + v.role === "assistant" + ? getMessageTextContentWithoutThinking(v) + : getMessageTextContent(v), })); const modelConfig = { @@ -122,134 +124,118 @@ export class QwenApi implements LLMApi { options.onController?.(controller); try { + const headers = { + ...getHeaders(), + "X-DashScope-SSE": shouldStream ? "enable" : "disable", + }; + const chatPath = this.path(Alibaba.ChatPath); const chatPayload = { method: "POST", body: JSON.stringify(requestPayload), signal: controller.signal, - headers: { - ...getHeaders(), - "X-DashScope-SSE": shouldStream ? "enable" : "disable", - }, + headers: headers, }; // make a fetch request const requestTimeoutId = setTimeout( () => controller.abort(), - REQUEST_TIMEOUT_MS, + getTimeoutMSByModel(options.config.model), ); if (shouldStream) { - let responseText = ""; - let remainText = ""; - let finished = false; - let responseRes: Response; - - // animate response to make it looks smooth - function animateResponseText() { - if (finished || controller.signal.aborted) { - responseText += remainText; - console.log("[Response Animation] finished"); - if (responseText?.length === 0) { - options.onError?.(new Error("empty response from server")); + const [tools, funcs] = usePluginStore + .getState() + .getAsTools( + useChatStore.getState().currentSession().mask?.plugin || [], + ); + return streamWithThink( + chatPath, + requestPayload, + headers, + tools as any, + funcs, + controller, + // parseSSE + (text: string, runTools: ChatMessageTool[]) => { + // console.log("parseSSE", text, runTools); + const json = JSON.parse(text); + const choices = json.output.choices as Array<{ + message: { + content: string | null; + tool_calls: ChatMessageTool[]; + reasoning_content: string | null; + }; + }>; + + if (!choices?.length) return { isThinking: false, content: "" }; + + const tool_calls = choices[0]?.message?.tool_calls; + if (tool_calls?.length > 0) { + const index = tool_calls[0]?.index; + const id = tool_calls[0]?.id; + const args = tool_calls[0]?.function?.arguments; + if (id) { + runTools.push({ + id, + type: tool_calls[0]?.type, + function: { + name: tool_calls[0]?.function?.name as string, + arguments: args, + }, + }); + } else { + // @ts-ignore + runTools[index]["function"]["arguments"] += args; + } } - return; - } - - if (remainText.length > 0) { - const fetchCount = Math.max(1, Math.round(remainText.length / 60)); - const fetchText = remainText.slice(0, fetchCount); - responseText += fetchText; - remainText = remainText.slice(fetchCount); - options.onUpdate?.(responseText, fetchText); - } - - requestAnimationFrame(animateResponseText); - } - - // start animaion - animateResponseText(); - - const finish = () => { - if (!finished) { - finished = true; - options.onFinish(responseText + remainText, responseRes); - } - }; - - controller.signal.onabort = finish; - - fetchEventSource(chatPath, { - fetch: fetch as any, - ...chatPayload, - async onopen(res) { - clearTimeout(requestTimeoutId); - const contentType = res.headers.get("content-type"); - console.log( - "[Alibaba] request response content type: ", - contentType, - ); - responseRes = res; - if (contentType?.startsWith("text/plain")) { - responseText = await res.clone().text(); - return finish(); - } + const reasoning = choices[0]?.message?.reasoning_content; + const content = choices[0]?.message?.content; + // Skip if both content and reasoning_content are empty or null if ( - !res.ok || - !res.headers - .get("content-type") - ?.startsWith(EventStreamContentType) || - res.status !== 200 + (!reasoning || reasoning.length === 0) && + (!content || content.length === 0) ) { - const responseTexts = [responseText]; - let extraInfo = await res.clone().text(); - try { - const resJson = await res.clone().json(); - extraInfo = prettyObject(resJson); - } catch {} - - if (res.status === 401) { - responseTexts.push(Locale.Error.Unauthorized); - } - - if (extraInfo) { - responseTexts.push(extraInfo); - } - - responseText = responseTexts.join("\n\n"); - - return finish(); + return { + isThinking: false, + content: "", + }; } - }, - onmessage(msg) { - if (msg.data === "[DONE]" || finished) { - return finish(); - } - const text = msg.data; - try { - const json = JSON.parse(text); - const choices = json.output.choices as Array<{ - message: { content: string }; - }>; - const delta = choices[0]?.message?.content; - if (delta) { - remainText += delta; - } - } catch (e) { - console.error("[Request] parse error", text, msg); + + if (reasoning && reasoning.length > 0) { + return { + isThinking: true, + content: reasoning, + }; + } else if (content && content.length > 0) { + return { + isThinking: false, + content: content, + }; } + + return { + isThinking: false, + content: "", + }; }, - onclose() { - finish(); - }, - onerror(e) { - options.onError?.(e); - throw e; + // processToolMessage, include tool_calls message and tool call results + ( + requestPayload: RequestPayload, + toolCallMessage: any, + toolCallResult: any[], + ) => { + requestPayload?.input?.messages?.splice( + requestPayload?.input?.messages?.length, + 0, + toolCallMessage, + ...toolCallResult, + ); }, - openWhenHidden: true, - }); + options, + ); } else { const res = await fetch(chatPath, chatPayload); clearTimeout(requestTimeoutId); diff --git a/app/client/platforms/baidu.ts b/app/client/platforms/baidu.ts index 9e8c2f139b6..dc990db4103 100644 --- a/app/client/platforms/baidu.ts +++ b/app/client/platforms/baidu.ts @@ -1,10 +1,5 @@ "use client"; -import { - ApiPath, - Baidu, - BAIDU_BASE_URL, - REQUEST_TIMEOUT_MS, -} from "@/app/constant"; +import { ApiPath, Baidu, BAIDU_BASE_URL } from "@/app/constant"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { getAccessToken } from "@/app/utils/baidu"; @@ -23,7 +18,7 @@ import { } from "@fortaine/fetch-event-source"; import { prettyObject } from "@/app/utils/format"; import { getClientConfig } from "@/app/config/client"; -import { getMessageTextContent } from "@/app/utils"; +import { getMessageTextContent, getTimeoutMSByModel } from "@/app/utils"; import { fetch } from "@/app/utils/stream"; export interface OpenAIListModelResponse { @@ -155,7 +150,7 @@ export class ErnieApi implements LLMApi { // make a fetch request const requestTimeoutId = setTimeout( () => controller.abort(), - REQUEST_TIMEOUT_MS, + getTimeoutMSByModel(options.config.model), ); if (shouldStream) { diff --git a/app/client/platforms/bytedance.ts b/app/client/platforms/bytedance.ts index c2f128128fe..f9524cba28d 100644 --- a/app/client/platforms/bytedance.ts +++ b/app/client/platforms/bytedance.ts @@ -1,11 +1,12 @@ "use client"; +import { ApiPath, ByteDance, BYTEDANCE_BASE_URL } from "@/app/constant"; import { - ApiPath, - ByteDance, - BYTEDANCE_BASE_URL, - REQUEST_TIMEOUT_MS, -} from "@/app/constant"; -import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; + useAccessStore, + useAppConfig, + useChatStore, + ChatMessageTool, + usePluginStore, +} from "@/app/store"; import { ChatOptions, @@ -15,14 +16,14 @@ import { MultimodalContent, SpeechOptions, } from "../api"; -import Locale from "../../locales"; -import { - EventStreamContentType, - fetchEventSource, -} from "@fortaine/fetch-event-source"; -import { prettyObject } from "@/app/utils/format"; + +import { streamWithThink } from "@/app/utils/chat"; import { getClientConfig } from "@/app/config/client"; import { preProcessImageContent } from "@/app/utils/chat"; +import { + getMessageTextContentWithoutThinking, + getTimeoutMSByModel, +} from "@/app/utils"; import { fetch } from "@/app/utils/stream"; export interface OpenAIListModelResponse { @@ -34,7 +35,7 @@ export interface OpenAIListModelResponse { }>; } -interface RequestPayload { +interface RequestPayloadForByteDance { messages: { role: "system" | "user" | "assistant"; content: string | MultimodalContent[]; @@ -86,7 +87,10 @@ export class DoubaoApi implements LLMApi { async chat(options: ChatOptions) { const messages: ChatOptions["messages"] = []; for (const v of options.messages) { - const content = await preProcessImageContent(v.content); + const content = + v.role === "assistant" + ? getMessageTextContentWithoutThinking(v) + : await preProcessImageContent(v.content); messages.push({ role: v.role, content }); } @@ -99,7 +103,7 @@ export class DoubaoApi implements LLMApi { }; const shouldStream = !!options.config.stream; - const requestPayload: RequestPayload = { + const requestPayload: RequestPayloadForByteDance = { messages, stream: shouldStream, model: modelConfig.model, @@ -124,119 +128,101 @@ export class DoubaoApi implements LLMApi { // make a fetch request const requestTimeoutId = setTimeout( () => controller.abort(), - REQUEST_TIMEOUT_MS, + getTimeoutMSByModel(options.config.model), ); if (shouldStream) { - let responseText = ""; - let remainText = ""; - let finished = false; - let responseRes: Response; - - // animate response to make it looks smooth - function animateResponseText() { - if (finished || controller.signal.aborted) { - responseText += remainText; - console.log("[Response Animation] finished"); - if (responseText?.length === 0) { - options.onError?.(new Error("empty response from server")); - } - return; - } - - if (remainText.length > 0) { - const fetchCount = Math.max(1, Math.round(remainText.length / 60)); - const fetchText = remainText.slice(0, fetchCount); - responseText += fetchText; - remainText = remainText.slice(fetchCount); - options.onUpdate?.(responseText, fetchText); - } - - requestAnimationFrame(animateResponseText); - } - - // start animaion - animateResponseText(); - - const finish = () => { - if (!finished) { - finished = true; - options.onFinish(responseText + remainText, responseRes); - } - }; - - controller.signal.onabort = finish; - - fetchEventSource(chatPath, { - fetch: fetch as any, - ...chatPayload, - async onopen(res) { - clearTimeout(requestTimeoutId); - const contentType = res.headers.get("content-type"); - console.log( - "[ByteDance] request response content type: ", - contentType, - ); - responseRes = res; - if (contentType?.startsWith("text/plain")) { - responseText = await res.clone().text(); - return finish(); + const [tools, funcs] = usePluginStore + .getState() + .getAsTools( + useChatStore.getState().currentSession().mask?.plugin || [], + ); + return streamWithThink( + chatPath, + requestPayload, + getHeaders(), + tools as any, + funcs, + controller, + // parseSSE + (text: string, runTools: ChatMessageTool[]) => { + // console.log("parseSSE", text, runTools); + const json = JSON.parse(text); + const choices = json.choices as Array<{ + delta: { + content: string | null; + tool_calls: ChatMessageTool[]; + reasoning_content: string | null; + }; + }>; + + if (!choices?.length) return { isThinking: false, content: "" }; + + const tool_calls = choices[0]?.delta?.tool_calls; + if (tool_calls?.length > 0) { + const index = tool_calls[0]?.index; + const id = tool_calls[0]?.id; + const args = tool_calls[0]?.function?.arguments; + if (id) { + runTools.push({ + id, + type: tool_calls[0]?.type, + function: { + name: tool_calls[0]?.function?.name as string, + arguments: args, + }, + }); + } else { + // @ts-ignore + runTools[index]["function"]["arguments"] += args; + } } + const reasoning = choices[0]?.delta?.reasoning_content; + const content = choices[0]?.delta?.content; + // Skip if both content and reasoning_content are empty or null if ( - !res.ok || - !res.headers - .get("content-type") - ?.startsWith(EventStreamContentType) || - res.status !== 200 + (!reasoning || reasoning.length === 0) && + (!content || content.length === 0) ) { - const responseTexts = [responseText]; - let extraInfo = await res.clone().text(); - try { - const resJson = await res.clone().json(); - extraInfo = prettyObject(resJson); - } catch {} - - if (res.status === 401) { - responseTexts.push(Locale.Error.Unauthorized); - } - - if (extraInfo) { - responseTexts.push(extraInfo); - } - - responseText = responseTexts.join("\n\n"); - - return finish(); + return { + isThinking: false, + content: "", + }; } - }, - onmessage(msg) { - if (msg.data === "[DONE]" || finished) { - return finish(); - } - const text = msg.data; - try { - const json = JSON.parse(text); - const choices = json.choices as Array<{ - delta: { content: string }; - }>; - const delta = choices[0]?.delta?.content; - if (delta) { - remainText += delta; - } - } catch (e) { - console.error("[Request] parse error", text, msg); + + if (reasoning && reasoning.length > 0) { + return { + isThinking: true, + content: reasoning, + }; + } else if (content && content.length > 0) { + return { + isThinking: false, + content: content, + }; } + + return { + isThinking: false, + content: "", + }; }, - onclose() { - finish(); - }, - onerror(e) { - options.onError?.(e); - throw e; + // processToolMessage, include tool_calls message and tool call results + ( + requestPayload: RequestPayloadForByteDance, + toolCallMessage: any, + toolCallResult: any[], + ) => { + requestPayload?.messages?.splice( + requestPayload?.messages?.length, + 0, + toolCallMessage, + ...toolCallResult, + ); }, - openWhenHidden: true, - }); + options, + ); } else { const res = await fetch(chatPath, chatPayload); clearTimeout(requestTimeoutId); diff --git a/app/client/platforms/deepseek.ts b/app/client/platforms/deepseek.ts index c436ae61d01..b21d24cefc4 100644 --- a/app/client/platforms/deepseek.ts +++ b/app/client/platforms/deepseek.ts @@ -1,12 +1,6 @@ "use client"; // azure and openai, using same models. so using same LLMApi. -import { - ApiPath, - DEEPSEEK_BASE_URL, - DeepSeek, - REQUEST_TIMEOUT_MS, - REQUEST_TIMEOUT_MS_FOR_THINKING, -} from "@/app/constant"; +import { ApiPath, DEEPSEEK_BASE_URL, DeepSeek } from "@/app/constant"; import { useAccessStore, useAppConfig, @@ -26,6 +20,7 @@ import { getClientConfig } from "@/app/config/client"; import { getMessageTextContent, getMessageTextContentWithoutThinking, + getTimeoutMSByModel, } from "@/app/utils"; import { RequestPayload } from "./openai"; import { fetch } from "@/app/utils/stream"; @@ -116,16 +111,10 @@ export class DeepSeekApi implements LLMApi { headers: getHeaders(), }; - // console.log(chatPayload); - - const isR1 = - options.config.model.endsWith("-reasoner") || - options.config.model.endsWith("-r1"); - // make a fetch request const requestTimeoutId = setTimeout( () => controller.abort(), - isR1 ? REQUEST_TIMEOUT_MS_FOR_THINKING : REQUEST_TIMEOUT_MS, + getTimeoutMSByModel(options.config.model), ); if (shouldStream) { @@ -176,8 +165,8 @@ export class DeepSeekApi implements LLMApi { // Skip if both content and reasoning_content are empty or null if ( - (!reasoning || reasoning.trim().length === 0) && - (!content || content.trim().length === 0) + (!reasoning || reasoning.length === 0) && + (!content || content.length === 0) ) { return { isThinking: false, @@ -185,12 +174,12 @@ export class DeepSeekApi implements LLMApi { }; } - if (reasoning && reasoning.trim().length > 0) { + if (reasoning && reasoning.length > 0) { return { isThinking: true, content: reasoning, }; - } else if (content && content.trim().length > 0) { + } else if (content && content.length > 0) { return { isThinking: false, content: content, diff --git a/app/client/platforms/glm.ts b/app/client/platforms/glm.ts index a8d1869e30e..98b10277de9 100644 --- a/app/client/platforms/glm.ts +++ b/app/client/platforms/glm.ts @@ -1,10 +1,5 @@ "use client"; -import { - ApiPath, - CHATGLM_BASE_URL, - ChatGLM, - REQUEST_TIMEOUT_MS, -} from "@/app/constant"; +import { ApiPath, CHATGLM_BASE_URL, ChatGLM } from "@/app/constant"; import { useAccessStore, useAppConfig, @@ -21,7 +16,11 @@ import { SpeechOptions, } from "../api"; import { getClientConfig } from "@/app/config/client"; -import { getMessageTextContent, isVisionModel } from "@/app/utils"; +import { + getMessageTextContent, + isVisionModel, + getTimeoutMSByModel, +} from "@/app/utils"; import { RequestPayload } from "./openai"; import { fetch } from "@/app/utils/stream"; import { preProcessImageContent } from "@/app/utils/chat"; @@ -191,7 +190,7 @@ export class ChatGLMApi implements LLMApi { const requestTimeoutId = setTimeout( () => controller.abort(), - REQUEST_TIMEOUT_MS, + getTimeoutMSByModel(options.config.model), ); if (modelType === "image" || modelType === "video") { diff --git a/app/client/platforms/google.ts b/app/client/platforms/google.ts index 1e593dd4257..654f0e3e4c7 100644 --- a/app/client/platforms/google.ts +++ b/app/client/platforms/google.ts @@ -1,9 +1,4 @@ -import { - ApiPath, - Google, - REQUEST_TIMEOUT_MS, - REQUEST_TIMEOUT_MS_FOR_THINKING, -} from "@/app/constant"; +import { ApiPath, Google } from "@/app/constant"; import { ChatOptions, getHeaders, @@ -27,6 +22,7 @@ import { getMessageTextContent, getMessageImages, isVisionModel, + getTimeoutMSByModel, } from "@/app/utils"; import { preProcessImageContent } from "@/app/utils/chat"; import { nanoid } from "nanoid"; @@ -206,7 +202,7 @@ export class GeminiProApi implements LLMApi { // make a fetch request const requestTimeoutId = setTimeout( () => controller.abort(), - isThinking ? REQUEST_TIMEOUT_MS_FOR_THINKING : REQUEST_TIMEOUT_MS, + getTimeoutMSByModel(options.config.model), ); if (shouldStream) { diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index fbe533cadab..c6f3fc4253f 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -8,7 +8,6 @@ import { Azure, REQUEST_TIMEOUT_MS, ServiceProvider, - REQUEST_TIMEOUT_MS_FOR_THINKING, } from "@/app/constant"; import { ChatMessageTool, @@ -22,7 +21,7 @@ import { preProcessImageContent, uploadImage, base64Image2Blob, - stream, + streamWithThink, } from "@/app/utils/chat"; import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare"; import { ModelSize, DalleQuality, DalleStyle } from "@/app/typing"; @@ -42,6 +41,7 @@ import { getMessageTextContent, isVisionModel, isDalle3 as _isDalle3, + getTimeoutMSByModel, } from "@/app/utils"; import { fetch } from "@/app/utils/stream"; @@ -294,7 +294,7 @@ export class ChatGPTApi implements LLMApi { useChatStore.getState().currentSession().mask?.plugin || [], ); // console.log("getAsTools", tools, funcs); - stream( + streamWithThink( chatPath, requestPayload, getHeaders(), @@ -309,8 +309,12 @@ export class ChatGPTApi implements LLMApi { delta: { content: string; tool_calls: ChatMessageTool[]; + reasoning_content: string | null; }; }>; + + if (!choices?.length) return { isThinking: false, content: "" }; + const tool_calls = choices[0]?.delta?.tool_calls; if (tool_calls?.length > 0) { const id = tool_calls[0]?.id; @@ -330,7 +334,37 @@ export class ChatGPTApi implements LLMApi { runTools[index]["function"]["arguments"] += args; } } - return choices[0]?.delta?.content; + + const reasoning = choices[0]?.delta?.reasoning_content; + const content = choices[0]?.delta?.content; + + // Skip if both content and reasoning_content are empty or null + if ( + (!reasoning || reasoning.length === 0) && + (!content || content.length === 0) + ) { + return { + isThinking: false, + content: "", + }; + } + + if (reasoning && reasoning.length > 0) { + return { + isThinking: true, + content: reasoning, + }; + } else if (content && content.length > 0) { + return { + isThinking: false, + content: content, + }; + } + + return { + isThinking: false, + content: "", + }; }, // processToolMessage, include tool_calls message and tool call results ( @@ -362,9 +396,7 @@ export class ChatGPTApi implements LLMApi { // make a fetch request const requestTimeoutId = setTimeout( () => controller.abort(), - isDalle3 || isO1OrO3 - ? REQUEST_TIMEOUT_MS_FOR_THINKING - : REQUEST_TIMEOUT_MS, // dalle3 using b64_json is slow. + getTimeoutMSByModel(options.config.model), ); const res = await fetch(chatPath, chatPayload); diff --git a/app/client/platforms/siliconflow.ts b/app/client/platforms/siliconflow.ts index 029e5245a8e..34f0844c328 100644 --- a/app/client/platforms/siliconflow.ts +++ b/app/client/platforms/siliconflow.ts @@ -4,7 +4,6 @@ import { ApiPath, SILICONFLOW_BASE_URL, SiliconFlow, - REQUEST_TIMEOUT_MS_FOR_THINKING, DEFAULT_MODELS, } from "@/app/constant"; import { @@ -27,6 +26,7 @@ import { getMessageTextContent, getMessageTextContentWithoutThinking, isVisionModel, + getTimeoutMSByModel, } from "@/app/utils"; import { RequestPayload } from "./openai"; @@ -137,7 +137,7 @@ export class SiliconflowApi implements LLMApi { // Use extended timeout for thinking models as they typically require more processing time const requestTimeoutId = setTimeout( () => controller.abort(), - REQUEST_TIMEOUT_MS_FOR_THINKING, + getTimeoutMSByModel(options.config.model), ); if (shouldStream) { diff --git a/app/client/platforms/tencent.ts b/app/client/platforms/tencent.ts index 580844a5b31..8adeb1b3ea7 100644 --- a/app/client/platforms/tencent.ts +++ b/app/client/platforms/tencent.ts @@ -1,5 +1,5 @@ "use client"; -import { ApiPath, TENCENT_BASE_URL, REQUEST_TIMEOUT_MS } from "@/app/constant"; +import { ApiPath, TENCENT_BASE_URL } from "@/app/constant"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { @@ -17,7 +17,11 @@ import { } from "@fortaine/fetch-event-source"; import { prettyObject } from "@/app/utils/format"; import { getClientConfig } from "@/app/config/client"; -import { getMessageTextContent, isVisionModel } from "@/app/utils"; +import { + getMessageTextContent, + isVisionModel, + getTimeoutMSByModel, +} from "@/app/utils"; import mapKeys from "lodash-es/mapKeys"; import mapValues from "lodash-es/mapValues"; import isArray from "lodash-es/isArray"; @@ -135,7 +139,7 @@ export class HunyuanApi implements LLMApi { // make a fetch request const requestTimeoutId = setTimeout( () => controller.abort(), - REQUEST_TIMEOUT_MS, + getTimeoutMSByModel(options.config.model), ); if (shouldStream) { diff --git a/app/client/platforms/xai.ts b/app/client/platforms/xai.ts index 8c41c2d988f..830ad4778ac 100644 --- a/app/client/platforms/xai.ts +++ b/app/client/platforms/xai.ts @@ -1,6 +1,6 @@ "use client"; // azure and openai, using same models. so using same LLMApi. -import { ApiPath, XAI_BASE_URL, XAI, REQUEST_TIMEOUT_MS } from "@/app/constant"; +import { ApiPath, XAI_BASE_URL, XAI } from "@/app/constant"; import { useAccessStore, useAppConfig, @@ -17,6 +17,7 @@ import { SpeechOptions, } from "../api"; import { getClientConfig } from "@/app/config/client"; +import { getTimeoutMSByModel } from "@/app/utils"; import { preProcessImageContent } from "@/app/utils/chat"; import { RequestPayload } from "./openai"; import { fetch } from "@/app/utils/stream"; @@ -103,7 +104,7 @@ export class XAIApi implements LLMApi { // make a fetch request const requestTimeoutId = setTimeout( () => controller.abort(), - REQUEST_TIMEOUT_MS, + getTimeoutMSByModel(options.config.model), ); if (shouldStream) { diff --git a/app/utils.ts b/app/utils.ts index f2337801976..6183e03b057 100644 --- a/app/utils.ts +++ b/app/utils.ts @@ -2,7 +2,11 @@ import { useEffect, useState } from "react"; import { showToast } from "./components/ui-lib"; import Locale from "./locales"; import { RequestMessage } from "./client/api"; -import { ServiceProvider } from "./constant"; +import { + REQUEST_TIMEOUT_MS, + REQUEST_TIMEOUT_MS_FOR_THINKING, + ServiceProvider, +} from "./constant"; // import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http"; import { fetch as tauriStreamFetch } from "./utils/stream"; import { VISION_MODEL_REGEXES, EXCLUDE_VISION_MODEL_REGEXES } from "./constant"; @@ -292,6 +296,20 @@ export function isDalle3(model: string) { return "dall-e-3" === model; } +export function getTimeoutMSByModel(model: string) { + model = model.toLowerCase(); + if ( + model.startsWith("dall-e") || + model.startsWith("dalle") || + model.startsWith("o1") || + model.startsWith("o3") || + model.includes("deepseek-r") || + model.includes("-thinking") + ) + return REQUEST_TIMEOUT_MS_FOR_THINKING; + return REQUEST_TIMEOUT_MS; +} + export function getModelSizes(model: string): ModelSize[] { if (isDalle3(model)) { return ["1024x1024", "1792x1024", "1024x1792"]; diff --git a/app/utils/chat.ts b/app/utils/chat.ts index b77955e6ecf..efc496f2c32 100644 --- a/app/utils/chat.ts +++ b/app/utils/chat.ts @@ -400,6 +400,7 @@ export function streamWithThink( let responseRes: Response; let isInThinkingMode = false; let lastIsThinking = false; + let lastIsThinkingTagged = false; //between and tags // animate response to make it looks smooth function animateResponseText() { @@ -579,6 +580,23 @@ export function streamWithThink( if (!chunk?.content || chunk.content.length === 0) { return; } + + // deal with and tags start + if (!chunk.isThinking) { + if (chunk.content.startsWith("")) { + chunk.isThinking = true; + chunk.content = chunk.content.slice(7).trim(); + lastIsThinkingTagged = true; + } else if (chunk.content.endsWith("")) { + chunk.isThinking = false; + chunk.content = chunk.content.slice(0, -8).trim(); + lastIsThinkingTagged = false; + } else if (lastIsThinkingTagged) { + chunk.isThinking = true; + } + } + // deal with and tags start + // Check if thinking mode changed const isThinkingChanged = lastIsThinking !== chunk.isThinking; lastIsThinking = chunk.isThinking;