diff --git a/.gitignore b/.gitignore index b2434b249..66eefacc9 100644 --- a/.gitignore +++ b/.gitignore @@ -98,7 +98,7 @@ playwright/.cache/ # Midscene.js dump files __ai_responses__/ -midscene_run +midscene_run/ .nx/cache .nx/workspace-data diff --git a/biome.json b/biome.json index d9acb001a..b045a4864 100644 --- a/biome.json +++ b/biome.json @@ -7,6 +7,7 @@ "**/midscene_run", ".nx", "**/dist", + "dist", "**/doc_build", "*-dump.json", "script_get_all_texts.tmp.js", diff --git a/packages/cli/package.json b/packages/cli/package.json index b64183b41..e743a3a5c 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -7,10 +7,7 @@ "bin": { "midscene": "./bin/midscene" }, - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "scripts": { "dev": "modern dev", "build": "modern build", diff --git a/packages/midscene/package.json b/packages/midscene/package.json index f81cad17d..c3a3c3bd8 100644 --- a/packages/midscene/package.json +++ b/packages/midscene/package.json @@ -6,10 +6,7 @@ "main": "./dist/lib/index.js", "module": "./dist/es/index.js", "types": "./dist/types/index.d.ts", - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "exports": { ".": { "types": "./dist/types/index.d.ts", @@ -44,18 +41,10 @@ }, "typesVersions": { "*": { - ".": [ - "./dist/types/index.d.ts" - ], - "utils": [ - "./dist/types/utils.d.ts" - ], - "ai-model": [ - "./dist/types/ai-model.d.ts" - ], - "image": [ - "./dist/types/image.d.ts" - ] + ".": ["./dist/types/index.d.ts"], + "utils": ["./dist/types/utils.d.ts"], + "ai-model": ["./dist/types/ai-model.d.ts"], + "image": ["./dist/types/image.d.ts"] } }, "scripts": { @@ -70,9 +59,11 @@ }, "dependencies": { "openai": "4.47.1", - "sharp": "0.33.3" + "sharp": "0.33.3", + "node-fetch": "2.6.7" }, "devDependencies": { + "@types/node-fetch": "2.6.11", "@modern-js/module-tools": "^2.56.1", "@types/node": "^18.0.0", "langsmith": "0.1.36", diff --git a/packages/midscene/src/ai-model/automation/index.ts b/packages/midscene/src/ai-model/automation/index.ts new file mode 100644 index 000000000..bcfd2bcca --- /dev/null +++ b/packages/midscene/src/ai-model/automation/index.ts @@ -0,0 +1,82 @@ +import assert from 'node:assert'; +import type { PlanningAIResponse, PlanningAction, UIContext } from '@/types'; +import { AIActionType, type AIArgs, callAiFn } from '../common'; +import { describeUserPage } from '../prompt/util'; +import { systemPromptToTaskPlanning } from './planning'; + +export async function plan( + userPrompt: string, + opts: { + context: UIContext; + callAI?: typeof callAiFn; + }, + useModel?: 'coze' | 'openAI', +): Promise<{ plans: PlanningAction[] }> { + const { callAI, context } = opts || {}; + const { screenshotBase64 } = context; + const { description: pageDescription } = await describeUserPage(context); + let planFromAI: PlanningAIResponse | undefined; + + const systemPrompt = systemPromptToTaskPlanning(); + const msgs: AIArgs = [ + { role: 'system', content: systemPrompt }, + { + role: 'user', + content: [ + { + type: 'image_url', + image_url: { + url: screenshotBase64, + detail: 'high', + }, + }, + { + type: 'text', + text: ` + pageDescription: ${pageDescription} + `, + }, + { + type: 'text', + text: ` + Here is the description of the task. Just go ahead: + ===================================== + ${userPrompt} + ===================================== + `, + }, + ], + }, + ]; + + if (callAI) { + planFromAI = await callAI({ + msgs, + AIActionType: AIActionType.PLAN, + useModel, + }); + } else { + planFromAI = await callAiFn({ + msgs, + AIActionType: AIActionType.PLAN, + useModel, + }); + } + + const actions = planFromAI?.actions || []; + + assert(planFromAI, "can't get planFromAI"); + assert(actions && actions.length > 0, 'no actions in ai plan'); + + if (planFromAI.error) { + throw new Error(planFromAI.error); + } + + actions.forEach((task) => { + if (task.type === 'Error') { + throw new Error(task.thought); + } + }); + + return { plans: actions }; +} diff --git a/packages/midscene/src/automation/planning.ts b/packages/midscene/src/ai-model/automation/planning.ts similarity index 57% rename from packages/midscene/src/automation/planning.ts rename to packages/midscene/src/ai-model/automation/planning.ts index 7d20febee..428b4bc7a 100644 --- a/packages/midscene/src/automation/planning.ts +++ b/packages/midscene/src/ai-model/automation/planning.ts @@ -1,14 +1,6 @@ -import { describeUserPage } from '@/ai-model'; -import { callToGetJSONObject } from '@/ai-model/openai'; -import type { PlanningAIResponse, PlanningAction, UIContext } from '@/types'; -import type { ChatCompletionMessageParam } from 'openai/resources'; - -const characteristic = - 'You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.'; - -export function systemPromptToTaskPlanning(query: string) { +export function systemPromptToTaskPlanning() { return ` - ${characteristic} + You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users. Based on the page context information (screenshot and description) you get, decompose the task user asked into a series of actions. Actions are executed in the order listed in the list. After executing the actions, the task should be completed. @@ -40,7 +32,7 @@ export function systemPromptToTaskPlanning(query: string) { 1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context. 2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that. - If any error occurs during the task planning (like the page content and task are irrelevant, or the element mentioned does not exist at all), please return the error message with explanation in the errors field. The thoughts、prompts、error messages should all in the same language as the user query. + If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query. Return in the following JSON format: { @@ -55,57 +47,5 @@ export function systemPromptToTaskPlanning(query: string) { ], error?: string, // Overall error messages. If there is any error occurs during the task planning (i.e. error in previous 'actions' array), conclude the errors again, put error messages here } - - Here is the description of the task. Just go ahead: - ===================================== - ${query} - ===================================== `; } - -export async function plan( - userPrompt: string, - opts: { - context: UIContext; - callAI?: typeof callToGetJSONObject; - }, -): Promise<{ plans: PlanningAction[] }> { - const { callAI = callToGetJSONObject, context } = - opts || {}; - const { screenshotBase64 } = context; - const { description } = await describeUserPage(context); - const systemPrompt = systemPromptToTaskPlanning(userPrompt); - const msgs: ChatCompletionMessageParam[] = [ - { role: 'system', content: systemPrompt }, - { - role: 'user', - content: [ - { - type: 'image_url', - image_url: { - url: screenshotBase64, - detail: 'high', - }, - }, - { - type: 'text', - text: description, - }, - ], - }, - ]; - - const planFromAI = await callAI(msgs); - if (planFromAI.error) { - throw new Error(planFromAI.error); - } - - const { actions } = planFromAI; - actions.forEach((task) => { - if (task.type === 'Error') { - throw new Error(task.thought); - } - }); - - return { plans: actions }; -} diff --git a/packages/midscene/src/ai-model/common.ts b/packages/midscene/src/ai-model/common.ts new file mode 100644 index 000000000..209d7b119 --- /dev/null +++ b/packages/midscene/src/ai-model/common.ts @@ -0,0 +1,63 @@ +import type { + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, +} from 'openai/resources'; +import { + COZE_AI_ACTION_BOT_ID, + COZE_AI_ASSERT_BOT_ID, + COZE_EXTRACT_INFO_BOT_ID, + COZE_INSPECT_ELEMENT_BOT_ID, + callCozeAi, + transfromOpenAiArgsToCoze, + useCozeModel, +} from './coze'; +import { callToGetJSONObject, useOpenAIModel } from './openai'; + +export type AIArgs = [ + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, +]; + +export enum AIActionType { + ASSERT = 0, + INSPECT_ELEMENT = 1, + EXTRACT_DATA = 2, + PLAN = 3, +} + +export async function callAiFn(options: { + msgs: AIArgs; + AIActionType: AIActionType; + useModel?: 'openAI' | 'coze'; +}) { + const { useModel, msgs, AIActionType: AIActionTypeValue } = options; + if (useOpenAIModel(useModel)) { + const parseResult = await callToGetJSONObject(msgs); + return parseResult; + } + + if (useCozeModel(useModel)) { + let botId = ''; + switch (AIActionTypeValue) { + case AIActionType.ASSERT: + botId = COZE_AI_ASSERT_BOT_ID; + break; + case AIActionType.EXTRACT_DATA: + botId = COZE_EXTRACT_INFO_BOT_ID; + break; + case AIActionType.INSPECT_ELEMENT: + botId = COZE_INSPECT_ELEMENT_BOT_ID; + break; + default: + botId = COZE_AI_ACTION_BOT_ID; + } + const cozeMsg = transfromOpenAiArgsToCoze(msgs[1]); + const parseResult = await callCozeAi({ + ...cozeMsg, + botId, + }); + return parseResult; + } + + throw Error('Does not contain coze and openai environment variables'); +} diff --git a/packages/midscene/src/ai-model/coze/index.ts b/packages/midscene/src/ai-model/coze/index.ts new file mode 100644 index 000000000..a17e70646 --- /dev/null +++ b/packages/midscene/src/ai-model/coze/index.ts @@ -0,0 +1,108 @@ +import assert from 'node:assert'; +import fetch from 'node-fetch'; +import type { ChatCompletionUserMessageParam } from 'openai/resources'; + +export const COZE_INSPECT_ELEMENT_BOT_ID = + process.env.COZE_INSPECT_ELEMENT_BOT_ID || ''; +export const COZE_AI_ACTION_BOT_ID = process.env.COZE_AI_ACTION_BOT_ID || ''; +export const COZE_AI_ASSERT_BOT_ID = process.env.COZE_AI_ASSERT_BOT_ID || ''; +export const COZE_EXTRACT_INFO_BOT_ID = + process.env.COZE_EXTRACT_INFO_BOT_ID || ''; + +export const COZE_BOT_TOKEN = 'COZE_BOT_TOKEN'; + +export function useCozeModel(useModel?: 'coze' | 'openAI') { + if (useModel && useModel !== 'coze') return false; + return ( + process.env[COZE_BOT_TOKEN] && + process.env.COZE_INSPECT_ELEMENT_BOT_ID && + process.env.COZE_AI_ACTION_BOT_ID && + process.env.COZE_AI_ASSERT_BOT_ID && + process.env.COZE_EXTRACT_INFO_BOT_ID + ); +} + +export async function callCozeAi(options: { + query: string; + imgs: Array; + botId: string; +}): Promise { + const { query, imgs, botId } = options; + const completion = await fetch('https://api.coze.com/open_api/v2/chat', { + method: 'POST', + headers: { + Authorization: `Bearer ${process.env[COZE_BOT_TOKEN]}`, + 'Content-Type': 'application/json', + Accept: '*/*', + Host: 'api.coze.com', + Connection: 'keep-alive', + }, + body: JSON.stringify({ + conversation_id: '123', + bot_id: botId, + user: '29032201862555', + query, + meta_data: { + img: imgs.map((imgPath) => { + return { + url: imgPath, + }; + }), + }, + stream: false, + }), + }); + if (!completion.ok) { + console.error('CozeAI reponse error', completion); + throw new Error('Network response was not ok'); + } + + const aiResponse = await completion.json(); + if (aiResponse.code !== 0) { + console.error('CozeAI error response', aiResponse); + throw new Error('CozeAI error response', aiResponse); + } + + if (!aiResponse?.messages || !aiResponse?.messages[0]?.content) { + console.error('aiResponse', aiResponse); + throw new Error('aiResponse is undefined', aiResponse); + } + const parseContent = aiResponse?.messages[0]?.content; + assert(parseContent, 'empty content'); + try { + return JSON.parse(parseContent); + } catch (err) { + console.error("can't parse coze content", aiResponse, err); + throw Error("can't parse coze content"); + } +} + +export function transfromOpenAiArgsToCoze(msg: ChatCompletionUserMessageParam) { + if (msg.role !== 'user') throw Error(`can't transfrom ${msg} to coze args`); + // const query = ''; + // const imgs = msg.content + if (typeof msg.content === 'string') { + return { + query: msg.content, + imgs: [], + }; + } + + return { + query: msg.content.reduce((res, next) => { + if (next.type === 'text') { + res += `\n${next.text}`; + } + return res; + }, ''), + imgs: msg.content.reduce( + (res, next) => { + if (next.type === 'image_url') { + res.push(next.image_url.url); + } + return res; + }, + [] as Array, + ), + }; +} diff --git a/packages/midscene/src/ai-model/index.ts b/packages/midscene/src/ai-model/index.ts index a60c99d9b..3fb9fcbcf 100644 --- a/packages/midscene/src/ai-model/index.ts +++ b/packages/midscene/src/ai-model/index.ts @@ -4,4 +4,7 @@ export { describeUserPage } from './prompt/util'; export type { ChatCompletionMessageParam } from 'openai/resources'; -export { AiInspectElement, AiExtractElementInfo } from './inspect'; +export { AiInspectElement, AiExtractElementInfo, AiAssert } from './inspect'; + +export { plan } from './automation'; +export { callAiFn } from './common'; diff --git a/packages/midscene/src/ai-model/inspect.ts b/packages/midscene/src/ai-model/inspect.ts index 447c9c388..e003de2d9 100644 --- a/packages/midscene/src/ai-model/inspect.ts +++ b/packages/midscene/src/ai-model/inspect.ts @@ -6,35 +6,42 @@ import type { BaseElement, UIContext, } from '@/types'; -import type { ChatCompletionMessageParam } from 'openai/resources'; -import { callToGetJSONObject } from './openai'; -import { systemPromptToFindElement } from './prompt/element_inspector'; +import type { + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, +} from 'openai/resources'; +import { AIActionType, callAiFn } from './common'; +import { + multiDescription, + systemPromptToFindElement, +} from './prompt/element_inspector'; import { describeUserPage, systemPromptToAssert, systemPromptToExtract, } from './prompt/util'; +export type AIArgs = [ + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, +]; + export async function AiInspectElement< ElementType extends BaseElement = BaseElement, >(options: { context: UIContext; multi: boolean; findElementDescription: string; - callAI?: typeof callToGetJSONObject; + callAI?: typeof callAiFn; + useModel?: 'coze' | 'openAI'; }) { - const { - context, - multi, - findElementDescription, - callAI = callToGetJSONObject, - } = options; + const { context, multi, findElementDescription, callAI, useModel } = options; const { screenshotBase64 } = context; const { description, elementById } = await describeUserPage(context); - const systemPrompt = systemPromptToFindElement(findElementDescription, multi); + const systemPrompt = systemPromptToFindElement(); - const msgs: ChatCompletionMessageParam[] = [ + const msgs: AIArgs = [ { role: 'system', content: systemPrompt }, { role: 'user', @@ -48,14 +55,46 @@ export async function AiInspectElement< }, { type: 'text', - text: description, + text: ` + pageDescription: \n + ${description} + `, + }, + { + type: 'text', + text: ` + Here is the description of the findElement. Just go ahead: + ===================================== + ${JSON.stringify({ + description: findElementDescription, + multi: multiDescription(multi), + })} + ===================================== + `, }, ], }, ]; - const parseResult = await callAI(msgs); + + if (callAI) { + const parseResult = await callAI({ + msgs, + AIActionType: AIActionType.INSPECT_ELEMENT, + useModel, + }); + return { + parseResult, + elementById, + }; + } + const inspectElement = await callAiFn({ + msgs, + AIActionType: AIActionType.INSPECT_ELEMENT, + useModel, + }); + return { - parseResult, + parseResult: inspectElement, elementById, }; } @@ -65,25 +104,16 @@ export async function AiExtractElementInfo< ElementType extends BaseElement = BaseElement, >(options: { dataQuery: string | Record; - sectionConstraints: { - name: string; - description: string; - }[]; context: UIContext; - callAI?: typeof callToGetJSONObject; + useModel?: 'coze' | 'openAI'; }) { - const { - dataQuery, - sectionConstraints, - context, - callAI = callToGetJSONObject, - } = options; - const systemPrompt = systemPromptToExtract(dataQuery, sectionConstraints); + const { dataQuery, context, useModel } = options; + const systemPrompt = systemPromptToExtract(); const { screenshotBase64 } = context; const { description, elementById } = await describeUserPage(context); - const msgs: ChatCompletionMessageParam[] = [ + const msgs: AIArgs = [ { role: 'system', content: systemPrompt }, { role: 'user', @@ -96,13 +126,31 @@ export async function AiExtractElementInfo< }, { type: 'text', - text: description, + text: ` +pageDescription: ${description} + +Use your extract_data_from_UI skill to find the following data, placing it in the \`data\` field +DATA_DEMAND start: +===================================== +${ + typeof dataQuery === 'object' + ? `return in key-value style object, keys are ${Object.keys(dataQuery).join(',')}` + : '' +}; +${typeof dataQuery === 'string' ? dataQuery : JSON.stringify(dataQuery, null, 2)} +===================================== +DATA_DEMAND ends. + `, }, ], }, ]; - const parseResult = await callAI>(msgs); + const parseResult = await callAiFn>({ + msgs, + useModel, + AIActionType: AIActionType.EXTRACT_DATA, + }); return { parseResult, elementById, @@ -114,17 +162,17 @@ export async function AiAssert< >(options: { assertion: string; context: UIContext; - callAI?: typeof callToGetJSONObject; + useModel?: 'coze' | 'openAI'; }) { - const { assertion, context, callAI = callToGetJSONObject } = options; + const { assertion, context, useModel } = options; assert(assertion, 'assertion should be a string'); - const systemPrompt = systemPromptToAssert(assertion); const { screenshotBase64 } = context; const { description, elementById } = await describeUserPage(context); + const systemPrompt = systemPromptToAssert(); - const msgs: ChatCompletionMessageParam[] = [ + const msgs: AIArgs = [ { role: 'system', content: systemPrompt }, { role: 'user', @@ -137,12 +185,29 @@ export async function AiAssert< }, { type: 'text', - text: description, + text: ` + pageDescription: \n + ${description} + `, + }, + { + type: 'text', + text: ` + Here is the description of the assertion. Just go ahead: + ===================================== + ${assertion} + ===================================== + `, }, ], }, ]; - const assertResult = await callAI(msgs); + const assertResult = await callAiFn({ + msgs, + AIActionType: AIActionType.ASSERT, + useModel, + }); return assertResult; } +export { callAiFn }; diff --git a/packages/midscene/src/ai-model/openai.ts b/packages/midscene/src/ai-model/openai/index.ts similarity index 58% rename from packages/midscene/src/ai-model/openai.ts rename to packages/midscene/src/ai-model/openai/index.ts index e4e6f7d10..33a5bf8d6 100644 --- a/packages/midscene/src/ai-model/openai.ts +++ b/packages/midscene/src/ai-model/openai/index.ts @@ -4,29 +4,38 @@ import { wrapOpenAI } from 'langsmith/wrappers'; import OpenAI, { type ClientOptions } from 'openai'; import type { ChatCompletionMessageParam } from 'openai/resources'; -const envConfigKey = 'MIDSCENE_OPENAI_INIT_CONFIG_JSON'; -const envModelKey = 'MIDSCENE_MODEL_NAME'; -const envSmithDebug = 'MIDSCENE_LANGSMITH_DEBUG'; +export const MIDSCENE_OPENAI_INIT_CONFIG_JSON = + 'MIDSCENE_OPENAI_INIT_CONFIG_JSON'; +export const MIDSCENE_MODEL_NAME = 'MIDSCENE_MODEL_NAME'; +export const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG'; +export const OPENAI_API_KEY = 'OPENAI_API_KEY'; + +export function useOpenAIModel(useModel?: 'coze' | 'openAI') { + if (useModel && useModel !== 'openAI') return false; + if (process.env[OPENAI_API_KEY]) return true; + + return Boolean(process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON]); +} let extraConfig: ClientOptions = {}; if ( - typeof process.env[envConfigKey] === 'string' && - process.env[envConfigKey] + typeof process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] === 'string' && + process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] ) { console.log('config for openai loaded'); - extraConfig = JSON.parse(process.env[envConfigKey]); + extraConfig = JSON.parse(process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON]); } let model = 'gpt-4o'; -if (typeof process.env[envModelKey] === 'string') { - console.log(`model: ${process.env[envModelKey]}`); - model = process.env[envModelKey]; +if (typeof process.env[MIDSCENE_MODEL_NAME] === 'string') { + console.log(`model: ${process.env[MIDSCENE_MODEL_NAME]}`); + model = process.env[MIDSCENE_MODEL_NAME]; } async function createOpenAI() { const openai = new OpenAI(extraConfig); - if (process.env[envSmithDebug]) { + if (process.env[MIDSCENE_LANGSMITH_DEBUG]) { console.log('DEBUGGING MODE: langsmith wrapper enabled'); const openai = wrapOpenAI(new OpenAI()); return openai; diff --git a/packages/midscene/src/ai-model/prompt/element_inspector.ts b/packages/midscene/src/ai-model/prompt/element_inspector.ts index 056ff7c89..b762e5de7 100644 --- a/packages/midscene/src/ai-model/prompt/element_inspector.ts +++ b/packages/midscene/src/ai-model/prompt/element_inspector.ts @@ -1,7 +1,4 @@ -export function systemPromptToFindElement( - description: string, - multi?: boolean, -) { +export function systemPromptToFindElement() { return ` ## Role: You are an expert in software page image (2D) and page element text analysis. @@ -17,12 +14,8 @@ You are an expert in software page image (2D) and page element text analysis. ## Workflow: 1. Receive the user's element description, screenshot, and element description information. Note that the text may contain non-English characters (e.g., Chinese), indicating that the application may be non-English. -2. Based on the description (${description}), locate the target element ID in the list of element descriptions and the screenshot. -3. Return the number of elements: ${ - multi - ? 'multiple elements matching the description (two or more)' - : 'The element closest to the description (only one)' - }. +2. Based on the user's description, locate the target element ID in the list of element descriptions and the screenshot. +3. Found the required number of elements 4. Return JSON data containing the selection reason and element ID. ## Constraints: @@ -54,6 +47,8 @@ Input Example: \`\`\`json // Description: "Shopping cart icon in the upper right corner" { + "description": "PLACEHOLDER", // Description of the target element + "multi": "PLACEHOLDER", //Find the number of elements "screenshot": "path/screenshot.png", "text": '{ "pageSize": { @@ -134,3 +129,9 @@ Output Example: `; } + +export function multiDescription(multi: boolean) { + return multi + ? 'multiple elements matching the description (two or more)' + : 'The element closest to the description (only one)'; +} diff --git a/packages/midscene/src/ai-model/prompt/util.ts b/packages/midscene/src/ai-model/prompt/util.ts index 7dbd0fd6d..7f2463741 100644 --- a/packages/midscene/src/ai-model/prompt/util.ts +++ b/packages/midscene/src/ai-model/prompt/util.ts @@ -95,54 +95,40 @@ export function promptsOfSectionQuery( return `${instruction}\n${constraints.map(singleSection).join('\n')}`; } -export function systemPromptToExtract( - dataQuery: Record | string, - sections?: BasicSectionQuery[], -) { - const allSectionNames: string[] = - sections?.filter((c) => c.name).map((c) => c.name || '') || []; - const sectionFindingPrompt = promptsOfSectionQuery(sections || []); - const sectionReturnFormat = allSectionNames.length - ? ' sections: [], // detailed information of each section from segment_a_web_page skill' - : ''; - +export function systemPromptToExtract() { return ` -${characteristic} -${contextFormatIntro} +You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users. +The user will give you a screenshot and the texts on it. There may be some none-English characters (like Chinese) on it, indicating it's an non-English app. You have the following skills: -${allSectionNames.length ? skillSegment : ''} -${skillExtractData} - -Now, do the following jobs: -${sectionFindingPrompt} -Use your extract_data_from_UI skill to find the following data, placing it in the \`data\` field -DATA_DEMAND start: -${ - typeof dataQuery === 'object' - ? `return in key-value style object, keys are ${Object.keys(dataQuery).join(',')}` - : '' -}; -${typeof dataQuery === 'string' ? dataQuery : JSON.stringify(dataQuery, null, 2)} -DATA_DEMAND ends. + +skill name: extract_data_from_UI +related input: DATA_DEMAND +skill content: +* User will give you some data requirements in DATA_DEMAND. Consider the UI context, follow the user's instructions, and provide comprehensive data accordingly. +* There may be some special commands in DATA_DEMAND, please pay extra attention + - LOCATE_ONE_ELEMENT and LOCATE_ONE_OR_MORE_ELEMENTS: if you see a description that mentions the keyword LOCATE_ONE_ELEMENT + - LOCATE_ONE_OR_MORE_ELEMENTS(e.g. follow LOCATE_ONE_ELEMENT : i want to find ...), it means user wants to locate a specific element meets the description. + +Return in this way: prefix + the id / comma-separated ids, for example: LOCATE_ONE_ELEMENT/1 , LOCATE_ONE_OR_MORE_ELEMENTS/1,2,3 . If not found, keep the prefix and leave the suffix empty, like LOCATE_ONE_ELEMENT/ . + + Return in the following JSON format: { language: "en", // "en" or "zh", the language of the page. Use the same language to describe section name, description, and similar fields. - ${sectionReturnFormat} data: any, // the extracted data from extract_data_from_UI skill. Make sure both the value and scheme meet the DATA_DEMAND. - errors?: [], // string[], error message if any + errors: [], // string[], error message if any } `; } -export function systemPromptToAssert(assertion: string) { +export function systemPromptToAssert() { return ` ${characteristic} ${contextFormatIntro} -Based on the information you get, assert the following: -${assertion} +Based on the information you get, Return assertion judgment: Return in the following JSON format: { diff --git a/packages/midscene/src/automation/index.ts b/packages/midscene/src/automation/index.ts deleted file mode 100644 index fed4539f7..000000000 --- a/packages/midscene/src/automation/index.ts +++ /dev/null @@ -1 +0,0 @@ -export { plan } from './planning'; diff --git a/packages/midscene/src/automation/web/puppeteer/index.ts b/packages/midscene/src/automation/web/puppeteer/index.ts deleted file mode 100644 index e69de29bb..000000000 diff --git a/packages/midscene/src/index.ts b/packages/midscene/src/index.ts index a1d0da458..b6bc6ff69 100644 --- a/packages/midscene/src/index.ts +++ b/packages/midscene/src/index.ts @@ -3,7 +3,7 @@ import Insight from './insight'; import { getElement, getSection } from './query'; import { setDumpDir } from './utils'; -export { plan } from './automation'; +export { plan } from './ai-model'; export * from './types'; export default Insight; diff --git a/packages/midscene/src/insight/index.ts b/packages/midscene/src/insight/index.ts index 157ebe79e..29302bcc8 100644 --- a/packages/midscene/src/insight/index.ts +++ b/packages/midscene/src/insight/index.ts @@ -4,7 +4,7 @@ import { AiInspectElement, callToGetJSONObject as callAI, } from '@/ai-model/index'; -import { AiAssert } from '@/ai-model/inspect'; +import { AiAssert, callAiFn } from '@/ai-model/inspect'; import type { AIElementParseResponse, BaseElement, @@ -38,7 +38,7 @@ const sortByOrder = (a: UISection, b: UISection) => { export interface LocateOpts { multi?: boolean; - callAI?: typeof callAI; + callAI?: typeof callAiFn; } // export type UnwrapDataShape = T extends EnhancedQuery ? DataShape : {}; @@ -53,7 +53,7 @@ export default class Insight< > { contextRetrieverFn: () => Promise | ContextType; - aiVendorFn: typeof callAI = callAI; + aiVendorFn: (...args: Array) => Promise = callAiFn; onceDumpUpdatedFn?: DumpSubscriber; @@ -80,14 +80,14 @@ export default class Insight< async locate( queryPrompt: string, - opt?: { callAI: LocateOpts['callAI'] }, + opt?: { callAI?: typeof callAiFn }, ): Promise; async locate( queryPrompt: string, opt: { multi: true }, ): Promise; async locate(queryPrompt: string, opt?: LocateOpts) { - const { callAI = this.aiVendorFn, multi = false } = opt || {}; + const { callAI, multi = false } = opt || {}; assert(queryPrompt, 'query is required for located'); const dumpSubscriber = this.onceDumpUpdatedFn; this.onceDumpUpdatedFn = undefined; @@ -177,46 +177,19 @@ export default class Insight< async extract(input: Record): Promise; async extract(dataDemand: InsightExtractParam): Promise { - let dataQuery: Record | string = {}; - const sectionQueryMap: Record = {}; assert( typeof dataDemand === 'object' || typeof dataDemand === 'string', `dataDemand should be object or string, but get ${typeof dataDemand}`, ); const dumpSubscriber = this.onceDumpUpdatedFn; this.onceDumpUpdatedFn = undefined; - if (typeof dataDemand === 'string') { - dataQuery = dataDemand; - } else { - // filter all sectionQuery - for (const key in dataDemand) { - const query = dataDemand[key]; - const sectionQuery = extractSectionQuery(query); - if (sectionQuery) { - sectionQueryMap[key] = sectionQuery; - } else { - dataQuery[key] = query; - } - } - dataQuery = dataDemand; - } - - const sectionConstraints = Object.keys(sectionQueryMap).map((name) => { - const sectionQueryPrompt = sectionQueryMap[name]; - return { - name, - description: sectionQueryPrompt || '', - }; - }); const context = await this.contextRetrieverFn(); const startTime = Date.now(); const { parseResult, elementById } = await AiExtractElementInfo({ context, - dataQuery, - sectionConstraints, - callAI: this.aiVendorFn, + dataQuery: dataDemand, }); const timeCost = Date.now() - startTime; @@ -326,7 +299,6 @@ export default class Insight< const startTime = Date.now(); const assertResult = await AiAssert({ assertion, - callAI: this.aiVendorFn, context, }); diff --git a/packages/midscene/tests/ai-model/assert/assert-coze.test.ts b/packages/midscene/tests/ai-model/assert/assert-coze.test.ts new file mode 100644 index 000000000..666824c34 --- /dev/null +++ b/packages/midscene/tests/ai-model/assert/assert-coze.test.ts @@ -0,0 +1,34 @@ +import { AiAssert } from '@/ai-model'; +import { getPageDataOfTestName } from 'tests/ai-model/inspector/util'; +/* eslint-disable max-lines-per-function */ +import { describe, expect, it, vi } from 'vitest'; + +vi.setConfig({ + testTimeout: 180 * 1000, + hookTimeout: 30 * 1000, +}); + +const useModel = 'coze'; +describe('assert', () => { + it('todo pass', async () => { + const { context } = await getPageDataOfTestName('todo'); + + const { pass } = await AiAssert({ + assertion: 'Three tasks have been added', + context, + useModel, + }); + expect(pass).toBe(true); + }); + + it('todo error', async () => { + const { context } = await getPageDataOfTestName('todo'); + + const { pass, thought } = await AiAssert({ + assertion: 'There are four tasks in the task list', + context, + useModel, + }); + expect(pass).toBe(false); + }); +}); diff --git a/packages/midscene/tests/ai-model/assert/assert-openai.test.ts b/packages/midscene/tests/ai-model/assert/assert-openai.test.ts new file mode 100644 index 000000000..b7317c8d0 --- /dev/null +++ b/packages/midscene/tests/ai-model/assert/assert-openai.test.ts @@ -0,0 +1,34 @@ +import { AiAssert } from '@/ai-model'; +import { getPageDataOfTestName } from 'tests/ai-model/inspector/util'; +/* eslint-disable max-lines-per-function */ +import { describe, expect, it, vi } from 'vitest'; + +vi.setConfig({ + testTimeout: 180 * 1000, + hookTimeout: 30 * 1000, +}); + +const useModel = undefined; +describe('assert', () => { + it('todo pass', async () => { + const { context } = await getPageDataOfTestName('todo'); + + const { pass } = await AiAssert({ + assertion: 'Three tasks have been added', + context, + useModel, + }); + expect(pass).toBe(true); + }); + + it('todo error', async () => { + const { context } = await getPageDataOfTestName('todo'); + + const { pass, thought } = await AiAssert({ + assertion: 'There are four tasks in the task list', + context, + useModel, + }); + expect(pass).toBe(false); + }); +}); diff --git a/packages/midscene/tests/ai-model/automation/planning-coze.test.ts b/packages/midscene/tests/ai-model/automation/planning-coze.test.ts new file mode 100644 index 000000000..4589edc0a --- /dev/null +++ b/packages/midscene/tests/ai-model/automation/planning-coze.test.ts @@ -0,0 +1,81 @@ +import { plan } from '@/ai-model'; +import { getPageDataOfTestName } from 'tests/ai-model/inspector/util'; +/* eslint-disable max-lines-per-function */ +import { describe, expect, it, vi } from 'vitest'; + +vi.setConfig({ + testTimeout: 180 * 1000, + hookTimeout: 30 * 1000, +}); + +const userModel = 'coze'; + +describe('automation - planning', () => { + it('basic run', async () => { + const { context } = await getPageDataOfTestName('todo'); + + const { plans } = await plan( + 'type "Why is the earth a sphere?", hit Enter', + { + context, + }, + userModel, + ); + expect(plans.length).toBe(3); + expect(plans[0].thought).toBeTruthy(); + expect(plans[0].type).toBe('Locate'); + expect(plans[1].type).toBe('Input'); + expect(plans[2].type).toBe('KeyboardPress'); + }); + + it('should raise an error when prompt is irrelevant with page', async () => { + const { context } = await getPageDataOfTestName('todo'); + + expect(async () => { + await plan( + 'Tap the blue T-shirt in left top corner, and click the "add to cart" button', + { + context, + }, + userModel, + ); + }).rejects.toThrowError(); + }); + + it('Error message in Chinese', async () => { + const { context } = await getPageDataOfTestName('todo'); + let error: Error | undefined; + try { + await plan( + '在界面上点击“香蕉奶茶”,然后添加到购物车', + { + context, + }, + userModel, + ); + } catch (e: any) { + error = e; + } + + expect(error).toBeTruthy(); + expect(/a-z/i.test(error!.message)).toBeFalsy(); + }); + + it('instructions of to-do mvc', async () => { + const { context } = await getPageDataOfTestName('todo'); + const instructions = [ + '在任务框 input 输入 今天学习 JS,按回车键', + '在任务框 input 输入 明天学习 Rust,按回车键', + '在任务框 input 输入后天学习 AI,按回车键', + '将鼠标移动到任务列表中的第二项,点击第二项任务右边的删除按钮', + '点击第二条任务左边的勾选按钮', + '点击任务列表下面的 completed 状态按钮', + ]; + + for (const instruction of instructions) { + const { plans } = await plan(instruction, { context }, userModel); + expect(plans).toBeTruthy(); + // console.log(`instruction: ${instruction}\nplans: ${JSON.stringify(plans, undefined, 2)}`); + } + }); +}); diff --git a/packages/midscene/tests/automation/planning.test.ts b/packages/midscene/tests/ai-model/automation/planning.test.ts similarity index 87% rename from packages/midscene/tests/automation/planning.test.ts rename to packages/midscene/tests/ai-model/automation/planning.test.ts index 098cb129c..f592b1e0e 100644 --- a/packages/midscene/tests/automation/planning.test.ts +++ b/packages/midscene/tests/ai-model/automation/planning.test.ts @@ -1,4 +1,4 @@ -import { plan } from '@/automation/'; +import { plan } from '@/ai-model'; import { getPageDataOfTestName } from 'tests/ai-model/inspector/util'; /* eslint-disable max-lines-per-function */ import { describe, expect, it, vi } from 'vitest'; @@ -8,6 +8,8 @@ vi.setConfig({ hookTimeout: 30 * 1000, }); +const userModel = undefined; + describe('automation - planning', () => { it('basic run', async () => { const { context } = await getPageDataOfTestName('todo'); @@ -17,6 +19,7 @@ describe('automation - planning', () => { { context, }, + userModel, ); expect(plans.length).toBe(4); expect(plans[0].thought).toBeTruthy(); @@ -37,6 +40,7 @@ describe('automation - planning', () => { { context, }, + userModel, ); }).rejects.toThrowError(); }); @@ -45,9 +49,13 @@ describe('automation - planning', () => { const { context } = await getPageDataOfTestName('todo'); let error: Error | undefined; try { - await plan('在界面上点击“香蕉奶茶”,然后添加到购物车', { - context, - }); + await plan( + '在界面上点击“香蕉奶茶”,然后添加到购物车', + { + context, + }, + userModel, + ); } catch (e: any) { error = e; } @@ -68,7 +76,7 @@ describe('automation - planning', () => { ]; for (const instruction of instructions) { - const { plans } = await plan(instruction, { context }); + const { plans } = await plan(instruction, { context }, 'openAI'); expect(plans).toBeTruthy(); // console.log(`instruction: ${instruction}\nplans: ${JSON.stringify(plans, undefined, 2)}`); } diff --git a/packages/midscene/tests/ai-model/extract/__snapshots__/extract-coze.test.ts.snap b/packages/midscene/tests/ai-model/extract/__snapshots__/extract-coze.test.ts.snap new file mode 100644 index 000000000..0fd4b0924 --- /dev/null +++ b/packages/midscene/tests/ai-model/extract/__snapshots__/extract-coze.test.ts.snap @@ -0,0 +1,34 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`assert > todo 1`] = ` +{ + "data": [ + "Learn Python", + "Learn Rust", + "Learn AI", + ], + "errors": [], + "language": "en", +} +`; + +exports[`assert > todo obj 1`] = ` +{ + "data": [ + { + "checked": false, + "text": "Learn Python", + }, + { + "checked": false, + "text": "Learn Rust", + }, + { + "checked": false, + "text": "Learn AI", + }, + ], + "errors": [], + "language": "en", +} +`; diff --git a/packages/midscene/tests/ai-model/extract/__snapshots__/extract-openai.test.ts.snap b/packages/midscene/tests/ai-model/extract/__snapshots__/extract-openai.test.ts.snap new file mode 100644 index 000000000..e72d66d12 --- /dev/null +++ b/packages/midscene/tests/ai-model/extract/__snapshots__/extract-openai.test.ts.snap @@ -0,0 +1,51 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`assert > online order 1`] = ` +{ + "data": [ + { + "name": "多肉大橘(首创)", + "price": "6.82", + }, + { + "name": "轻芒芒甘露", + "price": "6.54", + }, + ], + "errors": [], + "language": "zh", +} +`; + +exports[`assert > todo 1`] = ` +{ + "data": [ + "Learn Python", + "Learn Rust", + "Learn AI", + ], + "errors": [], + "language": "en", +} +`; + +exports[`assert > todo obj 1`] = ` +{ + "data": [ + { + "checked": false, + "text": "Learn Python", + }, + { + "checked": false, + "text": "Learn Rust", + }, + { + "checked": false, + "text": "Learn AI", + }, + ], + "errors": [], + "language": "en", +} +`; diff --git a/packages/midscene/tests/ai-model/extract/extract-coze.test.ts b/packages/midscene/tests/ai-model/extract/extract-coze.test.ts new file mode 100644 index 000000000..482414c80 --- /dev/null +++ b/packages/midscene/tests/ai-model/extract/extract-coze.test.ts @@ -0,0 +1,46 @@ +import { AiExtractElementInfo } from '@/ai-model'; +import { getPageDataOfTestName } from 'tests/ai-model/inspector/util'; +import { describe, expect, it, vi } from 'vitest'; + +vi.setConfig({ + testTimeout: 180 * 1000, + hookTimeout: 30 * 1000, +}); + +const useModel = 'coze'; + +describe('assert', () => { + it('todo', async () => { + const { context } = await getPageDataOfTestName('todo'); + + const { parseResult } = await AiExtractElementInfo({ + dataQuery: 'Array, Complete task list, string is the task', + context, + useModel, + }); + expect(parseResult).toMatchSnapshot(); + }); + + // it('online order', async () => { + // const { context } = await getPageDataOfTestName('online_order'); + + // const { parseResult } = await AiExtractElementInfo({ + // dataQuery: '{name: string, price: string}[], 饮品名称和价格', + // context, + // useModel, + // }); + // expect(parseResult).toMatchSnapshot(); + // }); + + it('todo obj', async () => { + const { context } = await getPageDataOfTestName('todo'); + + const { parseResult } = await AiExtractElementInfo({ + dataQuery: + '{checked: boolean; text: string}[],Complete task list, string is the task', + context, + useModel, + }); + expect(parseResult).toMatchSnapshot(); + }); +}); diff --git a/packages/midscene/tests/ai-model/extract/extract-openai.test.ts b/packages/midscene/tests/ai-model/extract/extract-openai.test.ts new file mode 100644 index 000000000..8cf723c9c --- /dev/null +++ b/packages/midscene/tests/ai-model/extract/extract-openai.test.ts @@ -0,0 +1,46 @@ +import { AiExtractElementInfo } from '@/ai-model'; +import { getPageDataOfTestName } from 'tests/ai-model/inspector/util'; +import { describe, expect, it, vi } from 'vitest'; + +vi.setConfig({ + testTimeout: 180 * 1000, + hookTimeout: 30 * 1000, +}); + +const useModel = undefined; + +describe('assert', () => { + it('todo', async () => { + const { context } = await getPageDataOfTestName('todo'); + + const { parseResult } = await AiExtractElementInfo({ + dataQuery: 'Array, Complete task list, string is the task', + context, + useModel, + }); + expect(parseResult).toMatchSnapshot(); + }); + + it('online order', async () => { + const { context } = await getPageDataOfTestName('online_order'); + + const { parseResult } = await AiExtractElementInfo({ + dataQuery: '{name: string, price: string}[], 饮品名称和价格', + context, + useModel, + }); + expect(parseResult).toMatchSnapshot(); + }); + + it('todo obj', async () => { + const { context } = await getPageDataOfTestName('todo'); + + const { parseResult } = await AiExtractElementInfo({ + dataQuery: + '{checked: boolean; text: string}[],Complete task list, string is the task', + context, + useModel, + }); + expect(parseResult).toMatchSnapshot(); + }); +}); diff --git a/packages/midscene/tests/ai-model/inspector/__snapshots__/coze_todo_inspector.test.ts.snap b/packages/midscene/tests/ai-model/inspector/__snapshots__/coze_todo_inspector.test.ts.snap new file mode 100644 index 000000000..ea51c7170 --- /dev/null +++ b/packages/midscene/tests/ai-model/inspector/__snapshots__/coze_todo_inspector.test.ts.snap @@ -0,0 +1,52 @@ +[ + { + "elements": [ + { + "id": "3530a9c1eb", + "indexId": "2", + }, + ], + "error": [], + "prompt": "任务输入框", + }, + { + "elements": [ + { + "id": "b5bacc879a", + "indexId": "8", + }, + ], + "error": [], + "prompt": "任务列表中的第二项", + }, + { + "elements": [ + { + "id": "7ccd467339", + "indexId": "9", + }, + ], + "error": [], + "prompt": "第二项任务右边的删除按钮", + }, + { + "elements": [ + { + "id": "eb987bf616", + "indexId": "10", + }, + ], + "error": [], + "prompt": "任务列表中第三项左边的勾选按钮", + }, + { + "elements": [ + { + "id": "0f8f471e06", + "indexId": "15", + }, + ], + "error": [], + "prompt": "任务列表下面的 Completed 状态按钮", + }, +] \ No newline at end of file diff --git a/packages/midscene/tests/ai-model/inspector/__snapshots__/online_order_inspector.test.ts.snap b/packages/midscene/tests/ai-model/inspector/__snapshots__/online_order_inspector.test.ts.snap index ff8f47cdd..aa0619e06 100644 --- a/packages/midscene/tests/ai-model/inspector/__snapshots__/online_order_inspector.test.ts.snap +++ b/packages/midscene/tests/ai-model/inspector/__snapshots__/online_order_inspector.test.ts.snap @@ -3,6 +3,7 @@ "elements": [ { "id": "6ad26dfdca", + "indexId": "1", }, ], "error": [], @@ -12,6 +13,7 @@ "elements": [ { "id": "ba59909699", + "indexId": "2", }, ], "error": [], @@ -21,6 +23,7 @@ "elements": [ { "id": "f775c69cb4", + "indexId": "4", }, ], "error": [], @@ -30,9 +33,11 @@ "elements": [ { "id": "14103376fb", + "indexId": "22", }, { "id": "0250e12e67", + "indexId": "28", }, ], "error": [], @@ -42,9 +47,11 @@ "elements": [ { "id": "580cfae23c", + "indexId": "23", }, { "id": "925c254744", + "indexId": "29", }, ], "error": [], @@ -54,6 +61,7 @@ "elements": [ { "id": "cad3004a2d", + "indexId": "30", }, ], "error": [], diff --git a/packages/midscene/tests/ai-model/inspector/__snapshots__/todo_inspector.test.ts.snap b/packages/midscene/tests/ai-model/inspector/__snapshots__/todo_inspector.test.ts.snap index cb60e3e58..ea51c7170 100644 --- a/packages/midscene/tests/ai-model/inspector/__snapshots__/todo_inspector.test.ts.snap +++ b/packages/midscene/tests/ai-model/inspector/__snapshots__/todo_inspector.test.ts.snap @@ -3,6 +3,7 @@ "elements": [ { "id": "3530a9c1eb", + "indexId": "2", }, ], "error": [], @@ -12,6 +13,7 @@ "elements": [ { "id": "b5bacc879a", + "indexId": "8", }, ], "error": [], @@ -21,6 +23,7 @@ "elements": [ { "id": "7ccd467339", + "indexId": "9", }, ], "error": [], @@ -30,6 +33,7 @@ "elements": [ { "id": "eb987bf616", + "indexId": "10", }, ], "error": [], @@ -39,6 +43,7 @@ "elements": [ { "id": "0f8f471e06", + "indexId": "15", }, ], "error": [], diff --git a/packages/midscene/tests/ai-model/inspector/__snapshots__/xicha_inspector.test.ts.snap b/packages/midscene/tests/ai-model/inspector/__snapshots__/xicha_inspector.test.ts.snap deleted file mode 100644 index 090cc4101..000000000 --- a/packages/midscene/tests/ai-model/inspector/__snapshots__/xicha_inspector.test.ts.snap +++ /dev/null @@ -1,62 +0,0 @@ -[ - { - "elements": [ - { - "id": "922e98a196", - }, - ], - "error": [], - "prompt": "Top left menu bar icon", - }, - { - "elements": [ - { - "id": "83ffa89342", - }, - ], - "error": [], - "prompt": "Toggle language text button(Could be:中文、english text)", - }, - { - "elements": [ - { - "id": "a525985342", - }, - ], - "error": [], - "prompt": "Top right shopping cart", - }, - { - "elements": [ - { - "id": "3fb89d359f", - }, - { - "id": "c4300a7c45", - }, - ], - "error": [], - "prompt": "The price number on the right of the drink picture", - }, - { - "elements": [ - { - "id": "ae0ba24c99", - }, - { - "id": "a50d88f84c", - }, - ], - "error": [], - "prompt": "选择规格按钮", - }, - { - "elements": [ - { - "id": "df4f252aab", - }, - ], - "error": [], - "prompt": "Bottom right Customer service button", - }, -] \ No newline at end of file diff --git a/packages/midscene/tests/ai-model/inspector/coze_inspector.test.ts b/packages/midscene/tests/ai-model/inspector/coze_inspector.test.ts new file mode 100644 index 000000000..8fc6435c5 --- /dev/null +++ b/packages/midscene/tests/ai-model/inspector/coze_inspector.test.ts @@ -0,0 +1,69 @@ +import path from 'node:path'; +import { AiInspectElement } from '@/ai-model'; +import { expect, it } from 'vitest'; +import { + getPageTestData, + repeat, + runTestCases, + writeFileSyncWithDir, +} from './util'; + +const testTodoCases = [ + { + description: '任务输入框', + multi: false, + }, + { + description: '任务列表中的第二项', + multi: false, + }, + { + description: '第二项任务右边的删除按钮', + multi: false, + }, + { + description: '任务列表中第三项左边的勾选按钮', + multi: false, + }, + { + description: '任务列表下面的 Completed 状态按钮', + multi: false, + }, +]; + +it( + 'coze todo: inspect element', + async () => { + const { context } = await getPageTestData( + path.join(__dirname, './test-data/todo'), + ); + + const { aiResponse, filterUnstableResult } = await runTestCases( + testTodoCases, + context, + async (testCase) => { + const { parseResult } = await AiInspectElement({ + context, + multi: testCase.multi, + findElementDescription: testCase.description, + useModel: 'coze', + }); + return parseResult; + }, + ); + writeFileSyncWithDir( + path.join( + __dirname, + '__ai_responses__/coze_todo-inspector-element-.json', + ), + JSON.stringify(aiResponse, null, 2), + { encoding: 'utf-8' }, + ); + expect(filterUnstableResult).toMatchFileSnapshot( + './__snapshots__/coze_todo_inspector.test.ts.snap', + ); + }, + { + timeout: 90 * 1000, + }, +); diff --git a/packages/midscene/tests/ai-model/inspector/online_order_inspector.test.ts b/packages/midscene/tests/ai-model/inspector/online_order_inspector.test.ts index 0b08b0ba6..b497eb49c 100644 --- a/packages/midscene/tests/ai-model/inspector/online_order_inspector.test.ts +++ b/packages/midscene/tests/ai-model/inspector/online_order_inspector.test.ts @@ -31,7 +31,7 @@ const testCases = [ }, { description: 'Bottom right Customer service button', - multi: true, + multi: false, }, ]; @@ -45,6 +45,7 @@ repeat(5, (repeatIndex) => { const { aiResponse, filterUnstableResult } = await runTestCases( testCases, + context, async (testCase) => { const { parseResult } = await AiInspectElement({ context, diff --git a/packages/midscene/tests/ai-model/inspector/todo_inspector.test.ts b/packages/midscene/tests/ai-model/inspector/todo_inspector.test.ts index a9b1b6072..729a27722 100644 --- a/packages/midscene/tests/ai-model/inspector/todo_inspector.test.ts +++ b/packages/midscene/tests/ai-model/inspector/todo_inspector.test.ts @@ -42,6 +42,7 @@ repeat(2, (repeatIndex) => { const { aiResponse, filterUnstableResult } = await runTestCases( testTodoCases, + context, async (testCase) => { const { parseResult } = await AiInspectElement({ context, diff --git a/packages/midscene/tests/ai-model/inspector/util.ts b/packages/midscene/tests/ai-model/inspector/util.ts index 914e031bc..4b72e3b95 100644 --- a/packages/midscene/tests/ai-model/inspector/util.ts +++ b/packages/midscene/tests/ai-model/inspector/util.ts @@ -22,16 +22,19 @@ export interface TextAiElementResponse extends AiElementsResponse { prompt: string; error?: string; spendTime: string; + elementsSnapshot: Array; } export async function runTestCases( testCases: Array, + context: any, getAiResponse: (options: { description: string; multi: boolean; }) => Promise, ) { let aiResponse: Array = []; + const { content: elementSnapshot } = context; const aiReq = testCases.map(async (testCase, caseIndex) => { const startTime = Date.now(); @@ -44,6 +47,14 @@ export async function runTestCases( prompt: testCase.description, caseIndex, spendTime: `${spendTime}s`, + elementsSnapshot: msg.elements.map((element) => { + const index = elementSnapshot.findIndex((item: any) => { + if (item.nodeHashId === element.id) { + return true; + } + }); + return elementSnapshot[index]; + }), }); } else { aiResponse.push({ @@ -66,11 +77,12 @@ export async function runTestCases( }); const filterUnstableResult = aiResponse.map((aiInfo) => { - const { elements = [], prompt, error = [] } = aiInfo; + const { elements = [], prompt, error = [], elementsSnapshot } = aiInfo; return { - elements: elements.map((element) => { + elements: elements.map((element, index) => { return { id: element.id.toString(), + indexId: elementsSnapshot[index].indexId, }; }), prompt, @@ -114,11 +126,12 @@ export async function getPageTestData(targetDir: string) { const resizeOutputImgP = path.join(targetDir, 'input.png'); const snapshotJsonPath = path.join(targetDir, 'element-snapshot.json'); const snapshotJson = readFileSync(snapshotJsonPath, { encoding: 'utf-8' }); + const elementSnapshot = JSON.parse(snapshotJson); const screenshotBase64 = base64Encoded(resizeOutputImgP); const size = await imageInfoOfBase64(screenshotBase64); const baseContext = { size, - content: JSON.parse(snapshotJson), + content: elementSnapshot, screenshotBase64: base64Encoded(resizeOutputImgP), }; diff --git a/packages/midscene/vitest.config.ts b/packages/midscene/vitest.config.ts index dd6c5aefb..7c9352d30 100644 --- a/packages/midscene/vitest.config.ts +++ b/packages/midscene/vitest.config.ts @@ -20,10 +20,14 @@ const aiModelTest = export default defineConfig({ test: { - // include: ['tests/inspector/*.test.ts'], include: ['tests/**/*.test.ts'], + // include: ['tests/ai-model/**/*-coze.test.ts'], // Need to improve the corresponding testing - exclude: ['tests/insight/*.test.ts', ...aiModelTest], + exclude: [ + 'tests/insight/*.test.ts', + 'tests/executor/*.test.ts', + ...aiModelTest, + ], }, resolve: { alias: { diff --git a/packages/visualizer-report/package.json b/packages/visualizer-report/package.json index 41b9f61b5..73ebdbad7 100644 --- a/packages/visualizer-report/package.json +++ b/packages/visualizer-report/package.json @@ -11,10 +11,7 @@ "lint": "modern lint", "upgrade": "modern upgrade" }, - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "engines": { "node": ">=16.18.1" }, @@ -23,10 +20,7 @@ "node --max_old_space_size=8192 ./node_modules/eslint/bin/eslint.js --fix --color --cache --quiet" ] }, - "eslintIgnore": [ - "node_modules/", - "dist/" - ], + "eslintIgnore": ["node_modules/", "dist/"], "dependencies": { "@modern-js/runtime": "^2.56.2", "@midscene/visualizer": "workspace:*", diff --git a/packages/visualizer/package.json b/packages/visualizer/package.json index a617a5d91..58b391d09 100644 --- a/packages/visualizer/package.json +++ b/packages/visualizer/package.json @@ -5,10 +5,7 @@ "jsnext:source": "./src/index.ts", "main": "./dist/lib/index.js", "module": "./dist/es/index.js", - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "scripts": { "dev": "modern dev", "build": "modern build", @@ -39,12 +36,7 @@ "rimraf": "~3.0.2", "typescript": "~5.0.4" }, - "sideEffects": [ - "**/*.css", - "**/*.less", - "**/*.sass", - "**/*.scss" - ], + "sideEffects": ["**/*.css", "**/*.less", "**/*.sass", "**/*.scss"], "publishConfig": { "access": "public" } diff --git a/packages/web-integration/midscene_run/cache/ai-auto-todo.spec.ts(ai todo).json b/packages/web-integration/midscene_run/cache/ai-auto-todo.spec.ts(ai todo).json deleted file mode 100644 index 296bcc8fc..000000000 --- a/packages/web-integration/midscene_run/cache/ai-auto-todo.spec.ts(ai todo).json +++ /dev/null @@ -1,356 +0,0 @@ -{ - "pkgName": "@midscene/web", - "pkgVersion": "0.1.4", - "taskFile": "ai-auto-todo.spec.ts", - "taskTitle": "ai todo", - "aiTasks": [ - { - "type": "plan", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "Enter \"Learn JS today\" in the task box, then press Enter to create", - "response": { - "plans": [ - { - "thought": "To create a new task, I need to locate the input box labeled with 'What needs to be done?'", - "type": "Locate", - "param": { - "prompt": "The input box with the placeholder 'What needs to be done?' in the center of the page." - } - }, - { - "thought": "Input the task name 'Learn JS today' into the input box.", - "type": "Input", - "param": { - "value": "Learn JS today" - } - }, - { - "thought": "Press Enter to create the task.", - "type": "KeyboardPress", - "param": { - "value": "Enter" - } - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "The input box with the placeholder 'What needs to be done?' in the center of the page.", - "response": { - "elements": [ - { - "reason": "The element with the placeholder 'What needs to be done?' is an input node located centrally on the page at the specified coordinates.", - "text": "What needs to be done?", - "id": "3530a9c1eb" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "Enter \"Learn Rust tomorrow\" in the task box, then press Enter to create", - "response": { - "plans": [ - { - "thought": "The task is to enter 'Learn Rust tomorrow' in the task box and press Enter to create the task. Identifying the input field first.", - "type": "Locate", - "param": { - "prompt": "The input field with placeholder 'What needs to be done?'" - } - }, - { - "thought": "Input the provided text 'Learn Rust tomorrow' into the identified input field.", - "type": "Input", - "param": { - "value": "Learn Rust tomorrow" - } - }, - { - "thought": "Press Enter to create the task after inputting the text.", - "type": "KeyboardPress", - "param": { - "value": "Enter" - } - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "The input field with placeholder 'What needs to be done?'", - "response": { - "elements": [ - { - "reason": "The element has a placeholder attribute with the value 'What needs to be done?', and matches the 'INPUT Node' type.", - "text": "What needs to be done?", - "id": "3530a9c1eb" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "Enter \"Learning AI the day after tomorrow\" in the task box, then press Enter to create", - "response": { - "plans": [ - { - "thought": "The user wants to add a new task by entering the given text and pressing Enter. The input box is clearly labeled and occupies a large portion of the screen.", - "type": "Locate", - "param": { - "prompt": "The text input box with the placeholder 'What needs to be done?' in the center of the page." - } - }, - { - "thought": "After locating the input box, the next step is to enter the specified text in it.", - "type": "Input", - "param": { - "value": "Learning AI the day after tomorrow" - } - }, - { - "thought": "Finally, pressing Enter will create the new task.", - "type": "KeyboardPress", - "param": { - "value": "Enter" - } - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "The text input box with the placeholder 'What needs to be done?' in the center of the page.", - "response": { - "elements": [ - { - "reason": "The element described is the text input box with the placeholder 'What needs to be done?'. This matches the element with ID '3530a9c1eb' that has attributes including 'type': 'text' and 'placeholder': 'What needs to be done?'.", - "text": "What needs to be done?", - "id": "3530a9c1eb" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "Move your mouse over the second item in the task list and click the Delete button to the right of the second task", - "response": { - "plans": [ - { - "thought": "To perform the specified task, we need to first locate the second item in the task list.", - "type": "Locate", - "param": { - "prompt": "The second item in the task list labeled 'Learn Rust tomorrow'." - } - }, - { - "thought": "After locating the second item, hover over it to reveal the Delete button.", - "type": "Hover", - "param": null - }, - { - "thought": "After revealing the Delete button, locate it next to the second item.", - "type": "Locate", - "param": { - "prompt": "The Delete button next to the item labeled 'Learn Rust tomorrow'." - } - }, - { - "thought": "Tap the Delete button to remove the second task.", - "type": "Tap", - "param": null - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "The second item in the task list labeled 'Learn Rust tomorrow'.", - "response": { - "elements": [ - { - "reason": "The element has the text 'Learn Rust tomorrow' which exactly matches the description provided by the user. This text is located in the second item in the task list as verified by the screenshot.", - "text": "Learn Rust tomorrow", - "id": "10bc260820" - } - ], - "errors": [] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "The Delete button next to the item labeled 'Learn Rust tomorrow'.", - "response": { - "elements": [ - { - "reason": "The element with the ID '7ccd467339' matches the description 'Delete button next to the item labeled Learn Rust tomorrow'. It is a BUTTON node with the content '×' and located next to the item labeled Learn Rust tomorrow.", - "text": "×", - "id": "7ccd467339" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "Click the check button to the left of the second task", - "response": { - "plans": [ - { - "thought": "Clicking the checkbox to the left of the second task", - "type": "Locate", - "param": { - "prompt": "The second checkbox button from the top in the tasks list, with the label 'Learning AI the day after tomorrow'" - } - }, - { - "thought": "Clicking the located checkbox.", - "type": "Tap", - "param": null - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "The second checkbox button from the top in the tasks list, with the label 'Learning AI the day after tomorrow'", - "response": { - "elements": [ - { - "reason": "The second checkbox button from the top in the tasks list, which includes the label 'Learning AI the day after tomorrow' is identified with the node type 'INPUT Node' and is at position left: 372px and top: 269px.", - "text": "", - "id": "c0751f3b26" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "Click the completed Status button below the task list", - "response": { - "plans": [ - { - "thought": "The user wants to click the 'Completed' status button, which is visible just below the task list.", - "type": "Locate", - "param": { - "prompt": "The 'Completed' status button below the task list" - } - }, - { - "thought": "Tap on the 'Completed' status button to complete the task.", - "type": "Tap", - "param": null - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://todomvc.com/examples/react/dist/", - "size": { - "width": 1280, - "height": 720 - } - }, - "prompt": "The 'Completed' status button below the task list", - "response": { - "elements": [ - { - "reason": "The 'Completed' status button below the task list is the text node with content 'Completed' located beneath the listed tasks.", - "text": "Completed", - "id": "663a34de3b" - } - ], - "errors": [] - } - } - ] -} \ No newline at end of file diff --git a/packages/web-integration/midscene_run/cache/ai-online-order.spec.ts(ai online order).json b/packages/web-integration/midscene_run/cache/ai-online-order.spec.ts(ai online order).json deleted file mode 100644 index 4f0cf3a47..000000000 --- a/packages/web-integration/midscene_run/cache/ai-online-order.spec.ts(ai online order).json +++ /dev/null @@ -1,555 +0,0 @@ -{ - "pkgName": "@midscene/web", - "pkgVersion": "0.1.4", - "taskFile": "ai-online-order.spec.ts", - "taskTitle": "ai online order", - "aiTasks": [ - { - "type": "plan", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "点击左上角语言切换按钮(英文、中文),在弹出的下拉列表中点击中文", - "response": { - "plans": [ - { - "thought": "需要切换语言,需要找到明显的语言选择按钮。", - "type": "Locate", - "param": { - "prompt": "页面左上角显示语言的文字或按钮" - } - }, - { - "thought": "点击语言切换按钮", - "type": "Tap", - "param": null - }, - { - "thought": "需要选择中文,需要找到下拉列表中的中文选项", - "type": "Locate", - "param": { - "prompt": "下拉列表中显示'中文'的文字" - } - }, - { - "thought": "点击中文选项", - "type": "Tap", - "param": null - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "页面左上角显示语言的文字或按钮", - "response": { - "elements": [ - { - "reason": "The element containing the text 'English' is located in the upper left corner of the page, consistent with the description which asks for the language displayed in the top left corner.", - "text": "English", - "id": "57c026f0b8" - } - ], - "errors": [] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "下拉列表中显示'中文'的文字", - "response": { - "elements": [ - { - "reason": "The element with the text '中文' is a text node located within a dropdown list, as indicated in the screenshot.", - "text": "中文", - "id": "efc0b2ff38" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "向下滚动一屏", - "response": { - "plans": [ - { - "thought": "用户要求向下滚动一屏,页面包含了多个菜单项和商品信息,满足滚动需求。", - "type": "Scroll", - "param": { - "scrollType": "ScrollDown" - } - } - ] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "直接点击多肉葡萄的规格按钮", - "response": { - "plans": [ - { - "thought": "找到多肉葡萄的选规格按钮,这是用户所描述的任务。", - "type": "Locate", - "param": { - "prompt": "内容是“多肉葡萄(首创)”的下方黄色按钮" - } - }, - { - "thought": "点击找到的按钮。", - "type": "Tap", - "param": null - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "内容是“多肉葡萄(首创)”的下方黄色按钮", - "response": { - "elements": [ - { - "reason": "The button below the content '多肉葡萄(首创)' matches the user's description of a yellow button with the text '选规格'.", - "text": "选规格", - "id": "427d3179c2" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "点击不使用吸管、点击冰沙推荐、点击正常冰推荐", - "response": { - "plans": [ - { - "thought": "根据要求,首先点击选择\"不使用吸管\"。", - "type": "Locate", - "param": { - "prompt": "标签为“不使用吸管”的复选框,位于页面中部偏下方。" - } - }, - { - "thought": "点击\"不使用吸管\"。", - "type": "Tap", - "param": null - }, - { - "thought": "然后点击冰沙推荐选项。", - "type": "Locate", - "param": { - "prompt": "标签为\"冰沙(推荐)\"的复选框,位于“不使用吸管”选项的下方。" - } - }, - { - "thought": "点击冰沙推荐。", - "type": "Tap", - "param": null - }, - { - "thought": "最后点击正常冰推荐选项。", - "type": "Locate", - "param": { - "prompt": "标签为\"正常冰(推荐)\"的复选框,位于“冰沙(推荐)”的下方。" - } - }, - { - "thought": "点击正常冰推荐。", - "type": "Tap", - "param": null - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "标签为“不使用吸管”的复选框,位于页面中部偏下方。", - "response": { - "elements": [ - { - "reason": "The element '不使用吸管' corresponds to the checkbox label '不使用吸管'. It is located towards the middle-bottom part of the page as described.", - "text": "不使用吸管", - "id": "e83902c611" - } - ], - "errors": [] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "标签为\"冰沙(推荐)\"的复选框,位于“不使用吸管”选项的下方。", - "response": { - "elements": [ - { - "reason": "根据描述,“冰沙(推荐)”选项在“不使用吸管”选项的下方,并是一个复选框类型的元素。", - "text": "冰沙(推荐)", - "id": "04f0538285" - } - ], - "errors": [] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "标签为\"正常冰(推荐)\"的复选框,位于“冰沙(推荐)”的下方。", - "response": { - "elements": [ - { - "reason": "The checkbox labeled '正常冰(推荐)' is located below the option '冰沙(推荐)'. This matches the user's description.", - "text": "正常冰(推荐)", - "id": "0b385ab4dc" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "向下滚动一屏", - "response": { - "plans": [ - { - "thought": "在页面中找到用户需要的滚动位置。", - "type": "Locate", - "param": { - "prompt": "最后一项少冰复选框下面的空白区域" - } - }, - { - "thought": "向下滚动一屏来实现用户要求。", - "type": "Scroll", - "param": { - "scrollType": "ScrollDown" - } - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "最后一项少冰复选框下面的空白区域", - "response": { - "elements": [ - { - "reason": "The area described as \"the blank area under the 少冰 (Less Ice) checkbox\" should be an empty area immediately below the element with id 'a5060a97b5'. The closest matching element below it in the description list is the checkbox element with id '05408139ae'. This element is an INPUT Node situated directly below the 少冰 checkbox.", - "text": "", - "id": "05408139ae" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "点击标准甜、点击绿妍(推荐)、点击标准口味", - "response": { - "plans": [ - { - "thought": "根据用户的要求,首先需要点击“标准甜(推荐)”的选项。", - "type": "Locate", - "param": { - "prompt": "标有“标准甜(推荐)”文字旁边的复选框" - } - }, - { - "thought": "点击该复选框", - "type": "Tap", - "param": null - }, - { - "thought": "接下来需要点击“绿妍(推荐)”的选项。", - "type": "Locate", - "param": { - "prompt": "标有“绿妍(推荐)”文字旁边的复选框" - } - }, - { - "thought": "点击该复选框", - "type": "Tap", - "param": null - }, - { - "thought": "最后需要点击“标准口味(推荐)”的选项。", - "type": "Locate", - "param": { - "prompt": "标有“标准口味(推荐)”文字旁边的复选框" - } - }, - { - "thought": "点击该复选框", - "type": "Tap", - "param": null - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "标有“标准甜(推荐)”文字旁边的复选框", - "response": { - "elements": [ - { - "reason": "The checkbox closest to the text '标准甜(推荐)' matches the description provided, it has the appropriate 'nodeType' and 'class' attributes, and is located next to the specified text.", - "text": "", - "id": "5280c060a7" - } - ], - "errors": [] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "标有“绿妍(推荐)”文字旁边的复选框", - "response": { - "elements": [ - { - "reason": "The checkbox next to the text '绿妍(推荐)' (Green Yan (recommended)) is located close to the mention of this text in the description. This matches the user requirement for a checkbox.", - "text": "", - "id": "5570045013" - } - ], - "errors": [] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "标有“标准口味(推荐)”文字旁边的复选框", - "response": { - "elements": [ - { - "reason": "The element matches the description '标有“标准口味(推荐)”文字旁边的复选框'. It is a checkbox located next to the text node containing '标准口味(推荐)'", - "text": "标准口味(推荐)", - "id": "0d204274f0" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "滚动到最下面", - "response": { - "plans": [ - { - "thought": "用户想要滚动到页面的最底部,这可以通过页面的全局滚动动作实现。", - "type": "Scroll", - "param": { - "scrollType": "ScrollUntilBottom" - } - } - ] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "点击选好了按钮", - "response": { - "plans": [ - { - "thought": "用户希望点击页面上的'选好了'按钮,以确认选择好的物品。", - "type": "Locate", - "param": { - "prompt": "带有'选好了'字样的黄色按钮,位于页面底部中央。" - } - }, - { - "thought": "点击找到的'选好了'按钮以完成用户任务。", - "type": "Tap", - "param": null - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "带有'选好了'字样的黄色按钮,位于页面底部中央。", - "response": { - "elements": [ - { - "reason": "The button has the text '选好了' and is located at the bottom center of the page. Based on the description and the screenshot, the element is located at the bottom and has a yellow background.", - "text": "选好了", - "id": "c9de916ef0" - } - ], - "errors": [] - } - }, - { - "type": "plan", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "点击右上角商品图标按钮", - "response": { - "plans": [ - { - "thought": "用户希望点击右上角的商品图标按钮。从页面描述和截图中,可以看到右上角有一个红色带数字1的图标按钮。", - "type": "Locate", - "param": { - "prompt": "右上角带有红色数字1的商品图标按钮" - } - }, - { - "thought": "点击该按钮。", - "type": "Tap", - "param": null - } - ] - } - }, - { - "type": "locate", - "pageContext": { - "url": "https://heyteavivocity.meuu.online/home", - "size": { - "width": 400, - "height": 905 - } - }, - "prompt": "右上角带有红色数字1的商品图标按钮", - "response": { - "elements": [ - { - "reason": "The element is located in the upper right corner, is an image type, and appears to have a red notification badge with the number 1, consistent with a shopping cart icon button.", - "text": "", - "id": "f775c69cb4" - } - ], - "errors": [] - } - } - ] -} \ No newline at end of file diff --git a/packages/web-integration/package.json b/packages/web-integration/package.json index 7d5effc8a..8eaf02155 100644 --- a/packages/web-integration/package.json +++ b/packages/web-integration/package.json @@ -30,18 +30,10 @@ }, "typesVersions": { "*": { - ".": [ - "./dist/types/index.d.ts" - ], - "playwright-report": [ - "./dist/types/playwright-report.d.ts" - ], - "constants": [ - "./dist/types/constants.d.ts" - ], - "html-element": [ - "./dist/types/html-element/index.d.ts" - ] + ".": ["./dist/types/index.d.ts"], + "playwright-report": ["./dist/types/playwright-report.d.ts"], + "constants": ["./dist/types/constants.d.ts"], + "html-element": ["./dist/types/html-element/index.d.ts"] } }, "scripts": { @@ -60,10 +52,7 @@ "e2e:ui": "playwright test --config=playwright.config.ts --ui", "e2e:ui-cache": "MIDSCENE_CACHE=true playwright test --config=playwright.config.ts --ui" }, - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "dependencies": { "openai": "4.47.1", "sharp": "0.33.3", diff --git a/packages/web-integration/src/common/tasks.ts b/packages/web-integration/src/common/tasks.ts index 43555d01d..6204d931a 100644 --- a/packages/web-integration/src/common/tasks.ts +++ b/packages/web-integration/src/common/tasks.ts @@ -108,14 +108,15 @@ export class PageTaskExecutor { param.prompt, ); let locateResult: AIElementParseResponse | undefined; - const callAI = this.insight.aiVendorFn; + const callAI = this.insight.aiVendorFn; const element = await this.insight.locate(param.prompt, { - callAI: async (message: ChatCompletionMessageParam[]) => { + callAI: async (...message: any) => { if (locateCache) { locateResult = locateCache; return Promise.resolve(locateCache); } - locateResult = await callAI(message); + locateResult = await callAI(...message); + assert(locateResult); return locateResult; }, }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5548a7a84..a44d3ca9d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -118,6 +118,9 @@ importers: packages/midscene: dependencies: + node-fetch: + specifier: 2.6.7 + version: 2.6.7 openai: specifier: 4.47.1 version: 4.47.1 @@ -131,6 +134,9 @@ importers: '@types/node': specifier: ^18.0.0 version: 18.19.41 + '@types/node-fetch': + specifier: 2.6.11 + version: 2.6.11 dotenv: specifier: 16.4.5 version: 16.4.5 @@ -7905,8 +7911,8 @@ packages: resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} engines: {node: '>=10.5.0'} - node-fetch@2.7.0: - resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} + node-fetch@2.6.7: + resolution: {integrity: sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==} engines: {node: 4.x || >=6.0.0} peerDependencies: encoding: ^0.1.0 @@ -13164,7 +13170,7 @@ snapshots: detect-libc: 2.0.3 https-proxy-agent: 5.0.1 make-dir: 3.1.0 - node-fetch: 2.7.0 + node-fetch: 2.6.7 nopt: 5.0.0 npmlog: 5.0.1 rimraf: 3.0.2 @@ -21439,7 +21445,7 @@ snapshots: node-domexception@1.0.0: {} - node-fetch@2.7.0: + node-fetch@2.6.7: dependencies: whatwg-url: 5.0.0 @@ -21635,7 +21641,7 @@ snapshots: agentkeepalive: 4.5.0 form-data-encoder: 1.7.2 formdata-node: 4.4.1 - node-fetch: 2.7.0 + node-fetch: 2.6.7 web-streams-polyfill: 3.3.3 transitivePeerDependencies: - encoding