diff --git a/.env.example b/.env.example index acc193a63..213356e0f 100644 --- a/.env.example +++ b/.env.example @@ -10,7 +10,13 @@ SUPABASE_KEY= AUTO_PAY_MODE= ANALYTICS_MODE= -# Log environment -LOG_ENVIRONMENT=production # development to see logs in console -LOG_LEVEL=debug # 0: error 1: warn 2: info 3: http 4: verbose 5: debug 6: silly -LOG_RETRY=0 # 0 for no retry, more than 0 for the number of retries \ No newline at end of file +# Use `trace` to get verbose logging or `info` to show less +LOG_LEVEL=debug +LOGDNA_INGESTION_KEY= +OPENAI_API_HOST=https://api.openai.com +OPENAI_API_KEY= +CHATGPT_USER_PROMPT_FOR_IMPORTANT_WORDS="I need your help to find important words (e.g. unique adjectives) from github issue below and I want to parse them easily so please separate them using #(No other contexts needed). Please separate the words by # so I can parse them easily. Please answer simply as I only need the important words. Here is the issue content.\n" +CHATGPT_USER_PROMPT_FOR_MEASURE_SIMILARITY='I have two github issues and I need to measure the possibility of the 2 issues are the same content (No other contents needed and give me only the number in %).\n Give me in number format and add % after the number.\nDo not tell other things since I only need the number (e.g. 85%). Here are two issues:\n 1. "%first%"\n2. "%second%"' +SIMILARITY_THRESHOLD=80 +MEASURE_SIMILARITY_AI_TEMPERATURE=0 +IMPORTANT_WORDS_AI_TEMPERATURE=0 diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index c790059a3..fa5fedab5 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -81,4 +81,4 @@ jobs: X25519_PRIVATE_KEY: 'QCDb30UHUkwJAGhLWC-R2N0PiEbd4vQY6qH2Wloybyo' FOLLOW_UP_TIME: '4 days' DISQUALIFY_TIME: '7 days' - run: yarn start:serverless \ No newline at end of file + run: yarn start:serverless diff --git a/README.md b/README.md index d6e9b64d7..9a31f394a 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,15 @@ yarn start:watch - `LOGDNA_INGESTION_KEY`: Get it from [Memzo](https://app.mezmo.com/) by creating an account, adding an organization, and copying the ingestion key on the next screen. - `FOLLOWUP_TIME`: (optional) Set a custom follow-up time (default: 4 days). - `DISQUALIFY_TIME`: (optional) Set a custom disqualify time (default: 7 days). +- `OPENAI_API_HOST`: (optional) Set OpenAI host url (default: https://api.openai.com). +- `OPENAI_API_KEY`: Set OpenAI key. +- `CHATGPT_USER_PROMPT_FOR_IMPORTANT_WORDS`: (optional) Set a custom user prompt for finding important words +(default: "I need your help to find important words (e.g. unique adjectives) from github issue below and I want to parse them easily so please separate them using #(No other contexts needed). Please separate the words by # so I can parse them easily. Please answer simply as I only need the important words. Here is the issue content.\n"). +- `CHATGPT_USER_PROMPT_FOR_MEASURE_SIMILARITY`: (optional) Set a custom user prompt for measuring similarity +(default: 'I have two github issues and I need to measure the possibility of the 2 issues are the same content (No other contents needed and give me only the number in %).\n Give me in number format and add % after the number.\nDo not tell other things since I only need the number (e.g. 85%). Here are two issues:\n 1. "%first%"\n2. "%second%"'). +- `SIMILARITY_THRESHOLD`: (optional) Set similarity threshold (default: 80). +- `MEASURE_SIMILARITY_AI_TEMPERATURE`: (optional) Set ChatGPT temperature for measuring similarity (default: 0). +- `IMPORTANT_WORDS_AI_TEMPERATURE`: (optional) Set ChatGPT temperature for finding important words (default: 0). `APP_ID` and `PRIVATE_KEY` are [here](https://t.me/c/1588400061/1627) for internal developers to use. If you are an external developer, `APP_ID`and `PRIVATE_KEY` are automatically generated when you install the app on your repository. diff --git a/package.json b/package.json index bf2367eb1..e64d64e99 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "decimal.js": "^10.4.3", "copyfiles": "^2.4.1", "ethers": "^5.7.2", + "exponential-backoff": "^3.1.1", "husky": "^8.0.2", "jimp": "^0.22.4", "js-yaml": "^4.1.0", diff --git a/src/handlers/issue/index.ts b/src/handlers/issue/index.ts new file mode 100644 index 000000000..12e1212ae --- /dev/null +++ b/src/handlers/issue/index.ts @@ -0,0 +1 @@ +export * from "./pre"; diff --git a/src/handlers/issue/pre.ts b/src/handlers/issue/pre.ts new file mode 100644 index 000000000..bebda5dce --- /dev/null +++ b/src/handlers/issue/pre.ts @@ -0,0 +1,49 @@ +import { extractImportantWords, upsertCommentToIssue, measureSimilarity } from "../../helpers"; +import { getBotContext, getLogger } from "../../bindings"; +import { Issue, Payload } from "../../types"; + +export const findDuplicateOne = async () => { + const logger = getLogger(); + const context = getBotContext(); + const payload = context.payload as Payload; + const issue = payload.issue; + + if (!issue?.body) return; + const importantWords = await extractImportantWords(issue); + const perPage = 10; + let curPage = 1; + + for (const importantWord of importantWords) { + let fetchDone = false; + try { + while (!fetchDone) { + const response = await context.octokit.rest.search.issuesAndPullRequests({ + q: `${importantWord} repo:${payload.repository.owner.login}/${payload.repository.name} is:issue`, + sort: "created", + order: "desc", + per_page: perPage, + page: curPage, + }); + if (response.data.items.length > 0) { + for (const result of response.data.items) { + if (!result.body) continue; + if (result.id === issue.id) continue; + const similarity = await measureSimilarity(issue, result as Issue); + if (similarity > parseInt(process.env.SIMILARITY_THRESHOLD || "80")) { + await upsertCommentToIssue( + issue.number, + `Similar issue (${result.title}) found at ${result.html_url}.\nSimilarity is about ${similarity}%`, + "created" + ); + return; + } + } + } + if (response.data.items.length < perPage) fetchDone = true; + else curPage++; + } + } catch (e: unknown) { + logger.error(`Could not find any issues, reason: ${e}`); + } + } +}; diff --git a/src/handlers/processors.ts b/src/handlers/processors.ts index d64e07eaa..16f89a535 100644 --- a/src/handlers/processors.ts +++ b/src/handlers/processors.ts @@ -8,11 +8,12 @@ import { checkPullRequests } from "./assign/auto"; import { createDevPoolPR } from "./pull-request"; import { runOnPush } from "./push"; import { incentivizeComments, incentivizeCreatorComment } from "./payout"; +import { findDuplicateOne } from "./issue"; export const processors: Record = { [GithubEvent.ISSUES_OPENED]: { pre: [nullHandler], - action: [nullHandler], // SHOULD not set `issueCreatedCallback` until the exploit issue resolved. https://github.com/ubiquity/ubiquibot/issues/535 + action: [findDuplicateOne], // SHOULD not set `issueCreatedCallback` until the exploit issue resolved. https://github.com/ubiquity/ubiquibot/issues/535 post: [nullHandler], }, [GithubEvent.ISSUES_REOPENED]: { diff --git a/src/helpers/index.ts b/src/helpers/index.ts index 1d447003d..ad8ee0cb9 100644 --- a/src/helpers/index.ts +++ b/src/helpers/index.ts @@ -8,3 +8,4 @@ export * from "./contracts"; export * from "./comment"; export * from "./payout"; export * from "./file"; +export * from "./similarity"; diff --git a/src/helpers/issue.ts b/src/helpers/issue.ts index 3dee46034..73ac46cb8 100644 --- a/src/helpers/issue.ts +++ b/src/helpers/issue.ts @@ -48,7 +48,13 @@ export const addLabelToIssue = async (labelName: string) => { } }; -export const listIssuesForRepo = async (state: "open" | "closed" | "all" = "open", per_page = 30, page = 1) => { +export const listIssuesForRepo = async ( + state: "open" | "closed" | "all" = "open", + per_page = 30, + page = 1, + sort: "created" | "updated" | "comments" = "created", + direction: "desc" | "asc" = "desc" +) => { const context = getBotContext(); const payload = context.payload as Payload; @@ -58,6 +64,8 @@ export const listIssuesForRepo = async (state: "open" | "closed" | "all" = "open state, per_page, page, + sort, + direction, }); await checkRateLimitGit(response.headers); diff --git a/src/helpers/similarity.ts b/src/helpers/similarity.ts new file mode 100644 index 000000000..a5743e562 --- /dev/null +++ b/src/helpers/similarity.ts @@ -0,0 +1,104 @@ +import { getLogger } from "../bindings"; +import axios, { AxiosError } from "axios"; +import { ajv } from "../utils"; +import { Static, Type } from "@sinclair/typebox"; +import { backOff } from "exponential-backoff"; +import { Issue } from "../types"; + +export const extractImportantWords = async (issue: Issue): Promise => { + const res = await getAnswerFromChatGPT( + "", + `${ + process.env.CHATGPT_USER_PROMPT_FOR_IMPORTANT_WORDS || + "I need your help to find important words (e.g. unique adjectives) from github issue below and I want to parse them easily so please separate them using #(No other contexts needed). Please separate the words by # so I can parse them easily. Please answer simply as I only need the important words. Here is the issue content.\n" + } '${`Issue title: ${issue.title}\nIssue content: ${issue.body}`}'`, + parseFloat(process.env.IMPORTANT_WORDS_AI_TEMPERATURE || "0") + ); + if (res === "") return []; + return res.split(/[,# ]/); +}; + +export const measureSimilarity = async (first: Issue, second: Issue): Promise => { + const res = await getAnswerFromChatGPT( + "", + `${( + process.env.CHATGPT_USER_PROMPT_FOR_MEASURE_SIMILARITY || + 'I have two github issues and I need to measure the possibility of the 2 issues are the same content (I need to parse the % so other contents are not needed and give me only the number in %).\n Give me in number format and add % after the number.\nDo not tell other things since I only need the number (e.g. 85%). Here are two issues:\n 1. "%first%"\n2. "%second%"' + ) + .replace("%first%", `Issue title: ${first.title}\nIssue content: ${first.body}`) + .replace("%second%", `Issue title: ${second.title}\nIssue content: ${second.body}`)}`, + parseFloat(process.env.MEASURE_SIMILARITY_AI_TEMPERATURE || "0") + ); + const matches = res.match(/\d+/); + const percent = matches && matches.length > 0 ? parseInt(matches[0]) || 0 : 0; + return percent; +}; + +const ChatMessageSchema = Type.Object({ + content: Type.String(), +}); + +const ChoiceSchema = Type.Object({ + message: ChatMessageSchema, +}); + +const ChoicesSchema = Type.Object({ + choices: Type.Array(ChoiceSchema), +}); + +type Choices = Static; + +export const getAnswerFromChatGPT = async (systemPrompt: string, userPrompt: string, temperature = 0, max_tokens = 1500): Promise => { + const logger = getLogger(); + const body = JSON.stringify({ + model: "gpt-3.5-turbo", + messages: [ + { + role: "system", + content: systemPrompt, + }, + { + role: "user", + content: userPrompt, + }, + ], + max_tokens, + temperature, + stream: false, + }); + const config = { + method: "post", + url: `${process.env.OPENAI_API_HOST || "https://api.openai.com"}/v1/chat/completions`, + headers: { + Authorization: `Bearer ${process.env.OPENAI_API_KEY}`, + "Content-Type": "application/json", + }, + data: body, + }; + try { + const response = await backOff(() => axios(config), { + startingDelay: 6000, + retry: (e: AxiosError) => { + if (e.response && e.response.status === 429) return true; + return false; + }, + }); + const data: Choices = response.data; + const validate = ajv.compile(ChoicesSchema); + const valid = validate(data); + if (!valid) { + logger.error(`Error occured from OpenAI`); + return ""; + } + const { choices: choice } = data; + if (choice.length <= 0) { + logger.error(`No result from OpenAI`); + return ""; + } + const answer = choice[0].message.content; + return answer; + } catch (error) { + logger.error(`Getting response from ChatGPT failed: ${error}`); + return ""; + } +};