ubiquity · 0xcodercrane · Aug 27, 2023 · Aug 11, 2023 · Aug 11, 2023 · Aug 11, 2023
diff --git a/.env.example b/.env.example
@@ -10,7 +10,13 @@ SUPABASE_KEY=
 AUTO_PAY_MODE=
 ANALYTICS_MODE=
 
-# Log environment
-LOG_ENVIRONMENT=production # development to see logs in console
-LOG_LEVEL=debug # 0: error 1: warn 2: info 3: http 4: verbose 5: debug 6: silly
-LOG_RETRY=0 # 0 for no retry, more than 0 for the number of retries
+# Use `trace` to get verbose logging or `info` to show less
+LOG_LEVEL=debug
+LOGDNA_INGESTION_KEY=
+OPENAI_API_HOST=https://api.openai.com
+OPENAI_API_KEY=
+CHATGPT_USER_PROMPT_FOR_IMPORTANT_WORDS="I need your help to find important words (e.g. unique adjectives) from github issue below and I want to parse them easily so please separate them using #(No other contexts needed). Please separate the words by # so I can parse them easily. Please answer simply as I only need the important words. Here is the issue content.\n"
+CHATGPT_USER_PROMPT_FOR_MEASURE_SIMILARITY='I have two github issues and I need to measure the possibility of the 2 issues are the same content (No other contents needed and give me only the number in %).\n Give me in number format and add % after the number.\nDo not tell other things since I only need the number (e.g. 85%). Here are two issues:\n 1. "%first%"\n2. "%second%"'
+SIMILARITY_THRESHOLD=80
+MEASURE_SIMILARITY_AI_TEMPERATURE=0
+IMPORTANT_WORDS_AI_TEMPERATURE=0
diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
@@ -81,4 +81,4 @@ jobs:
           X25519_PRIVATE_KEY: 'QCDb30UHUkwJAGhLWC-R2N0PiEbd4vQY6qH2Wloybyo'
           FOLLOW_UP_TIME: '4 days'
           DISQUALIFY_TIME: '7 days'
-        run: yarn start:serverless
+        run: yarn start:serverless
diff --git a/README.md b/README.md
@@ -23,6 +23,15 @@ yarn start:watch
 - `LOGDNA_INGESTION_KEY`: Get it from [Memzo](https://app.mezmo.com/) by creating an account, adding an organization, and copying the ingestion key on the next screen.
 - `FOLLOWUP_TIME`: (optional) Set a custom follow-up time (default: 4 days).
 - `DISQUALIFY_TIME`: (optional) Set a custom disqualify time (default: 7 days).
+- `OPENAI_API_HOST`: (optional) Set OpenAI host url (default: https://api.openai.com).
+- `OPENAI_API_KEY`: Set OpenAI key.
+- `CHATGPT_USER_PROMPT_FOR_IMPORTANT_WORDS`: (optional) Set a custom user prompt for finding important words 
+(default: "I need your help to find important words (e.g. unique adjectives) from github issue below and I want to parse them easily so please separate them using #(No other contexts needed). Please separate the words by # so I can parse them easily. Please answer simply as I only need the important words. Here is the issue content.\n").
+- `CHATGPT_USER_PROMPT_FOR_MEASURE_SIMILARITY`: (optional) Set a custom user prompt for measuring similarity 
+(default: 'I have two github issues and I need to measure the possibility of the 2 issues are the same content (No other contents needed and give me only the number in %).\n Give me in number format and add % after the number.\nDo not tell other things since I only need the number (e.g. 85%). Here are two issues:\n 1. "%first%"\n2. "%second%"').
+- `SIMILARITY_THRESHOLD`: (optional) Set similarity threshold (default: 80).
+- `MEASURE_SIMILARITY_AI_TEMPERATURE`: (optional) Set ChatGPT temperature for measuring similarity (default: 0).
+- `IMPORTANT_WORDS_AI_TEMPERATURE`: (optional) Set ChatGPT temperature for finding important words (default: 0).
 
 `APP_ID` and `PRIVATE_KEY` are [here](https://t.me/c/1588400061/1627) for internal developers to use.
 If you are an external developer, `APP_ID`and `PRIVATE_KEY` are automatically generated when you install the app on your repository.

diff --git a/package.json b/package.json
@@ -44,6 +44,7 @@
     "decimal.js": "^10.4.3",
     "copyfiles": "^2.4.1",
     "ethers": "^5.7.2",
+    "exponential-backoff": "^3.1.1",
     "husky": "^8.0.2",
     "jimp": "^0.22.4",
     "js-yaml": "^4.1.0",

diff --git a/src/handlers/issue/index.ts b/src/handlers/issue/index.ts
@@ -0,0 +1 @@
+export * from "./pre";
diff --git a/src/handlers/issue/pre.ts b/src/handlers/issue/pre.ts
@@ -0,0 +1,49 @@
+import { extractImportantWords, upsertCommentToIssue, measureSimilarity } from "../../helpers";
+import { getBotContext, getLogger } from "../../bindings";
+import { Issue, Payload } from "../../types";
+
+export const findDuplicateOne = async () => {
+  const logger = getLogger();
+  const context = getBotContext();
+  const payload = context.payload as Payload;
+  const issue = payload.issue;
+
+  if (!issue?.body) return;
+  const importantWords = await extractImportantWords(issue);
+  const perPage = 10;
+  let curPage = 1;
+
+  for (const importantWord of importantWords) {
+    let fetchDone = false;
+    try {
+      while (!fetchDone) {
+        const response = await context.octokit.rest.search.issuesAndPullRequests({
+          q: `${importantWord} repo:${payload.repository.owner.login}/${payload.repository.name} is:issue`,
+          sort: "created",
+          order: "desc",
+          per_page: perPage,
+          page: curPage,
+        });
+        if (response.data.items.length > 0) {
+          for (const result of response.data.items) {
+            if (!result.body) continue;
+            if (result.id === issue.id) continue;
+            const similarity = await measureSimilarity(issue, result as Issue);
+            if (similarity > parseInt(process.env.SIMILARITY_THRESHOLD || "80")) {
+              await upsertCommentToIssue(
+                issue.number,
+                `Similar issue (${result.title}) found at ${result.html_url}.\nSimilarity is about ${similarity}%`,
+                "created"
+              );
+              return;
+            }
+          }
+        }
+        if (response.data.items.length < perPage) fetchDone = true;
+        else curPage++;
+      }
+    } catch (e: unknown) {
+      logger.error(`Could not find any issues, reason: ${e}`);
+    }
+  }
+};
diff --git a/src/handlers/processors.ts b/src/handlers/processors.ts
@@ -8,10 +8,11 @@ import { checkPullRequests } from "./assign/auto";
 import { createDevPoolPR } from "./pull-request";
 import { runOnPush } from "./push";
 import { incentivizeComments, incentivizeCreatorComment } from "./payout";
+import { findDuplicateOne } from "./issue";
 
 export const processors: Record<string, Handler> = {
   [GithubEvent.ISSUES_OPENED]: {
-    pre: [nullHandler],
+    pre: [findDuplicateOne],
     action: [nullHandler], // SHOULD not set `issueCreatedCallback` until the exploit issue resolved.  https://github.com/ubiquity/ubiquibot/issues/535
     post: [nullHandler],
   },

diff --git a/src/helpers/index.ts b/src/helpers/index.ts
@@ -8,3 +8,4 @@ export * from "./contracts";
 export * from "./comment";
 export * from "./payout";
 export * from "./file";
+export * from "./similarity";
diff --git a/src/helpers/issue.ts b/src/helpers/issue.ts
@@ -48,7 +48,13 @@ export const addLabelToIssue = async (labelName: string) => {
   }
 };
 
-export const listIssuesForRepo = async (state: "open" | "closed" | "all" = "open", per_page = 30, page = 1) => {
+export const listIssuesForRepo = async (
+  state: "open" | "closed" | "all" = "open",
+  per_page = 30,
+  page = 1,
+  sort: "created" | "updated" | "comments" = "created",
+  direction: "desc" | "asc" = "desc"
+) => {
   const context = getBotContext();
   const payload = context.payload as Payload;
 
@@ -58,6 +64,8 @@ export const listIssuesForRepo = async (state: "open" | "closed" | "all" = "open
     state,
     per_page,
     page,
+    sort,
+    direction,
   });
 
   await checkRateLimitGit(response.headers);

diff --git a/src/helpers/similarity.ts b/src/helpers/similarity.ts
@@ -0,0 +1,104 @@
+import { getLogger } from "../bindings";
+import axios, { AxiosError } from "axios";
+import { ajv } from "../utils";
+import { Static, Type } from "@sinclair/typebox";
+import { backOff } from "exponential-backoff";
+import { Issue } from "../types";
+
+export const extractImportantWords = async (issue: Issue): Promise<string[]> => {
+  const res = await getAnswerFromChatGPT(
+    "",
+    `${
+      process.env.CHATGPT_USER_PROMPT_FOR_IMPORTANT_WORDS ||
+      "I need your help to find important words (e.g. unique adjectives) from github issue below and I want to parse them easily so please separate them using #(No other contexts needed). Please separate the words by # so I can parse them easily. Please answer simply as I only need the important words. Here is the issue content.\n"
+    } '${`Issue title: ${issue.title}\nIssue content: ${issue.body}`}'`,
+    parseFloat(process.env.IMPORTANT_WORDS_AI_TEMPERATURE || "0")
+  );
+  if (res === "") return [];
+  return res.split(/[,# ]/);
+};
+
+export const measureSimilarity = async (first: Issue, second: Issue): Promise<number> => {
+  const res = await getAnswerFromChatGPT(
+    "",
+    `${(
+      process.env.CHATGPT_USER_PROMPT_FOR_MEASURE_SIMILARITY ||
+      'I have two github issues and I need to measure the possibility of the 2 issues are the same content (I need to parse the % so other contents are not needed and give me only the number in %).\n Give me in number format and add % after the number.\nDo not tell other things since I only need the number (e.g. 85%). Here are two issues:\n 1. "%first%"\n2. "%second%"'
+    )
+      .replace("%first%", `Issue title: ${first.title}\nIssue content: ${first.body}`)
+      .replace("%second%", `Issue title: ${second.title}\nIssue content: ${second.body}`)}`,
+    parseFloat(process.env.MEASURE_SIMILARITY_AI_TEMPERATURE || "0")
+  );
+  const matches = res.match(/\d+/);
+  const percent = matches && matches.length > 0 ? parseInt(matches[0]) || 0 : 0;
+  return percent;
+};
+
+const ChatMessageSchema = Type.Object({
+  content: Type.String(),
+});
+
+const ChoiceSchema = Type.Object({
+  message: ChatMessageSchema,
+});
+
+const ChoicesSchema = Type.Object({
+  choices: Type.Array(ChoiceSchema),
+});
+
+type Choices = Static<typeof ChoicesSchema>;
+
+export const getAnswerFromChatGPT = async (systemPrompt: string, userPrompt: string, temperature = 0, max_tokens = 1500): Promise<string> => {
+  const logger = getLogger();
+  const body = JSON.stringify({
+    model: "gpt-3.5-turbo",
+    messages: [
+      {
+        role: "system",
+        content: systemPrompt,
+      },
+      {
+        role: "user",
+        content: userPrompt,
+      },
+    ],
+    max_tokens,
+    temperature,
+    stream: false,
+  });
+  const config = {
+    method: "post",
+    url: `${process.env.OPENAI_API_HOST || "https://api.openai.com"}/v1/chat/completions`,
+    headers: {
+      Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
+      "Content-Type": "application/json",
+    },
+    data: body,
+  };
+  try {
+    const response = await backOff(() => axios(config), {
+      startingDelay: 6000,
+      retry: (e: AxiosError) => {
+        if (e.response && e.response.status === 429) return true;
+        return false;
+      },
+    });
+    const data: Choices = response.data;
+    const validate = ajv.compile(ChoicesSchema);
+    const valid = validate(data);
+    if (!valid) {
+      logger.error(`Error occured from OpenAI`);
+      return "";
+    }
+    const { choices: choice } = data;
+    if (choice.length <= 0) {
+      logger.error(`No result from OpenAI`);
+      return "";
+    }
+    const answer = choice[0].message.content;
+    return answer;
+  } catch (error) {
+    logger.error(`Getting response from ChatGPT failed: ${error}`);
+    return "";
+  }
+};