From 08dcade61fbf8d8300514647082cbcaa46b934d5 Mon Sep 17 00:00:00 2001 From: Zhang TingAn Date: Thu, 17 Oct 2024 14:55:18 +0800 Subject: [PATCH] fix: [codegeex] optimized efficiency and functionality 1. The speed of inserting vector data into the database has become faster. 2. fix some bugs related multithreads. 3. Ui and tip log: as title --- assets/translations/en_US.ts | 104 +++++++++-------- assets/translations/zh_CN.ts | 106 ++++++++++-------- src/plugins/codegeex/codegeex/askapi.cpp | 12 +- src/plugins/codegeex/codegeex/askapi.h | 1 + .../codegeex/widgets/inputeditwidget.cpp | 5 + src/plugins/codegeex/widgets/intropage.cpp | 3 +- src/scripts/rag/generate.py | 35 +++--- 7 files changed, 151 insertions(+), 115 deletions(-) diff --git a/assets/translations/en_US.ts b/assets/translations/en_US.ts index 0df0bea46..655ee8106 100644 --- a/assets/translations/en_US.ts +++ b/assets/translations/en_US.ts @@ -1183,7 +1183,7 @@ storage: %2 CodeGeeX::AskApi - + Install @@ -1221,13 +1221,13 @@ storage: %2 - + The file indexing feature is not available, which may cause functions such as @codebase to not work properly.Please install the required environment. the installation process may take several minutes. - + The indexing of project %1 has not been completed, which may cause the results to be inaccurate. @@ -1277,22 +1277,27 @@ storage: %2 CodeGeex - + + Quick Open CodeGeeX + + + + Login - + Please login to use CodeGeeX. - + Install - + Install a Python Conda virtual environment for using the file indexing feature. Without it, there may be abnormalities in the @codebase and some AI functionalities. @@ -1590,77 +1595,77 @@ storage: %2 Controller - + Open Document - + Hide ContextWidget - + Show docks in this view - + &File - + &Edit - + &Build - + &Debug - + &Tools - + &Help - + Open File - + Open Project - + Report Bug - + Help Documents - + Expand All - + Fold All @@ -2791,7 +2796,7 @@ need to manually copy the source code to this path InlineChatWidget - + Press %1 to inline chat @@ -2956,41 +2961,46 @@ need to manually copy the source code to this path - CodeGeeX provides code completion suggestions in editor, Press %1 Ctrl + T %2 to accept. + CodeGeeX provides code completion suggestions in editor, Press %1 Tab %2 to accept. - Select code and %1 right-click %2 to add comments or translate code. + CodeGeeX provides inline chat functionality in editor, Press %1 Ctrl + T %2 to use it. + Select code and %1 right-click %2 to add comments or translate code. + + + + Also, you can directly %1 ask CodeGeeX any questions %2. - + Try the following questions: - + How to iterate through a dictionary in Python? - + Write a quicksort function. - + What is the best way to start learning JavaScript? - + logout @@ -3335,7 +3345,7 @@ repos path: %0 MainWindow - + Hide Dock Widget @@ -4084,7 +4094,7 @@ repos path: %0 - + Select File @@ -4502,7 +4512,7 @@ not exists support files: %0 - + AI @@ -5849,8 +5859,8 @@ not exists support files: %0 ShortcutSettingWidget - - + + Type to search in keybindings @@ -5880,54 +5890,54 @@ not exists support files: %0 - - + + Add Shortcut - + Change %1 - + Remove %1 - + Remove All Shortcut - + Reset Shortcut - + Change Shortcut - + Export Keyboard Mapping Scheme - - + + Keyboard Mapping Scheme (*.kms) - + Import Keyboard Mapping Scheme - + Recording Keys. Press Escape to exit diff --git a/assets/translations/zh_CN.ts b/assets/translations/zh_CN.ts index 1c8cb8213..7fb90e047 100644 --- a/assets/translations/zh_CN.ts +++ b/assets/translations/zh_CN.ts @@ -1203,7 +1203,7 @@ storage: %2 CodeGeeX::AskApi - + Install 安装 @@ -1241,14 +1241,14 @@ storage: %2 在为工程%1建立文件索引时出现错误 - + The file indexing feature is not available, which may cause functions such as @codebase to not work properly.Please install the required environment. the installation process may take several minutes. 文件索引功能不可用,可能会导致@codebase等功能无法正常使用,请安装相应的环境。 安装过程可能会耗费若干分钟。 - + The indexing of project %1 has not been completed, which may cause the results to be inaccurate. 工程%1的文件索引尚未完全完成,可能会导致结果不准确. @@ -1298,22 +1298,27 @@ storage: %2 CodeGeex - + + Quick Open CodeGeeX + 快捷使用CodeGeeX + + + Login 登录 - + Please login to use CodeGeeX. 请登录后使用CodeGeex. - + Install 安装 - + Install a Python Conda virtual environment for using the file indexing feature. Without it, there may be abnormalities in the @codebase and some AI functionalities. 安装Python Conda虚拟环境,以用于文件索引功能。否则可能导致@codebase或部分Ai功能异常。 @@ -1617,77 +1622,77 @@ storage: %2 Controller - + Open Document 打开文件 - + Hide ContextWidget 隐藏内容区 - + Show docks in this view 当前视图中的窗口 - + &File 文件(&F) - + &Edit 编辑(&E) - + &Build 编译(&B) - + &Debug 调试(&D) - + &Tools 工具(&T) - + &Help 帮助(&H) - + Open File 打开文件 - + Open Project 打开工程 - + Report Bug 报告Bug - + Help Documents 帮助文档 - + Expand All 展开所有 - + Fold All 折叠所有 @@ -2826,7 +2831,7 @@ need to manually copy the source code to this path InlineChatWidget - + Press %1 to inline chat 按%1进行内联聊天 @@ -2991,41 +2996,46 @@ need to manually copy the source code to this path - CodeGeeX provides code completion suggestions in editor, Press %1 Ctrl + T %2 to accept. - CodeGeex在编辑器提供代码补全建议,按%1 Ctrl + T %2即可接受 + CodeGeeX provides code completion suggestions in editor, Press %1 Tab %2 to accept. + CodeGeeX在编辑器中提供了代码补全建议,按%1 Tab %2即可接受. + CodeGeeX provides inline chat functionality in editor, Press %1 Ctrl + T %2 to use it. + CodeGeeX在编辑器中提供了内联聊天功能,按%1 Ctrl+T %2即可使用. + + + Select code and %1 right-click %2 to add comments or translate code. 选择代码并%1点击右键%2以添加注释或翻译代码 - + Also, you can directly %1 ask CodeGeeX any questions %2. 在对话栏中直接%1向CodeGeeX提问%2 - + Try the following questions: 试试下面的问题: - + How to iterate through a dictionary in Python? 如何在 Python 中遍历字典? - + Write a quicksort function. 编写一个快速排序函数。 - + What is the best way to start learning JavaScript? 开始学习 JavaScript 的最佳方法是什么? - + logout 退出登录 @@ -3372,7 +3382,7 @@ repos path: %0 MainWindow - + Hide Dock Widget 隐藏驻留区 @@ -4121,7 +4131,7 @@ repos path: %0 用户行为分析 - + Select File 选择文件 @@ -4541,7 +4551,7 @@ not exists support files: %0 目标 - + AI @@ -5895,8 +5905,8 @@ not exists support files: %0 ShortcutSettingWidget - - + + Type to search in keybindings 在此键入搜索的快捷键 @@ -5926,54 +5936,54 @@ not exists support files: %0 导入 - - + + Add Shortcut 添加快捷键 - + Change %1 修改 %1 - + Remove %1 删除 %1 - + Remove All Shortcut 删除所有快捷键 - + Reset Shortcut 重置快捷键 - + Change Shortcut 修改快捷键 - + Export Keyboard Mapping Scheme 导出键盘映射方案 - - + + Keyboard Mapping Scheme (*.kms) 键盘映射方案 (*.kms) - + Import Keyboard Mapping Scheme 导入键盘映射方案 - + Recording Keys. Press Escape to exit 正在录制按键。按 Esc 退出 diff --git a/src/plugins/codegeex/codegeex/askapi.cpp b/src/plugins/codegeex/codegeex/askapi.cpp index d5ed77a30..e4f2b1596 100644 --- a/src/plugins/codegeex/codegeex/askapi.cpp +++ b/src/plugins/codegeex/codegeex/askapi.cpp @@ -53,6 +53,7 @@ class AskApiPrivate : public QObject QString locale = "zh"; bool codebaseEnabled = false; bool networkEnabled = false; + bool terminated = false; QStringList referenceFiles; }; @@ -60,10 +61,13 @@ AskApiPrivate::AskApiPrivate(AskApi *qq) : q(qq), manager(new QNetworkAccessManager(qq)) { + connect(q, &AskApi::stopReceive, this, [=](){ terminated = true; }); } QNetworkReply *AskApiPrivate::postMessage(const QString &url, const QString &token, const QByteArray &body) { + if (terminated) + return nullptr; QNetworkRequest request(url); request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json"); request.setRawHeader("code-token", token.toUtf8()); @@ -192,7 +196,7 @@ QByteArray AskApiPrivate::assembleSSEChatBody(const QString &prompt, const QStri if (!chunks.isEmpty()) { CodeGeeXManager::instance()->cleanHistoryMessage(); // incase history is too big if (result["Completed"].toBool() == false) - CodeGeeXManager::instance()->notify(0, CodeGeeXManager::tr("The indexing of project %1 has not been completed, which may cause the results to be inaccurate.").arg(currentProjectPath)); + emit q->notify(0, CodeGeeXManager::tr("The indexing of project %1 has not been completed, which may cause the results to be inaccurate.").arg(currentProjectPath)); jsonObject["history"] = QJsonArray(); QString context; context += prompt; @@ -283,6 +287,11 @@ AskApi::AskApi(QObject *parent) d(new AskApiPrivate(this)) { connect(this, &AskApi::syncSendMessage, this, &AskApi::slotSendMessage); + connect(this, &AskApi::notify, this, [](int type, const QString &message) { + using namespace dpfservice; + WindowService *windowService = dpfGetService(WindowService); + windowService->notify(type, "Ai", message, QStringList {}); + }); } AskApi::~AskApi() @@ -373,6 +382,7 @@ void AskApi::postSSEChat(const QString &url, const QMultiMap &history, const QString &talkId) { + d->terminated = false; QJsonArray jsonArray = convertHistoryToJSONArray(history); #ifdef SUPPORTMINIFORGE diff --git a/src/plugins/codegeex/codegeex/askapi.h b/src/plugins/codegeex/codegeex/askapi.h index 62ff7b6aa..ebef99db4 100644 --- a/src/plugins/codegeex/codegeex/askapi.h +++ b/src/plugins/codegeex/codegeex/askapi.h @@ -108,6 +108,7 @@ class AskApi : public QObject void stopReceive(); void syncSendMessage(const QString url, const QString &token, const QByteArray &body); void noChunksFounded(); + void notify(int type, const QString &message); public slots: void slotSendMessage(const QString url, const QString &token, const QByteArray &body); diff --git a/src/plugins/codegeex/widgets/inputeditwidget.cpp b/src/plugins/codegeex/widgets/inputeditwidget.cpp index 8a8a5b769..34b865761 100644 --- a/src/plugins/codegeex/widgets/inputeditwidget.cpp +++ b/src/plugins/codegeex/widgets/inputeditwidget.cpp @@ -227,6 +227,11 @@ void InputEdit::onTextChanged() QSet tagList; int last_pos = 0; + // bug: tag will removed when send message. and causes the tag to be reset before it is used. + // update tag when codegeex is not running + if (CodeGeeXManager::instance()->checkRunningState(true)) + return; + cursor.setPosition(0); formatList.clear(); diff --git a/src/plugins/codegeex/widgets/intropage.cpp b/src/plugins/codegeex/widgets/intropage.cpp index 8d200b7ab..623989c03 100644 --- a/src/plugins/codegeex/widgets/intropage.cpp +++ b/src/plugins/codegeex/widgets/intropage.cpp @@ -68,7 +68,8 @@ void IntroPage::initIntroContent() introLayout->setSpacing(10); qobject_cast(layout())->addLayout(introLayout); - appendDescLabel(introLayout, tr("CodeGeeX provides code completion suggestions in editor, Press %1 Ctrl + T %2 to accept.").arg("", "")); + appendDescLabel(introLayout, tr("CodeGeeX provides code completion suggestions in editor, Press %1 Tab %2 to accept.").arg("", "")); + appendDescLabel(introLayout, tr("CodeGeeX provides inline chat functionality in editor, Press %1 Ctrl + T %2 to use it.").arg("", "")); appendDescLabel(introLayout, tr("Select code and %1 right-click %2 to add comments or translate code.").arg("", "")); appendDescLabel(introLayout, tr("Also, you can directly %1 ask CodeGeeX any questions %2.").arg("", "")); } diff --git a/src/scripts/rag/generate.py b/src/scripts/rag/generate.py index 8cd704d77..821c2b884 100644 --- a/src/scripts/rag/generate.py +++ b/src/scripts/rag/generate.py @@ -258,7 +258,7 @@ def _insert_chunks_sync(db: sqlite3.Connection, tag_string: str, chunks: List[Di finally: cursor.close() -def _insert_embedding_sync(db: sqlite3.Connection, vector: bytes, chunk: Dict[str, Any]): +def _insert_embedding_sync(db: sqlite3.Connection, embedding_map: Dict[str, bytes], chunks: List[Dict[str, Any]]): cursor = db.db.cursor() try: cursor.execute("BEGIN") @@ -267,21 +267,20 @@ def _insert_embedding_sync(db: sqlite3.Connection, vector: bytes, chunk: Dict[st INSERT INTO lance_db_cache (uuid, cacheKey, path, artifact_id, vector, startLine, endLine, contents) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """ - - cursor.execute(embedding_sql, ( - uuid.uuid4().hex, - chunk["digest"], - chunk["filepath"], - #chunk["index"], - "all-MiniLM-L6-v2", - vector, - chunk["startLine"], - chunk["endLine"], - chunk["content"] - )) + for chunk in chunks: + cursor.execute(embedding_sql, ( + str(uuid.uuid4()), + chunk["digest"], + chunk["filepath"], + "lance", + embedding_map[chunk["digest"]], + chunk["startLine"], + chunk["endLine"], + chunk["content"] + )) if cursor.rowcount == 0: - raise Exception("Failed to insert into embeddings table") + raise Exception("Failed to insert any embeddings into the table") db.db.commit() except Exception as e: @@ -324,6 +323,7 @@ def initDb(db: sqlite3.Connection): def embeddingDirectory(dir: str, provider: ONNXEmbeddingsProvider, db: sqlite3.Connection): for entry in os.listdir(dir): + embedding_map = {} full_path = os.path.join(dir, entry) if "/3rdparty/" in full_path or "/.unioncode/" in full_path: continue @@ -335,14 +335,13 @@ def embeddingDirectory(dir: str, provider: ONNXEmbeddingsProvider, db: sqlite3.C code = file.read() max_chunk_size = 1024 chunks = list(chunk_document(full_path, code, max_chunk_size)) - _insert_chunks_sync(db, "test", chunks) - for chunk in chunks: content = chunk['content'] embedding = provider.embed_single(content) provider.code_blocks.append(content) - _insert_embedding_sync(db, embedding.tobytes(), chunk) - + embedding_map[chunk['digest']] = embedding.tobytes() + _insert_chunks_sync(db, "chunk", chunks) + _insert_embedding_sync(db, embedding_map, chunks) elif os.path.isdir(full_path): embeddingDirectory(full_path, provider, db)