Skip to content

Commit

Permalink
fix: [codegeex] optimized efficiency and functionality
Browse files Browse the repository at this point in the history
1. The speed of inserting vector data into the database has become faster.
2. fix some bugs related multithreads.
3. Ui and tip

log: as title
  • Loading branch information
LiHua000 committed Oct 17, 2024
1 parent 46e97a9 commit 08dcade
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 115 deletions.
104 changes: 57 additions & 47 deletions assets/translations/en_US.ts

Large diffs are not rendered by default.

106 changes: 58 additions & 48 deletions assets/translations/zh_CN.ts

Large diffs are not rendered by default.

12 changes: 11 additions & 1 deletion src/plugins/codegeex/codegeex/askapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,21 @@ class AskApiPrivate : public QObject
QString locale = "zh";
bool codebaseEnabled = false;
bool networkEnabled = false;
bool terminated = false;
QStringList referenceFiles;
};

AskApiPrivate::AskApiPrivate(AskApi *qq)
: q(qq),
manager(new QNetworkAccessManager(qq))
{
connect(q, &AskApi::stopReceive, this, [=](){ terminated = true; });
}

QNetworkReply *AskApiPrivate::postMessage(const QString &url, const QString &token, const QByteArray &body)
{
if (terminated)
return nullptr;
QNetworkRequest request(url);
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
request.setRawHeader("code-token", token.toUtf8());
Expand Down Expand Up @@ -192,7 +196,7 @@ QByteArray AskApiPrivate::assembleSSEChatBody(const QString &prompt, const QStri
if (!chunks.isEmpty()) {
CodeGeeXManager::instance()->cleanHistoryMessage(); // incase history is too big
if (result["Completed"].toBool() == false)
CodeGeeXManager::instance()->notify(0, CodeGeeXManager::tr("The indexing of project %1 has not been completed, which may cause the results to be inaccurate.").arg(currentProjectPath));
emit q->notify(0, CodeGeeXManager::tr("The indexing of project %1 has not been completed, which may cause the results to be inaccurate.").arg(currentProjectPath));
jsonObject["history"] = QJsonArray();
QString context;
context += prompt;
Expand Down Expand Up @@ -283,6 +287,11 @@ AskApi::AskApi(QObject *parent)
d(new AskApiPrivate(this))
{
connect(this, &AskApi::syncSendMessage, this, &AskApi::slotSendMessage);
connect(this, &AskApi::notify, this, [](int type, const QString &message) {
using namespace dpfservice;
WindowService *windowService = dpfGetService(WindowService);
windowService->notify(type, "Ai", message, QStringList {});
});
}

AskApi::~AskApi()
Expand Down Expand Up @@ -373,6 +382,7 @@ void AskApi::postSSEChat(const QString &url,
const QMultiMap<QString, QString> &history,
const QString &talkId)
{
d->terminated = false;
QJsonArray jsonArray = convertHistoryToJSONArray(history);

#ifdef SUPPORTMINIFORGE
Expand Down
1 change: 1 addition & 0 deletions src/plugins/codegeex/codegeex/askapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class AskApi : public QObject
void stopReceive();
void syncSendMessage(const QString url, const QString &token, const QByteArray &body);

Check warning on line 109 in src/plugins/codegeex/codegeex/askapi.h

View workflow job for this annotation

GitHub Actions / cppcheck

Parameter 'url' is passed by value. It could be passed as a const reference which is usually faster and recommended in C++.

Check warning on line 109 in src/plugins/codegeex/codegeex/askapi.h

View workflow job for this annotation

GitHub Actions / static-check / static-check

Parameter 'url' is passed by value. It could be passed as a const reference which is usually faster and recommended in C++.
void noChunksFounded();
void notify(int type, const QString &message);

public slots:
void slotSendMessage(const QString url, const QString &token, const QByteArray &body);

Check warning on line 114 in src/plugins/codegeex/codegeex/askapi.h

View workflow job for this annotation

GitHub Actions / cppcheck

Parameter 'url' is passed by value. It could be passed as a const reference which is usually faster and recommended in C++.

Check warning on line 114 in src/plugins/codegeex/codegeex/askapi.h

View workflow job for this annotation

GitHub Actions / static-check / static-check

Parameter 'url' is passed by value. It could be passed as a const reference which is usually faster and recommended in C++.
Expand Down
5 changes: 5 additions & 0 deletions src/plugins/codegeex/widgets/inputeditwidget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,11 @@ void InputEdit::onTextChanged()
QSet<QString> tagList;
int last_pos = 0;

// bug: tag will removed when send message. and causes the tag to be reset before it is used.
// update tag when codegeex is not running
if (CodeGeeXManager::instance()->checkRunningState(true))
return;

cursor.setPosition(0);
formatList.clear();

Expand Down
3 changes: 2 additions & 1 deletion src/plugins/codegeex/widgets/intropage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ void IntroPage::initIntroContent()
introLayout->setSpacing(10);
qobject_cast<QVBoxLayout *>(layout())->addLayout(introLayout);

appendDescLabel(introLayout, tr("CodeGeeX provides code completion suggestions in editor, Press %1 Ctrl + T %2 to accept.").arg("<font style='color:dodgerblue;'>", "</font>"));
appendDescLabel(introLayout, tr("CodeGeeX provides code completion suggestions in editor, Press %1 Tab %2 to accept.").arg("<font style='color:dodgerblue;'>", "</font>"));
appendDescLabel(introLayout, tr("CodeGeeX provides inline chat functionality in editor, Press %1 Ctrl + T %2 to use it.").arg("<font style='color:dodgerblue;'>", "</font>"));
appendDescLabel(introLayout, tr("Select code and %1 right-click %2 to add comments or translate code.").arg("<font style='color:dodgerblue;'>", "</font>"));
appendDescLabel(introLayout, tr("Also, you can directly %1 ask CodeGeeX any questions %2.").arg("<font style='color:dodgerblue;'>", "</font>"));
}
Expand Down
35 changes: 17 additions & 18 deletions src/scripts/rag/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def _insert_chunks_sync(db: sqlite3.Connection, tag_string: str, chunks: List[Di
finally:
cursor.close()

def _insert_embedding_sync(db: sqlite3.Connection, vector: bytes, chunk: Dict[str, Any]):
def _insert_embedding_sync(db: sqlite3.Connection, embedding_map: Dict[str, bytes], chunks: List[Dict[str, Any]]):
cursor = db.db.cursor()
try:
cursor.execute("BEGIN")
Expand All @@ -267,21 +267,20 @@ def _insert_embedding_sync(db: sqlite3.Connection, vector: bytes, chunk: Dict[st
INSERT INTO lance_db_cache (uuid, cacheKey, path, artifact_id, vector, startLine, endLine, contents)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
"""

cursor.execute(embedding_sql, (
uuid.uuid4().hex,
chunk["digest"],
chunk["filepath"],
#chunk["index"],
"all-MiniLM-L6-v2",
vector,
chunk["startLine"],
chunk["endLine"],
chunk["content"]
))
for chunk in chunks:
cursor.execute(embedding_sql, (
str(uuid.uuid4()),
chunk["digest"],
chunk["filepath"],
"lance",
embedding_map[chunk["digest"]],
chunk["startLine"],
chunk["endLine"],
chunk["content"]
))

if cursor.rowcount == 0:
raise Exception("Failed to insert into embeddings table")
raise Exception("Failed to insert any embeddings into the table")

db.db.commit()
except Exception as e:
Expand Down Expand Up @@ -324,6 +323,7 @@ def initDb(db: sqlite3.Connection):

def embeddingDirectory(dir: str, provider: ONNXEmbeddingsProvider, db: sqlite3.Connection):
for entry in os.listdir(dir):
embedding_map = {}
full_path = os.path.join(dir, entry)
if "/3rdparty/" in full_path or "/.unioncode/" in full_path:
continue
Expand All @@ -335,14 +335,13 @@ def embeddingDirectory(dir: str, provider: ONNXEmbeddingsProvider, db: sqlite3.C
code = file.read()
max_chunk_size = 1024
chunks = list(chunk_document(full_path, code, max_chunk_size))
_insert_chunks_sync(db, "test", chunks)

for chunk in chunks:
content = chunk['content']
embedding = provider.embed_single(content)
provider.code_blocks.append(content)
_insert_embedding_sync(db, embedding.tobytes(), chunk)

embedding_map[chunk['digest']] = embedding.tobytes()
_insert_chunks_sync(db, "chunk", chunks)
_insert_embedding_sync(db, embedding_map, chunks)
elif os.path.isdir(full_path):
embeddingDirectory(full_path, provider, db)

Expand Down

0 comments on commit 08dcade

Please sign in to comment.