Skip to content

Commit

Permalink
Bridge between ChatCraft DB and DuckDB (#783)
Browse files Browse the repository at this point in the history
* Bridge between ChatCraft DB and DuckDB

* feat: add error handling and retry logic for missing chatcraft tables

* style: Remove trailing whitespace in duckdb-chatcraft.ts

* fix: add proper error typing and type guard in chatCraftQuery

* refactor: add stricter validation for chatcraft table sync in queries

* feat: sync all referenced chatcraft tables before query retry

* refactor: improve error handling flow in chatCraftQuery function

* lint

* todos

* Refactor

* Review fixes

---------

Co-authored-by: Taras Glek (aider) <[email protected]>
  • Loading branch information
humphd and tarasglek authored Jan 15, 2025
1 parent c2f7165 commit 20f34a3
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 87 deletions.
32 changes: 10 additions & 22 deletions src/lib/commands/DuckCommand.ts
Original file line number Diff line number Diff line change
@@ -1,45 +1,33 @@
import { ChatCraftCommand } from "../ChatCraftCommand";
import { ChatCraftChat } from "../ChatCraftChat";
import { ChatCraftHumanMessage } from "../ChatCraftMessage";
import db from "../../lib/db";
import { query, queryResultToJson, queryToMarkdown } from "../duckdb";
import { getTables, queryToMarkdown } from "../duckdb-chatcraft";

export class DuckCommand extends ChatCraftCommand {
constructor() {
super("duck", "/duck", "Do some SQL queries");
}

async execute(chat: ChatCraftChat, _user: User | undefined, args?: string[]) {
const exportResult = await db.exportToDuckDB();
let sql: string;
let markdown: string;

if (!args?.length) {
// Get a list of all tables and describe each one
const message: string[] = ["## DuckDB Tables"];

const tables = await query("SHOW TABLES");
const tableNames = queryResultToJson(tables).map((row: any) => row.name);

await Promise.all(
tableNames.map(async (name: string) => {
const rowCount = exportResult.tables.find((table) => table.name === name)?.rowCount || 0;
const tableDescription = await queryToMarkdown(`DESCRIBE ${name}`);
message.push(`### ${name} (${rowCount} rows)`, tableDescription);
})
);

return chat.addMessage(new ChatCraftHumanMessage({ text: message.join("\n\n") }));
sql = "SHOW TABLES;";
markdown = await getTables();
} else {
sql = args.join(" ");
markdown = await queryToMarkdown(sql);
}

const sql = args.join(" ");
const results = await queryToMarkdown(sql);
const message = [
// show query
"```sql",
sql,
"```",
// show results
results,
].join("\n\n");
markdown,
].join("\n");
return chat.addMessage(new ChatCraftHumanMessage({ text: message }));
}
}
67 changes: 26 additions & 41 deletions src/lib/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,17 @@ import Dexie, { Table } from "dexie";
import { ChatCraftChat, SerializedChatCraftChat } from "./ChatCraftChat";

import type { MessageType, FunctionCallParams, FunctionCallResult } from "./ChatCraftMessage";
import { insertJSON } from "./duckdb";

// List of all known table names
export const CHATCRAFT_TABLES = ["chats", "messages", "shared", "functions", "starred"] as const;
export type ChatCraftTableName = (typeof CHATCRAFT_TABLES)[number];

/**
* Checks if a table name exists in Dexie
*/
export function isChatCraftTableName(name: string): name is ChatCraftTableName {
return CHATCRAFT_TABLES.includes(name as ChatCraftTableName);
}

export type ChatCraftChatTable = {
id: string;
Expand Down Expand Up @@ -170,48 +180,23 @@ class ChatCraftDatabase extends Dexie {
}

/**
* Exports all tables from Dexie to DuckDB
* @returns Object containing table names and row counts
* Get a ChatCraftTable by name
*/
async exportToDuckDB(): Promise<{
tables: { name: string; rowCount: number }[];
}> {
// Step 1: Get data from each Dexie table
const tableNames: Array<
keyof Pick<typeof this, "chats" | "messages" | "shared" | "functions" | "starred">
> = ["chats", "messages", "shared", "functions", "starred"];

const tableData = await Promise.all(
tableNames.map(async (name) => ({
name,
data: await this[name].toArray(),
}))
);

// Step 2: Create tables in DuckDB
const results = [];
for (const { name, data } of tableData) {
// Convert dates to ISO strings for JSON serialization
const jsonData = data.map((record) => ({
...record,
date: record.date instanceof Date ? record.date.toISOString() : record.date,
}));

try {
// Create table in DuckDB from JSON
await insertJSON(name, JSON.stringify(jsonData));

results.push({
name,
rowCount: data.length,
});
} catch (err) {
console.error(`Error creating table ${name} in DuckDB:`, err);
throw err;
}
byTableName(tableName: ChatCraftTableName) {
switch (tableName) {
case "chats":
return this.chats;
case "messages":
return this.messages;
case "shared":
return this.shared;
case "functions":
return this.functions;
case "starred":
return this.starred;
default:
throw new Error(`Unknown table name: ${tableName}`);
}

return { tables: results };
}
}

Expand Down
117 changes: 117 additions & 0 deletions src/lib/duckdb-chatcraft.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import { DataType } from "apache-arrow";
import db, { CHATCRAFT_TABLES, ChatCraftTableName, isChatCraftTableName } from "./db";
import {
withConnection,
insertJSON,
QueryResult,
query,
DuckDBCatalogError,
queryResultToJson,
} from "./duckdb";
import { jsonToMarkdownTable } from "./utils";

/**
* Extracts chatcraft schema table references from a SQL query
* TODO: Implement this function using json_serialize_sql and AST traversal
* @param sql The SQL query to analyze
* @returns Array of table names referenced in the chatcraft schema
*/
function extractChatCraftTables(sql: string): string[] {
// Match "chatcraft.table_name" or "chatcraft.table_name;"
const regex = /chatcraft\.(\w+)(?:\s|;|$)/g;
const matches = [...sql.matchAll(regex)];
return [...new Set(matches.map((match) => match[1]))];
}

/**
* Synchronizes a single ChatCraft table to DuckDB
* @param tableName The name of the table to synchronize
* @returns Promise resolving when sync is complete
*/
async function syncChatCraftTable(tableName: ChatCraftTableName): Promise<void> {
const data = await db.byTableName(tableName).toArray();

// Convert dates to ISO strings for JSON serialization
const jsonData = data.map((record) => ({
...record,
date: record.date instanceof Date ? record.date.toISOString() : record.date,
}));

await insertJSON(tableName, JSON.stringify(jsonData), { schema: "chatcraft" });
}

/**
* Enhanced query function that handles ChatCraft db data synchronization silently
* @param sql The SQL query to execute
* @param params Optional parameters for prepared statement
* @returns Query results as an Arrow Table
*/
async function chatCraftQuery<T extends { [key: string]: DataType } = any>(
sql: string,
params?: any[]
): Promise<QueryResult<T>> {
try {
// First, attempt to execute the query assuming everything is already created
return await query<T>(sql, params);
} catch (error: unknown) {
// If the query fails, see if the error is due to a missing table that we can provide
// by injecting ChatCraft data from Dexie. NOTE: if a user happens to create a tabled
// that shares the same name as our injected tables (e.g., chatcraft.messages), we'll
// let them use theirs instead of generating a new one. This also reduces the risk of
// overhead from premature table creation.
if (error instanceof DuckDBCatalogError) {
const referencedTables = extractChatCraftTables(sql);

// If we have referenced tables, sync them
if (referencedTables.length > 0) {
// Create schema if needed
await withConnection(async (conn) => {
await conn.query(`CREATE SCHEMA IF NOT EXISTS chatcraft`);
});

// Sync all referenced chatcraft tables
for (const tableName of referencedTables) {
if (isChatCraftTableName(tableName)) {
await syncChatCraftTable(tableName);
}
}

// Retry the query after syncing
return await query<T>(sql, params);
}
}

// If not a catalog error or no referenced tables, rethrow
throw error;
}
}

// Replace the original query export with the enhanced, ChatCraft version
export { chatCraftQuery as query };

/**
* Executes a SQL query and returns the results as a Markdown table
* @param sql The SQL query to execute
* @param params Optional parameters for prepared statement
* @returns Promise resolving to a Markdown formatted table string
* @throws {Error} If the query fails
*/
export async function queryToMarkdown(sql: string, params?: any[]): Promise<string> {
const result = await chatCraftQuery(sql, params);
const json = queryResultToJson(result);
return jsonToMarkdownTable(json);
}

/**
* Get a list of all available tables, including the "virtual"
* chatcraft.* tables we can sync into duckdb on demand.
*/
export async function getTables() {
const result = await query("show tables");
const json = queryResultToJson(result);
// TODO: this isn't really accurate, since `show tables` only shows what's in the current schema (main)
CHATCRAFT_TABLES.forEach((table) => {
json.push({ name: `chatcraft.${table}` });
});
return jsonToMarkdownTable(json);
}
77 changes: 54 additions & 23 deletions src/lib/duckdb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import {
// NOTE: duckdb-wasm uses v17.0.0 currently vs. v18.x, see:
// https://github.com/duckdb/duckdb-wasm/blob/b42a8e78d60b30363139a966e42bd33a3dd305a5/packages/duckdb-wasm/package.json#L26C9-L26C34
import * as arrow from "apache-arrow";
import { jsonToMarkdownTable } from "./utils";

async function init(logToConsole = true) {
// NOTE: the wasm bundles are too large for CloudFlare pages, so we load externally
Expand Down Expand Up @@ -54,7 +53,7 @@ export function queryResultToJson(result: QueryResult) {
}

// Manage connection lifecycle, closing when done
async function withConnection<T>(
export async function withConnection<T>(
callback: (conn: AsyncDuckDBConnection, duckdb: AsyncDuckDB) => Promise<T>
): Promise<T> {
let conn: AsyncDuckDBConnection | null = null;
Expand All @@ -72,6 +71,7 @@ async function withConnection<T>(
* @param sql The SQL query to execute
* @param params Optional parameters for prepared statement
* @returns Promise resolving to an Arrow Table containing the results
* @throws {DuckDBCatalogError} If the query references a non-existent table
* @throws {Error} If the query fails
* @example
* // Simple query
Expand All @@ -88,15 +88,22 @@ export async function query<T extends { [key: string]: arrow.DataType } = any>(
params?: any[]
): Promise<QueryResult<T>> {
return withConnection(async (conn) => {
if (!params?.length) {
return await conn.query<T>(sql);
}

const stmt = await conn.prepare<T>(sql);
try {
return await stmt.query(...params);
} finally {
await stmt.close();
if (!params?.length) {
return await conn.query<T>(sql);
}

const stmt = await conn.prepare<T>(sql);
try {
return await stmt.query(...params);
} finally {
await stmt.close();
}
} catch (err) {
if (DuckDBCatalogError.isCatalogError(err)) {
throw new DuckDBCatalogError(err);
}
throw err;
}
});
}
Expand Down Expand Up @@ -281,23 +288,47 @@ export async function insertJSON(
* Resets the DuckDB instance, terminating the connection
* @throws {Error} If termination fails
*/
/**
* Executes a SQL query and returns the results as a Markdown table
* @param sql The SQL query to execute
* @param params Optional parameters for prepared statement
* @returns Promise resolving to a Markdown formatted table string
* @throws {Error} If the query fails
*/
export async function queryToMarkdown(sql: string, params?: any[]): Promise<string> {
const result = await query(sql, params);
const json = queryResultToJson(result);
return jsonToMarkdownTable(json);
}

export async function reset(): Promise<void> {
if (_duckdb) {
await _duckdb.dropFiles();
await _duckdb.terminate();
_duckdb = null;
}
}

/**
* Custom error for identifying DuckDB Catalog Errors with missing Table
*/
export class DuckDBCatalogError extends Error {
static readonly ERROR_NAME = "DuckDBCatalogError" as const;

private static readonly catalogErrorPattern =
/Catalog Error: Table with name (\w+) does not exist!/;

readonly tableName: string;

constructor(error: unknown) {
if (!DuckDBCatalogError.isCatalogError(error)) {
throw new Error("Not a DuckDB catalog error");
}

const tableName = DuckDBCatalogError.extractTableName(error);
if (!tableName) {
throw new Error("Failed to extract table name from error message");
}

super(`Table '${tableName}' does not exist in DuckDB catalog`);

this.tableName = tableName;
this.name = DuckDBCatalogError.ERROR_NAME;
}

static isCatalogError(error: unknown): error is Error {
return error instanceof Error && DuckDBCatalogError.extractTableName(error) !== null;
}

static extractTableName(error: Error): string | null {
const match = error.message.match(DuckDBCatalogError.catalogErrorPattern);
return match?.[1] ?? null;
}
}
2 changes: 1 addition & 1 deletion src/lib/run-code.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import esbuildWasmUrl from "esbuild-wasm/esbuild.wasm?url";
import { queryToMarkdown } from "./duckdb";
import { queryToMarkdown } from "./duckdb-chatcraft";

// By default, we haven't loaded the esbuild wasm module, and
// the esbuild module doesn't have a concept of checking if it's
Expand Down

0 comments on commit 20f34a3

Please sign in to comment.