Skip to content

Commit

Permalink
feat: ✨ improve cache handling and add test script
Browse files Browse the repository at this point in the history
  • Loading branch information
pelikhan committed Jan 6, 2025
1 parent b9a481f commit be80fe0
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 25 deletions.
43 changes: 27 additions & 16 deletions packages/core/src/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,28 +196,39 @@ export class JSONLineCache<K, V> extends MemoryCache<K, V> {
return host.resolvePath(this.folder(), "db.jsonl")
}

private _initializePromise: Promise<void>
/**
* Initialize the cache by loading entries from the file.
* Identifies duplicate entries and rewrites the file if necessary.
*/
override async initialize() {
if (this._entries) return
super.initialize()
await host.createDirectory(this.folder()) // Ensure directory exists
const content = await tryReadText(this.path())
const objs: CacheEntry<K, V>[] = (await JSONLTryParse(content)) ?? []
let numdup = 0 // Counter for duplicates
for (const obj of objs) {
if (this._entries[obj.sha]) numdup++ // Count duplicates
this._entries[obj.sha] = obj
}
if (2 * numdup > objs.length) {
// Rewrite file if too many duplicates
await writeJSONL(
this.path(),
objs.filter((o) => this._entries[o.sha] === o) // Preserve order
)
}
if (this._initializePromise) return await this._initializePromise

this._initializePromise = (async () => {
await host.createDirectory(this.folder()) // Ensure directory exists
const content = await tryReadText(this.path())
const entries: Record<string, CacheEntry<K, V>> = {}
const objs: CacheEntry<K, V>[] =
(await JSONLTryParse(content)) ?? []
let numdup = 0 // Counter for duplicates
for (const obj of objs) {
if (entries[obj.sha]) numdup++ // Count duplicates
entries[obj.sha] = obj
}
if (2 * numdup > objs.length) {
// Rewrite file if too many duplicates
await writeJSONL(
this.path(),
objs.filter((o) => entries[o.sha] === o) // Preserve order
)
}
// success
super.initialize()
this._entries = entries
this._initializePromise = undefined
})()
return this._initializePromise
}

override async appendEntry(ent: CacheEntry<K, V>) {
Expand Down
16 changes: 9 additions & 7 deletions packages/core/src/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -982,7 +982,7 @@ export async function executeChatSession(
: undefined,
messages,
}
updateChatFeatures(reqTrace, req)
updateChatFeatures(reqTrace, model, req)
logVerbose(
`chat: sending ${messages.length} messages to ${model} (~${tokens ?? "?"} tokens)\n`
)
Expand All @@ -996,6 +996,7 @@ export async function executeChatSession(
)
if (cacheStore) {
const cachedKey = deleteUndefinedValues({
modelid: model,
...req,
...cfgNoToken,
}) satisfies ChatCompletionRequestCacheKey
Expand All @@ -1010,12 +1011,12 @@ export async function executeChatSession(
)
resp = cacheRes.value
resp.cached = cacheRes.cached
reqTrace.itemValue("cache", cacheStore.name)
reqTrace.itemValue("cache_key", cacheRes.key)
logVerbose(
`chat: cache ${resp.cached ? "hit" : "miss"} (${cacheStore.name}/${cacheRes.key.slice(0, 7)})`
)
if (resp.cached) {
reqTrace.itemValue("cache", cacheStore.name)
reqTrace.itemValue("cache_key", cacheRes.key)
logVerbose(
`chat: cache hit (${cacheStore.name}/${cacheRes.key.slice(0, 7)})`
)
if (cacheRes.value.text)
partialCb({
responseSoFar: cacheRes.value.text,
Expand Down Expand Up @@ -1073,9 +1074,10 @@ export async function executeChatSession(

function updateChatFeatures(
trace: MarkdownTrace,
modelid: string,
req: CreateChatCompletionRequest
) {
const { provider, model } = parseModelIdentifier(req.model)
const { provider, model } = parseModelIdentifier(modelid)
const features = MODEL_PROVIDERS.find(({ id }) => id === provider)

if (!isNaN(req.seed) && features?.seed === false) {
Expand Down
4 changes: 2 additions & 2 deletions packages/core/src/jsonl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ export function JSONLTryParse(
): any[] {
if (!text) return []
const res: any[] = []
for (const line of text.split("\n")) {
if (!line) continue
const lines = text.split("\n")
for (const line of lines.filter((l) => !!l.trim())) {
const obj = JSON5TryParse(line, options)
if (obj !== undefined && obj !== null) res.push(obj)
}
Expand Down
36 changes: 36 additions & 0 deletions packages/sample/genaisrc/repomap-test.genai.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// https://github.com/microsoft/genaiscript/issues/972
script({
title: "generate repomap for the repo",
cache: "repomap-test",
files: ["src/greeter.ts", "src/counting.py", "src/fib.ts"],
})

let prompts = [
`summarize the file in one sentence.`
]

async function processFile(current_file, prompts) {
let result = [];
for (const prompt of prompts) {
const { text } = await runPrompt((_) => {
_.def("FILE", current_file);
_.$`=============`;
_.$`${prompt}`;
}, { system: [] , cache: "repomap-test" });
result.push(text);
}
console.log(result)
}


// this does not hit "src/counting.py" cache
const queue = host.promiseQueue(2)
const summaries = await queue.mapAll(
env.files,
(file) => processFile(file, prompts)
)

// // this work fine
// for (const file of env.files) {
// await processFile(file, prompts)
// }

0 comments on commit be80fe0

Please sign in to comment.