-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.ts
104 lines (89 loc) · 3.18 KB
/
utils.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import { Pinecone } from "@pinecone-database/pinecone";
import { HfInference } from '@huggingface/inference'
const hf = new HfInference(process.env.HF_TOKEN)
async function generateEmbeddings(text: string): Promise<number[]> {
try {
const apiOutput = await hf.featureExtraction({
model: "mixedbread-ai/mxbai-embed-large-v1",
inputs: text,
});
const flatEmbedding = Array.isArray(apiOutput)
? apiOutput.flat(2)
: Array.from(apiOutput);
if (!flatEmbedding.every(val => typeof val === 'number')) {
throw new Error('Embedding contains non-numeric values');
}
return flatEmbedding;
} catch (error) {
console.error("Error generating embeddings:", error);
throw error;
}
}
interface Document {
id: string;
text: string;
metadata?: Record<string, any>;
}
export async function upsertToPinecone(
client: Pinecone,
indexName: string,
namespace: string,
documents: Document[]
): Promise<void> {
try {
const index = client.Index(indexName);
const BATCH_SIZE = 100;
for (let i = 0; i < documents.length; i += BATCH_SIZE) {
const batch = documents.slice(i, i + BATCH_SIZE);
const vectors = await Promise.all(
batch.map(async (doc) => {
const embedding = await generateEmbeddings(doc.text);
return {
id: doc.id,
values: embedding,
metadata: {
...doc.metadata,
chunk: doc.text
}
};
})
);
await index.namespace(namespace).upsert(vectors);
console.log(`Upserted batch ${i / BATCH_SIZE + 1} to Pinecone`);
}
console.log("Successfully upserted all documents to Pinecone");
} catch (error) {
console.error("Error upserting to Pinecone:", error);
throw error;
}
}
export async function queryPineconeVectorStore(
client: Pinecone,
indexName: string,
namespace: string,
searchQuery: string
): Promise<string> {
try {
const queryEmbedding = await generateEmbeddings(searchQuery);
const index = client.Index(indexName);
const queryResponse = await index.namespace(namespace).query({
topK: 5,
vector: queryEmbedding,
includeMetadata: true,
includeValues: false,
});
console.log("Pinecone Query Response:", queryResponse);
if (queryResponse.matches.length > 0) {
const concatenatedRetrievals = queryResponse.matches
.map((match, index) => `\nDocument Finding ${index + 1}: \n${match.metadata?.chunk}`)
.join(". \n\n");
return concatenatedRetrievals;
} else {
console.log("No matches found in Pinecone.");
return "<nomatches>";
}
} catch (error) {
console.error("Error in queryPineconeVectorStore:", error);
return "<nomatches>";
}
}