Skip to content

Commit

Permalink
Ids of vectors are associated and stored inside of vertex record on d…
Browse files Browse the repository at this point in the history
…isk.

Presence of vector id is mandatory.
Id is represented is byte array of size of 16 bytes.
  • Loading branch information
andrii0lomakin committed Dec 1, 2023
1 parent 5433bb9 commit 25973c0
Show file tree
Hide file tree
Showing 17 changed files with 430 additions and 136 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,12 @@ public static void main(String[] args) {

var siftDir = rootDir.resolve("sift");
var siftDataName = "sift_base.fvecs";

var vectors = BenchUtils.readFVectors(siftDir.resolve(siftDataName), vectorDimensions);
var ids = new int[vectors.length];
for (int i = 0; i < ids.length; i++) {
ids[i] = i;
}

var indexName = "sift1m";
System.out.printf("%d data vectors loaded with dimension %d, building index %s...%n",
Expand All @@ -73,7 +78,7 @@ public static void main(String[] args) {

ts1 = System.currentTimeMillis();

client.uploadVectors(indexName, vectors, (current, count) -> {
client.uploadVectors(indexName, vectors, ids, (current, count) -> {
if (current >= 0 && current < Integer.MAX_VALUE) {
if (current % 1_000 == 0) {
System.out.printf("%d vectors uploaded out of %d%n", current, count);
Expand Down Expand Up @@ -133,7 +138,7 @@ public static void main(String[] args) {
System.out.printf("Iteration %d out of 5 %n", (i + 1));

for (int j = 0; j < queryVectors.length; j++) {
var vector = queryVectors[j];
var vector = queryVectors[j];
client.findNearestNeighbours(indexName, vector, 1);

if ((j + 1) % 1_000 == 0) {
Expand All @@ -149,7 +154,7 @@ public static void main(String[] args) {
for (var index = 0; index < queryVectors.length; index++) {
var vector = queryVectors[index];

var result = client.findNearestNeighbours(indexName, vector, 1);
var result = client.findIntNearestNeighbours(indexName, vector, 1);
if (groundTruth[index][0] != result[0]) {
errorsCount++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ private DataStore(int dimensions, DistanceFunction distanceFunction, FileChannel
this.channel = channel;
this.distanceFunction = distanceFunction;

var vectorSize = dimensions * Float.BYTES;
var bufferSize = Math.min(64 * 1024 * 1024 / vectorSize, 1) * vectorSize;
//record contains vector and its associated id
var recordSize = dimensions * Float.BYTES + IndexBuilder.VECTOR_ID_SIZE;
var bufferSize = Math.min(64 * 1024 * 1024 / recordSize, 1) * recordSize;

this.buffer = ByteBuffer.allocate(bufferSize).order(ByteOrder.nativeOrder());
this.preprocessingResult = new float[dimensions];
Expand All @@ -54,7 +55,12 @@ public static DataStore create(final String name, final int dimensions,
return new DataStore(dimensions, distanceFunction, channel);
}

public void add(final float[] vector) throws IOException {
public void add(final float[] vector, @NotNull byte[] id) throws IOException {
if (id.length != IndexBuilder.VECTOR_ID_SIZE) {
throw new IllegalArgumentException("Vector id size should be equal to " + IndexBuilder.VECTOR_ID_SIZE +
". Vector id size : " + id.length);
}

var vectorToStore = distanceFunction.preProcess(vector, preprocessingResult);

if (buffer.remaining() == 0) {
Expand All @@ -70,6 +76,8 @@ public void add(final float[] vector) throws IOException {
for (var component : vectorToStore) {
buffer.putFloat(component);
}

buffer.put(id);
}

public static Path dataLocation(@NotNull final String name, final Path dataDirectoryPath) {
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.lang.foreign.MemorySegment;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
Expand Down Expand Up @@ -112,7 +113,7 @@ public IndexReader(String name, int vectorDim, int maxConnectionsPerVertex, int
logger.info("Vector index {} has been initialized.", name);
}

public void nearest(float[] vector, int[] result, int resultSize) {
public byte[][] nearest(float[] vector, int resultSize) {
if (closed) {
throw new IllegalStateException("Index is closed");
}
Expand Down Expand Up @@ -242,7 +243,25 @@ public void nearest(float[] vector, int[] result, int resultSize) {
diskCache.unlock(id, vertexToPreload[i], graphFilePath);
}

nearestCandidates.vertexIndices(result, resultSize);
var vertexIndexes = new int[resultSize];
nearestCandidates.vertexIndices(vertexIndexes, resultSize);

var result = new byte[resultSize][];
for (int i = 0; i < resultSize; i++) {
var vertexIndex = vertexIndexes[i];
var inMemoryPageIndex = diskCache.readLock(id, vertexIndex, graphFilePath);
try {
var vectorIdOffset = diskCache.vectorIdOffset(inMemoryPageIndex, vertexIndex);
var vectorId = new byte[IndexBuilder.VECTOR_ID_SIZE];
MemorySegment.copy(diskCache.pages, vectorIdOffset, MemorySegment.ofArray(vectorId), 0,
IndexBuilder.VECTOR_ID_SIZE);
result[i] = vectorId;
} finally {
diskCache.unlock(id, vertexIndex, graphFilePath);
}
}

return result;
}

private void preloadVertices(BoundedGreedyVertexPriorityQueue nearestCandidates, int[] vertexToPreload) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ public interface VectorReader extends AutoCloseable {
int size();

MemorySegment read(int index);

MemorySegment id(int index);
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,15 @@ static void runSiftBenchmarks(

var indexName = "test_index";
try (var dataBuilder = DataStore.create(indexName, 128, L2DistanceFunction.INSTANCE, dbDir)) {
for (var vector : vectors) {
dataBuilder.add(vector);
for (int i = 0; i < vectors.length; i++) {
var buffer = ByteBuffer.allocate(IndexBuilder.VECTOR_ID_SIZE);
buffer.order(ByteOrder.LITTLE_ENDIAN);

buffer.putInt(i);
buffer.rewind();

var vector = vectors[i];
dataBuilder.add(vector, buffer.array());
}
}

Expand Down Expand Up @@ -91,11 +98,9 @@ static void runSiftBenchmarks(

//give GC chance to collect garbage
Thread.sleep(60 * 1000);

var result = new int[1];
for (int i = 0; i < 10; i++) {
for (float[] vector : queryVectors) {
indexReader.nearest(vector, result, 1);
indexReader.nearest(vector, 1);
}
}

Expand All @@ -110,8 +115,9 @@ static void runSiftBenchmarks(
var errorsCount = 0;
for (var index = 0; index < queryVectors.length; index++) {
var vector = queryVectors[index];
indexReader.nearest(vector, result, 1);
if (groundTruth[index][0] != result[0]) {
var rawId = indexReader.nearest(vector, 1);

if (groundTruth[index][0] != ByteBuffer.wrap(rawId[0]).order(ByteOrder.LITTLE_ENDIAN).getInt()) {
errorsCount++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ public static void main(String[] args) {
vector[j] = buffer.get();
}

dataBuilder.add(vector);
var id = ByteBuffer.allocate(Integer.BYTES).
order(ByteOrder.LITTLE_ENDIAN).putInt((int) i).array();
dataBuilder.add(vector, id);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,15 @@ public MemorySegment read(int index) {
return segment.asSlice((long) index * recordSize, (long) Float.BYTES * vectorDimensions);
}

@Override
public MemorySegment id(int index) {
var buffer = ByteBuffer.allocate(IndexBuilder.VECTOR_ID_SIZE);
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putInt(index);

return MemorySegment.ofBuffer(buffer);
}

@Override
public void close() {
arena.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import jetbrains.vectoriadb.index.IndexReader;
import jetbrains.vectoriadb.index.diskcache.DiskCache;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
Expand Down Expand Up @@ -80,10 +82,9 @@ public static void main(String[] args) throws Exception {

System.out.println("Warming up ...");

var result = new int[1];
for (int i = 0; i < 50; i++) {
for (float[] vector : m1QueryVectors) {
indexReader.nearest(vector, result, 1);
indexReader.nearest(vector, 1);
}
}
}
Expand All @@ -101,11 +102,18 @@ public static void main(String[] args) throws Exception {
bigAnnDbDir, Distance.DOT, diskCache)) {

System.out.println("Running BigANN bench...");
var result = new int[recallCount];

var start = System.nanoTime();
for (int i = 0; i < bigAnnQueryVectors.length; i++) {
float[] vector = bigAnnQueryVectors[i];
indexReader.nearest(vector, result, recallCount);
var rawIds = indexReader.nearest(vector, recallCount);

var result = new int[recallCount];
for (int j = 0; j < rawIds.length; j++) {
var rawId = rawIds[j];
result[j] = ByteBuffer.wrap(rawId).order(ByteOrder.LITTLE_ENDIAN).getInt();
}

totalRecall += recall(result, bigAnnGroundTruth[i], recallCount);
}
var end = System.nanoTime();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ public static void main(String[] args) throws Exception {

System.out.println("Running queries...");
var errors = 0;
var result = new int[1];

try (var diskCache = new DiskCache(400 * 1024 * 1024, vectorDimensions,
IndexBuilder.DEFAULT_MAX_CONNECTIONS_PER_VERTEX)) {
Expand All @@ -72,8 +71,8 @@ public static void main(String[] args) throws Exception {
MemorySegment.copy(queryVectorSegment, ValueLayout.JAVA_FLOAT, 0, queryVector,
0, vectorDimensions);

indexReader.nearest(queryVector, result, 1);
if (result[0] != groundTruth[index]) {
var rawIds = indexReader.nearest(queryVector, 1);
if (ByteBuffer.wrap(rawIds[0]).order(ByteOrder.LITTLE_ENDIAN).getInt() != groundTruth[index]) {
errors++;
}
}
Expand Down
Loading

0 comments on commit 25973c0

Please sign in to comment.