Skip to content

Commit

Permalink
Initial version of VectoriadDB client and addition of benchmarks proj…
Browse files Browse the repository at this point in the history
…ect.
  • Loading branch information
andrii0lomakin committed Nov 2, 2023
1 parent 1d94c43 commit 5d59267
Show file tree
Hide file tree
Showing 9 changed files with 451 additions and 7 deletions.
9 changes: 2 additions & 7 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pluginManagement {
id("me.champeau.jmh") version ("0.7.1")
id("com.google.protobuf") version ("0.9.4")
id("org.springframework.boot") version ("3.1.5")
id("com.bmuschko.docker-remote-api") version("9.3.6")
id("com.bmuschko.docker-remote-api") version ("9.3.6")
}
repositories {
maven(url = "https://cache-redirector.jetbrains.com/plugins.gradle.org/m2")
Expand Down Expand Up @@ -176,11 +176,6 @@ include("vectoriadb-index")
project(":vectoriadb-index").name = "vectoriadb-index"

include("vectoriadb-server")
project(":vectoriadb-server").name = "vectoriadb-server"

include("vectoriadb-interface")
project(":vectoriadb-interface").name = "vectoriadb-interface"

include("vectoriadb-java-client")
project(":vectoriadb-java-client").name = "vectoriadb-java-client"
include("vectoriadb-docker")
include("vectoriadb-bench")
5 changes: 5 additions & 0 deletions vectoriadb-bench/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
dependencies {
implementation(libs.commons.net)

implementation(project(":vectoriadb-index"))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
package jetbrains.vectoriadb.bench;

import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.net.ftp.FTP;
import org.apache.commons.net.ftp.FTPClient;

import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;

public class BenchUtils {
public static void extractTarGzArchive(Path rootDir, Path archivePath) throws IOException {
System.out.println("Extracting " + archivePath.getFileName() + " into " + rootDir);

try (var fis = Files.newInputStream(archivePath)) {
try (var giz = new GzipCompressorInputStream(fis)) {
try (var tar = new TarArchiveInputStream(giz)) {
var entry = tar.getNextTarEntry();

while (entry != null) {
var name = entry.getName();
if (name.endsWith(".fvecs") || name.endsWith(".ivecs")) {
System.out.printf("Extracting %s%n", name);
var file = rootDir.resolve(name);
if (!Files.exists(file.getParent())) {
Files.createDirectories(file.getParent());
}

try (var fos = Files.newOutputStream(file)) {
IOUtils.copy(tar, fos);
}
}
entry = tar.getNextTarEntry();
}
}
}
}

System.out.printf("%s extracted%n", archivePath.getFileName());
}

public static void extractGzArchive(Path targetPath, Path archivePath) throws IOException {
System.out.println("Extracting " + archivePath.getFileName() + " into " + targetPath.getFileName());

try (var fis = Files.newInputStream(archivePath)) {
try (var giz = new GzipCompressorInputStream(fis)) {
Files.copy(giz, targetPath, StandardCopyOption.REPLACE_EXISTING);
}
}

System.out.printf("%s extracted%n", archivePath.getFileName());
}


public static Path downloadBenchFile(Path rootDir, String benchArchiveName) throws IOException {
var benchArchivePath = rootDir.resolve(benchArchiveName);

if (Files.exists(benchArchivePath)) {
System.out.println(benchArchiveName + " already exists in " + rootDir);
} else {
System.out.println("Downloading " + benchArchiveName +
" from ftp.irisa.fr into " + rootDir);

var ftpClient = new FTPClient();
ftpClient.connect("ftp.irisa.fr");
ftpClient.enterLocalPassiveMode();
var loggedIdn = ftpClient.login("anonymous", "anonymous");
ftpClient.setFileType(FTP.BINARY_FILE_TYPE);
if (!loggedIdn) {
throw new IllegalStateException("Failed to login to ftp.irisa.fr");
}

System.out.println("Logged in to ftp.irisa.fr");
try (var fos = Files.newOutputStream(benchArchivePath)) {
ftpClient.retrieveFile("/local/texmex/corpus/" + benchArchiveName, fos);
} finally {
ftpClient.logout();
ftpClient.disconnect();
}

System.out.println(benchArchiveName + " downloaded");
}

return benchArchivePath;
}

public static float[][] readFVectors(Path path, int vectorDimensions) throws IOException {
try (var channel = FileChannel.open(path)) {

var vectorBuffer = ByteBuffer.allocate(Float.BYTES * vectorDimensions + Integer.BYTES);
vectorBuffer.order(ByteOrder.LITTLE_ENDIAN);

var vectorsCount =
(int) (channel.size() / (Float.BYTES * vectorDimensions + Integer.BYTES));
var vectors = new float[vectorsCount][];
for (var i = 0; i < vectorsCount; i++) {
vectorBuffer.rewind();
readFully(channel, vectorBuffer);
vectorBuffer.rewind();

if (vectorBuffer.getInt() != vectorDimensions) {
throw new IllegalStateException("Vector dimensions mismatch");
}

var vector = new float[vectorDimensions];
for (var j = 0; j < vector.length; j++) {
vector[j] = vectorBuffer.getFloat();
}
vectors[i] = vector;
}
return vectors;
}
}

public static float[][] readFBVectors(Path path, int vectorDimensions, int size) throws IOException {
try (var channel = FileChannel.open(path)) {
var vectorBuffer = ByteBuffer.allocate(vectorDimensions + Integer.BYTES);
vectorBuffer.order(ByteOrder.LITTLE_ENDIAN);

var vectorsCount =
Math.min(size, (int) (channel.size() / (vectorDimensions + Integer.BYTES)));
var vectors = new float[vectorsCount][];

for (var i = 0; i < vectorsCount; i++) {
vectorBuffer.rewind();
readFully(channel, vectorBuffer);
vectorBuffer.rewind();

if (vectorBuffer.getInt() != vectorDimensions) {
throw new IllegalStateException("Vector dimensions mismatch");
}

var vector = new float[vectorDimensions];
for (var j = 0; j < vector.length; j++) {
vector[j] = vectorBuffer.get();
}
vectors[i] = vector;
}

return vectors;
}
}

@SuppressWarnings("SameParameterValue")
public static int[][] readIVectors(Path siftSmallBase, int vectorDimensions) throws IOException {
try (var channel = FileChannel.open(siftSmallBase)) {
var vectorBuffer = ByteBuffer.allocate(Integer.BYTES * vectorDimensions + Integer.BYTES);
vectorBuffer.order(ByteOrder.LITTLE_ENDIAN);

var vectorsCount =
(int) (channel.size() / ((long) Integer.BYTES * vectorDimensions + Integer.BYTES));
var vectors = new int[vectorsCount][];
for (var i = 0; i < vectorsCount; i++) {
vectorBuffer.rewind();
readFully(channel, vectorBuffer);
vectorBuffer.rewind();

if (vectorBuffer.getInt() != vectorDimensions) {
throw new IllegalStateException("Vector dimensions mismatch");
}

var vector = new int[vectorDimensions];
for (var j = 0; j < vector.length; j++) {
vector[j] = vectorBuffer.getInt();
}

vectors[i] = vector;
}
return vectors;
}
}

private static void readFully(FileChannel siftSmallBaseChannel, ByteBuffer vectorBuffer) throws IOException {
while (vectorBuffer.remaining() > 0) {
var r = siftSmallBaseChannel.read(vectorBuffer);
if (r < 0) {
throw new EOFException();
}
}
}
}
9 changes: 9 additions & 0 deletions vectoriadb-java-client/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
dependencies {
implementation(libs.grpc.java)
implementation(libs.grpc.protobuf)
implementation(libs.grpc.netty.shaded)
implementation(libs.grpc.stub)
implementation(libs.commons.net)

implementation(project(":vectoriadb-interface"))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package jetbrains.vectoriadb.client;

public enum Distance {
L2,
DOT,
COSINE
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package jetbrains.vectoriadb.client;

import java.util.List;

public interface IndexBuildStatusListener {
boolean onIndexBuildStatusUpdate(String indexName, List<Phase> phases);

record Phase(String name, double progress, String... parameters) {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package jetbrains.vectoriadb.client;

public record IndexMetadata(int maximumConnectionsPerVertex, int maximumCandidatesReturned, int compressionRatio, float distanceMultiplier) {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package jetbrains.vectoriadb.client;

public enum IndexState {
CREATING,
CREATED,
UPLOADING,
UPLOADED,
IN_BUILD_QUEUE,
BUILDING,
BUILT,
BROKEN
}
Loading

0 comments on commit 5d59267

Please sign in to comment.