-
Notifications
You must be signed in to change notification settings - Fork 114
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial version of VectoriadDB client and addition of benchmarks proj…
…ect.
- Loading branch information
1 parent
1d94c43
commit 5d59267
Showing
9 changed files
with
451 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
dependencies { | ||
implementation(libs.commons.net) | ||
|
||
implementation(project(":vectoriadb-index")) | ||
} |
188 changes: 188 additions & 0 deletions
188
vectoriadb-bench/src/main/java/jetbrains/vectoriadb/bench/BenchUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
package jetbrains.vectoriadb.bench; | ||
|
||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; | ||
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; | ||
import org.apache.commons.compress.utils.IOUtils; | ||
import org.apache.commons.net.ftp.FTP; | ||
import org.apache.commons.net.ftp.FTPClient; | ||
|
||
import java.io.EOFException; | ||
import java.io.IOException; | ||
import java.nio.ByteBuffer; | ||
import java.nio.ByteOrder; | ||
import java.nio.channels.FileChannel; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.StandardCopyOption; | ||
|
||
public class BenchUtils { | ||
public static void extractTarGzArchive(Path rootDir, Path archivePath) throws IOException { | ||
System.out.println("Extracting " + archivePath.getFileName() + " into " + rootDir); | ||
|
||
try (var fis = Files.newInputStream(archivePath)) { | ||
try (var giz = new GzipCompressorInputStream(fis)) { | ||
try (var tar = new TarArchiveInputStream(giz)) { | ||
var entry = tar.getNextTarEntry(); | ||
|
||
while (entry != null) { | ||
var name = entry.getName(); | ||
if (name.endsWith(".fvecs") || name.endsWith(".ivecs")) { | ||
System.out.printf("Extracting %s%n", name); | ||
var file = rootDir.resolve(name); | ||
if (!Files.exists(file.getParent())) { | ||
Files.createDirectories(file.getParent()); | ||
} | ||
|
||
try (var fos = Files.newOutputStream(file)) { | ||
IOUtils.copy(tar, fos); | ||
} | ||
} | ||
entry = tar.getNextTarEntry(); | ||
} | ||
} | ||
} | ||
} | ||
|
||
System.out.printf("%s extracted%n", archivePath.getFileName()); | ||
} | ||
|
||
public static void extractGzArchive(Path targetPath, Path archivePath) throws IOException { | ||
System.out.println("Extracting " + archivePath.getFileName() + " into " + targetPath.getFileName()); | ||
|
||
try (var fis = Files.newInputStream(archivePath)) { | ||
try (var giz = new GzipCompressorInputStream(fis)) { | ||
Files.copy(giz, targetPath, StandardCopyOption.REPLACE_EXISTING); | ||
} | ||
} | ||
|
||
System.out.printf("%s extracted%n", archivePath.getFileName()); | ||
} | ||
|
||
|
||
public static Path downloadBenchFile(Path rootDir, String benchArchiveName) throws IOException { | ||
var benchArchivePath = rootDir.resolve(benchArchiveName); | ||
|
||
if (Files.exists(benchArchivePath)) { | ||
System.out.println(benchArchiveName + " already exists in " + rootDir); | ||
} else { | ||
System.out.println("Downloading " + benchArchiveName + | ||
" from ftp.irisa.fr into " + rootDir); | ||
|
||
var ftpClient = new FTPClient(); | ||
ftpClient.connect("ftp.irisa.fr"); | ||
ftpClient.enterLocalPassiveMode(); | ||
var loggedIdn = ftpClient.login("anonymous", "anonymous"); | ||
ftpClient.setFileType(FTP.BINARY_FILE_TYPE); | ||
if (!loggedIdn) { | ||
throw new IllegalStateException("Failed to login to ftp.irisa.fr"); | ||
} | ||
|
||
System.out.println("Logged in to ftp.irisa.fr"); | ||
try (var fos = Files.newOutputStream(benchArchivePath)) { | ||
ftpClient.retrieveFile("/local/texmex/corpus/" + benchArchiveName, fos); | ||
} finally { | ||
ftpClient.logout(); | ||
ftpClient.disconnect(); | ||
} | ||
|
||
System.out.println(benchArchiveName + " downloaded"); | ||
} | ||
|
||
return benchArchivePath; | ||
} | ||
|
||
public static float[][] readFVectors(Path path, int vectorDimensions) throws IOException { | ||
try (var channel = FileChannel.open(path)) { | ||
|
||
var vectorBuffer = ByteBuffer.allocate(Float.BYTES * vectorDimensions + Integer.BYTES); | ||
vectorBuffer.order(ByteOrder.LITTLE_ENDIAN); | ||
|
||
var vectorsCount = | ||
(int) (channel.size() / (Float.BYTES * vectorDimensions + Integer.BYTES)); | ||
var vectors = new float[vectorsCount][]; | ||
for (var i = 0; i < vectorsCount; i++) { | ||
vectorBuffer.rewind(); | ||
readFully(channel, vectorBuffer); | ||
vectorBuffer.rewind(); | ||
|
||
if (vectorBuffer.getInt() != vectorDimensions) { | ||
throw new IllegalStateException("Vector dimensions mismatch"); | ||
} | ||
|
||
var vector = new float[vectorDimensions]; | ||
for (var j = 0; j < vector.length; j++) { | ||
vector[j] = vectorBuffer.getFloat(); | ||
} | ||
vectors[i] = vector; | ||
} | ||
return vectors; | ||
} | ||
} | ||
|
||
public static float[][] readFBVectors(Path path, int vectorDimensions, int size) throws IOException { | ||
try (var channel = FileChannel.open(path)) { | ||
var vectorBuffer = ByteBuffer.allocate(vectorDimensions + Integer.BYTES); | ||
vectorBuffer.order(ByteOrder.LITTLE_ENDIAN); | ||
|
||
var vectorsCount = | ||
Math.min(size, (int) (channel.size() / (vectorDimensions + Integer.BYTES))); | ||
var vectors = new float[vectorsCount][]; | ||
|
||
for (var i = 0; i < vectorsCount; i++) { | ||
vectorBuffer.rewind(); | ||
readFully(channel, vectorBuffer); | ||
vectorBuffer.rewind(); | ||
|
||
if (vectorBuffer.getInt() != vectorDimensions) { | ||
throw new IllegalStateException("Vector dimensions mismatch"); | ||
} | ||
|
||
var vector = new float[vectorDimensions]; | ||
for (var j = 0; j < vector.length; j++) { | ||
vector[j] = vectorBuffer.get(); | ||
} | ||
vectors[i] = vector; | ||
} | ||
|
||
return vectors; | ||
} | ||
} | ||
|
||
@SuppressWarnings("SameParameterValue") | ||
public static int[][] readIVectors(Path siftSmallBase, int vectorDimensions) throws IOException { | ||
try (var channel = FileChannel.open(siftSmallBase)) { | ||
var vectorBuffer = ByteBuffer.allocate(Integer.BYTES * vectorDimensions + Integer.BYTES); | ||
vectorBuffer.order(ByteOrder.LITTLE_ENDIAN); | ||
|
||
var vectorsCount = | ||
(int) (channel.size() / ((long) Integer.BYTES * vectorDimensions + Integer.BYTES)); | ||
var vectors = new int[vectorsCount][]; | ||
for (var i = 0; i < vectorsCount; i++) { | ||
vectorBuffer.rewind(); | ||
readFully(channel, vectorBuffer); | ||
vectorBuffer.rewind(); | ||
|
||
if (vectorBuffer.getInt() != vectorDimensions) { | ||
throw new IllegalStateException("Vector dimensions mismatch"); | ||
} | ||
|
||
var vector = new int[vectorDimensions]; | ||
for (var j = 0; j < vector.length; j++) { | ||
vector[j] = vectorBuffer.getInt(); | ||
} | ||
|
||
vectors[i] = vector; | ||
} | ||
return vectors; | ||
} | ||
} | ||
|
||
private static void readFully(FileChannel siftSmallBaseChannel, ByteBuffer vectorBuffer) throws IOException { | ||
while (vectorBuffer.remaining() > 0) { | ||
var r = siftSmallBaseChannel.read(vectorBuffer); | ||
if (r < 0) { | ||
throw new EOFException(); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
dependencies { | ||
implementation(libs.grpc.java) | ||
implementation(libs.grpc.protobuf) | ||
implementation(libs.grpc.netty.shaded) | ||
implementation(libs.grpc.stub) | ||
implementation(libs.commons.net) | ||
|
||
implementation(project(":vectoriadb-interface")) | ||
} |
7 changes: 7 additions & 0 deletions
7
vectoriadb-java-client/src/main/java/jetbrains/vectoriadb/client/Distance.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package jetbrains.vectoriadb.client; | ||
|
||
public enum Distance { | ||
L2, | ||
DOT, | ||
COSINE | ||
} |
10 changes: 10 additions & 0 deletions
10
...riadb-java-client/src/main/java/jetbrains/vectoriadb/client/IndexBuildStatusListener.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package jetbrains.vectoriadb.client; | ||
|
||
import java.util.List; | ||
|
||
public interface IndexBuildStatusListener { | ||
boolean onIndexBuildStatusUpdate(String indexName, List<Phase> phases); | ||
|
||
record Phase(String name, double progress, String... parameters) { | ||
} | ||
} |
4 changes: 4 additions & 0 deletions
4
vectoriadb-java-client/src/main/java/jetbrains/vectoriadb/client/IndexMetadata.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
package jetbrains.vectoriadb.client; | ||
|
||
public record IndexMetadata(int maximumConnectionsPerVertex, int maximumCandidatesReturned, int compressionRatio, float distanceMultiplier) { | ||
} |
12 changes: 12 additions & 0 deletions
12
vectoriadb-java-client/src/main/java/jetbrains/vectoriadb/client/IndexState.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package jetbrains.vectoriadb.client; | ||
|
||
public enum IndexState { | ||
CREATING, | ||
CREATED, | ||
UPLOADING, | ||
UPLOADED, | ||
IN_BUILD_QUEUE, | ||
BUILDING, | ||
BUILT, | ||
BROKEN | ||
} |
Oops, something went wrong.