Skip to content

Commit

Permalink
Merge pull request #119 from JetBrains/embedded-id
Browse files Browse the repository at this point in the history
1. IDs of 16 bytes of size are used to identify vectors. IDs are mandatory to be provided. IDs are embedded into the index presentation of vertexes on disk.
2. Versions of Gradle and Kotlin were updated to the latest ones.
  • Loading branch information
andrii0lomakin authored Dec 5, 2023
2 parents 5433bb9 + c727086 commit 8b56467
Show file tree
Hide file tree
Showing 24 changed files with 453 additions and 154 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/gradle_21.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up JDKs 21
uses: actions/setup-java@v3
uses: actions/setup-java@v4
with:
java-version: "21"
distribution: 'temurin'
Expand Down
2 changes: 1 addition & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ fun shouldApplyDokka(project: Project): Boolean {
}

tasks.wrapper {
gradleVersion = "8.4"
gradleVersion = "8.5"
}

defaultTasks("assemble")
Expand Down
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
3 changes: 2 additions & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
22 changes: 13 additions & 9 deletions gradlew
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ done
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
Expand Down Expand Up @@ -130,26 +131,29 @@ location of your Java installation."
fi
else
JAVACMD=java
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi

# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC3045
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC3045
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
Expand Down Expand Up @@ -198,11 +202,11 @@ fi
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Collect all arguments for the java command;
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
# shell script including quotes and variable substitutions, so put them in
# double quotes to make sure that they get re-expanded; and
# * put everything else in single quotes, so that it's not re-expanded.
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.

set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
Expand Down
4 changes: 2 additions & 2 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pluginManagement {
plugins {
id("org.jetbrains.kotlin.jvm") version ("1.9.20")
id("org.jetbrains.kotlin.jvm") version ("1.9.21")
id("org.jetbrains.dokka") version ("1.8.10")
id("com.github.hierynomus.license") version ("0.16.1")
id("io.codearte.nexus-staging") version ("0.30.0")
Expand All @@ -20,7 +20,7 @@ dependencyResolutionManagement {
versionCatalogs {
create("libs") {
version("kotlin-lang", "1.9")
version("kotlin", "1.9.20")
version("kotlin", "1.9.21")

version("kotlin-logging", "3.0.5")
version("lz4", "1.8.0")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,12 @@ public static void main(String[] args) {

var siftDir = rootDir.resolve("sift");
var siftDataName = "sift_base.fvecs";

var vectors = BenchUtils.readFVectors(siftDir.resolve(siftDataName), vectorDimensions);
var ids = new int[vectors.length];
for (int i = 0; i < ids.length; i++) {
ids[i] = i;
}

var indexName = "sift1m";
System.out.printf("%d data vectors loaded with dimension %d, building index %s...%n",
Expand All @@ -73,7 +78,7 @@ public static void main(String[] args) {

ts1 = System.currentTimeMillis();

client.uploadVectors(indexName, vectors, (current, count) -> {
client.uploadVectors(indexName, vectors, ids, (current, count) -> {
if (current >= 0 && current < Integer.MAX_VALUE) {
if (current % 1_000 == 0) {
System.out.printf("%d vectors uploaded out of %d%n", current, count);
Expand Down Expand Up @@ -133,7 +138,7 @@ public static void main(String[] args) {
System.out.printf("Iteration %d out of 5 %n", (i + 1));

for (int j = 0; j < queryVectors.length; j++) {
var vector = queryVectors[j];
var vector = queryVectors[j];
client.findNearestNeighbours(indexName, vector, 1);

if ((j + 1) % 1_000 == 0) {
Expand All @@ -149,7 +154,7 @@ public static void main(String[] args) {
for (var index = 0; index < queryVectors.length; index++) {
var vector = queryVectors[index];

var result = client.findNearestNeighbours(indexName, vector, 1);
var result = client.findIntNearestNeighbours(indexName, vector, 1);
if (groundTruth[index][0] != result[0]) {
errorsCount++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ private DataStore(int dimensions, DistanceFunction distanceFunction, FileChannel
this.channel = channel;
this.distanceFunction = distanceFunction;

var vectorSize = dimensions * Float.BYTES;
var bufferSize = Math.min(64 * 1024 * 1024 / vectorSize, 1) * vectorSize;
//record contains vector and its associated id
var recordSize = dimensions * Float.BYTES + IndexBuilder.VECTOR_ID_SIZE;
var bufferSize = Math.min(64 * 1024 * 1024 / recordSize, 1) * recordSize;

this.buffer = ByteBuffer.allocate(bufferSize).order(ByteOrder.nativeOrder());
this.preprocessingResult = new float[dimensions];
Expand All @@ -54,7 +55,12 @@ public static DataStore create(final String name, final int dimensions,
return new DataStore(dimensions, distanceFunction, channel);
}

public void add(final float[] vector) throws IOException {
public void add(final float[] vector, @NotNull byte[] id) throws IOException {
if (id.length != IndexBuilder.VECTOR_ID_SIZE) {
throw new IllegalArgumentException("Vector id size should be equal to " + IndexBuilder.VECTOR_ID_SIZE +
". Vector id size : " + id.length);
}

var vectorToStore = distanceFunction.preProcess(vector, preprocessingResult);

if (buffer.remaining() == 0) {
Expand All @@ -70,6 +76,8 @@ public void add(final float[] vector) throws IOException {
for (var component : vectorToStore) {
buffer.putFloat(component);
}

buffer.put(id);
}

public static Path dataLocation(@NotNull final String name, final Path dataDirectoryPath) {
Expand Down
Loading

0 comments on commit 8b56467

Please sign in to comment.