Skip to content

Commit

Permalink
benchs
Browse files Browse the repository at this point in the history
  • Loading branch information
lianapatel committed Jun 11, 2024
1 parent 63c0212 commit b30fdc8
Show file tree
Hide file tree
Showing 48 changed files with 9,838 additions and 0 deletions.
11 changes: 11 additions & 0 deletions benchs/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.



add_executable(bench_ivf_selector EXCLUDE_FROM_ALL bench_ivf_selector.cpp)
target_link_libraries(bench_ivf_selector PRIVATE faiss)

361 changes: 361 additions & 0 deletions benchs/README.md

Large diffs are not rendered by default.

81 changes: 81 additions & 0 deletions benchs/bench_6bit_codec.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <omp.h>
#include <cstdio>

#include <benchmark/benchmark.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/random.h>
#include <faiss/utils/utils.h>

using namespace faiss;

static void bench(benchmark::State& state) {
int d = 128;
int n = 2000;

std::vector<float> x(d * n);

float_rand(x.data(), d * n, 12345);

// make sure it's idempotent
ScalarQuantizer sq(d, ScalarQuantizer::QT_6bit);

omp_set_num_threads(1);

sq.train(n, x.data());

size_t code_size = sq.code_size;
state.counters["code_size"] = sq.code_size;

// encode
std::vector<uint8_t> codes(code_size * n);
sq.compute_codes(x.data(), codes.data(), n);

// decode
std::vector<float> x2(d * n);
sq.decode(codes.data(), x2.data(), n);

state.counters["sql2_recons_error"] =
fvec_L2sqr(x.data(), x2.data(), n * d) / n;

// encode again
std::vector<uint8_t> codes2(code_size * n);
sq.compute_codes(x2.data(), codes2.data(), n);

size_t ndiff = 0;
for (size_t i = 0; i < codes.size(); i++) {
if (codes[i] != codes2[i])
ndiff++;
}

state.counters["ndiff_for_idempotence"] = ndiff;

state.counters["code_size_two"] = codes.size();

std::unique_ptr<ScalarQuantizer::SQDistanceComputer> dc(
sq.get_distance_computer());
dc->codes = codes.data();
dc->code_size = sq.code_size;
state.counters["code_size_three"] = dc->code_size;

for (auto _ : state) {
float sum_dis = 0;
for (int i = 0; i < n; i++) {
dc->set_query(&x[i * d]);
for (int j = 0; j < n; j++) {
benchmark::DoNotOptimize(sum_dis += (*dc)(j));
}
}
}
}
// I think maybe n and d should be input arguments
// for thigns to really make sense, idk.
BENCHMARK(bench)->Iterations(20);
BENCHMARK_MAIN();
20 changes: 20 additions & 0 deletions benchs/bench_all_ivf/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Benchmark of IVF variants

This is a benchmark of IVF index variants, looking at compression vs. speed vs. accuracy.
The results are in [this wiki chapter](https://github.com/facebookresearch/faiss/wiki/Indexing-1G-vectors)


The code is organized as:

- `datasets.py`: code to access the datafiles, compute the ground-truth and report accuracies

- `bench_all_ivf.py`: evaluate one type of inverted file

- `run_on_cluster_generic.bash`: call `bench_all_ivf.py` for all tested types of indices.
Since the number of experiments is quite large the script is structured so that the benchmark can be run on a cluster.

- `parse_bench_all_ivf.py`: make nice tradeoff plots from all the results.

The code depends on Faiss and can use 1 to 8 GPUs to do the k-means clustering for large vocabularies.

It was run in October 2018 for the results in the wiki.
Loading

0 comments on commit b30fdc8

Please sign in to comment.