-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
63c0212
commit b30fdc8
Showing
48 changed files
with
9,838 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
|
||
|
||
add_executable(bench_ivf_selector EXCLUDE_FROM_ALL bench_ivf_selector.cpp) | ||
target_link_libraries(bench_ivf_selector PRIVATE faiss) | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
/** | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* This source code is licensed under the MIT license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include <omp.h> | ||
#include <cstdio> | ||
|
||
#include <benchmark/benchmark.h> | ||
#include <faiss/impl/ScalarQuantizer.h> | ||
#include <faiss/utils/distances.h> | ||
#include <faiss/utils/random.h> | ||
#include <faiss/utils/utils.h> | ||
|
||
using namespace faiss; | ||
|
||
static void bench(benchmark::State& state) { | ||
int d = 128; | ||
int n = 2000; | ||
|
||
std::vector<float> x(d * n); | ||
|
||
float_rand(x.data(), d * n, 12345); | ||
|
||
// make sure it's idempotent | ||
ScalarQuantizer sq(d, ScalarQuantizer::QT_6bit); | ||
|
||
omp_set_num_threads(1); | ||
|
||
sq.train(n, x.data()); | ||
|
||
size_t code_size = sq.code_size; | ||
state.counters["code_size"] = sq.code_size; | ||
|
||
// encode | ||
std::vector<uint8_t> codes(code_size * n); | ||
sq.compute_codes(x.data(), codes.data(), n); | ||
|
||
// decode | ||
std::vector<float> x2(d * n); | ||
sq.decode(codes.data(), x2.data(), n); | ||
|
||
state.counters["sql2_recons_error"] = | ||
fvec_L2sqr(x.data(), x2.data(), n * d) / n; | ||
|
||
// encode again | ||
std::vector<uint8_t> codes2(code_size * n); | ||
sq.compute_codes(x2.data(), codes2.data(), n); | ||
|
||
size_t ndiff = 0; | ||
for (size_t i = 0; i < codes.size(); i++) { | ||
if (codes[i] != codes2[i]) | ||
ndiff++; | ||
} | ||
|
||
state.counters["ndiff_for_idempotence"] = ndiff; | ||
|
||
state.counters["code_size_two"] = codes.size(); | ||
|
||
std::unique_ptr<ScalarQuantizer::SQDistanceComputer> dc( | ||
sq.get_distance_computer()); | ||
dc->codes = codes.data(); | ||
dc->code_size = sq.code_size; | ||
state.counters["code_size_three"] = dc->code_size; | ||
|
||
for (auto _ : state) { | ||
float sum_dis = 0; | ||
for (int i = 0; i < n; i++) { | ||
dc->set_query(&x[i * d]); | ||
for (int j = 0; j < n; j++) { | ||
benchmark::DoNotOptimize(sum_dis += (*dc)(j)); | ||
} | ||
} | ||
} | ||
} | ||
// I think maybe n and d should be input arguments | ||
// for thigns to really make sense, idk. | ||
BENCHMARK(bench)->Iterations(20); | ||
BENCHMARK_MAIN(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Benchmark of IVF variants | ||
|
||
This is a benchmark of IVF index variants, looking at compression vs. speed vs. accuracy. | ||
The results are in [this wiki chapter](https://github.com/facebookresearch/faiss/wiki/Indexing-1G-vectors) | ||
|
||
|
||
The code is organized as: | ||
|
||
- `datasets.py`: code to access the datafiles, compute the ground-truth and report accuracies | ||
|
||
- `bench_all_ivf.py`: evaluate one type of inverted file | ||
|
||
- `run_on_cluster_generic.bash`: call `bench_all_ivf.py` for all tested types of indices. | ||
Since the number of experiments is quite large the script is structured so that the benchmark can be run on a cluster. | ||
|
||
- `parse_bench_all_ivf.py`: make nice tradeoff plots from all the results. | ||
|
||
The code depends on Faiss and can use 1 to 8 GPUs to do the k-means clustering for large vocabularies. | ||
|
||
It was run in October 2018 for the results in the wiki. |
Oops, something went wrong.