Skip to content

Commit

Permalink
demos
Browse files Browse the repository at this point in the history
  • Loading branch information
lianapatel committed Jun 11, 2024
1 parent 98c682c commit db87eaf
Show file tree
Hide file tree
Showing 16 changed files with 3,046 additions and 0 deletions.
108 changes: 108 additions & 0 deletions demos/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

add_executable(demo_imi_flat EXCLUDE_FROM_ALL demo_imi_flat.cpp)
target_link_libraries(demo_imi_flat PRIVATE faiss)

add_executable(demo_imi_pq EXCLUDE_FROM_ALL demo_imi_pq.cpp)
target_link_libraries(demo_imi_pq PRIVATE faiss)

add_executable(demo_ivfpq_indexing EXCLUDE_FROM_ALL demo_ivfpq_indexing.cpp)
target_link_libraries(demo_ivfpq_indexing PRIVATE faiss)

add_executable(demo_nndescent EXCLUDE_FROM_ALL demo_nndescent.cpp)
target_link_libraries(demo_nndescent PRIVATE faiss)

add_executable(demo_sift1M EXCLUDE_FROM_ALL demo_sift1M.cpp)
target_link_libraries(demo_sift1M PRIVATE faiss)

add_executable(demo_weighted_kmeans EXCLUDE_FROM_ALL demo_weighted_kmeans.cpp)
target_link_libraries(demo_weighted_kmeans PRIVATE faiss)

add_executable(demo_residual_quantizer EXCLUDE_FROM_ALL demo_residual_quantizer.cpp)
target_link_libraries(demo_residual_quantizer PRIVATE faiss)

# add_executable(demo_new_test EXCLUDE_FROM_ALL demo_new_test.cpp)
# target_link_libraries(demo_new_test PRIVATE faiss)

# add_executable(demo_test_search EXCLUDE_FROM_ALL demo_test_search.cpp)
# target_link_libraries(demo_test_search PRIVATE faiss)

# add_executable(demo_test_search_small EXCLUDE_FROM_ALL demo_test_search_small.cpp)
# target_link_libraries(demo_test_search_small PRIVATE faiss)

# add_executable(demo_test_hybrid_small EXCLUDE_FROM_ALL demo_test_hybrid_small.cpp)
# target_link_libraries(demo_test_hybrid_small PRIVATE faiss)

# add_executable(demo_test_hybrid_large EXCLUDE_FROM_ALL demo_test_hybrid_large.cpp)
# target_link_libraries(demo_test_hybrid_large PRIVATE faiss)

# add_executable(make_indices EXCLUDE_FROM_ALL make_indices.cpp)
# target_link_libraries(make_indices PRIVATE faiss)

# add_executable(make_sift_indices EXCLUDE_FROM_ALL make_sift_indices.cpp)
# target_link_libraries(make_sift_indices PRIVATE faiss)

# add_executable(benchmark EXCLUDE_FROM_ALL benchmark.cpp)
# target_link_libraries(benchmark PRIVATE faiss)

add_executable(utils EXCLUDE_FROM_ALL utils.cpp)
target_link_libraries(utils PRIVATE faiss)

# add_executable(correlation EXCLUDE_FROM_ALL correlation.cpp)
# target_link_libraries(correlation PRIVATE faiss)


# add_executable(check_queries EXCLUDE_FROM_ALL check_queries.cpp)
# target_link_libraries(check_queries PRIVATE faiss)

# add_executable(trace_query EXCLUDE_FROM_ALL trace_query.cpp)
# target_link_libraries(trace_query PRIVATE faiss)

# add_executable(make_debug_index EXCLUDE_FROM_ALL make_debug_index.cpp)
# target_link_libraries(make_debug_index PRIVATE faiss)

# add_executable(print_edges EXCLUDE_FROM_ALL print_edges.cpp)
# target_link_libraries(print_edges PRIVATE faiss)

# add_executable(profile_query EXCLUDE_FROM_ALL profile_query.cpp)
# target_link_libraries(profile_query PRIVATE faiss)

# add_executable(prefilter EXCLUDE_FROM_ALL prefilter.cpp)
# target_link_libraries(prefilter PRIVATE faiss)

# add_executable(make_tripclick_indices EXCLUDE_FROM_ALL make_tripclick_indices.cpp)
# target_link_libraries(make_tripclick_indices PRIVATE faiss)

# add_executable(test_tripclick_indices EXCLUDE_FROM_ALL test_tripclick_indices.cpp)
# target_link_libraries(test_tripclick_indices PRIVATE faiss)


# add_executable(trace_tripclick_query EXCLUDE_FROM_ALL trace_tripclick_query.cpp)
# target_link_libraries(trace_tripclick_query PRIVATE faiss)

# add_executable(test_tripclick_dates EXCLUDE_FROM_ALL test_tripclick_dates.cpp)
# target_link_libraries(test_tripclick_dates PRIVATE faiss)

# add_executable(make_tripclick_dates_indices EXCLUDE_FROM_ALL make_tripclick_dates_indices.cpp)
# target_link_libraries(make_tripclick_dates_indices PRIVATE faiss)


# add_executable(make_tripclick_oracle_indices EXCLUDE_FROM_ALL make_tripclick_oracle_indices.cpp)
# target_link_libraries(make_tripclick_oracle_indices PRIVATE faiss)


# add_executable(make_laion_indices EXCLUDE_FROM_ALL make_laion_indices.cpp)
# target_link_libraries(make_laion_indices PRIVATE faiss)

# add_executable(test_laion_indices EXCLUDE_FROM_ALL test_laion_indices.cpp)
# target_link_libraries(test_laion_indices PRIVATE faiss)

# add_executable(test_laion_arb_pred EXCLUDE_FROM_ALL test_laion_arb_pred.cpp)
# target_link_libraries(test_laion_arb_pred PRIVATE faiss)

add_executable(test_acorn EXCLUDE_FROM_ALL test_acorn.cpp)
target_link_libraries(test_acorn PRIVATE faiss)
28 changes: 28 additions & 0 deletions demos/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@


Demos for a few Faiss functionalities
=====================================


demo_auto_tune.py
-----------------

Demonstrates the auto-tuning functionality of Faiss


demo_ondisk_ivf.py
------------------

Shows how to construct a Faiss index that stores the inverted file
data on disk, eg. when it does not fit in RAM. The script works on a
small dataset (sift1M) for demonstration and proceeds in stages:

0: train on the dataset

1-4: build 4 indexes, each containing 1/4 of the dataset. This can be
done in parallel on several machines

5: merge the 4 indexes into one that is written directly to disk
(needs not to fit in RAM)

6: load and test the index
170 changes: 170 additions & 0 deletions demos/demo_auto_tune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#!/usr/bin/env python2

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import print_function
import os
import time
import numpy as np

try:
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot
graphical_output = True
except ImportError:
graphical_output = False

import faiss

#################################################################
# Small I/O functions
#################################################################

def ivecs_read(fname):
a = np.fromfile(fname, dtype="int32")
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()

def fvecs_read(fname):
return ivecs_read(fname).view('float32')


def plot_OperatingPoints(ops, nq, **kwargs):
ops = ops.optimal_pts
n = ops.size() * 2 - 1
pyplot.plot([ops.at( i // 2).perf for i in range(n)],
[ops.at((i + 1) // 2).t / nq * 1000 for i in range(n)],
**kwargs)


#################################################################
# prepare common data for all indexes
#################################################################



t0 = time.time()

print("load data")

xt = fvecs_read("sift1M/sift_learn.fvecs")
xb = fvecs_read("sift1M/sift_base.fvecs")
xq = fvecs_read("sift1M/sift_query.fvecs")

d = xt.shape[1]

print("load GT")

gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
gt = gt.astype('int64')
k = gt.shape[1]

print("prepare criterion")

# criterion = 1-recall at 1
crit = faiss.OneRecallAtRCriterion(xq.shape[0], 1)
crit.set_groundtruth(None, gt)
crit.nnn = k

# indexes that are useful when there is no limitation on memory usage
unlimited_mem_keys = [
"IMI2x10,Flat", "IMI2x11,Flat",
"IVF4096,Flat", "IVF16384,Flat",
"PCA64,IMI2x10,Flat"]

# memory limited to 16 bytes / vector
keys_mem_16 = [
'IMI2x10,PQ16', 'IVF4096,PQ16',
'IMI2x10,PQ8+8', 'OPQ16_64,IMI2x10,PQ16'
]

# limited to 32 bytes / vector
keys_mem_32 = [
'IMI2x10,PQ32', 'IVF4096,PQ32', 'IVF16384,PQ32',
'IMI2x10,PQ16+16',
'OPQ32,IVF4096,PQ32', 'IVF4096,PQ16+16', 'OPQ16,IMI2x10,PQ16+16'
]

# indexes that can run on the GPU
keys_gpu = [
"PCA64,IVF4096,Flat",
"PCA64,Flat", "Flat", "IVF4096,Flat", "IVF16384,Flat",
"IVF4096,PQ32"]


keys_to_test = unlimited_mem_keys
use_gpu = False


if use_gpu:
# if this fails, it means that the GPU version was not comp
assert faiss.StandardGpuResources, \
"FAISS was not compiled with GPU support, or loading _swigfaiss_gpu.so failed"
res = faiss.StandardGpuResources()
dev_no = 0

# remember results from other index types
op_per_key = []


# keep track of optimal operating points seen so far
op = faiss.OperatingPoints()


for index_key in keys_to_test:

print("============ key", index_key)

# make the index described by the key
index = faiss.index_factory(d, index_key)


if use_gpu:
# transfer to GPU (may be partial)
index = faiss.index_cpu_to_gpu(res, dev_no, index)
params = faiss.GpuParameterSpace()
else:
params = faiss.ParameterSpace()

params.initialize(index)

print("[%.3f s] train & add" % (time.time() - t0))

index.train(xt)
index.add(xb)

print("[%.3f s] explore op points" % (time.time() - t0))

# find operating points for this index
opi = params.explore(index, xq, crit)

print("[%.3f s] result operating points:" % (time.time() - t0))
opi.display()

# update best operating points so far
op.merge_with(opi, index_key + " ")

op_per_key.append((index_key, opi))

if graphical_output:
# graphical output (to tmp/ subdirectory)

fig = pyplot.figure(figsize=(12, 9))
pyplot.xlabel("1-recall at 1")
pyplot.ylabel("search time (ms/query, %d threads)" % faiss.omp_get_max_threads())
pyplot.gca().set_yscale('log')
pyplot.grid()
for i2, opi2 in op_per_key:
plot_OperatingPoints(opi2, crit.nq, label = i2, marker = 'o')
# plot_OperatingPoints(op, crit.nq, label = 'best', marker = 'o', color = 'r')
pyplot.legend(loc=2)
fig.savefig('tmp/demo_auto_tune.png')


print("[%.3f s] final result:" % (time.time() - t0))

op.display()
Loading

0 comments on commit db87eaf

Please sign in to comment.