diff --git a/demos/CMakeLists.txt b/demos/CMakeLists.txt
new file mode 100644
index 0000000..6b4141d
--- /dev/null
+++ b/demos/CMakeLists.txt
@@ -0,0 +1,108 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+add_executable(demo_imi_flat EXCLUDE_FROM_ALL demo_imi_flat.cpp)
+target_link_libraries(demo_imi_flat PRIVATE faiss)
+
+add_executable(demo_imi_pq EXCLUDE_FROM_ALL demo_imi_pq.cpp)
+target_link_libraries(demo_imi_pq PRIVATE faiss)
+
+add_executable(demo_ivfpq_indexing EXCLUDE_FROM_ALL demo_ivfpq_indexing.cpp)
+target_link_libraries(demo_ivfpq_indexing PRIVATE faiss)
+
+add_executable(demo_nndescent EXCLUDE_FROM_ALL demo_nndescent.cpp)
+target_link_libraries(demo_nndescent PRIVATE faiss)
+
+add_executable(demo_sift1M EXCLUDE_FROM_ALL demo_sift1M.cpp)
+target_link_libraries(demo_sift1M PRIVATE faiss)
+
+add_executable(demo_weighted_kmeans EXCLUDE_FROM_ALL demo_weighted_kmeans.cpp)
+target_link_libraries(demo_weighted_kmeans PRIVATE faiss)
+
+add_executable(demo_residual_quantizer EXCLUDE_FROM_ALL demo_residual_quantizer.cpp)
+target_link_libraries(demo_residual_quantizer PRIVATE faiss)
+
+# add_executable(demo_new_test EXCLUDE_FROM_ALL demo_new_test.cpp)
+# target_link_libraries(demo_new_test PRIVATE faiss)
+
+# add_executable(demo_test_search EXCLUDE_FROM_ALL demo_test_search.cpp)
+# target_link_libraries(demo_test_search PRIVATE faiss)
+
+# add_executable(demo_test_search_small EXCLUDE_FROM_ALL demo_test_search_small.cpp)
+# target_link_libraries(demo_test_search_small PRIVATE faiss)
+
+# add_executable(demo_test_hybrid_small EXCLUDE_FROM_ALL demo_test_hybrid_small.cpp)
+# target_link_libraries(demo_test_hybrid_small PRIVATE faiss)
+
+# add_executable(demo_test_hybrid_large EXCLUDE_FROM_ALL demo_test_hybrid_large.cpp)
+# target_link_libraries(demo_test_hybrid_large PRIVATE faiss)
+
+# add_executable(make_indices EXCLUDE_FROM_ALL make_indices.cpp)
+# target_link_libraries(make_indices PRIVATE faiss)
+
+# add_executable(make_sift_indices EXCLUDE_FROM_ALL make_sift_indices.cpp)
+# target_link_libraries(make_sift_indices PRIVATE faiss)
+
+# add_executable(benchmark EXCLUDE_FROM_ALL benchmark.cpp)
+# target_link_libraries(benchmark PRIVATE faiss)
+
+add_executable(utils EXCLUDE_FROM_ALL utils.cpp)
+target_link_libraries(utils PRIVATE faiss)
+
+# add_executable(correlation EXCLUDE_FROM_ALL correlation.cpp)
+# target_link_libraries(correlation PRIVATE faiss)
+
+
+# add_executable(check_queries EXCLUDE_FROM_ALL check_queries.cpp)
+# target_link_libraries(check_queries PRIVATE faiss)
+
+# add_executable(trace_query EXCLUDE_FROM_ALL trace_query.cpp)
+# target_link_libraries(trace_query PRIVATE faiss)
+
+# add_executable(make_debug_index EXCLUDE_FROM_ALL make_debug_index.cpp)
+# target_link_libraries(make_debug_index PRIVATE faiss)
+
+# add_executable(print_edges EXCLUDE_FROM_ALL print_edges.cpp)
+# target_link_libraries(print_edges PRIVATE faiss)
+
+# add_executable(profile_query EXCLUDE_FROM_ALL profile_query.cpp)
+# target_link_libraries(profile_query PRIVATE faiss)
+
+# add_executable(prefilter EXCLUDE_FROM_ALL prefilter.cpp)
+# target_link_libraries(prefilter PRIVATE faiss)
+
+# add_executable(make_tripclick_indices EXCLUDE_FROM_ALL make_tripclick_indices.cpp)
+# target_link_libraries(make_tripclick_indices PRIVATE faiss)
+
+# add_executable(test_tripclick_indices EXCLUDE_FROM_ALL test_tripclick_indices.cpp)
+# target_link_libraries(test_tripclick_indices PRIVATE faiss)
+
+
+# add_executable(trace_tripclick_query EXCLUDE_FROM_ALL trace_tripclick_query.cpp)
+# target_link_libraries(trace_tripclick_query PRIVATE faiss)
+
+# add_executable(test_tripclick_dates EXCLUDE_FROM_ALL test_tripclick_dates.cpp)
+# target_link_libraries(test_tripclick_dates PRIVATE faiss)
+
+# add_executable(make_tripclick_dates_indices EXCLUDE_FROM_ALL make_tripclick_dates_indices.cpp)
+# target_link_libraries(make_tripclick_dates_indices PRIVATE faiss)
+
+
+# add_executable(make_tripclick_oracle_indices EXCLUDE_FROM_ALL make_tripclick_oracle_indices.cpp)
+# target_link_libraries(make_tripclick_oracle_indices PRIVATE faiss)
+
+
+# add_executable(make_laion_indices EXCLUDE_FROM_ALL make_laion_indices.cpp)
+# target_link_libraries(make_laion_indices PRIVATE faiss)
+
+# add_executable(test_laion_indices EXCLUDE_FROM_ALL test_laion_indices.cpp)
+# target_link_libraries(test_laion_indices PRIVATE faiss)
+
+# add_executable(test_laion_arb_pred EXCLUDE_FROM_ALL test_laion_arb_pred.cpp)
+# target_link_libraries(test_laion_arb_pred PRIVATE faiss)
+
+add_executable(test_acorn EXCLUDE_FROM_ALL test_acorn.cpp)
+target_link_libraries(test_acorn PRIVATE faiss)
diff --git a/demos/README.md b/demos/README.md
new file mode 100644
index 0000000..71a23f2
--- /dev/null
+++ b/demos/README.md
@@ -0,0 +1,28 @@
+
+
+Demos for a few Faiss functionalities
+=====================================
+
+
+demo_auto_tune.py
+-----------------
+
+Demonstrates the auto-tuning functionality of Faiss
+
+
+demo_ondisk_ivf.py
+------------------
+
+Shows how to construct a Faiss index that stores the inverted file
+data on disk, eg. when it does not fit in RAM. The script works on a
+small dataset (sift1M) for demonstration and proceeds in stages:
+
+0: train on the dataset
+
+1-4: build 4 indexes, each containing 1/4 of the dataset. This can be
+done in parallel on several machines
+
+5: merge the 4 indexes into one that is written directly to disk
+(needs not to fit in RAM)
+
+6: load and test the index
diff --git a/demos/demo_auto_tune.py b/demos/demo_auto_tune.py
new file mode 100755
index 0000000..be1079a
--- /dev/null
+++ b/demos/demo_auto_tune.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python2
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import print_function
+import os
+import time
+import numpy as np
+
+try:
+    import matplotlib
+    matplotlib.use('Agg')
+    from matplotlib import pyplot
+    graphical_output = True
+except ImportError:
+    graphical_output = False
+
+import faiss
+
+#################################################################
+# Small I/O functions
+#################################################################
+
+def ivecs_read(fname):
+    a = np.fromfile(fname, dtype="int32")
+    d = a[0]
+    return a.reshape(-1, d + 1)[:, 1:].copy()
+
+def fvecs_read(fname):
+    return ivecs_read(fname).view('float32')
+
+
+def plot_OperatingPoints(ops, nq, **kwargs):
+    ops = ops.optimal_pts
+    n = ops.size() * 2 - 1
+    pyplot.plot([ops.at( i      // 2).perf for i in range(n)],
+                [ops.at((i + 1) // 2).t / nq * 1000 for i in range(n)],
+                **kwargs)
+
+
+#################################################################
+# prepare common data for all indexes
+#################################################################
+
+
+
+t0 = time.time()
+
+print("load data")
+
+xt = fvecs_read("sift1M/sift_learn.fvecs")
+xb = fvecs_read("sift1M/sift_base.fvecs")
+xq = fvecs_read("sift1M/sift_query.fvecs")
+
+d = xt.shape[1]
+
+print("load GT")
+
+gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
+gt = gt.astype('int64')
+k = gt.shape[1]
+
+print("prepare criterion")
+
+# criterion = 1-recall at 1
+crit = faiss.OneRecallAtRCriterion(xq.shape[0], 1)
+crit.set_groundtruth(None, gt)
+crit.nnn = k
+
+# indexes that are useful when there is no limitation on memory usage
+unlimited_mem_keys = [
+    "IMI2x10,Flat", "IMI2x11,Flat",
+    "IVF4096,Flat", "IVF16384,Flat",
+    "PCA64,IMI2x10,Flat"]
+
+# memory limited to 16 bytes / vector
+keys_mem_16 = [
+    'IMI2x10,PQ16', 'IVF4096,PQ16',
+    'IMI2x10,PQ8+8', 'OPQ16_64,IMI2x10,PQ16'
+    ]
+
+# limited to 32 bytes / vector
+keys_mem_32 = [
+    'IMI2x10,PQ32', 'IVF4096,PQ32', 'IVF16384,PQ32',
+    'IMI2x10,PQ16+16',
+    'OPQ32,IVF4096,PQ32', 'IVF4096,PQ16+16', 'OPQ16,IMI2x10,PQ16+16'
+    ]
+
+# indexes that can run on the GPU
+keys_gpu = [
+    "PCA64,IVF4096,Flat",
+    "PCA64,Flat", "Flat", "IVF4096,Flat", "IVF16384,Flat",
+    "IVF4096,PQ32"]
+
+
+keys_to_test = unlimited_mem_keys
+use_gpu = False
+
+
+if use_gpu:
+    # if this fails, it means that the GPU version was not comp
+    assert faiss.StandardGpuResources, \
+        "FAISS was not compiled with GPU support, or loading _swigfaiss_gpu.so failed"
+    res = faiss.StandardGpuResources()
+    dev_no = 0
+
+# remember results from other index types
+op_per_key = []
+
+
+# keep track of optimal operating points seen so far
+op = faiss.OperatingPoints()
+
+
+for index_key in keys_to_test:
+
+    print("============ key", index_key)
+
+    # make the index described by the key
+    index = faiss.index_factory(d, index_key)
+
+
+    if use_gpu:
+        # transfer to GPU (may be partial)
+        index = faiss.index_cpu_to_gpu(res, dev_no, index)
+        params = faiss.GpuParameterSpace()
+    else:
+        params = faiss.ParameterSpace()
+
+    params.initialize(index)
+
+    print("[%.3f s] train & add" % (time.time() - t0))
+
+    index.train(xt)
+    index.add(xb)
+
+    print("[%.3f s] explore op points" % (time.time() - t0))
+
+    # find operating points for this index
+    opi = params.explore(index, xq, crit)
+
+    print("[%.3f s] result operating points:" % (time.time() - t0))
+    opi.display()
+
+    # update best operating points so far
+    op.merge_with(opi, index_key + " ")
+
+    op_per_key.append((index_key, opi))
+
+    if graphical_output:
+        # graphical output (to tmp/ subdirectory)
+
+        fig = pyplot.figure(figsize=(12, 9))
+        pyplot.xlabel("1-recall at 1")
+        pyplot.ylabel("search time (ms/query, %d threads)" % faiss.omp_get_max_threads())
+        pyplot.gca().set_yscale('log')
+        pyplot.grid()
+        for i2, opi2 in op_per_key:
+            plot_OperatingPoints(opi2, crit.nq, label = i2, marker = 'o')
+        # plot_OperatingPoints(op, crit.nq, label = 'best', marker = 'o', color = 'r')
+        pyplot.legend(loc=2)
+        fig.savefig('tmp/demo_auto_tune.png')
+
+
+print("[%.3f s] final result:" % (time.time() - t0))
+
+op.display()
diff --git a/demos/demo_client_server_ivf.py b/demos/demo_client_server_ivf.py
new file mode 100755
index 0000000..82803d8
--- /dev/null
+++ b/demos/demo_client_server_ivf.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+import numpy as np
+import faiss
+
+from faiss.contrib.client_server import run_index_server, ClientIndex
+
+
+#################################################################
+# Small I/O functions
+#################################################################
+
+
+def ivecs_read(fname):
+    a = np.fromfile(fname, dtype='int32')
+    d = a[0]
+    return a.reshape(-1, d + 1)[:, 1:].copy()
+
+
+def fvecs_read(fname):
+    return ivecs_read(fname).view('float32')
+
+
+#################################################################
+#  Main program
+#################################################################
+
+stage = int(sys.argv[1])
+
+tmpdir = '/tmp/'
+
+if stage == 0:
+    # train the index
+    xt = fvecs_read("sift1M/sift_learn.fvecs")
+    index = faiss.index_factory(xt.shape[1], "IVF4096,Flat")
+    print("training index")
+    index.train(xt)
+    print("write " + tmpdir + "trained.index")
+    faiss.write_index(index, tmpdir + "trained.index")
+
+
+if 1 <= stage <= 4:
+    # add 1/4 of the database to 4 independent indexes
+    bno = stage - 1
+    xb = fvecs_read("sift1M/sift_base.fvecs")
+    i0, i1 = int(bno * xb.shape[0] / 4), int((bno + 1) * xb.shape[0] / 4)
+    index = faiss.read_index(tmpdir + "trained.index")
+    print("adding vectors %d:%d" % (i0, i1))
+    index.add_with_ids(xb[i0:i1], np.arange(i0, i1))
+    print("write " + tmpdir + "block_%d.index" % bno)
+    faiss.write_index(index, tmpdir + "block_%d.index" % bno)
+
+
+machine_ports = [
+    ('localhost', 12010),
+    ('localhost', 12011),
+    ('localhost', 12012),
+    ('localhost', 12013),
+]
+v6 = False
+
+if 5 <= stage <= 8:
+    # load an index slice and launch index
+    bno = stage - 5
+
+    fname = tmpdir + "block_%d.index" % bno
+    print("read " + fname)
+    index = faiss.read_index(fname)
+
+    port = machine_ports[bno][1]
+    run_index_server(index, port, v6=v6)
+
+
+if stage == 9:
+    client_index = ClientIndex(machine_ports)
+    print('index size:', client_index.ntotal)
+    client_index.set_nprobe(16)
+
+    # load query vectors and ground-truth
+    xq = fvecs_read("sift1M/sift_query.fvecs")
+    gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
+
+    D, I = client_index.search(xq, 5)
+
+    recall_at_1 = (I[:, :1] == gt[:, :1]).sum() / float(xq.shape[0])
+    print("recall@1: %.3f" % recall_at_1)
diff --git a/demos/demo_imi_flat.cpp b/demos/demo_imi_flat.cpp
new file mode 100644
index 0000000..7713da4
--- /dev/null
+++ b/demos/demo_imi_flat.cpp
@@ -0,0 +1,156 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <sys/time.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/IndexPQ.h>
+#include <faiss/index_io.h>
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, nullptr);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+int main() {
+    double t0 = elapsed();
+
+    // dimension of the vectors to index
+    int d = 128;
+
+    // size of the database we plan to index
+    size_t nb = 1000 * 1000;
+
+    // make a set of nt training vectors in the unit cube
+    // (could be the database)
+    size_t nt = 100 * 1000;
+
+    //---------------------------------------------------------------
+    // Define the core quantizer
+    // We choose a multiple inverted index for faster training with less data
+    // and because it usually offers best accuracy/speed trade-offs
+    //
+    // We here assume that its lifespan of this coarse quantizer will cover the
+    // lifespan of the inverted-file quantizer IndexIVFFlat below
+    // With dynamic allocation, one may give the responsability to free the
+    // quantizer to the inverted-file index (with attribute do_delete_quantizer)
+    //
+    // Note: a regular clustering algorithm would be defined as:
+    //       faiss::IndexFlatL2 coarse_quantizer (d);
+    //
+    // Use nhash=2 subquantizers used to define the product coarse quantizer
+    // Number of bits: we will have 2^nbits_coarse centroids per subquantizer
+    //                 meaning (2^12)^nhash distinct inverted lists
+    size_t nhash = 2;
+    size_t nbits_subq = int(log2(nb + 1) / 2);     // good choice in general
+    size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
+
+    faiss::MultiIndexQuantizer coarse_quantizer(d, nhash, nbits_subq);
+
+    printf("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
+           nhash,
+           nbits_subq,
+           ncentroids,
+           nb);
+
+    // the coarse quantizer should not be dealloced before the index
+    // 4 = nb of bytes per code (d must be a multiple of this)
+    // 8 = nb of bits per sub-code (almost always 8)
+    faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
+    faiss::IndexIVFFlat index(&coarse_quantizer, d, ncentroids, metric);
+    index.quantizer_trains_alone = true;
+
+    // define the number of probes. 2048 is for high-dim, overkilled in practice
+    // Use 4-1024 depending on the trade-off speed accuracy that you want
+    index.nprobe = 2048;
+
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+
+    { // training
+        printf("[%.3f s] Generating %ld vectors in %dD for training\n",
+               elapsed() - t0,
+               nt,
+               d);
+
+        std::vector<float> trainvecs(nt * d);
+        for (size_t i = 0; i < nt * d; i++) {
+            trainvecs[i] = distrib(rng);
+        }
+
+        printf("[%.3f s] Training the index\n", elapsed() - t0);
+        index.verbose = true;
+        index.train(nt, trainvecs.data());
+    }
+
+    size_t nq;
+    std::vector<float> queries;
+
+    { // populating the database
+        printf("[%.3f s] Building a dataset of %ld vectors to index\n",
+               elapsed() - t0,
+               nb);
+
+        std::vector<float> database(nb * d);
+        for (size_t i = 0; i < nb * d; i++) {
+            database[i] = distrib(rng);
+        }
+
+        printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
+
+        index.add(nb, database.data());
+
+        // remember a few elements from the database as queries
+        int i0 = 1234;
+        int i1 = 1244;
+
+        nq = i1 - i0;
+        queries.resize(nq * d);
+        for (int i = i0; i < i1; i++) {
+            for (int j = 0; j < d; j++) {
+                queries[(i - i0) * d + j] = database[i * d + j];
+            }
+        }
+    }
+
+    { // searching the database
+        int k = 5;
+        printf("[%.3f s] Searching the %d nearest neighbors "
+               "of %ld vectors in the index\n",
+               elapsed() - t0,
+               k,
+               nq);
+
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+
+        printf("[%.3f s] Query results (vector ids, then distances):\n",
+               elapsed() - t0);
+
+        for (int i = 0; i < nq; i++) {
+            printf("query %2d: ", i);
+            for (int j = 0; j < k; j++) {
+                printf("%7ld ", nns[j + i * k]);
+            }
+            printf("\n     dis: ");
+            for (int j = 0; j < k; j++) {
+                printf("%7g ", dis[j + i * k]);
+            }
+            printf("\n");
+        }
+    }
+    return 0;
+}
diff --git a/demos/demo_imi_pq.cpp b/demos/demo_imi_pq.cpp
new file mode 100644
index 0000000..b20aefb
--- /dev/null
+++ b/demos/demo_imi_pq.cpp
@@ -0,0 +1,208 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <sys/time.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFPQ.h>
+#include <faiss/IndexPQ.h>
+#include <faiss/index_io.h>
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, nullptr);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+int main() {
+    double t0 = elapsed();
+
+    // dimension of the vectors to index
+    int d = 64;
+
+    // size of the database we plan to index
+    size_t nb = 1000 * 1000;
+    size_t add_bs = 10000; // # size of the blocks to add
+
+    // make a set of nt training vectors in the unit cube
+    // (could be the database)
+    size_t nt = 100 * 1000;
+
+    //---------------------------------------------------------------
+    // Define the core quantizer
+    // We choose a multiple inverted index for faster training with less data
+    // and because it usually offers best accuracy/speed trade-offs
+    //
+    // We here assume that its lifespan of this coarse quantizer will cover the
+    // lifespan of the inverted-file quantizer IndexIVFFlat below
+    // With dynamic allocation, one may give the responsability to free the
+    // quantizer to the inverted-file index (with attribute do_delete_quantizer)
+    //
+    // Note: a regular clustering algorithm would be defined as:
+    //       faiss::IndexFlatL2 coarse_quantizer (d);
+    //
+    // Use nhash=2 subquantizers used to define the product coarse quantizer
+    // Number of bits: we will have 2^nbits_coarse centroids per subquantizer
+    //                 meaning (2^12)^nhash distinct inverted lists
+    //
+    // The parameter bytes_per_code is determined by the memory
+    // constraint, the dataset will use nb * (bytes_per_code + 8)
+    // bytes.
+    //
+    // The parameter nbits_subq is determined by the size of the dataset to
+    // index.
+    //
+    size_t nhash = 2;
+    size_t nbits_subq = 9;
+    size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
+    int bytes_per_code = 16;
+
+    faiss::MultiIndexQuantizer coarse_quantizer(d, nhash, nbits_subq);
+
+    printf("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
+           nhash,
+           nbits_subq,
+           ncentroids,
+           nb);
+
+    // the coarse quantizer should not be dealloced before the index
+    // 4 = nb of bytes per code (d must be a multiple of this)
+    // 8 = nb of bits per sub-code (almost always 8)
+    faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
+    faiss::IndexIVFPQ index(
+            &coarse_quantizer, d, ncentroids, bytes_per_code, 8);
+    index.quantizer_trains_alone = true;
+
+    // define the number of probes. 2048 is for high-dim, overkill in practice
+    // Use 4-1024 depending on the trade-off speed accuracy that you want
+    index.nprobe = 2048;
+
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+
+    { // training.
+
+        // The distribution of the training vectors should be the same
+        // as the database vectors. It could be a sub-sample of the
+        // database vectors, if sampling is not biased. Here we just
+        // randomly generate the vectors.
+
+        printf("[%.3f s] Generating %ld vectors in %dD for training\n",
+               elapsed() - t0,
+               nt,
+               d);
+
+        std::vector<float> trainvecs(nt * d);
+        for (size_t i = 0; i < nt; i++) {
+            for (size_t j = 0; j < d; j++) {
+                trainvecs[i * d + j] = distrib(rng);
+            }
+        }
+
+        printf("[%.3f s] Training the index\n", elapsed() - t0);
+        index.verbose = true;
+        index.train(nt, trainvecs.data());
+    }
+
+    // the index can be re-loaded later with
+    // faiss::Index * idx = faiss::read_index("/tmp/trained_index.faissindex");
+    faiss::write_index(&index, "/tmp/trained_index.faissindex");
+
+    size_t nq;
+    std::vector<float> queries;
+
+    { // populating the database
+        printf("[%.3f s] Building a dataset of %ld vectors to index\n",
+               elapsed() - t0,
+               nb);
+
+        std::vector<float> database(nb * d);
+        std::vector<faiss::idx_t> ids(nb);
+        for (size_t i = 0; i < nb; i++) {
+            for (size_t j = 0; j < d; j++) {
+                database[i * d + j] = distrib(rng);
+            }
+            ids[i] = 8760000000L + i;
+        }
+
+        printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
+
+        for (size_t begin = 0; begin < nb; begin += add_bs) {
+            size_t end = std::min(begin + add_bs, nb);
+            index.add_with_ids(
+                    end - begin,
+                    database.data() + d * begin,
+                    ids.data() + begin);
+        }
+
+        // remember a few elements from the database as queries
+        int i0 = 1234;
+        int i1 = 1244;
+
+        nq = i1 - i0;
+        queries.resize(nq * d);
+        for (int i = i0; i < i1; i++) {
+            for (int j = 0; j < d; j++) {
+                queries[(i - i0) * d + j] = database[i * d + j];
+            }
+        }
+    }
+
+    // A few notes on the internal format of the index:
+    //
+    // - the positing lists for PQ codes are index.codes, which is a
+    //    std::vector < std::vector<uint8_t> >
+    //   if n is the length of posting list #i, codes[i] has length
+    //   bytes_per_code * n
+    //
+    // - the corresponding ids are stored in index.ids
+    //
+    // - given a vector float *x, finding which k centroids are
+    //   closest to it (ie to find the nearest neighbors) can be done with
+    //
+    //   faiss::idx_t *centroid_ids = new faiss::idx_t[k];
+    //   float *distances = new float[k];
+    //   index.quantizer->search (1, x, k, dis, centroids_ids);
+    //
+
+    faiss::write_index(&index, "/tmp/populated_index.faissindex");
+
+    { // searching the database
+        int k = 5;
+        printf("[%.3f s] Searching the %d nearest neighbors "
+               "of %ld vectors in the index\n",
+               elapsed() - t0,
+               k,
+               nq);
+
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+
+        printf("[%.3f s] Query results (vector ids, then distances):\n",
+               elapsed() - t0);
+
+        for (int i = 0; i < nq; i++) {
+            printf("query %2d: ", i);
+            for (int j = 0; j < k; j++) {
+                printf("%7ld ", nns[j + i * k]);
+            }
+            printf("\n     dis: ");
+            for (int j = 0; j < k; j++) {
+                printf("%7g ", dis[j + i * k]);
+            }
+            printf("\n");
+        }
+    }
+    return 0;
+}
diff --git a/demos/demo_ivfpq_indexing.cpp b/demos/demo_ivfpq_indexing.cpp
new file mode 100644
index 0000000..7f3efbd
--- /dev/null
+++ b/demos/demo_ivfpq_indexing.cpp
@@ -0,0 +1,139 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFPQ.h>
+#include <faiss/index_io.h>
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+int main() {
+    double t0 = elapsed();
+
+    // dimension of the vectors to index
+    int d = 128;
+
+    // size of thes database we plan to index
+    size_t nb = 200 * 1000;
+
+    // make a set of nt training vectors in the unit cube
+    // (could be the database)
+    size_t nt = 100 * 1000;
+
+    // make the index object and train it
+    faiss::IndexFlatL2 coarse_quantizer(d);
+
+    // a reasonable number of centroids to index nb vectors
+    int ncentroids = int(4 * sqrt(nb));
+
+    // the coarse quantizer should not be dealloced before the index
+    // 4 = nb of bytes per code (d must be a multiple of this)
+    // 8 = nb of bits per sub-code (almost always 8)
+    faiss::IndexIVFPQ index(&coarse_quantizer, d, ncentroids, 4, 8);
+
+    std::mt19937 rng;
+
+    { // training
+        printf("[%.3f s] Generating %ld vectors in %dD for training\n",
+               elapsed() - t0,
+               nt,
+               d);
+
+        std::vector<float> trainvecs(nt * d);
+        std::uniform_real_distribution<> distrib;
+        for (size_t i = 0; i < nt * d; i++) {
+            trainvecs[i] = distrib(rng);
+        }
+
+        printf("[%.3f s] Training the index\n", elapsed() - t0);
+        index.verbose = true;
+
+        index.train(nt, trainvecs.data());
+    }
+
+    { // I/O demo
+        const char* outfilename = "/tmp/index_trained.faissindex";
+        printf("[%.3f s] storing the pre-trained index to %s\n",
+               elapsed() - t0,
+               outfilename);
+
+        write_index(&index, outfilename);
+    }
+
+    size_t nq;
+    std::vector<float> queries;
+
+    { // populating the database
+        printf("[%.3f s] Building a dataset of %ld vectors to index\n",
+               elapsed() - t0,
+               nb);
+
+        std::vector<float> database(nb * d);
+        std::uniform_real_distribution<> distrib;
+        for (size_t i = 0; i < nb * d; i++) {
+            database[i] = distrib(rng);
+        }
+
+        printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
+
+        index.add(nb, database.data());
+
+        printf("[%.3f s] imbalance factor: %g\n",
+               elapsed() - t0,
+               index.invlists->imbalance_factor());
+
+        // remember a few elements from the database as queries
+        int i0 = 1234;
+        int i1 = 1243;
+
+        nq = i1 - i0;
+        queries.resize(nq * d);
+        for (int i = i0; i < i1; i++) {
+            for (int j = 0; j < d; j++) {
+                queries[(i - i0) * d + j] = database[i * d + j];
+            }
+        }
+    }
+
+    { // searching the database
+        int k = 5;
+        printf("[%.3f s] Searching the %d nearest neighbors "
+               "of %ld vectors in the index\n",
+               elapsed() - t0,
+               k,
+               nq);
+
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+
+        printf("[%.3f s] Query results (vector ids, then distances):\n",
+               elapsed() - t0);
+
+        for (int i = 0; i < nq; i++) {
+            printf("query %2d: ", i);
+            for (int j = 0; j < k; j++) {
+                printf("%7ld ", nns[j + i * k]);
+            }
+            printf("\n     dis: ");
+            for (int j = 0; j < k; j++) {
+                printf("%7g ", dis[j + i * k]);
+            }
+            printf("\n");
+        }
+
+        printf("note that the nearest neighbor is not at "
+               "distance 0 due to quantization errors\n");
+    }
+
+    return 0;
+}
diff --git a/demos/demo_new_test.cpp b/demos/demo_new_test.cpp
new file mode 100644
index 0000000..2e66ab3
--- /dev/null
+++ b/demos/demo_new_test.cpp
@@ -0,0 +1,177 @@
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <sys/time.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexHNSW.h>
+#include <faiss/index_io.h>
+
+// added these
+#include <stdlib.h>
+#include <stdio.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <pthread.h>
+#include <iostream>
+#include <sstream>      // for ostringstream
+#include <fstream>  
+#include <iosfwd>
+#include <faiss/impl/platform_macros.h>
+
+
+
+/*******************************************************
+ * Added for debugging
+ *******************************************************/
+const int debugFlag = 1;
+
+void debugTime() {
+	if (debugFlag) {
+        struct timeval tval;
+        gettimeofday(&tval, NULL);
+        struct tm *tm_info = localtime(&tval.tv_sec);
+        char timeBuff[25] = "";
+        strftime(timeBuff, 25, "%H:%M:%S", tm_info);
+        char timeBuffWithMilli[50] = "";
+        sprintf(timeBuffWithMilli, "%s.%06ld ", timeBuff, tval.tv_usec);
+        std::string timestamp(timeBuffWithMilli);
+		std::cout << timestamp << std::flush;
+    }
+}
+
+//needs atleast 2 args always
+//  alt debugFlag = 1 // fprintf(stderr, fmt, __VA_ARGS__); 
+#define debug(fmt, ...) \
+    do { \
+        if (debugFlag == 1) { \
+            fprintf(stdout, "--" fmt, __VA_ARGS__);\
+        } \
+        if (debugFlag == 2) { \
+            debugTime(); \
+            fprintf(stdout, "%s:%d:%s(): " fmt, __FILE__, __LINE__, __func__, __VA_ARGS__); \
+        } \
+    } while (0)
+
+
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+/*******************************************************
+ * Run tests
+ *******************************************************/
+
+//  args are nb, M, gamma
+int main(int argc, char *argv[]) {
+    printf("====================\nSTART: running tests for hnsw...\n");
+    double t0 = elapsed();
+    int opt;
+    int d = 128; // dimension of the vectors to index
+    size_t nb;
+    int M;
+    int gamma;
+    // int d = 128; // dimension of the vectors to index
+    // int M = 32 * 1000; // HSNW param M
+    // size_t nb = 1000; // size of the database we plan to index
+
+
+    {// parse arguments
+
+        if (argc != 4) {
+            fprintf(stderr, "Syntax: %s <number vecs> <M> <gamma>\n", argv[0]);
+            exit(1);
+        }
+
+        nb = strtoul(argv[1], NULL, 10);
+        debug("nb: %ld\n", nb);
+
+        M = atoi(argv[2]);
+        debug("M: %d\n", M);
+
+        gamma = atoi(argv[3]);
+        debug("gamma: %d\n", gamma);
+    }
+    
+    printf("[%.3f s] Index Params -- d: %d, M: %d, nb: %ld, gamma: %d\n",
+               elapsed() - t0, d, M, nb, gamma);
+    faiss::IndexHNSWFlat index(d, M, gamma);
+    debug("HNSW index created%s\n", "");
+    
+    std::mt19937 rng; // random generator to be used for creating vectors
+
+    size_t nq; // num queries
+    std::vector<float> queries;
+
+    { // populating the database
+        printf("[%.3f s] Building a dataset of %ld vectors to index\n",
+               elapsed() - t0,
+               nb);
+
+        std::vector<float> database(nb * d);
+        std::uniform_real_distribution<> distrib;
+        for (size_t i = 0; i < nb * d; i++) {
+            database[i] = distrib(rng);
+        }
+
+        printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
+
+        index.add(nb, database.data());
+
+        printf("[%.3f s] Vectors added\n", elapsed() - t0);
+
+        // TODO: print out stats here
+        // printf("[%.3f s] imbalance factor: %g\n",
+        //        elapsed() - t0,
+        //        index.invlists->imbalance_factor());
+
+        // remember a few elements from the database as queries
+        int i0 = 4;
+        int i1 = 8;
+
+        nq = i1 - i0;
+        queries.resize(nq * d);
+        for (int i = i0; i < i1; i++) {
+            for (int j = 0; j < d; j++) {
+                queries[(i - i0) * d + j] = database[i * d + j];
+            }
+        }
+    }
+
+    { // print out stats
+        index.printStats();
+    }
+
+    { // get index size
+        
+        //  file name
+        std::ostringstream ss;
+        ss << "./tmp/index_hnsw_N=" << nb << ".faissindex";
+        std::string s_tmp = ss.str();
+        const char* outfilename = s_tmp.c_str();
+        // const char* outfilename = "/tmp/index_hnsw.faissindex";
+        printf("[%.3f s] storing the hnsw index to %s\n",
+               elapsed() - t0,
+               outfilename);
+
+        // write index to disk
+        write_index(&index, outfilename);
+
+        //  measure file size
+        std::ifstream in_file(outfilename, std::ios::binary);
+        in_file.seekg(0, std::ios::end);
+        int file_size = in_file.tellg();
+        std::cout<<"====Size of the file is"<<" "<< file_size<<" "<<"bytes" << std::endl;
+        
+    }
+
+    printf("-----DONE-----\n");
+}
\ No newline at end of file
diff --git a/demos/demo_nndescent.cpp b/demos/demo_nndescent.cpp
new file mode 100644
index 0000000..34594b0
--- /dev/null
+++ b/demos/demo_nndescent.cpp
@@ -0,0 +1,88 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <chrono>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexNNDescent.h>
+
+using namespace std::chrono;
+
+int main(void) {
+    // dimension of the vectors to index
+    int d = 64;
+    int K = 64;
+
+    // size of the database we plan to index
+    size_t nb = 10000;
+
+    std::mt19937 rng(12345);
+
+    // make the index object and train it
+    faiss::IndexNNDescentFlat index(d, K, faiss::METRIC_L2);
+    index.nndescent.S = 10;
+    index.nndescent.R = 32;
+    index.nndescent.L = K;
+    index.nndescent.iter = 10;
+    index.verbose = true;
+
+    // generate labels by IndexFlat
+    faiss::IndexFlat bruteforce(d, faiss::METRIC_L2);
+
+    std::vector<float> database(nb * d);
+    for (size_t i = 0; i < nb * d; i++) {
+        database[i] = rng() % 1024;
+    }
+
+    { // populating the database
+        index.add(nb, database.data());
+        bruteforce.add(nb, database.data());
+    }
+
+    size_t nq = 1000;
+
+    { // searching the database
+        printf("Searching ...\n");
+        index.nndescent.search_L = 50;
+
+        std::vector<float> queries(nq * d);
+        for (size_t i = 0; i < nq * d; i++) {
+            queries[i] = rng() % 1024;
+        }
+
+        int k = 5;
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<faiss::idx_t> gt_nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        auto start = high_resolution_clock::now();
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+        auto end = high_resolution_clock::now();
+
+        // find exact kNNs by brute force search
+        bruteforce.search(nq, queries.data(), k, dis.data(), gt_nns.data());
+
+        int recalls = 0;
+        for (size_t i = 0; i < nq; ++i) {
+            for (int n = 0; n < k; n++) {
+                for (int m = 0; m < k; m++) {
+                    if (nns[i * k + n] == gt_nns[i * k + m]) {
+                        recalls += 1;
+                    }
+                }
+            }
+        }
+        float recall = 1.0f * recalls / (k * nq);
+        auto t = duration_cast<microseconds>(end - start).count();
+        int qps = nq * 1.0f * 1000 * 1000 / t;
+
+        printf("Recall@%d: %f, QPS: %d\n", k, recall, qps);
+    }
+}
diff --git a/demos/demo_ondisk_ivf.py b/demos/demo_ondisk_ivf.py
new file mode 100755
index 0000000..e4d6437
--- /dev/null
+++ b/demos/demo_ondisk_ivf.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+import numpy as np
+import faiss
+from faiss.contrib.ondisk import merge_ondisk
+
+#################################################################
+# Small I/O functions
+#################################################################
+
+
+def ivecs_read(fname):
+    a = np.fromfile(fname, dtype='int32')
+    d = a[0]
+    return a.reshape(-1, d + 1)[:, 1:].copy()
+
+
+def fvecs_read(fname):
+    return ivecs_read(fname).view('float32')
+
+
+#################################################################
+# Main program
+#################################################################
+
+stage = int(sys.argv[1])
+
+tmpdir = '/tmp/'
+
+if stage == 0:
+    # train the index
+    xt = fvecs_read("sift1M/sift_learn.fvecs")
+    index = faiss.index_factory(xt.shape[1], "IVF4096,Flat")
+    print("training index")
+    index.train(xt)
+    print("write " + tmpdir + "trained.index")
+    faiss.write_index(index, tmpdir + "trained.index")
+
+
+if 1 <= stage <= 4:
+    # add 1/4 of the database to 4 independent indexes
+    bno = stage - 1
+    xb = fvecs_read("sift1M/sift_base.fvecs")
+    i0, i1 = int(bno * xb.shape[0] / 4), int((bno + 1) * xb.shape[0] / 4)
+    index = faiss.read_index(tmpdir + "trained.index")
+    print("adding vectors %d:%d" % (i0, i1))
+    index.add_with_ids(xb[i0:i1], np.arange(i0, i1))
+    print("write " + tmpdir + "block_%d.index" % bno)
+    faiss.write_index(index, tmpdir + "block_%d.index" % bno)
+
+if stage == 5:
+
+    print('loading trained index')
+    # construct the output index
+    index = faiss.read_index(tmpdir + "trained.index")
+
+    block_fnames = [
+        tmpdir + "block_%d.index" % bno
+        for bno in range(4)
+    ]
+
+    merge_ondisk(index, block_fnames, tmpdir + "merged_index.ivfdata")
+
+    print("write " + tmpdir + "populated.index")
+    faiss.write_index(index, tmpdir + "populated.index")
+
+
+if stage == 6:
+    # perform a search from disk
+    print("read " + tmpdir + "populated.index")
+    index = faiss.read_index(tmpdir + "populated.index")
+    index.nprobe = 16
+
+    # load query vectors and ground-truth
+    xq = fvecs_read("sift1M/sift_query.fvecs")
+    gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
+
+    D, I = index.search(xq, 5)
+
+    recall_at_1 = (I[:, :1] == gt[:, :1]).sum() / float(xq.shape[0])
+    print("recall@1: %.3f" % recall_at_1)
diff --git a/demos/demo_residual_quantizer.cpp b/demos/demo_residual_quantizer.cpp
new file mode 100644
index 0000000..6166fc1
--- /dev/null
+++ b/demos/demo_residual_quantizer.cpp
@@ -0,0 +1,292 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#include <climits>
+#include <cstdio>
+#include <memory>
+
+#include <faiss/IVFlib.h>
+#include <faiss/IndexAdditiveQuantizer.h>
+#include <faiss/IndexIVFAdditiveQuantizer.h>
+#include <faiss/MetricType.h>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/hamming.h>
+#include <faiss/utils/random.h>
+#include <faiss/utils/utils.h>
+
+/* This demo file shows how to:
+ * - use a DistanceComputer to compute distances with encoded vectors
+ * - in the context of an IVF, how to split an additive quantizer into an
+ * AdditiveCoarseQuantizer and a ResidualQuantizer, in two different ways, with
+ * and without storing the prefix.
+ */
+
+int main() {
+    /******************************************
+     * Generate a test dataset
+     ******************************************/
+    using idx_t = faiss::idx_t;
+    size_t d = 128;
+    size_t nt = 10000;
+    size_t nb = 10000;
+    size_t nq = 100;
+    double t0 = faiss::getmillisecs();
+
+    auto tic = [t0]() {
+        printf("[%.3f s] ", (faiss::getmillisecs() - t0) / 1000);
+    };
+
+    tic();
+    printf("samping dataset of %zd dim vectors, Q %zd B %zd T %zd\n",
+           d,
+           nq,
+           nb,
+           nt);
+
+    std::vector<float> buf(d * (nq + nt + nb));
+    faiss::rand_smooth_vectors(nq + nt + nb, d, buf.data(), 1234);
+    const float* xt = buf.data();
+    const float* xb = buf.data() + nt * d;
+    const float* xq = buf.data() + (nt + nb) * d;
+
+    idx_t k = 10;
+    std::vector<idx_t> gt(k * nq);
+    std::vector<float> unused(k * nq);
+    tic();
+    printf("compute ground truth, k=%zd\n", k);
+    faiss::knn_L2sqr(xq, xb, d, nq, nb, k, unused.data(), gt.data());
+
+    // a function to compute the accuracy
+    auto accuracy = [&](const idx_t* I) {
+        idx_t accu = 0;
+        for (idx_t q = 0; q < nq; q++) {
+            accu += faiss::ranklist_intersection_size(
+                    k, gt.data() + q * k, k, I + q * k);
+        }
+        return double(accu) / (k * nq);
+    };
+
+    /******************************************
+     * Prepare the residual quantizer
+     ******************************************/
+
+    faiss::ResidualQuantizer rq(
+            d, 7, 6, faiss::AdditiveQuantizer::ST_norm_qint8);
+    // do cheap an inaccurate training
+    rq.cp.niter = 5;
+    rq.max_beam_size = 5;
+    rq.train_type = 0;
+    tic();
+    printf("training the residual quantizer beam_size=%d\n", rq.max_beam_size);
+    rq.train(nt, xt);
+
+    tic();
+    printf("encoding the database, code_size=%zd\n", rq.code_size);
+    size_t code_size = rq.code_size;
+    std::vector<uint8_t> raw_codes(nb * code_size);
+    rq.compute_codes(xb, raw_codes.data(), nb);
+
+    /****************************************************************
+     * Make an index that uses that residual quantizer
+     * Verify that a distance computer gives the same distances
+     ****************************************************************/
+    {
+        faiss::IndexResidualQuantizer index(
+                rq.d, rq.nbits, faiss::METRIC_L2, rq.search_type);
+
+        // override trained index
+        index.rq = rq;
+        index.is_trained = true;
+
+        // override vectors
+        index.codes = raw_codes;
+        index.ntotal = nb;
+
+        tic();
+        printf("IndexResidualQuantizer ready, searching\n");
+
+        std::vector<float> D(k * nq);
+        std::vector<idx_t> I(k * nq);
+        index.search(nq, xq, k, D.data(), I.data());
+
+        tic();
+        printf("Accuracy (intersection @ %zd): %.3f\n", k, accuracy(I.data()));
+        std::unique_ptr<faiss::FlatCodesDistanceComputer> dc(
+                index.get_FlatCodesDistanceComputer());
+
+        float max_diff12 = 0, max_diff13 = 0;
+
+        for (idx_t q = 0; q < nq; q++) {
+            const float* query = xq + q * d;
+            dc->set_query(query);
+            for (int i = 0; i < k; i++) {
+                // 3 ways of computing the same distance
+
+                // distance returned by the index
+                float dis1 = D[q * k + i];
+
+                // distance returned by the DistanceComputer that accesses the
+                // index
+                idx_t db_index = I[q * k + i];
+                float dis2 = (*dc)(db_index);
+
+                // distance computer from a code that does not belong to the
+                // index
+                const uint8_t* code = raw_codes.data() + code_size * db_index;
+                float dis3 = dc->distance_to_code(code);
+
+                max_diff12 = std::max(std::abs(dis1 - dis2), max_diff12);
+                max_diff13 = std::max(std::abs(dis1 - dis3), max_diff13);
+            }
+        }
+        tic();
+        printf("Max DistanceComputer discrepancy 1-2: %g 1-3: %g\n",
+               max_diff12,
+               max_diff13);
+    }
+
+    /****************************************************************
+     * Make an IVF index that uses the first 2 levels as a coarse quantizer
+     * The IVF codes contain the full code (ie. redundant with the coarse
+     *quantizer code)
+     ****************************************************************/
+    {
+        // build a coarse quantizer from the 2 first levels of the RQ
+        std::vector<size_t> nbits(2);
+        std::copy(rq.nbits.begin(), rq.nbits.begin() + 2, nbits.begin());
+        faiss::ResidualCoarseQuantizer rcq(rq.d, nbits);
+
+        // set the coarse quantizer from the 2 first quantizers
+        rcq.rq.initialize_from(rq);
+        rcq.is_trained = true;
+        rcq.ntotal = (idx_t)1 << rcq.rq.tot_bits;
+
+        // settings for exhaustive search in RCQ
+        rcq.centroid_norms.resize(rcq.ntotal);
+        rcq.aq->compute_centroid_norms(rcq.centroid_norms.data());
+        rcq.beam_factor = -1.0; // use exact search
+        size_t nlist = rcq.ntotal;
+        tic();
+        printf("RCQ nlist = %zd tot_bits=%zd\n", nlist, rcq.rq.tot_bits);
+
+        // build a IVFResidualQuantizer from that
+        faiss::IndexIVFResidualQuantizer index(
+                &rcq, rcq.d, nlist, rq.nbits, faiss::METRIC_L2, rq.search_type);
+        index.by_residual = false;
+        index.rq = rq;
+        index.is_trained = true;
+
+        // there are 3 ways of filling up the index...
+        for (std::string filled_with : {"add", "manual", "derived"}) {
+            tic();
+            printf("filling up the index with %s, code_size=%zd\n",
+                   filled_with.c_str(),
+                   index.code_size);
+
+            index.reset();
+
+            if (filled_with == "add") {
+                // standard add method
+                index.add(nb, xb);
+            } else if (filled_with == "manual") {
+                // compute inverted lists and add elements manually
+                // fill in the inverted index manually
+                faiss::InvertedLists& invlists = *index.invlists;
+
+                // assign vectors to inverted lists
+                std::vector<idx_t> listnos(nb);
+                std::vector<float> unused(nb);
+                rcq.search(nb, xb, 1, unused.data(), listnos.data());
+
+                // populate inverted lists
+                for (idx_t i = 0; i < nb; i++) {
+                    invlists.add_entry(
+                            listnos[i], i, &raw_codes[i * code_size]);
+                }
+
+                index.ntotal = nb;
+            } else if (filled_with == "derived") {
+                // Since we have the raw codes precomputed, their prefix is the
+                // inverted list index, so let's use that.
+                faiss::InvertedLists& invlists = *index.invlists;
+
+                // populate inverted lists
+                for (idx_t i = 0; i < nb; i++) {
+                    const uint8_t* code = &raw_codes[i * code_size];
+                    faiss::BitstringReader rd(code, code_size);
+                    idx_t list_no =
+                            rd.read(rcq.rq.tot_bits); // read the list number
+                    invlists.add_entry(list_no, i, code);
+                }
+
+                index.ntotal = nb;
+            }
+
+            tic();
+            printf("Index filled in\n");
+
+            for (int nprobe : {1, 4, 16, 64, int(nlist)}) {
+                printf("setting nprobe=%-4d", nprobe);
+
+                index.nprobe = nprobe;
+                std::vector<float> D(k * nq);
+                std::vector<idx_t> I(k * nq);
+                index.search(nq, xq, k, D.data(), I.data());
+
+                tic();
+                printf("Accuracy (intersection @ %zd): %.3f\n",
+                       k,
+                       accuracy(I.data()));
+            }
+        }
+    }
+
+    /****************************************************************
+     * Make an IVF index that uses the first 2 levels as a coarse
+     * quantizer, but this time does not store the code prefix from the index
+     ****************************************************************/
+
+    {
+        // build a coarse quantizer from the 2 first levels of the RQ
+        int nlevel = 2;
+
+        std::unique_ptr<faiss::IndexIVFResidualQuantizer> index(
+                faiss::ivflib::ivf_residual_from_quantizer(rq, nlevel));
+
+        // there are 2 ways of filling up the index...
+        for (std::string filled_with : {"add", "derived"}) {
+            tic();
+            printf("filling up the IVF index with %s, code_size=%zd\n",
+                   filled_with.c_str(),
+                   index->code_size);
+
+            index->reset();
+
+            if (filled_with == "add") {
+                // standard add method
+                index->add(nb, xb);
+            } else if (filled_with == "derived") {
+                faiss::ivflib::ivf_residual_add_from_flat_codes(
+                        index.get(), nb, raw_codes.data(), rq.code_size);
+            }
+
+            tic();
+            printf("Index filled in\n");
+
+            for (int nprobe : {1, 4, 16, 64, int(index->nlist)}) {
+                printf("setting nprobe=%-4d", nprobe);
+
+                index->nprobe = nprobe;
+                std::vector<float> D(k * nq);
+                std::vector<idx_t> I(k * nq);
+                index->search(nq, xq, k, D.data(), I.data());
+
+                tic();
+                printf("Accuracy (intersection @ %zd): %.3f\n",
+                       k,
+                       accuracy(I.data()));
+            }
+        }
+    }
+
+    return 0;
+}
diff --git a/demos/demo_sift1M.cpp b/demos/demo_sift1M.cpp
new file mode 100644
index 0000000..598565f
--- /dev/null
+++ b/demos/demo_sift1M.cpp
@@ -0,0 +1,256 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sys/time.h>
+
+#include <faiss/AutoTune.h>
+#include <faiss/index_factory.h>
+
+/**
+ * To run this demo, please download the ANN_SIFT1M dataset from
+ *
+ *   http://corpus-texmex.irisa.fr/
+ *
+ * and unzip it to the sudirectory sift1M.
+ **/
+
+/*****************************************************
+ * I/O functions for fvecs and ivecs
+ *****************************************************/
+
+float* fvecs_read(const char* fname, size_t* d_out, size_t* n_out) {
+    FILE* f = fopen(fname, "r");
+    if (!f) {
+        fprintf(stderr, "could not open %s\n", fname);
+        perror("");
+        abort();
+    }
+    int d;
+    fread(&d, 1, sizeof(int), f);
+    assert((d > 0 && d < 1000000) || !"unreasonable dimension");
+    fseek(f, 0, SEEK_SET);
+    struct stat st;
+    fstat(fileno(f), &st);
+    size_t sz = st.st_size;
+    assert(sz % ((d + 1) * 4) == 0 || !"weird file size");
+    size_t n = sz / ((d + 1) * 4);
+
+    *d_out = d;
+    *n_out = n;
+    float* x = new float[n * (d + 1)];
+    size_t nr = fread(x, sizeof(float), n * (d + 1), f);
+    assert(nr == n * (d + 1) || !"could not read whole file");
+
+    // shift array to remove row headers
+    for (size_t i = 0; i < n; i++)
+        memmove(x + i * d, x + 1 + i * (d + 1), d * sizeof(*x));
+
+    fclose(f);
+    return x;
+}
+
+// not very clean, but works as long as sizeof(int) == sizeof(float)
+int* ivecs_read(const char* fname, size_t* d_out, size_t* n_out) {
+    return (int*)fvecs_read(fname, d_out, n_out);
+}
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, nullptr);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+int main() {
+    double t0 = elapsed();
+
+    // this is typically the fastest one.
+    const char* index_key = "IVF4096,Flat";
+
+    // these ones have better memory usage
+    // const char *index_key = "Flat";
+    // const char *index_key = "PQ32";
+    // const char *index_key = "PCA80,Flat";
+    // const char *index_key = "IVF4096,PQ8+16";
+    // const char *index_key = "IVF4096,PQ32";
+    // const char *index_key = "IMI2x8,PQ32";
+    // const char *index_key = "IMI2x8,PQ8+16";
+    // const char *index_key = "OPQ16_64,IMI2x8,PQ8+16";
+
+    faiss::Index* index;
+
+    size_t d;
+
+    {
+        printf("[%.3f s] Loading train set\n", elapsed() - t0);
+
+        size_t nt;
+        float* xt = fvecs_read("sift1M/sift_learn.fvecs", &d, &nt);
+
+        printf("[%.3f s] Preparing index \"%s\" d=%ld\n",
+               elapsed() - t0,
+               index_key,
+               d);
+        index = faiss::index_factory(d, index_key);
+
+        printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nt);
+
+        index->train(nt, xt);
+        delete[] xt;
+    }
+
+    {
+        printf("[%.3f s] Loading database\n", elapsed() - t0);
+
+        size_t nb, d2;
+        float* xb = fvecs_read("sift1M/sift_base.fvecs", &d2, &nb);
+        assert(d == d2 || !"dataset does not have same dimension as train set");
+
+        printf("[%.3f s] Indexing database, size %ld*%ld\n",
+               elapsed() - t0,
+               nb,
+               d);
+
+        index->add(nb, xb);
+
+        delete[] xb;
+    }
+
+    size_t nq;
+    float* xq;
+
+    {
+        printf("[%.3f s] Loading queries\n", elapsed() - t0);
+
+        size_t d2;
+        xq = fvecs_read("sift1M/sift_query.fvecs", &d2, &nq);
+        assert(d == d2 || !"query does not have same dimension as train set");
+    }
+
+    size_t k;         // nb of results per query in the GT
+    faiss::idx_t* gt; // nq * k matrix of ground-truth nearest-neighbors
+
+    {
+        printf("[%.3f s] Loading ground truth for %ld queries\n",
+               elapsed() - t0,
+               nq);
+
+        // load ground-truth and convert int to long
+        size_t nq2;
+        int* gt_int = ivecs_read("sift1M/sift_groundtruth.ivecs", &k, &nq2);
+        assert(nq2 == nq || !"incorrect nb of ground truth entries");
+
+        gt = new faiss::idx_t[k * nq];
+        for (int i = 0; i < k * nq; i++) {
+            gt[i] = gt_int[i];
+        }
+        delete[] gt_int;
+    }
+
+    // Result of the auto-tuning
+    std::string selected_params;
+
+    { // run auto-tuning
+
+        printf("[%.3f s] Preparing auto-tune criterion 1-recall at 1 "
+               "criterion, with k=%ld nq=%ld\n",
+               elapsed() - t0,
+               k,
+               nq);
+
+        faiss::OneRecallAtRCriterion crit(nq, 1);
+        crit.set_groundtruth(k, nullptr, gt);
+        crit.nnn = k; // by default, the criterion will request only 1 NN
+
+        printf("[%.3f s] Preparing auto-tune parameters\n", elapsed() - t0);
+
+        faiss::ParameterSpace params;
+        params.initialize(index);
+
+        printf("[%.3f s] Auto-tuning over %ld parameters (%ld combinations)\n",
+               elapsed() - t0,
+               params.parameter_ranges.size(),
+               params.n_combinations());
+
+        faiss::OperatingPoints ops;
+        params.explore(index, nq, xq, crit, &ops);
+
+        printf("[%.3f s] Found the following operating points: \n",
+               elapsed() - t0);
+
+        ops.display();
+
+        // keep the first parameter that obtains > 0.5 1-recall@1
+        for (int i = 0; i < ops.optimal_pts.size(); i++) {
+            if (ops.optimal_pts[i].perf > 0.5) {
+                selected_params = ops.optimal_pts[i].key;
+                break;
+            }
+        }
+        assert(selected_params.size() >= 0 ||
+               !"could not find good enough op point");
+    }
+
+    { // Use the found configuration to perform a search
+
+        faiss::ParameterSpace params;
+
+        printf("[%.3f s] Setting parameter configuration \"%s\" on index\n",
+               elapsed() - t0,
+               selected_params.c_str());
+
+        params.set_index_parameters(index, selected_params.c_str());
+
+        printf("[%.3f s] Perform a search on %ld queries\n",
+               elapsed() - t0,
+               nq);
+
+        // output buffers
+        faiss::idx_t* I = new faiss::idx_t[nq * k];
+        float* D = new float[nq * k];
+
+        index->search(nq, xq, k, D, I);
+
+        printf("[%.3f s] Compute recalls\n", elapsed() - t0);
+
+        // evaluate result by hand.
+        int n_1 = 0, n_10 = 0, n_100 = 0;
+        for (int i = 0; i < nq; i++) {
+            int gt_nn = gt[i * k];
+            for (int j = 0; j < k; j++) {
+                if (I[i * k + j] == gt_nn) {
+                    if (j < 1)
+                        n_1++;
+                    if (j < 10)
+                        n_10++;
+                    if (j < 100)
+                        n_100++;
+                }
+            }
+        }
+        printf("R@1 = %.4f\n", n_1 / float(nq));
+        printf("R@10 = %.4f\n", n_10 / float(nq));
+        printf("R@100 = %.4f\n", n_100 / float(nq));
+
+        delete[] I;
+        delete[] D;
+    }
+
+    delete[] xq;
+    delete[] gt;
+    delete index;
+    return 0;
+}
diff --git a/demos/demo_weighted_kmeans.cpp b/demos/demo_weighted_kmeans.cpp
new file mode 100644
index 0000000..f6f89fa
--- /dev/null
+++ b/demos/demo_weighted_kmeans.cpp
@@ -0,0 +1,180 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <faiss/Clustering.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexHNSW.h>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/random.h>
+
+namespace {
+
+enum WeightedKMeansType {
+    WKMT_FlatL2,
+    WKMT_FlatIP,
+    WKMT_FlatIP_spherical,
+    WKMT_HNSW,
+};
+
+float weighted_kmeans_clustering(
+        size_t d,
+        size_t n,
+        size_t k,
+        const float* input,
+        const float* weights,
+        float* centroids,
+        WeightedKMeansType index_num) {
+    using namespace faiss;
+    Clustering clus(d, k);
+    clus.verbose = true;
+
+    std::unique_ptr<Index> index;
+
+    switch (index_num) {
+        case WKMT_FlatL2:
+            index.reset(new IndexFlatL2(d));
+            break;
+        case WKMT_FlatIP:
+            index.reset(new IndexFlatIP(d));
+            break;
+        case WKMT_FlatIP_spherical:
+            index.reset(new IndexFlatIP(d));
+            clus.spherical = true;
+            break;
+        case WKMT_HNSW:
+            IndexHNSWFlat* ihnsw = new IndexHNSWFlat(d, 32);
+            ihnsw->hnsw.efSearch = 128;
+            index.reset(ihnsw);
+            break;
+    }
+
+    clus.train(n, input, *index.get(), weights);
+    // on output the index contains the centroids.
+    memcpy(centroids, clus.centroids.data(), sizeof(*centroids) * d * k);
+    return clus.iteration_stats.back().obj;
+}
+
+int d = 32;
+float sigma = 0.1;
+
+#define BIGTEST
+
+#ifdef BIGTEST
+// the production setup = setting of https://fb.quip.com/CWgnAAYbwtgs
+int nc = 200000;
+int n_big = 4;
+int n_small = 2;
+#else
+int nc = 5;
+int n_big = 100;
+int n_small = 10;
+#endif
+
+int n; // number of training points
+
+void generate_trainset(
+        std::vector<float>& ccent,
+        std::vector<float>& x,
+        std::vector<float>& weights) {
+    // same sampling as test_build_blocks.py test_weighted
+
+    ccent.resize(d * 2 * nc);
+    faiss::float_randn(ccent.data(), d * 2 * nc, 123);
+    faiss::fvec_renorm_L2(d, 2 * nc, ccent.data());
+    n = nc * n_big + nc * n_small;
+    x.resize(d * n);
+    weights.resize(n);
+    faiss::float_randn(x.data(), x.size(), 1234);
+
+    float* xi = x.data();
+    float* w = weights.data();
+    for (int ci = 0; ci < nc * 2; ci++) {   // loop over centroids
+        int np = ci < nc ? n_big : n_small; // nb of points around this centroid
+        for (int i = 0; i < np; i++) {
+            for (int j = 0; j < d; j++) {
+                xi[j] = xi[j] * sigma + ccent[ci * d + j];
+            }
+            *w++ = ci < nc ? 0.1 : 10;
+            xi += d;
+        }
+    }
+}
+
+} // namespace
+
+int main(int argc, char** argv) {
+    std::vector<float> ccent;
+    std::vector<float> x;
+    std::vector<float> weights;
+
+    printf("generate training set\n");
+    generate_trainset(ccent, x, weights);
+
+    std::vector<float> centroids;
+    centroids.resize(nc * d);
+
+    int the_index_num = -1;
+    int the_with_weights = -1;
+
+    if (argc == 3) {
+        the_index_num = atoi(argv[1]);
+        the_with_weights = atoi(argv[2]);
+    }
+
+    for (int index_num = WKMT_FlatL2; index_num <= WKMT_HNSW; index_num++) {
+        if (the_index_num >= 0 && index_num != the_index_num) {
+            continue;
+        }
+
+        for (int with_weights = 0; with_weights <= 1; with_weights++) {
+            if (the_with_weights >= 0 && with_weights != the_with_weights) {
+                continue;
+            }
+
+            printf("=================== index_num=%d Run %s weights\n",
+                   index_num,
+                   with_weights ? "with" : "without");
+
+            weighted_kmeans_clustering(
+                    d,
+                    n,
+                    nc,
+                    x.data(),
+                    with_weights ? weights.data() : nullptr,
+                    centroids.data(),
+                    (WeightedKMeansType)index_num);
+
+            { // compute distance of points to centroids
+                faiss::IndexFlatL2 cent_index(d);
+                cent_index.add(nc, centroids.data());
+                std::vector<float> dis(n);
+                std::vector<faiss::idx_t> idx(n);
+
+                cent_index.search(
+                        nc * 2, ccent.data(), 1, dis.data(), idx.data());
+
+                float dis1 = 0, dis2 = 0;
+                for (int i = 0; i < nc; i++) {
+                    dis1 += dis[i];
+                }
+                printf("average distance of points from big clusters: %g\n",
+                       dis1 / nc);
+
+                for (int i = 0; i < nc; i++) {
+                    dis2 += dis[i + nc];
+                }
+
+                printf("average distance of points from small clusters: %g\n",
+                       dis2 / nc);
+            }
+        }
+    }
+    return 0;
+}
diff --git a/demos/metadata.txt b/demos/metadata.txt
new file mode 100644
index 0000000..9257b63
--- /dev/null
+++ b/demos/metadata.txt
@@ -0,0 +1,61 @@
+## 1000 rand values between 1 and 10 - for testing nb=1000, s =.1
+
+7,  2,  2,  6,  6,  4,  7,  9,  6,  2,  8,  1,  9,  4,  9,  3,  8,
+        1,  5,  4,  6,  3,  9,  5,  5,  2,  6, 10,  7,  4,  9, 10,  9,  8,
+        9,  6,  3,  5,  9,  6,  9,  3, 10,  2, 10,  3,  5,  7,  7,  8, 10,
+        1,  2,  2,  8,  3,  5,  7,  9,  8, 10,  9,  1,  4,  6,  2,  7,  1,
+        9, 10, 10,  7,  1,  1,  5,  5,  8,  1,  6,  2,  1,  5,  1,  4,  9,
+        2, 10,  7,  5,  1, 10,  2,  1,  4,  4,  8,  7,  2,  2, 10,  5,  9,
+        2,  2,  2,  1,  6,  5,  2,  5,  2,  6,  5,  7,  7,  4,  5,  1,  9,
+        4,  1,  1,  3,  1,  2,  7,  9,  2,  9,  7, 10,  8,  8,  6,  9,  7,
+        5,  8,  8,  4,  9,  3,  3,  6,  8,  6,  9,  6,  9,  9,  5,  8,  6,
+        3,  5,  2,  6,  4,  8,  1,  7,  8,  3,  5,  1,  4,  2,  7,  1,  9,
+       10,  2,  7,  7,  3,  3,  3,  6,  4,  3,  4,  5, 10, 10,  6,  6,  9,
+        8,  5,  9,  1,  1,  3,  5,  8,  4,  5,  3, 10,  2,  4,  7,  1,  3,
+        6,  1,  1,  7,  5,  6,  6,  6,  3,  6,  4,  1,  9,  3,  2,  6,  3,
+        1,  5,  5,  1,  8,  8,  1,  1,  9,  4,  9,  4,  4,  4,  7,  2,  2,
+        4, 10,  5,  5,  5,  6,  1,  4,  2,  4,  8,  1,  1,  3,  8,  9,  2,
+        2,  8,  2,  6,  4,  7,  8,  5,  8,  7,  7,  3,  6,  2,  3,  7, 10,
+        9, 10,  6,  6,  2,  2, 10,  2,  6,  3,  8,  3, 10,  7,  1,  3,  7,
+        7,  1,  4, 10, 10,  5,  7,  3,  4,  4,  3,  8,  3,  1,  1,  4,  2,
+        4,  6,  6,  9,  1, 10,  3,  2,  5,  9,  9,  9,  5,  7,  3,  1,  3,
+        9,  1,  9,  1,  3,  1,  1,  7, 10,  5,  8,  7,  7,  9,  1,  1,  7,
+        8,  6,  9,  3,  2,  3,  9,  1,  2,  2, 10,  3, 10,  5,  8,  2,  8,
+        6,  1, 10,  5,  1,  6,  1,  4,  2,  5, 10,  7,  8, 10,  2,  3,  2,
+        6,  1,  7,  3,  6,  8,  7,  8,  9,  4,  5, 10,  1,  8,  7,  9,  8,
+        5,  2,  5,  7,  9, 10,  2,  5,  7,  1,  3,  9,  1,  5,  1,  3,  3,
+        7,  2,  5,  8, 10,  8, 10,  7,  5,  3,  1,  6,  4,  2, 10, 10,  6,
+        4,  8,  6,  5,  2,  7,  5,  3,  1,  1,  8,  3,  4, 10,  8,  1,  5,
+        4,  9,  2,  8,  6,  5,  4,  3,  1,  9,  6,  4,  3,  3,  5,  2,  3,
+       10,  2, 10,  5,  2,  9,  9,  1,  5,  4,  6,  3,  4, 10,  2,  9,  6,
+       10,  3,  4,  5,  6,  5,  2,  1,  6,  3,  3,  6,  1,  3,  9,  6,  5,
+        5,  1,  3,  2,  3,  7,  7,  4, 10,  4,  9,  9,  6,  2, 10,  2,  6,
+        8,  5,  1, 10,  1,  9,  1,  3,  2,  4,  2,  7,  1, 10,  6,  9, 10,
+        2,  1,  7, 10,  9,  2, 10,  4,  7,  8,  1,  5, 10,  4,  6,  4,  3,
+        4,  3,  5,  1,  9,  2,  7,  9,  6,  1,  7,  8,  3,  6,  5,  2,  3,
+        6,  8,  2,  6,  1,  8,  3, 10,  3,  4,  7,  2,  1,  3,  8,  4,  4,
+        7,  4,  1,  8,  7,  1,  2, 10, 10,  6,  8,  3,  9,  4,  3,  3,  3,
+        5, 10,  5,  4,  5,  3,  6,  1,  9,  5,  9,  4,  5,  7,  5,  8,  5,
+        7,  6,  4,  1,  7,  1,  2,  8,  1,  5,  2,  2,  1,  4, 10, 10,  5,
+        8,  5,  6,  4,  2,  5,  6,  5,  1,  5, 10,  6, 10,  1,  1,  7,  4,
+        8,  9,  7, 10,  3,  8,  6,  5,  7,  2,  1,  3,  2,  5,  5,  4,  2,
+        4,  5,  9,  7,  9,  8,  4,  6,  6,  3,  1,  8,  9,  1,  7,  9,  1,
+        8,  4,  4,  4,  3,  8,  9,  7,  6,  3, 10,  7,  2,  4,  7,  5,  4,
+        9,  7,  6,  4,  6,  5,  1,  7,  2,  8,  2,  5,  3,  3,  2,  4, 10,
+        1,  4,  9,  9,  3,  4,  1, 10,  2,  1,  1,  9,  3,  5,  3,  1,  1,
+        3, 10,  3,  7, 10,  6,  7, 10,  6,  4,  9,  5,  3,  8,  8,  6,  9,
+        6,  6,  5,  1,  8,  1,  8,  2,  2,  6, 10,  3,  7,  4,  2,  7,  4,
+        1,  2,  1,  8,  9,  2, 10,  8,  5,  2,  6,  9,  4,  4,  9,  8,  7,
+        7,  5,  6,  9,  6,  4,  1,  3,  2,  8,  3,  6,  1,  9,  2,  3,  4,
+        3,  2,  7,  7,  3,  1,  2,  2,  1,  3,  7,  5,  5,  1,  4,  2, 10,
+        3,  8,  1,  6,  4,  7,  4,  2,  9,  3,  8,  7,  9,  5,  6,  9,  6,
+       10,  4,  8,  2, 10,  2,  8,  6,  5,  8,  2,  5,  3,  8,  1,  6,  2,
+        7,  3,  5,  9,  2,  5,  7,  2,  1,  6,  9,  3,  7,  1,  4, 10,  6,
+        3,  9,  8,  8, 10,  7,  2,  2,  3,  7,  4,  8,  7,  6,  9,  2,  8,
+        2,  3,  3,  5,  9,  2,  2,  4,  9,  9,  7,  5,  2,  2,  3,  4,  3,
+        6,  7,  9,  2,  1,  9,  4,  8,  3,  7,  6,  6,  7,  3,  9,  4,  6,
+        7,  4,  1,  3,  3,  5, 10,  1,  5,  4,  4,  9,  4,  4,  9,  1,  1,
+        1,  3,  5,  1,  6,  2,  2,  4,  6,  7,  9,  1,  7,  3,  1,  8,  9,
+        1,  5,  5,  6,  7,  8,  5,  1,  2,  4,  9,  9,  9,  7, 10, 10,  7,
+       10,  5,  1,  8, 10,  5,  6,  4,  5,  8,  2,  3,  7,  9,  3,  9,  4,
+        7,  5,  8,  1,  9,  2,  9,  1,  6, 10,  3,  3,  9,  6
\ No newline at end of file
diff --git a/demos/test_acorn.cpp b/demos/test_acorn.cpp
new file mode 100644
index 0000000..91038a3
--- /dev/null
+++ b/demos/test_acorn.cpp
@@ -0,0 +1,442 @@
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+#include <cstring>
+
+
+#include <sys/time.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexHNSW.h>
+#include <faiss/IndexACORN.h>
+#include <faiss/index_io.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+
+// added these
+#include <faiss/Index.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <pthread.h>
+#include <iostream>
+#include <sstream>      // for ostringstream
+#include <fstream>  
+#include <iosfwd>
+#include <faiss/impl/platform_macros.h>
+#include <assert.h>     /* assert */
+#include <thread>
+#include <set>
+#include <math.h>  
+#include <numeric> // for std::accumulate
+#include <cmath>   // for std::mean and std::stdev
+#include <nlohmann/json.hpp>
+#include "utils.cpp"
+
+
+
+
+// create indices for debugging, write indices to file, and get recall stats for all queries
+int main(int argc, char *argv[]) {
+    unsigned int nthreads = std::thread::hardware_concurrency();
+    std::cout << "====================\nSTART: running TEST_ACORN for hnsw, sift data --" << nthreads << "cores\n" << std::endl;
+    // printf("====================\nSTART: running MAKE_INDICES for hnsw --...\n");
+    double t0 = elapsed();
+    
+    int efc = 40; // default is 40
+    int efs = 16; //  default is 16
+    int k = 10; // search parameter
+    size_t d = 128; // dimension of the vectors to index - will be overwritten by the dimension of the dataset
+    int M; // HSNW param M TODO change M back
+    int M_beta; // param for compression
+    // float attr_sel = 0.001;
+    // int gamma = (int) 1 / attr_sel;
+    int gamma;
+    int n_centroids;
+    // int filter = 0;
+    std::string dataset; // must be sift1B or sift1M or tripclick
+    int test_partitions = 0;
+    int step = 10; //2
+    
+    std::string assignment_type = "rand";
+    int alpha = 0;
+
+    srand(0); // seed for random number generator
+    int num_trials = 60;
+
+
+    size_t N = 0; // N will be how many we truncate nb from sift1M to
+
+    int opt;
+    {// parse arguments
+
+        if (argc < 6 || argc > 8) {
+            fprintf(stderr, "Syntax: %s <number vecs> <gamma> [<assignment_type>] [<alpha>] <dataset> <M> <M_beta>\n", argv[0]);
+            exit(1);
+        }
+
+        N = strtoul(argv[1], NULL, 10);
+        printf("N: %ld\n", N);
+     
+
+        gamma = atoi(argv[2]);
+        printf("gamma: %d\n", gamma);
+
+
+        
+        
+        dataset = argv[3];
+        printf("dataset: %s\n", dataset.c_str());
+        if (dataset != "sift1M" && dataset != "sift1M_test" && dataset != "sift1B" && dataset != "tripclick" && dataset != "paper" && dataset != "paper_rand2m") {
+            printf("got dataset: %s\n", dataset.c_str());
+            fprintf(stderr, "Invalid <dataset>; must be a value in [sift1M, sift1B]\n");
+            exit(1);
+        }
+
+        M = atoi(argv[4]);
+        printf("M: %d\n", M);
+
+        M_beta = atoi(argv[5]);
+        printf("M_beta: %d\n", M_beta);
+
+    }
+    
+
+
+
+    // load metadata
+    n_centroids = gamma;
+
+    std::vector<int> metadata = load_ab(dataset, gamma, assignment_type, N);
+    metadata.resize(N);
+    assert(N == metadata.size());
+    printf("[%.3f s] Loaded metadata, %ld attr's found\n", 
+        elapsed() - t0, metadata.size());
+
+   
+
+    size_t nq;
+    float* xq;
+    std::vector<int> aq;
+    { // load query vectors and attributes
+        printf("[%.3f s] Loading query vectors and attributes\n", elapsed() - t0);
+
+        size_t d2;
+        // xq = fvecs_read("sift1M/sift_query.fvecs", &d2, &nq);
+        bool is_base = 0;
+        // load_data(dataset, is_base, &d2, &nq, xq);
+        std::string filename = get_file_name(dataset, is_base);
+        xq = fvecs_read(filename.c_str(), &d2, &nq);
+        assert(d == d2 || !"query does not have same dimension as expected 128");
+        if (d != d2) {
+            d = d2;
+        }
+        
+        std::cout << "query vecs data loaded, with dim: " << d2 << ", nb=" << nq << std::endl;
+        printf("[%.3f s] Loaded query vectors from %s\n", elapsed() - t0, filename.c_str());
+        aq = load_aq(dataset, n_centroids, alpha, N);
+        printf("[%.3f s] Loaded %ld %s queries\n", elapsed() - t0, nq, dataset.c_str());
+ 
+    }
+    // nq = 1;
+    int gt_size = 100;
+    if (dataset=="sift1M_test" || dataset=="paper") {
+        gt_size = 10;
+    } 
+    std::vector<faiss::idx_t> gt(gt_size * nq);
+    { // load ground truth
+        gt = load_gt(dataset, gamma, alpha, assignment_type, N);
+        printf("[%.3f s] Loaded ground truth, gt_size: %d\n", elapsed() - t0, gt_size);
+    }
+
+    // create normal (base) and hybrid index
+    printf("[%.3f s] Index Params -- d: %ld, M: %d, N: %ld, gamma: %d\n",
+               elapsed() - t0, d, M, N, gamma);
+    // base HNSW index
+    faiss::IndexHNSWFlat base_index(d, M, 1); // gamma = 1
+    base_index.hnsw.efConstruction = efc; // default is 40  in HNSW.capp
+    base_index.hnsw.efSearch = efs; // default is 16 in HNSW.capp
+    
+    // ACORN-gamma
+    faiss::IndexACORNFlat hybrid_index(d, M, gamma, metadata, M_beta);
+    hybrid_index.acorn.efSearch = efs; // default is 16 HybridHNSW.capp
+    debug("ACORN index created%s\n", "");
+
+
+    // ACORN-1
+    faiss::IndexACORNFlat hybrid_index_gamma1(d, M, 1, metadata, M*2);
+    hybrid_index_gamma1.acorn.efSearch = efs; // default is 16 HybridHNSW.capp
+
+
+
+
+    { // populating the database
+        std::cout << "====================Vectors====================\n" << std::endl;
+        // printf("====================Vectors====================\n");
+       
+        printf("[%.3f s] Loading database\n", elapsed() - t0);
+
+        size_t nb, d2;
+        bool is_base = 1;
+        std::string filename = get_file_name(dataset, is_base);
+        float* xb = fvecs_read(filename.c_str(), &d2, &nb);
+        assert(d == d2 || !"dataset does not dim 128 as expected");
+        printf("[%.3f s] Loaded base vectors from file: %s\n", elapsed() - t0, filename.c_str());
+
+       
+
+        std::cout << "data loaded, with dim: " << d2 << ", nb=" << nb << std::endl;
+
+        printf("[%.3f s] Indexing database, size %ld*%ld from max %ld\n",
+               elapsed() - t0, N, d2, nb);
+
+        // index->add(nb, xb);
+
+        printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
+        
+        base_index.add(N, xb);
+        printf("[%.3f s] Vectors added to base index \n", elapsed() - t0);
+        std::cout << "Base index vectors added: " << nb << std::endl;
+
+        hybrid_index.add(N, xb);
+        printf("[%.3f s] Vectors added to hybrid index \n", elapsed() - t0);
+        std::cout << "Hybrid index vectors added" << nb << std::endl;
+        // printf("SKIPPED creating ACORN-gamma\n");
+
+
+        hybrid_index_gamma1.add(N, xb);
+        printf("[%.3f s] Vectors added to hybrid index with gamma=1 \n", elapsed() - t0);
+        std::cout << "Hybrid index with gamma=1 vectors added" << nb << std::endl;
+
+    
+
+        delete[] xb;       
+    }
+   
+
+   // write hybrid index and partition indices to files
+    {
+        std::cout << "====================Write Index====================\n" << std::endl;
+        // write hybrid index
+        // std::string filename = "hybrid_index" + dataset + ".index";
+        std::stringstream filepath_stream;
+        if (dataset == "sift1M" || dataset == "sift1B") {
+            filepath_stream << "./tmp/hybrid_"  << (int) (N / 1000 / 1000) << "m_nc=" << n_centroids << "_assignment=" << assignment_type << "_alpha=" << alpha << ".json";
+
+        } else {
+            filepath_stream << "./tmp/" << dataset << "/hybrid" << "_M=" << M << "_efc" << efc << "_Mb=" << M_beta << "_gamma=" << gamma << ".json";
+        }
+        std::string filepath = filepath_stream.str();
+        write_index(&hybrid_index, filepath.c_str());
+        printf("[%.3f s] Wrote hybrid index to file: %s\n", elapsed() - t0, filepath.c_str());
+        
+        // write hybrid_gamma1 index
+        std::stringstream filepath_stream2;
+        if (dataset == "sift1M" || dataset == "sift1B") {
+            filepath_stream2 << "./tmp/hybrid_gamma1_"  << (int) (N / 1000 / 1000) << "m_nc=" << n_centroids << "_assignment=" << assignment_type << "_alpha=" << alpha << ".json";
+
+        } else {
+            filepath_stream2 << "./tmp/" << dataset << "/hybrid" << "_M=" << M << "_efc" << efc << "_Mb=" << M_beta << "_gamma=" << 1 << ".json";
+        }
+        std::string filepath2 = filepath_stream2.str();
+        write_index(&hybrid_index_gamma1, filepath2.c_str());
+        printf("[%.3f s] Wrote hybrid_gamma1 index to file: %s\n", elapsed() - t0, filepath2.c_str());
+
+        
+        { // write base index
+            std::stringstream filepath_stream;
+            if (dataset == "sift1M" || dataset == "sift1B") {
+                filepath_stream << "./tmp/base_"  << (int) (N / 1000 / 1000) << "m_nc=" << n_centroids << "_assignment=" << assignment_type << "_alpha=" << alpha << ".json";
+
+            } else {
+                filepath_stream << "./tmp/" << dataset << "/base" << "_M=" << M << "_efc=" << efc << ".json";
+            }
+            std::string filepath = filepath_stream.str();
+            write_index(&base_index, filepath.c_str());
+            printf("[%.3f s] Wrote base index to file: %s\n", elapsed() - t0, filepath.c_str());
+        }
+      
+
+        
+        
+
+
+        
+    }
+
+    { // print out stats
+        printf("====================================\n");
+        printf("============ BASE INDEX =============\n");
+        printf("====================================\n");
+        base_index.printStats(false);
+        printf("====================================\n");
+        printf("============ ACORN INDEX =============\n");
+        printf("====================================\n");
+        hybrid_index.printStats(false);
+       
+    }
+
+    
+    printf("==============================================\n");
+    printf("====================Search Results====================\n");
+    printf("==============================================\n");
+    // double t1 = elapsed();
+    printf("==============================================\n");
+    printf("====================Search====================\n");
+    printf("==============================================\n");
+    double t1 = elapsed();
+    
+    { // searching the base database
+        printf("====================HNSW INDEX====================\n");
+        printf("[%.3f s] Searching the %d nearest neighbors "
+               "of %ld vectors in the index, efsearch %d\n",
+               elapsed() - t0,
+               k,
+               nq,
+               base_index.hnsw.efSearch);
+
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        std::cout << "here1" << std::endl;
+        std::cout << "nn and dis size: " << nns.size() << " " << dis.size() << std::endl;
+
+ 
+
+        double t1 = elapsed();
+        base_index.search(nq, xq, k, dis.data(), nns.data());
+        double t2 = elapsed();
+
+        printf("[%.3f s] Query results (vector ids, then distances):\n",
+               elapsed() - t0);
+
+        // take max of 5 and nq
+        int nq_print = std::min(5, (int) nq);
+        for (int i = 0; i < nq_print; i++) {
+            printf("query %2d nn's: ", i);
+            for (int j = 0; j < k; j++) {
+                // printf("%7ld (%d) ", nns[j + i * k], metadata.size());
+                printf("%7ld (%d) ", nns[j + i * k], metadata[nns[j + i * k]]);
+            }
+            printf("\n     dis: \t");
+            for (int j = 0; j < k; j++) {
+                printf("%7g ", dis[j + i * k]);
+            }
+            printf("\n");
+            // exit(0);
+        }
+
+        printf("[%.3f s] *** Query time: %f\n",
+               elapsed() - t0, t2 - t1);
+        
+        // print number of distance computations
+        // printf("[%.3f s] *** Number of distance computations: %ld\n",
+            //    elapsed() - t0, base_index.ntotal * nq);
+        std::cout << "finished base index examples" << std::endl;
+
+    }
+
+    {// look at stats
+        // const faiss::HybridHNSWStats& stats = index.hnsw_stats;
+        const faiss::HNSWStats& stats = faiss::hnsw_stats;
+
+        std::cout << "============= BASE HNSW QUERY PROFILING STATS =============" << std::endl;
+        printf("[%.3f s] Timing results for search of k=%d nearest neighbors of nq=%ld vectors in the index\n",
+               elapsed() - t0,
+               k,
+               nq);
+        std::cout << "n1: " << stats.n1 << std::endl;
+        std::cout << "n2: " << stats.n2 << std::endl;
+        std::cout << "n3 (number distance comps at level 0): " << stats.n3 << std::endl;
+        std::cout << "ndis: " << stats.ndis << std::endl;
+        std::cout << "nreorder: " << stats.nreorder << std::endl;
+        printf("average distance computations per query: %f\n", (float)stats.n3 / stats.n1);
+    
+    }
+
+    { // searching the hybrid database
+        printf("==================== ACORN INDEX ====================\n");
+        printf("[%.3f s] Searching the %d nearest neighbors "
+               "of %ld vectors in the index, efsearch %d\n",
+               elapsed() - t0,
+               k,
+               nq,
+               hybrid_index.acorn.efSearch);
+
+        std::vector<faiss::idx_t> nns2(k * nq);
+        std::vector<float> dis2(k * nq);
+
+        // create filter_ids_map, ie a bitmap of the ids that are in the filter
+        std::vector<char> filter_ids_map(nq * N);
+        for (int xq = 0; xq < nq; xq++) {
+            for (int xb = 0; xb < N; xb++) {
+                filter_ids_map[xq * N + xb] = (bool) (metadata[xb] == aq[xq]);
+            }
+        }
+
+        double t1_x = elapsed();
+        hybrid_index.search(nq, xq, k, dis2.data(), nns2.data(), filter_ids_map.data()); // TODO change first argument back to nq
+        double t2_x = elapsed();
+
+
+        printf("[%.3f s] Query results (vector ids, then distances):\n",
+               elapsed() - t0);
+
+        int nq_print = std::min(5, (int) nq);
+        for (int i = 0; i < nq_print; i++) {
+            printf("query %2d nn's (%d): ", i, aq[i]);
+            for (int j = 0; j < k; j++) {
+                printf("%7ld (%d) ", nns2[j + i * k], metadata[nns2[j + i * k]]);
+            }
+            printf("\n     dis: \t");
+            for (int j = 0; j < k; j++) {
+                printf("%7g ", dis2[j + i * k]);
+            }
+            printf("\n");
+        }
+
+
+        printf("[%.3f s] *** Query time: %f\n",
+               elapsed() - t0, t2_x - t1_x);
+
+
+
+         std::cout << "finished hybrid index examples" << std::endl;
+    }
+
+
+
+    
+
+    // check here
+
+    {// look at stats
+        // const faiss::HybridHNSWStats& stats = index.hnsw_stats;
+        const faiss::ACORNStats& stats = faiss::acorn_stats;
+
+        std::cout << "============= ACORN QUERY PROFILING STATS =============" << std::endl;
+        printf("[%.3f s] Timing results for search of k=%d nearest neighbors of nq=%ld vectors in the index\n",
+               elapsed() - t0,
+               k,
+               nq);
+        std::cout << "n1: " << stats.n1 << std::endl;
+        std::cout << "n2: " << stats.n2 << std::endl;
+        std::cout << "n3 (number distance comps at level 0): " << stats.n3 << std::endl;
+        std::cout << "ndis: " << stats.ndis << std::endl;
+        std::cout << "nreorder: " << stats.nreorder << std::endl;
+        printf("average distance computations per query: %f\n", (float)stats.n3 / stats.n1);
+    
+    }
+  
+    
+
+    printf("[%.3f s] -----DONE-----\n", elapsed() - t0);
+}
\ No newline at end of file
diff --git a/demos/utils.cpp b/demos/utils.cpp
new file mode 100644
index 0000000..e3455be
--- /dev/null
+++ b/demos/utils.cpp
@@ -0,0 +1,563 @@
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+#include <cstring>
+
+
+#include <sys/time.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexHNSW.h>
+#include <faiss/IndexACORN.h>
+#include <faiss/index_io.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+
+// added these
+#include <faiss/Index.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <pthread.h>
+#include <iostream>
+#include <sstream>      // for ostringstream
+#include <fstream>  
+#include <iosfwd>
+#include <faiss/impl/platform_macros.h>
+#include <assert.h>     /* assert */
+#include <thread>
+#include <set>
+#include <math.h>  
+#include <numeric> // for std::accumulate
+#include <cmath>   // for std::mean and std::stdev
+#include <nlohmann/json.hpp>
+// #include <format>
+// for convenience
+using json = nlohmann::json;
+/**
+ * To run this demo, please download the ANN_SIFT1M dataset from
+ *
+ *   http://corpus-texmex.irisa.fr/
+        -> wget -r ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz
+        -> cd ftp.irisa.fr/local/texmex/corpus
+        -> tar -xf sift.tar.gz
+        
+ * and unzip it to the sudirectory sift1M.
+ **/
+
+// MACRO
+#define TESTING_DATA_DIR "./testing_data"
+
+
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <cstring>
+#include <zlib.h>
+
+// using namespace std;
+
+
+
+
+
+
+ /*****************************************************
+ * I/O functions for fvecs and ivecs
+ *****************************************************/
+
+bool fileExists(const std::string& filePath) {
+    std::ifstream file(filePath);
+    return file.good();
+}
+
+
+float* fvecs_read(const char* fname, size_t* d_out, size_t* n_out) {
+    FILE* f = fopen(fname, "r");
+    if (!f) {
+        fprintf(stderr, "could not open %s\n", fname);
+        perror("");
+        abort();
+    }
+    int d;
+    fread(&d, 1, sizeof(int), f);
+    assert((d > 0 && d < 1000000) || !"unreasonable dimension");
+    fseek(f, 0, SEEK_SET);
+    struct stat st;
+    fstat(fileno(f), &st);
+    size_t sz = st.st_size;
+    assert(sz % ((d + 1) * 4) == 0 || !"weird file size");
+    size_t n = sz / ((d + 1) * 4);
+
+    *d_out = d;
+    *n_out = n;
+    float* x = new float[n * (d + 1)];
+    size_t nr = fread(x, sizeof(float), n * (d + 1), f);
+    assert(nr == n * (d + 1) || !"could not read whole file");
+
+    // shift array to remove row headers
+    for (size_t i = 0; i < n; i++)
+        memmove(x + i * d, x + 1 + i * (d + 1), d * sizeof(*x));
+
+    fclose(f);
+    return x;
+}
+
+// not very clean, but works as long as sizeof(int) == sizeof(float)
+int* ivecs_read(const char* fname, size_t* d_out, size_t* n_out) {
+    return (int*)fvecs_read(fname, d_out, n_out);
+}
+
+
+
+// get file name to load data vectors from
+std::string get_file_name(std::string dataset, bool is_base) {
+    if (dataset == "sift1M" || dataset == "sift1M_test") {
+        return std::string("./Datasets/sift1M/sift_") + (is_base ? "base" : "query") + ".fvecs";
+    } else if (dataset == "sift1B") {
+        return std::string("./Datasets/sift1B/bigann_") + (is_base ? "base_10m" : "query") + ".fvecs";
+    } else if (dataset == "tripclick") {
+        return std::string("./Datasets/tripclick/") + (is_base ? "base_vecs_tripclick" : "query_vecs_tripclick_min100") + ".fvecs";
+    } else if (dataset == "paper" || dataset == "paper_rand2m") {
+        return std::string("./Datasets/paper/") + (is_base ? "paper_base" : "paper_query") + ".fvecs";
+    } else {
+        std::cerr << "Invalid datset in get_file_name" << std::endl;
+        return "";
+    }
+}
+
+// return name is in arg file_path
+void get_index_name(int N, int n_centroids, std::string assignment_type, float alpha, int M_beta, std::string& file_path) {
+    std::stringstream filepath_stream;
+    filepath_stream << "./tmp/hybrid_"  << (int) (N / 1000 / 1000) << "m_nc=" << n_centroids << "_assignment=" << assignment_type << "_alpha=" << alpha << "Mb=" << M_beta << ".json";
+    // copy filepath_stream to file_path
+    file_path = filepath_stream.str();
+}
+
+
+
+
+
+/*******************************************************
+ * Added for debugging
+ *******************************************************/
+const int debugFlag = 1;
+
+void debugTime() {
+	if (debugFlag) {
+        struct timeval tval;
+        gettimeofday(&tval, NULL);
+        struct tm *tm_info = localtime(&tval.tv_sec);
+        char timeBuff[25] = "";
+        strftime(timeBuff, 25, "%H:%M:%S", tm_info);
+        char timeBuffWithMilli[50] = "";
+        sprintf(timeBuffWithMilli, "%s.%06ld ", timeBuff, tval.tv_usec);
+        std::string timestamp(timeBuffWithMilli);
+		std::cout << timestamp << std::flush;
+    }
+}
+
+//needs atleast 2 args always
+//  alt debugFlag = 1 // fprintf(stderr, fmt, __VA_ARGS__); 
+#define debug(fmt, ...) \
+    do { \
+        if (debugFlag == 1) { \
+            fprintf(stdout, "--" fmt, __VA_ARGS__);\
+        } \
+        if (debugFlag == 2) { \
+            debugTime(); \
+            fprintf(stdout, "%s:%d:%s(): " fmt, __FILE__, __LINE__, __func__, __VA_ARGS__); \
+        } \
+    } while (0)
+
+
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+/*******************************************************
+ * performance testing helpers
+ *******************************************************/
+std::pair<float, float> get_mean_and_std(std::vector<float>& times) {
+    // compute mean
+    float total = 0;
+    // for (int num: times) {
+    for (int i=0; i < times.size(); i++) {
+       // printf("%f, ", times[i]); // for debugging
+        total = total + times[i];
+    }
+    float mean = (total / times.size());
+
+    // compute stdev from variance, using computed mean
+    float result = 0;
+    for (int i=0; i < times.size(); i++) {
+        result = result + (times[i] - mean)*(times[i] - mean);
+    }
+    float variance = result / (times.size() - 1);
+    // for debugging
+    // printf("variance: %f\n", variance);
+
+    float std = std::sqrt(variance);
+
+    // return 
+    return std::make_pair(mean, std);
+}
+
+
+
+
+// ground truth labels @gt, results to evaluate @I with @nq queries, returns @gt_size-Recall@k where gt had max gt_size NN's per query
+float compute_recall(std::vector<faiss::idx_t>& gt, int gt_size, std::vector<faiss::idx_t>& I, int nq, int k, int gamma=1) {
+    // printf("compute_recall params: gt.size(): %ld, gt_size: %d, I.size(): %ld, nq: %d, k: %d, gamma: %d\n", gt.size(), gt_size, I.size(), nq, k, gamma);
+    
+    int n_1 = 0, n_10 = 0, n_100 = 0;
+    for (int i = 0; i < nq; i++) { // loop over all queries
+        // int gt_nn = gt[i * k];
+        std::vector<faiss::idx_t>::const_iterator first = gt.begin() + i*gt_size;
+        std::vector<faiss::idx_t>::const_iterator last = gt.begin() + i*gt_size + (k / gamma);
+        std::vector<faiss::idx_t> gt_nns_tmp(first, last);
+        // if (gt_nns_tmp.size() > 10) {
+        //     printf("gt_nns size: %ld\n", gt_nns_tmp.size());
+        // }
+        
+        // gt_nns_tmp.resize(k); // truncate if gt_size > k
+        std::set<faiss::idx_t> gt_nns(gt_nns_tmp.begin(), gt_nns_tmp.end());
+        // if (gt_nns.size() > 10) {
+        //     printf("gt_nns size: %ld\n", gt_nns.size());
+        // }
+        
+        
+        for (int j = 0; j < k; j++) { // iterate over returned nn results
+            if (gt_nns.count(I[i * k + j])!=0) {
+            // if (I[i * k + j] == gt_nn) {
+                if (j < 1 * gamma)
+                    n_1++;
+                if (j < 10 * gamma)
+                    n_10++;
+                if (j < 100 * gamma)
+                    n_100++;
+            }
+        }
+    }
+    // BASE ACCURACY
+    // printf("* Base HNSW accuracy relative to exact search:\n");
+    // printf("\tR@1 = %.4f\n", n_1 / float(nq) );
+    // printf("\tR@10 = %.4f\n", n_10 / float(nq));
+    // printf("\tR@100 = %.4f\n", n_100 / float(nq)); // not sure why this is always same as R@10
+    // printf("\t---Results for %ld queries, k=%d, N=%ld, gt_size=%d\n", nq, k, N, gt_size);
+    return (n_10 / float(nq));
+
+}
+
+
+template <typename T>
+void log_values(std::string annotation, std::vector<T>& values) {
+    std::cout << annotation;
+    for (int i = 0; i < values.size(); i++) {
+        std::cout << values[i];
+        if (i < values.size() - 1) {
+            std::cout << ", ";
+        }
+    } 
+    std::cout << std::endl;
+}
+
+
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+// 
+// FOR CORRELATION TESTING
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+template <typename T>
+std::vector<T> load_json_to_vector(std::string filepath) {
+   // Open the JSON file
+    std::ifstream file(filepath);
+    if (!file.is_open()) {
+        std::cerr << "Failed to open JSON file" << std::endl;
+        // return 1;
+    }
+
+    // Parse the JSON data
+    json data;
+    try {
+        file >> data;
+    } catch (const std::exception& e) {
+        std::cerr << "Failed to parse JSON data from " << filepath << ": " << e.what() << std::endl;
+        // return 1;
+    }
+
+    // Convert data to a vector
+    std::vector<T> v =  data.get<std::vector<T>>();
+
+    // print size
+    std::cout << "metadata or vector loaded from json, size: " << v.size() << std::endl;
+    return v;
+}
+
+
+std::vector<int> load_aq(std::string dataset, int n_centroids, int alpha, int N) {
+    if (dataset == "sift1M" || dataset == "sift1B") {
+        assert((alpha == -2 || alpha == 0 || alpha == 2) || !"alpha must be value in [-2, 0, 2]");
+
+        // Compose File Name
+        std::stringstream filepath_stream;
+        filepath_stream << TESTING_DATA_DIR << "/query_filters_sift" << (int) (N / 1000 / 1000)  << "m_nc=" << n_centroids << "_alpha=" << alpha << ".json";
+        std::string filepath = filepath_stream.str();
+
+        std::vector<int> v = load_json_to_vector<int>(filepath);
+        printf("loaded query attributes from: %s\n", filepath.c_str());
+
+        
+        // // print out data for debugging
+        // for (int i : v) {
+        //     std::cout << i << " ";
+        // }
+        // std::cout << std::endl;
+
+        return v;
+    } else if (dataset == "tripclick") {
+        std::stringstream filepath_stream;   
+        filepath_stream << TESTING_DATA_DIR << "/query_filters_tripclick_sample_subset_min100.json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v = load_json_to_vector<int>(filepath);
+        printf("loaded query attributes from: %s\n", filepath.c_str());
+
+        
+        // // print out data for debugging
+        // for (int i : v) {
+        //     std::cout << i << " ";
+        // }
+        // std::cout << std::endl;
+
+        return v;
+    } else if (dataset == "sift1M_test") {
+        // return a vector of all int 5 with lenght N
+        std::vector<int> v(N, 5);
+        printf("made query filters with value %d, length %ld\n", v[0], v.size());
+        return v;
+    
+    } else if (dataset == "paper") {
+        std::vector<int> v(N, 5);
+        printf("made query filters with value %d, length %ld\n", v[0], v.size());
+        return v;
+    } else if (dataset == "paper_rand2m") {
+        // Compose File Name
+        std::stringstream filepath_stream;
+        filepath_stream << TESTING_DATA_DIR << "/query_filters_paper_rand2m_nc=12_alpha=0.json";
+        std::string filepath = filepath_stream.str();
+
+        std::vector<int> v = load_json_to_vector<int>(filepath);
+        printf("loaded query attributes from: %s\n", filepath.c_str());
+        return v;
+    
+    } else {
+        std::cerr << "Invalid dataset in load_aq" << std::endl;
+        return std::vector<int>();
+    }
+    
+
+}
+
+// assignment_type can be "rand", "soft", "soft_squared", "hard"
+std::vector<int> load_ab(std::string dataset, int n_centroids, std::string assignment_type, int N) {
+    // Compose File Name
+    if (dataset == "sift1M" || dataset == "sift1B") {
+        std::stringstream filepath_stream;   
+        filepath_stream << TESTING_DATA_DIR << "/base_attrs_sift" <<   (int) (N / 1000 / 1000)   << "m_nc=" << n_centroids << "_assignment=" << assignment_type << ".json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v = load_json_to_vector<int>(filepath);
+        printf("loaded base attributes from: %s\n", filepath.c_str());
+
+        
+        // // print out data for debugging
+        // for (int i : v) {
+        //     std::cout << i << " ";
+        // }
+        // std::cout << std::endl;
+
+        return v;
+    } else if (dataset == "sift1M_test") {
+        std::stringstream filepath_stream;   
+        filepath_stream << TESTING_DATA_DIR << "/sift_attr" << ".json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v = load_json_to_vector<int>(filepath);
+        printf("loaded base attributes from: %s\n", filepath.c_str());
+        return v;
+
+    } else if (dataset == "paper") {
+        std::stringstream filepath_stream;   
+        filepath_stream << TESTING_DATA_DIR << "/paper_attr.json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v = load_json_to_vector<int>(filepath);
+        printf("loaded base attributes from: %s\n", filepath.c_str());
+
+        return v;
+
+    } else if (dataset == "paper_rand2m") {
+        std::stringstream filepath_stream;   
+        filepath_stream << TESTING_DATA_DIR << "/base_attrs_paper_rand2m_nc=12_assignment=rand.json";
+        std::string filepath = filepath_stream.str();
+
+        std::vector<int> v = load_json_to_vector<int>(filepath);
+        printf("loaded base attributes from: %s\n", filepath.c_str());
+
+        return v;
+    } else if (dataset == "tripclick") {
+        std::stringstream filepath_stream;   
+        filepath_stream << TESTING_DATA_DIR << "/base_attrs_tripclick.json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v = load_json_to_vector<int>(filepath);
+        printf("loaded base attributes from: %s\n", filepath.c_str());
+
+        
+        // // print out data for debugging
+        // for (int i : v) {
+        //     std::cout << i << " ";
+        // }
+        // std::cout << std::endl;
+
+        return v;
+    } else {
+        std::cerr << "Invalid dataset in load_ab" << std::endl;
+        return std::vector<int>();
+    }
+    
+
+}
+
+// assignment_type can be "rand", "soft", "soft_squared", "hard"
+// alpha can be -2, 0, 2
+std::vector<faiss::idx_t> load_gt(std::string dataset, int n_centroids, int alpha, std::string assignment_type, int N) {
+    if (dataset == "sift1M" || dataset == "sift1B") {
+        // Compose File Name
+        std::stringstream filepath_stream;
+        filepath_stream << TESTING_DATA_DIR << "/gt_sift" << (int) (N / 1000 / 1000) << "m_nc=" << n_centroids << "_assignment=" << assignment_type << "_alpha=" << alpha << ".json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v_tmp = load_json_to_vector<int>(filepath);
+        std::vector<faiss::idx_t> v(v_tmp.begin(), v_tmp.end());
+        printf("loaded gt from: %s\n", filepath.c_str());
+
+        // // print out data for debugging
+        // for (faiss::idx_t i : v) {
+        //     std::cout << i << " ";
+        // }
+        // std::cout << std::endl;
+
+        return v;
+    } else if (dataset == "sift1M_test") {
+        // Compose File Name
+        std::stringstream filepath_stream;
+        filepath_stream << TESTING_DATA_DIR << "/sift_gt_5.json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v_tmp = load_json_to_vector<int>(filepath);
+        std::vector<faiss::idx_t> v(v_tmp.begin(), v_tmp.end());
+        printf("loaded gt from: %s\n", filepath.c_str());
+
+        // // print out data for debugging
+        // for (faiss::idx_t i : v) {
+        //     std::cout << i << " ";
+        // }
+        // std::cout << std::endl;
+
+        return v;
+
+    } else if (dataset == "paper") {
+        // Compose File Name
+        std::stringstream filepath_stream;
+        filepath_stream << TESTING_DATA_DIR << "/paper_gt_5.json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v_tmp = load_json_to_vector<int>(filepath);
+        std::vector<faiss::idx_t> v(v_tmp.begin(), v_tmp.end());
+        printf("loaded gt from: %s\n", filepath.c_str());
+
+        // // print out data for debugging
+        // for (faiss::idx_t i : v) {
+        //     std::cout << i << " ";
+        // }
+        // std::cout << std::endl;
+
+        return v;
+    
+    } else if (dataset == "paper_rand2m") {
+        // Compose File Name
+        std::stringstream filepath_stream;
+        filepath_stream << TESTING_DATA_DIR << "/gt_paper_rand2m_nc=12_assignment=rand_alpha=0.json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v_tmp = load_json_to_vector<int>(filepath);
+        std::vector<faiss::idx_t> v(v_tmp.begin(), v_tmp.end());
+        printf("loaded gt from: %s\n", filepath.c_str());
+
+        return v;
+    } else if (dataset == "tripclick") {
+        std::stringstream filepath_stream;   
+        filepath_stream << TESTING_DATA_DIR << "/gt_tripclick_sample_subset_min100.json";
+        std::string filepath = filepath_stream.str();
+        // printf("%s\n", filepath.c_str());
+
+        std::vector<int> v_tmp = load_json_to_vector<int>(filepath);
+        std::vector<faiss::idx_t> v(v_tmp.begin(), v_tmp.end());
+        printf("loaded gt from: %s\n", filepath.c_str());
+
+        // // print out data for debugging
+        // for (faiss::idx_t i : v) {
+        //     std::cout << i << " ";
+        // }
+        // std::cout << std::endl;
+
+        return v;
+    } else {
+        std::cerr << "Invalid dataset in load_gt" << std::endl;
+        return std::vector<faiss::idx_t>();
+    }
+    
+
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+