You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am using FALCONN in my dataset which based on the example "glove.py" ,my dataset has 10000 points and each 1024 dimension, when i used 8000 points of the dataset for query, It was OK, but when i seted it to 9000 or more points , error occurs " Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)"|, my computer has a RAM of 16GB, memory usage only 17%., so why? Thanks.
`import numpy as np
import falconn
import timeit
import math
import pandas as pd
import sys
import psutil
import os
if name == 'main':
dataset_file = pd.read_csv('./feature_day_lr.csv', dtype=np.float32)
# number_of_queries = 10000
number_of_tables = 50
dataset = np.array(dataset_file.iloc[:, 1:]) # dataset with 20000 rows and 1024 columns
assert dataset.dtype == np.float32
# Normalize all the lenghts, since we care about the cosine similarity.
print('Normalizing the dataset')
dataset /= np.linalg.norm(dataset, axis=1).reshape(-1, 1)
print('Done')
# Choose random data points to be queries.
print('Generating queries')
np.random.seed(4057218)
np.random.shuffle(dataset)
# queries = dataset[:len(dataset) - number_of_queries]
queries = dataset[0:100]
dataset = dataset[10000:16000]
print('Done')
# Center the dataset and the queries: this improves the performance of LSH quite a bit.
print('Centering the dataset and queries')
center = np.mean(dataset, axis=0)
dataset -= center
queries -= center
print('Done')
#
params_cp = falconn.LSHConstructionParameters()
params_cp.dimension = len(dataset[0])
params_cp.lsh_family = falconn.LSHFamily.CrossPolytope
params_cp.distance_function = falconn.DistanceFunction.EuclideanSquared
params_cp.l = number_of_tables
# we set one rotation, since the data is dense enough,
# for sparse data set it to 2
params_cp.num_rotations = 1
params_cp.seed = 5721840
# we want to use all the available threads to set up
params_cp.num_setup_threads = 0 # 0 o use all the available threads to set up
params_cp.storage_hash_table = falconn.StorageHashTable.BitPackedFlatHashTable
# params_cp.storage_hash_table = falconn.StorageHashTable.LinearProbingHashTable
# we build 18-bit hashes so that each table has
# 2^18 bins; this is a good choise since 2^18 is of the same
# order of magnitude as the number of data points
falconn.compute_number_of_hash_functions(15, params_cp)
#
print('Constructing the LSH table')
t1 = timeit.default_timer()
table = falconn.LSHIndex(params_cp)
table.setup(dataset)
t2 = timeit.default_timer()
print('Done')
print('Construction time: {}'.format(t2 - t1))
# report the memory usage
info = psutil.virtual_memory()
print info.percent
# print sys.getsizeof(table)
# initialize query_object
query_object = table.construct_query_object()
# using the binary search
print('Choosing number of probes')
number_of_probes = number_of_tables
query_object.set_num_probes(number_of_probes)
# final evaluation
t1 = timeit.default_timer()
result = []
for query in queries:
result.append(query_object.find_k_nearest_neighbors(query, 5))
t2 = timeit.default_timer()
print('Query time: {}'.format((t2 - t1) / len(queries)))`
The text was updated successfully, but these errors were encountered:
I am using FALCONN in my dataset which based on the example "glove.py" ,my dataset has 10000 points and each 1024 dimension, when i used 8000 points of the dataset for query, It was OK, but when i seted it to 9000 or more points , error occurs " Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)"|, my computer has a RAM of 16GB, memory usage only 17%., so why? Thanks.
`import numpy as np
import falconn
import timeit
import math
import pandas as pd
import sys
import psutil
import os
if name == 'main':
dataset_file = pd.read_csv('./feature_day_lr.csv', dtype=np.float32)
# number_of_queries = 10000
number_of_tables = 50
The text was updated successfully, but these errors were encountered: