From ff48d9733c3cabef67c831f63fb4f03bd0668c13 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 30 Jan 2025 23:28:28 +0100 Subject: [PATCH] tweaks --- src/utils/bloom1_sparse_index_params.h | 12 +++++++- src/utils/ts_bloom1_matches.c | 24 ++++++---------- .../batch_metadata_builder_bloom1.c | 28 +++++++------------ tsl/test/expected/compress_bloom_sparse.out | 8 +++--- 4 files changed, 34 insertions(+), 38 deletions(-) diff --git a/src/utils/bloom1_sparse_index_params.h b/src/utils/bloom1_sparse_index_params.h index 5ac32bf03de..8a745ebfd4c 100644 --- a/src/utils/bloom1_sparse_index_params.h +++ b/src/utils/bloom1_sparse_index_params.h @@ -5,6 +5,16 @@ */ #pragma once -#define BLOOM1_HASHES 3 +#include + +#define BLOOM1_HASHES 4 #define BLOOM1_SEED_1 0x71d924af #define BLOOM1_SEED_2 0xba48b314 + +static inline uint32 +bloom1_get_one_hash(uint32 value_hash, uint32 index) +{ + const uint32 h1 = hash_combine(value_hash, BLOOM1_SEED_1); + const uint32 h2 = hash_combine(value_hash, BLOOM1_SEED_2); + return h1 + index * h2 + index * index; +} diff --git a/src/utils/ts_bloom1_matches.c b/src/utils/ts_bloom1_matches.c index f24342d301b..093a2395978 100644 --- a/src/utils/ts_bloom1_matches.c +++ b/src/utils/ts_bloom1_matches.c @@ -20,11 +20,6 @@ TS_FUNCTION_INFO_V1(ts_bloom1_matches); Datum ts_bloom1_matches(PG_FUNCTION_ARGS) { - bytea *bloom = PG_GETARG_VARLENA_PP(0); - Datum val = PG_GETARG_DATUM(1); - - const int nbits = VARSIZE_ANY_EXHDR(bloom) * 8; - Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 1); Ensure(OidIsValid(val_type), "cannot determine argument type"); TypeCacheEntry *val_entry = lookup_type_cache(val_type, TYPECACHE_HASH_PROC); @@ -32,22 +27,21 @@ ts_bloom1_matches(PG_FUNCTION_ARGS) const Oid hash_proc_oid = val_entry->hash_proc; /* compute the hashes, used for the bloom filter */ - uint32 datum_hash = DatumGetUInt32(OidFunctionCall1Coll(hash_proc_oid, C_COLLATION_OID, val)); - uint32 h1 = hash_bytes_uint32_extended(datum_hash, BLOOM1_SEED_1) % nbits; - uint32 h2 = hash_bytes_uint32_extended(datum_hash, BLOOM1_SEED_2) % nbits; + Datum val = PG_GETARG_DATUM(1); + const uint32 datum_hash = + DatumGetUInt32(OidFunctionCall1Coll(hash_proc_oid, C_COLLATION_OID, val)); /* compute the requested number of hashes */ - const char *words = VARDATA_ANY(bloom); + bytea *bloom = PG_GETARG_VARLENA_PP(0); + const int nbits = VARSIZE_ANY_EXHDR(bloom) * 8; + const uint64 *words = (const uint64 *) VARDATA_ANY(bloom); const int word_bits = sizeof(*words) * 8; bool match = true; for (int i = 0; i < BLOOM1_HASHES; i++) { - /* h1 + h2 + f(i) */ - uint32 h = (h1 + i * h2) % nbits; - uint32 word_index = (h / word_bits); - uint32 bit = (h % word_bits); - - /* if the bit is not set, set it and remember we did that */ + const uint32 h = bloom1_get_one_hash(datum_hash, i) % nbits; + const uint32 word_index = (h / word_bits); + const uint32 bit = (h % word_bits); match = (words[word_index] & (0x01 << bit)) && match; } diff --git a/tsl/src/compression/batch_metadata_builder_bloom1.c b/tsl/src/compression/batch_metadata_builder_bloom1.c index 66bffe54ef6..42c1640ac90 100644 --- a/tsl/src/compression/batch_metadata_builder_bloom1.c +++ b/tsl/src/compression/batch_metadata_builder_bloom1.c @@ -76,7 +76,7 @@ batch_metadata_builder_bloom1_create(Oid type_oid, int bloom_attr_offset) .nbits_set = 0, }; - Assert(builder->nbits % 8 == 0); + Assert(builder->nbits % 64 == 0); const int bytea_size = VARHDRSZ + builder->nbits / 8; builder->bloom_bytea = palloc0(bytea_size); SET_VARSIZE(builder->bloom_bytea, bytea_size); @@ -89,30 +89,22 @@ bloom1_update_val(void *builder_, Datum val) { Bloom1MetadataBuilder *builder = (Bloom1MetadataBuilder *) builder_; - const int nbits = builder->nbits; const Oid hash_proc_oid = builder->hash_proc_oid; /* compute the hashes, used for the bloom filter */ - uint32 datum_hash = - DatumGetUInt32(OidFunctionCall1Coll(hash_proc_oid, /* collation = */ C_COLLATION_OID, val)); - uint32 h1 = hash_bytes_uint32_extended(datum_hash, BLOOM1_SEED_1) % nbits; - uint32 h2 = hash_bytes_uint32_extended(datum_hash, BLOOM1_SEED_2) % nbits; + const uint32 datum_hash = + DatumGetUInt32(OidFunctionCall1Coll(hash_proc_oid, C_COLLATION_OID, val)); /* compute the requested number of hashes */ - char *restrict words = VARDATA(builder->bloom_bytea); + const int nbits = builder->nbits; + uint64 *restrict words = (uint64 *restrict) VARDATA(builder->bloom_bytea); + const int word_bits = sizeof(*words) * 8; for (int i = 0; i < BLOOM1_HASHES; i++) { - /* h1 + h2 + f(i) */ - uint32 h = (h1 + i * h2) % builder->nbits; - uint32 byte = (h / 8); - uint32 bit = (h % 8); - - /* if the bit is not set, set it and remember we did that */ - if (!(words[byte] & (0x01 << bit))) - { - words[byte] |= (0x01 << bit); - builder->nbits_set++; - } + const uint32 h = bloom1_get_one_hash(datum_hash, i) % nbits; + const uint32 byte = (h / word_bits); + const uint32 bit = (h % word_bits); + words[byte] |= (0x01 << bit); } } diff --git a/tsl/test/expected/compress_bloom_sparse.out b/tsl/test/expected/compress_bloom_sparse.out index eeef58d7409..a8e79e7c889 100644 --- a/tsl/test/expected/compress_bloom_sparse.out +++ b/tsl/test/expected/compress_bloom_sparse.out @@ -53,13 +53,13 @@ select count(*) from bloom where value = md5(7248::text); Output: count(*) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk (actual rows=1 loops=1) Vectorized Filter: (_hyper_1_1_chunk.value = '1f4183315762e30ea441d3caef5e64ad'::text) - Rows Removed by Filter: 1999 - Batches Removed by Filter: 1 + Rows Removed by Filter: 2999 + Batches Removed by Filter: 2 Bulk Decompression: true - -> Seq Scan on _timescaledb_internal.compress_hyper_2_2_chunk (actual rows=2 loops=1) + -> Seq Scan on _timescaledb_internal.compress_hyper_2_2_chunk (actual rows=3 loops=1) Output: compress_hyper_2_2_chunk._ts_meta_count, compress_hyper_2_2_chunk._ts_meta_min_1, compress_hyper_2_2_chunk._ts_meta_max_1, compress_hyper_2_2_chunk.ts, compress_hyper_2_2_chunk._ts_meta_v2_bloom1_value, compress_hyper_2_2_chunk.value Filter: _timescaledb_functions.ts_bloom1_matches(compress_hyper_2_2_chunk._ts_meta_v2_bloom1_value, '1f4183315762e30ea441d3caef5e64ad'::text) - Rows Removed by Filter: 8 + Rows Removed by Filter: 7 (11 rows) select count(*) from bloom where value = md5(7248::text);