Skip to content

Commit

Permalink
tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
akuzm committed Jan 30, 2025
1 parent e27915e commit ff48d97
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 38 deletions.
12 changes: 11 additions & 1 deletion src/utils/bloom1_sparse_index_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@
*/
#pragma once

#define BLOOM1_HASHES 3
#include <common/hashfn.h>

#define BLOOM1_HASHES 4
#define BLOOM1_SEED_1 0x71d924af
#define BLOOM1_SEED_2 0xba48b314

static inline uint32
bloom1_get_one_hash(uint32 value_hash, uint32 index)
{
const uint32 h1 = hash_combine(value_hash, BLOOM1_SEED_1);
const uint32 h2 = hash_combine(value_hash, BLOOM1_SEED_2);
return h1 + index * h2 + index * index;
}
24 changes: 9 additions & 15 deletions src/utils/ts_bloom1_matches.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,28 @@ TS_FUNCTION_INFO_V1(ts_bloom1_matches);
Datum
ts_bloom1_matches(PG_FUNCTION_ARGS)
{
bytea *bloom = PG_GETARG_VARLENA_PP(0);
Datum val = PG_GETARG_DATUM(1);

const int nbits = VARSIZE_ANY_EXHDR(bloom) * 8;

Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
Ensure(OidIsValid(val_type), "cannot determine argument type");
TypeCacheEntry *val_entry = lookup_type_cache(val_type, TYPECACHE_HASH_PROC);
Ensure(OidIsValid(val_entry->hash_proc), "cannot determine hash function");
const Oid hash_proc_oid = val_entry->hash_proc;

/* compute the hashes, used for the bloom filter */
uint32 datum_hash = DatumGetUInt32(OidFunctionCall1Coll(hash_proc_oid, C_COLLATION_OID, val));
uint32 h1 = hash_bytes_uint32_extended(datum_hash, BLOOM1_SEED_1) % nbits;
uint32 h2 = hash_bytes_uint32_extended(datum_hash, BLOOM1_SEED_2) % nbits;
Datum val = PG_GETARG_DATUM(1);
const uint32 datum_hash =
DatumGetUInt32(OidFunctionCall1Coll(hash_proc_oid, C_COLLATION_OID, val));

/* compute the requested number of hashes */
const char *words = VARDATA_ANY(bloom);
bytea *bloom = PG_GETARG_VARLENA_PP(0);
const int nbits = VARSIZE_ANY_EXHDR(bloom) * 8;
const uint64 *words = (const uint64 *) VARDATA_ANY(bloom);
const int word_bits = sizeof(*words) * 8;
bool match = true;
for (int i = 0; i < BLOOM1_HASHES; i++)
{
/* h1 + h2 + f(i) */
uint32 h = (h1 + i * h2) % nbits;
uint32 word_index = (h / word_bits);
uint32 bit = (h % word_bits);

/* if the bit is not set, set it and remember we did that */
const uint32 h = bloom1_get_one_hash(datum_hash, i) % nbits;
const uint32 word_index = (h / word_bits);
const uint32 bit = (h % word_bits);
match = (words[word_index] & (0x01 << bit)) && match;
}

Expand Down
28 changes: 10 additions & 18 deletions tsl/src/compression/batch_metadata_builder_bloom1.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ batch_metadata_builder_bloom1_create(Oid type_oid, int bloom_attr_offset)
.nbits_set = 0,
};

Assert(builder->nbits % 8 == 0);
Assert(builder->nbits % 64 == 0);
const int bytea_size = VARHDRSZ + builder->nbits / 8;
builder->bloom_bytea = palloc0(bytea_size);
SET_VARSIZE(builder->bloom_bytea, bytea_size);
Expand All @@ -89,30 +89,22 @@ bloom1_update_val(void *builder_, Datum val)
{
Bloom1MetadataBuilder *builder = (Bloom1MetadataBuilder *) builder_;

const int nbits = builder->nbits;
const Oid hash_proc_oid = builder->hash_proc_oid;

/* compute the hashes, used for the bloom filter */
uint32 datum_hash =
DatumGetUInt32(OidFunctionCall1Coll(hash_proc_oid, /* collation = */ C_COLLATION_OID, val));
uint32 h1 = hash_bytes_uint32_extended(datum_hash, BLOOM1_SEED_1) % nbits;
uint32 h2 = hash_bytes_uint32_extended(datum_hash, BLOOM1_SEED_2) % nbits;
const uint32 datum_hash =
DatumGetUInt32(OidFunctionCall1Coll(hash_proc_oid, C_COLLATION_OID, val));

/* compute the requested number of hashes */
char *restrict words = VARDATA(builder->bloom_bytea);
const int nbits = builder->nbits;
uint64 *restrict words = (uint64 *restrict) VARDATA(builder->bloom_bytea);
const int word_bits = sizeof(*words) * 8;
for (int i = 0; i < BLOOM1_HASHES; i++)
{
/* h1 + h2 + f(i) */
uint32 h = (h1 + i * h2) % builder->nbits;
uint32 byte = (h / 8);
uint32 bit = (h % 8);

/* if the bit is not set, set it and remember we did that */
if (!(words[byte] & (0x01 << bit)))
{
words[byte] |= (0x01 << bit);
builder->nbits_set++;
}
const uint32 h = bloom1_get_one_hash(datum_hash, i) % nbits;
const uint32 byte = (h / word_bits);
const uint32 bit = (h % word_bits);
words[byte] |= (0x01 << bit);
}
}

Expand Down
8 changes: 4 additions & 4 deletions tsl/test/expected/compress_bloom_sparse.out
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,13 @@ select count(*) from bloom where value = md5(7248::text);
Output: count(*)
-> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk (actual rows=1 loops=1)
Vectorized Filter: (_hyper_1_1_chunk.value = '1f4183315762e30ea441d3caef5e64ad'::text)
Rows Removed by Filter: 1999
Batches Removed by Filter: 1
Rows Removed by Filter: 2999
Batches Removed by Filter: 2
Bulk Decompression: true
-> Seq Scan on _timescaledb_internal.compress_hyper_2_2_chunk (actual rows=2 loops=1)
-> Seq Scan on _timescaledb_internal.compress_hyper_2_2_chunk (actual rows=3 loops=1)
Output: compress_hyper_2_2_chunk._ts_meta_count, compress_hyper_2_2_chunk._ts_meta_min_1, compress_hyper_2_2_chunk._ts_meta_max_1, compress_hyper_2_2_chunk.ts, compress_hyper_2_2_chunk._ts_meta_v2_bloom1_value, compress_hyper_2_2_chunk.value
Filter: _timescaledb_functions.ts_bloom1_matches(compress_hyper_2_2_chunk._ts_meta_v2_bloom1_value, '1f4183315762e30ea441d3caef5e64ad'::text)
Rows Removed by Filter: 8
Rows Removed by Filter: 7
(11 rows)

select count(*) from bloom where value = md5(7248::text);
Expand Down

0 comments on commit ff48d97

Please sign in to comment.