From ded5b85c629fe14652cd837bd9e1ec58938c0725 Mon Sep 17 00:00:00 2001 From: William Casarin Date: Sat, 23 Dec 2023 13:20:52 -0800 Subject: [PATCH] varint: switch to 64 bit varints --- src/cursor.h | 113 ++++++++++++++++++++--------------------------- src/nostrdb.c | 39 ++++++++-------- src/nostrdb.h | 2 +- src/print_util.h | 2 +- src/typedefs.h | 14 ------ test.c | 54 ++++++++++++++++++++++ 6 files changed, 123 insertions(+), 101 deletions(-) diff --git a/src/cursor.h b/src/cursor.h index 9c3d9bb..028f399 100644 --- a/src/cursor.h +++ b/src/cursor.h @@ -2,10 +2,10 @@ #ifndef JB55_CURSOR_H #define JB55_CURSOR_H -#include "typedefs.h" #include "bolt11/likely.h" #include +#include #include #include #include @@ -32,14 +32,14 @@ static inline void wipe_cursor(struct cursor *cursor) memset(cursor->start, 0, cursor->end - cursor->start); } -static inline void make_cursor(u8 *start, u8 *end, struct cursor *cursor) +static inline void make_cursor(unsigned char *start, unsigned char *end, struct cursor *cursor) { cursor->start = start; cursor->p = start; cursor->end = end; } -static inline void make_array(struct array *a, u8* start, u8 *end, unsigned int elem_size) +static inline void make_array(struct array *a, unsigned char* start, unsigned char *end, unsigned int elem_size) { make_cursor(start, end, &a->cur); a->elem_size = elem_size; @@ -77,7 +77,7 @@ static inline void *cursor_alloc(struct cursor *mem, unsigned long size) static inline int cursor_slice(struct cursor *mem, struct cursor *slice, size_t size) { - u8 *p; + unsigned char *p; if (!(p = cursor_alloc(mem, size))) { return 0; } @@ -103,7 +103,7 @@ static inline int cursor_skip(struct cursor *cursor, int n) return 1; } -static inline int pull_byte(struct cursor *cursor, u8 *c) +static inline int cursor_pull_byte(struct cursor *cursor, unsigned char *c) { if (unlikely(cursor->p >= cursor->end)) return 0; @@ -114,7 +114,7 @@ static inline int pull_byte(struct cursor *cursor, u8 *c) return 1; } -static inline int parse_byte(struct cursor *cursor, u8 *c) +static inline int parse_byte(struct cursor *cursor, unsigned char *c) { if (unlikely(cursor->p >= cursor->end)) return 0; @@ -159,7 +159,7 @@ static inline int cursor_pull_c_str(struct cursor *cursor, const char **str) } -static inline int cursor_push_byte(struct cursor *cursor, u8 c) +static inline int cursor_push_byte(struct cursor *cursor, unsigned char c) { if (unlikely(cursor->p + 1 > cursor->end)) { return 0; @@ -171,7 +171,7 @@ static inline int cursor_push_byte(struct cursor *cursor, u8 c) return 1; } -static inline int cursor_pull(struct cursor *cursor, u8 *data, int len) +static inline int cursor_pull(struct cursor *cursor, unsigned char *data, int len) { if (unlikely(cursor->p + len > cursor->end)) { return 0; @@ -241,7 +241,7 @@ static inline unsigned char *cursor_top(struct cursor *cur, int len) static inline int cursor_top_int(struct cursor *cur, int *i) { - u8 *p; + unsigned char *p; if (unlikely(!(p = cursor_top(cur, sizeof(*i))))) { return 0; } @@ -249,7 +249,7 @@ static inline int cursor_top_int(struct cursor *cur, int *i) return 1; } -static inline int cursor_pop(struct cursor *cur, u8 *data, int len) +static inline int cursor_pop(struct cursor *cur, unsigned char *data, int len) { if (unlikely(cur->p - len < cur->start)) { return 0; @@ -261,7 +261,7 @@ static inline int cursor_pop(struct cursor *cur, u8 *data, int len) return 1; } -static inline int cursor_push(struct cursor *cursor, u8 *data, int len) +static inline int cursor_push(struct cursor *cursor, unsigned char *data, int len) { if (unlikely(cursor->p + len >= cursor->end)) { return 0; @@ -277,7 +277,7 @@ static inline int cursor_push(struct cursor *cursor, u8 *data, int len) static inline int cursor_push_int(struct cursor *cursor, int i) { - return cursor_push(cursor, (u8*)&i, sizeof(i)); + return cursor_push(cursor, (unsigned char*)&i, sizeof(i)); } static inline size_t cursor_count(struct cursor *cursor, size_t elem_size) @@ -285,73 +285,61 @@ static inline size_t cursor_count(struct cursor *cursor, size_t elem_size) return (cursor->p - cursor->start)/elem_size; } -/* TODO: push_varint */ -static inline int push_varint(struct cursor *cursor, int n) +/* Encodes a 64-bit integer into a variable-length format and pushes it into a cursor. + * Returns the number of bytes used or -1 in case of an error. */ +static inline int cursor_push_varint(struct cursor *cursor, uint64_t n) { - int ok, len; - unsigned char b; - len = 0; - - while (1) { - b = (n & 0xFF) | 0x80; + int len = 0; + do { + unsigned char b = (n & 0x7F) | (n > 0x7F ? 0x80 : 0); n >>= 7; - if (n == 0) { - b &= 0x7F; - ok = cursor_push_byte(cursor, b); - len++; - if (!ok) return 0; - break; - } - - ok = cursor_push_byte(cursor, b); + if (!cursor_push_byte(cursor, b)) + return -1; // Error handling len++; - if (!ok) return 0; - } + } while (n != 0); return len; } -/* TODO: pull_varint */ -static inline int pull_varint(struct cursor *cursor, int *n) +static inline int cursor_pull_varint(struct cursor *cursor, uint64_t *n) { int ok, i; unsigned char b; + *n = 0; - for (i = 0;; i++) { - ok = pull_byte(cursor, &b); + for (i = 0; i < 10; i++) { // Loop up to 10 bytes for 64-bit + ok = cursor_pull_byte(cursor, &b); if (!ok) return 0; - *n |= ((int)b & 0x7F) << (i * 7); + *n |= ((int64_t)b & 0x7F) << (i * 7); - /* is_last */ if ((b & 0x80) == 0) { - return i+1; + return i + 1; // Successfully read i+1 bytes } - - if (i == 4) return 0; } - return 0; + return 10; // Successfully read 10 bytes for a full 64-bit integer } + static inline int cursor_pull_int(struct cursor *cursor, int *i) { - return cursor_pull(cursor, (u8*)i, sizeof(*i)); + return cursor_pull(cursor, (unsigned char*)i, sizeof(*i)); } static inline int cursor_push_u32(struct cursor *cursor, uint32_t i) { return cursor_push(cursor, (unsigned char*)&i, sizeof(i)); } -static inline int cursor_push_u16(struct cursor *cursor, u16 i) +static inline int cursor_push_u16(struct cursor *cursor, unsigned short i) { - return cursor_push(cursor, (u8*)&i, sizeof(i)); + return cursor_push(cursor, (unsigned char*)&i, sizeof(i)); } static inline void *index_cursor(struct cursor *cursor, unsigned int index, int elem_size) { - u8 *p; + unsigned char *p; p = &cursor->start[elem_size * index]; if (unlikely(p >= cursor->end)) @@ -363,7 +351,7 @@ static inline void *index_cursor(struct cursor *cursor, unsigned int index, int static inline int push_sized_str(struct cursor *cursor, const char *str, int len) { - return cursor_push(cursor, (u8*)str, len); + return cursor_push(cursor, (unsigned char*)str, len); } static inline int cursor_push_lowercase(struct cursor *cur, const char *str, int len) @@ -382,7 +370,7 @@ static inline int cursor_push_lowercase(struct cursor *cur, const char *str, int static inline int cursor_push_str(struct cursor *cursor, const char *str) { - return cursor_push(cursor, (u8*)str, (int)strlen(str)); + return cursor_push(cursor, (unsigned char*)str, (int)strlen(str)); } static inline int cursor_push_c_str(struct cursor *cursor, const char *str) @@ -393,30 +381,27 @@ static inline int cursor_push_c_str(struct cursor *cursor, const char *str) /* TODO: push varint size */ static inline int push_prefixed_str(struct cursor *cursor, const char *str) { - int ok, len; - len = (int)strlen(str); - ok = push_varint(cursor, len); - if (!ok) return 0; + uint64_t len; + len = strlen(str); + if (!cursor_push_varint(cursor, len)) + return 0; return push_sized_str(cursor, str, len); } static inline int pull_prefixed_str(struct cursor *cursor, struct cursor *dest_buf, const char **str) { - int len, ok; - - ok = pull_varint(cursor, &len); - if (!ok) return 0; + uint64_t len; - if (unlikely(dest_buf->p + len > dest_buf->end)) { + if (!cursor_pull_varint(cursor, &len)) return 0; - } - ok = pull_data_into_cursor(cursor, dest_buf, (unsigned char**)str, len); - if (!ok) return 0; + if (unlikely(dest_buf->p + len > dest_buf->end)) + return 0; - ok = cursor_push_byte(dest_buf, 0); + if (!pull_data_into_cursor(cursor, dest_buf, (unsigned char**)str, len)) + return 0; - return 1; + return cursor_push_byte(dest_buf, 0); } static inline int cursor_remaining_capacity(struct cursor *cursor) @@ -450,7 +435,7 @@ static inline void cursor_print_around(struct cursor *cur, int range) } #undef max -static inline int pull_bytes(struct cursor *cur, int count, const u8 **bytes) { +static inline int pull_bytes(struct cursor *cur, int count, const unsigned char **bytes) { if (cur->p + count > cur->end) return 0; @@ -489,13 +474,13 @@ static inline int is_underscore(char c) { return c == '_'; } -static inline int is_utf8_byte(u8 c) { +static inline int is_utf8_byte(unsigned char c) { return c & 0x80; } static inline int parse_utf8_char(struct cursor *cursor, unsigned int *code_point, unsigned int *utf8_length) { - u8 first_byte; + unsigned char first_byte; if (!parse_byte(cursor, &first_byte)) return 0; // Not enough data diff --git a/src/nostrdb.c b/src/nostrdb.c index 4d6e58c..628f66b 100644 --- a/src/nostrdb.c +++ b/src/nostrdb.c @@ -225,11 +225,11 @@ static int ndb_make_text_search_key(unsigned char *buf, int bufsize, // TODO: need update this to uint64_t // we push this first because our query function can pull this off // quicky to check matches - if (!push_varint(&cur, (int32_t)note_id)) + if (!cursor_push_varint(&cur, (int32_t)note_id)) return 0; // string length - if (!push_varint(&cur, word_len)) + if (!cursor_push_varint(&cur, word_len)) return 0; // non-null terminated, lowercase string @@ -237,12 +237,12 @@ static int ndb_make_text_search_key(unsigned char *buf, int bufsize, return 0; // TODO: need update this to uint64_t - if (!push_varint(&cur, (int)timestamp)) + if (!cursor_push_varint(&cur, (int)timestamp)) return 0; // the index of the word in the content so that we can do more accurate // phrase searches - if (!push_varint(&cur, word_index)) + if (!cursor_push_varint(&cur, word_index)) return 0; size = cur.p - cur.start; @@ -314,19 +314,18 @@ static int mdb_cmp_memn(const MDB_val *a, const MDB_val *b) { static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b) { struct cursor ca, cb; - int sa, sb; - int nid_a, nid_b; + uint64_t sa, sb, nid_a, nid_b; MDB_val a2, b2; make_cursor(a->mv_data, a->mv_data + a->mv_size, &ca); make_cursor(b->mv_data, b->mv_data + b->mv_size, &cb); // note_id - if (unlikely(!pull_varint(&ca, &nid_a) || !pull_varint(&cb, &nid_b))) + if (unlikely(!cursor_pull_varint(&ca, &nid_a) || !cursor_pull_varint(&cb, &nid_b))) return 0; // string size - if (unlikely(!pull_varint(&ca, &sa) || !pull_varint(&cb, &sb))) + if (unlikely(!cursor_pull_varint(&ca, &sa) || !cursor_pull_varint(&cb, &sb))) return 0; a2.mv_data = ca.p; @@ -343,7 +342,7 @@ static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b) cb.p += sb; // timestamp - if (unlikely(!pull_varint(&ca, &sa) || !pull_varint(&cb, &sb))) + if (unlikely(!cursor_pull_varint(&ca, &sa) || !cursor_pull_varint(&cb, &sb))) return 0; if (sa < sb) return -1; @@ -354,7 +353,7 @@ static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b) else if (nid_a > nid_b) return 1; // word index - if (unlikely(!pull_varint(&ca, &sa) || !pull_varint(&cb, &sb))) + if (unlikely(!cursor_pull_varint(&ca, &sa) || !cursor_pull_varint(&cb, &sb))) return 0; if (sa < sb) return -1; @@ -366,11 +365,9 @@ static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b) static inline int ndb_unpack_text_search_key_noteid( struct cursor *cur, uint64_t *note_id) { - int inote_id; - if (!pull_varint(cur, &inote_id)) + if (!cursor_pull_varint(cur, note_id)) return 0; - *note_id = inote_id; return 1; } @@ -381,9 +378,13 @@ static inline int ndb_unpack_text_search_key_string(struct cursor *cur, const char **str, int *str_len) { - if (!pull_varint(cur, str_len)) + uint64_t len; + + if (!cursor_pull_varint(cur, &len)) return 0; + *str_len = len; + *str = (const char *)cur->p; if (!cursor_skip(cur, *str_len)) @@ -398,16 +399,12 @@ static inline int ndb_unpack_remaining_text_search_key(struct cursor *cur, struct ndb_text_search_key *key) { - int timestamp; - - if (!pull_varint(cur, ×tamp)) + if (!cursor_pull_varint(cur, &key->timestamp)) return 0; - if (!pull_varint(cur, &key->word_index)) + if (!cursor_pull_varint(cur, &key->word_index)) return 0; - key->timestamp = timestamp; - return 1; } @@ -2332,7 +2329,7 @@ static int prefix_count(const char *str1, int len1, const char *str2, int len2) static void ndb_print_text_search_key(struct ndb_text_search_key *key) { - printf("K<'%.*s' %d %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str, + printf("K<'%.*s' %" PRIu64 " %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str, key->word_index, key->timestamp, key->note_id); diff --git a/src/nostrdb.h b/src/nostrdb.h index 1a01c6f..bf90e2b 100644 --- a/src/nostrdb.h +++ b/src/nostrdb.h @@ -266,7 +266,7 @@ struct ndb_text_search_key const char *str; uint64_t timestamp; uint64_t note_id; - int word_index; + uint64_t word_index; }; struct ndb_text_search_result { diff --git a/src/print_util.h b/src/print_util.h index 1f60b07..93c8516 100644 --- a/src/print_util.h +++ b/src/print_util.h @@ -1,7 +1,7 @@ static void ndb_print_text_search_key(struct ndb_text_search_key *key) { - printf("K<'%.*s' %d %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str, + printf("K<'%.*s' %" PRIu64 " %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str, key->word_index, key->timestamp, key->note_id); diff --git a/src/typedefs.h b/src/typedefs.h index d68388d..e69de29 100644 --- a/src/typedefs.h +++ b/src/typedefs.h @@ -1,14 +0,0 @@ - -#ifndef PROTOVERSE_TYPEDEFS_H -#define PROTOVERSE_TYPEDEFS_H - -#include - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned short u16; -typedef uint64_t u64; -typedef int64_t s64; - - -#endif /* PROTOVERSE_TYPEDEFS_H */ diff --git a/test.c b/test.c index 921413e..36b402f 100644 --- a/test.c +++ b/test.c @@ -964,9 +964,63 @@ static void test_fulltext() ndb_destroy(ndb); free(json); + +} + +static void test_varint(int64_t value) { + unsigned char buffer[10]; + struct cursor cursor; + uint64_t result; + + // Initialize cursor + cursor.start = buffer; + cursor.p = buffer; + cursor.end = buffer + sizeof(buffer); + + // Push the value + assert(cursor_push_varint(&cursor, value)); + + // Reset cursor for reading + cursor.p = buffer; + + // Pull the value + if (!cursor_pull_varint(&cursor, &result)) { + printf("Test failed for value %" PRIu64 " \n", value); + assert(!"Failed to pull value"); + } + + // Check if the pushed and pulled values are the same + if (value != result) { + printf("Test failed for value %" PRIu64 "\n", value); + assert(!"test failed"); + } +} + +static int test_varints() { + test_varint(0); + test_varint(127); // Edge case for 1-byte varint + test_varint(128); // Edge case for 2-byte varint + test_varint(16383); // Edge case for 2-byte varint + test_varint(16384); // Edge case for 3-byte varint + test_varint(2097151); // Edge case for 3-byte varint + test_varint(2097152); // Edge case for 4-byte varint + test_varint(268435455); // Edge case for 4-byte varint + test_varint(268435456); // Edge case for 5-byte varint + test_varint(34359738367LL); // Edge case for 5-byte varint + test_varint(34359738368LL); // Edge case for 6-byte varint + test_varint(4398046511103LL); // Edge case for 6-byte varint + test_varint(4398046511104LL); // Edge case for 7-byte varint + test_varint(562949953421311LL); // Edge case for 7-byte varint + test_varint(562949953421312LL); // Edge case for 8-byte varint + test_varint(72057594037927935LL); // Edge case for 8-byte varint + test_varint(72057594037927936LL); // Edge case for 9-byte varint + test_varint(9223372036854775807LL); // Maximum 64-bit integer + + return 0; } int main(int argc, const char *argv[]) { + test_varints(); test_filters(); //test_migrate(); test_fetched_at();