Skip to content

Commit

Permalink
MONGOCRYPT-762 Generate text search token sets from StrEncode output (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
erwee authored Feb 3, 2025
1 parent ca98747 commit ecb7614
Show file tree
Hide file tree
Showing 7 changed files with 458 additions and 96 deletions.
3 changes: 2 additions & 1 deletion src/mc-fle2-insert-update-payload-private-v2.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
_mongocrypt_buffer_t encryptedTokens; \
} mc_Text##Type##TokenSet_t; \
void mc_Text##Type##TokenSet_init(mc_Text##Type##TokenSet_t *); \
void mc_Text##Type##TokenSet_cleanup(mc_Text##Type##TokenSet_t *)
void mc_Text##Type##TokenSet_cleanup(mc_Text##Type##TokenSet_t *); \
void mc_Text##Type##TokenSet_shallow_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dest)

DEF_TEXT_SEARCH_TOKEN_SET(Exact);
DEF_TEXT_SEARCH_TOKEN_SET(Substring);
Expand Down
8 changes: 8 additions & 0 deletions src/mc-fle2-insert-update-payload-v2.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@
_mongocrypt_buffer_cleanup(&ts->escDerivedToken); \
_mongocrypt_buffer_cleanup(&ts->serverDerivedFromDataToken); \
_mongocrypt_buffer_cleanup(&ts->encryptedTokens); \
} \
void mc_Text##Type##TokenSet_shallow_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dst) { \
BSON_ASSERT_PARAM(src); \
BSON_ASSERT_PARAM(dst); \
_mongocrypt_buffer_set_to(&src->edcDerivedToken, &dst->edcDerivedToken); \
_mongocrypt_buffer_set_to(&src->escDerivedToken, &dst->escDerivedToken); \
_mongocrypt_buffer_set_to(&src->serverDerivedFromDataToken, &dst->serverDerivedFromDataToken); \
_mongocrypt_buffer_set_to(&src->encryptedTokens, &dst->encryptedTokens); \
}

DEF_TEXT_SEARCH_TOKEN_SET_INIT_CLEANUP(Exact)
Expand Down
6 changes: 6 additions & 0 deletions src/mongocrypt-buffer-private.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,10 @@ bool _mongocrypt_buffer_from_subrange(_mongocrypt_buffer_t *out,
uint32_t offset,
uint32_t len) MONGOCRYPT_WARN_UNUSED_RESULT;

/* _mongocrypt_buffer_copy_from_string_as_bson_value initializes @out, wraps the provided string
* into a BSON value, and copies the BSON value to @out. No BSON validation is performed on @str.
* Caller must call _mongocrypt_buffer_cleanup.
*/
void _mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_buffer_t *out, const char *str, int len);

#endif /* MONGOCRYPT_BUFFER_H */
29 changes: 26 additions & 3 deletions src/mongocrypt-buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,10 @@ bool _mongocrypt_buffer_to_bson_value(_mongocrypt_buffer_t *plaintext, uint8_t t
return ret;
}

void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *iter) {
static void _mongocrypt_buffer_copy_as_bson_value(_mongocrypt_buffer_t *plaintext,
bool (*append_func)(bson_t *bson, const void *data, int len),
const void *data,
int len) {
bson_t wrapper = BSON_INITIALIZER;
int32_t offset = INT32_LEN /* skips document size */
+ TYPE_LEN /* element type */
Expand All @@ -326,13 +329,14 @@ void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *
uint8_t *wrapper_data;

BSON_ASSERT_PARAM(plaintext);
BSON_ASSERT_PARAM(iter);
BSON_ASSERT_PARAM(append_func);

/* It is not straightforward to transform a bson_value_t to a string of
* bytes. As a workaround, we wrap the value in a bson document with an empty
* key, then use the raw buffer from inside the new bson_t, skipping the
* length and type header information and the key name. */
bson_append_iter(&wrapper, "", 0, iter);
append_func(&wrapper, data, len);

wrapper_data = ((uint8_t *)bson_get_data(&wrapper));
BSON_ASSERT(wrapper.len >= (uint32_t)offset + NULL_BYTE_LEN);
plaintext->len = wrapper.len - (uint32_t)offset - NULL_BYTE_LEN; /* the final null byte */
Expand All @@ -345,6 +349,25 @@ void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *
bson_destroy(&wrapper);
}

static bool _append_iter(bson_t *bson, const void *iter, int len) {
return bson_append_iter(bson, "", 0, (const bson_iter_t *)iter);
}

static bool _append_utf8(bson_t *bson, const void *str, int len) {
return bson_append_utf8(bson, "", 0, (const char *)str, len);
}

void _mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_buffer_t *plaintext, const char *str, int len) {
BSON_ASSERT_PARAM(str);
BSON_ASSERT(len >= 0);
_mongocrypt_buffer_copy_as_bson_value(plaintext, _append_utf8, str, len);
}

void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *iter) {
BSON_ASSERT_PARAM(iter);
_mongocrypt_buffer_copy_as_bson_value(plaintext, _append_iter, iter, 0);
}

bool _mongocrypt_buffer_from_uuid_iter(_mongocrypt_buffer_t *buf, bson_iter_t *iter) {
const uint8_t *data;
bson_subtype_t subtype;
Expand Down
207 changes: 143 additions & 64 deletions src/mongocrypt-marking.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#include "mc-range-edge-generation-private.h"
#include "mc-range-encoding-private.h"
#include "mc-range-mincover-private.h"
#include "mc-str-encode-string-sets-private.h"
#include "mc-text-search-str-encode-private.h"
#include "mc-tokens-private.h"
#include "mongocrypt-buffer-private.h"
#include "mongocrypt-ciphertext-private.h"
Expand Down Expand Up @@ -1126,26 +1128,22 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb,
mc_FLE2InsertUpdatePayloadV2_t *payload,
const _mongocrypt_buffer_t *indexKeyId,
const mc_FLE2TextSearchInsertSpec_t *spec,
const _mongocrypt_buffer_t *value,
int64_t contentionFactor,
mongocrypt_status_t *status) {
BSON_ASSERT_PARAM(kb);
BSON_ASSERT_PARAM(payload);
BSON_ASSERT_PARAM(indexKeyId);
BSON_ASSERT_PARAM(spec);
BSON_ASSERT_PARAM(value);

_mongocrypt_crypto_t *crypto = kb->crypt->crypto;
mc_TextSearchTokenSets_t *tsts = &payload->textSearchTokenSets.tsts;
_FLE2EncryptedPayloadCommon_t common = {{0}};
bool res = false;

// TODO MONGOCRYPT-759 implement case folding; for now let foldedValue be a copy of value.
_mongocrypt_buffer_t foldedValue = {0};
_mongocrypt_buffer_init(&foldedValue);
_mongocrypt_buffer_copy_to(value, &foldedValue);

// TODO MONGOCRYPT-762 do StrEncode here to get substring sets to encode
mc_str_encode_sets_t *encodeSets = mc_text_search_str_encode(spec, status);
if (!encodeSets) {
goto fail;
}

// Start the token derivations
if (!_get_tokenKey(kb, indexKeyId, &common.tokenKey, status)) {
Expand All @@ -1164,72 +1162,154 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb,
goto fail;
}

if (!_fle2_generate_TextExactTokenSet(kb,
&tsts->exact,
&foldedValue,
contentionFactor,
common.collectionsLevel1Token,
common.serverTokenDerivationLevel1Token,
status)) {
goto fail;
// Generate exact token set singleton
{
_mongocrypt_buffer_t asBsonValue;
_mongocrypt_buffer_init(&asBsonValue);
BSON_ASSERT(encodeSets->exact.len < INT_MAX);
_mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue,
(const char *)encodeSets->exact.data,
(int)encodeSets->exact.len);
if (!_fle2_generate_TextExactTokenSet(kb,
&tsts->exact,
&asBsonValue,
contentionFactor,
common.collectionsLevel1Token,
common.serverTokenDerivationLevel1Token,
status)) {
_mongocrypt_buffer_cleanup(&asBsonValue);
goto fail;
}
_mongocrypt_buffer_cleanup(&asBsonValue);
}

if (spec->substr.set) {
// TODO MONGOCRYPT-762 iterate on StrEncode substrings set
mc_TextSubstringTokenSet_t substrSet = {{0}};
mc_TextSubstringTokenSet_init(&substrSet);
const char *substring;
uint32_t bytelen;
uint32_t appendCount;

if (!_fle2_generate_TextSubstringTokenSet(kb,
&substrSet,
&foldedValue,
contentionFactor,
common.collectionsLevel1Token,
common.serverTokenDerivationLevel1Token,
status)) {
mc_TextSubstringTokenSet_cleanup(&substrSet);
goto fail;
// Generate array of substring token sets
if (encodeSets->substring_set) {
mc_substring_set_iter_t set_itr;
mc_substring_set_iter_init(&set_itr, encodeSets->substring_set);

while (mc_substring_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) {
BSON_ASSERT(appendCount > 0);
BSON_ASSERT(bytelen < INT_MAX);

mc_TextSubstringTokenSet_t tset = {{0}};

_mongocrypt_buffer_t asBsonValue;
_mongocrypt_buffer_init(&asBsonValue);
_mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen);

if (!_fle2_generate_TextSubstringTokenSet(kb,
&tset,
&asBsonValue,
contentionFactor,
common.collectionsLevel1Token,
common.serverTokenDerivationLevel1Token,
status)) {
_mongocrypt_buffer_cleanup(&asBsonValue);
mc_TextSubstringTokenSet_cleanup(&tset);
goto fail;
}
_mongocrypt_buffer_cleanup(&asBsonValue);

if (appendCount > 1) {
mc_TextSubstringTokenSet_t tset_copy;
mc_TextSubstringTokenSet_shallow_copy(&tset, &tset_copy);
for (; appendCount > 1; appendCount--) {
_mc_array_append_val(&tsts->substringArray, tset_copy);
}
}
_mc_array_append_val(&tsts->substringArray, tset); // array now owns tset
}
_mc_array_append_val(&tsts->substringArray, substrSet);
}
if (spec->suffix.set) {
// TODO MONGOCRYPT-762 iterate on StrEncode suffixes set
mc_TextSuffixTokenSet_t suffixSet = {{0}};
mc_TextSuffixTokenSet_init(&suffixSet);

if (!_fle2_generate_TextSuffixTokenSet(kb,
&suffixSet,
&foldedValue,
contentionFactor,
common.collectionsLevel1Token,
common.serverTokenDerivationLevel1Token,
status)) {
mc_TextSuffixTokenSet_cleanup(&suffixSet);
goto fail;
}

// Generate array of suffix token sets
if (encodeSets->suffix_set) {
mc_affix_set_iter_t set_itr;
mc_affix_set_iter_init(&set_itr, encodeSets->suffix_set);

while (mc_affix_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) {
BSON_ASSERT(appendCount > 0);
BSON_ASSERT(bytelen < INT_MAX);

mc_TextSuffixTokenSet_t tset = {{0}};
mc_TextSuffixTokenSet_init(&tset);

_mongocrypt_buffer_t asBsonValue;
_mongocrypt_buffer_init(&asBsonValue);
_mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen);

if (!_fle2_generate_TextSuffixTokenSet(kb,
&tset,
&asBsonValue,
contentionFactor,
common.collectionsLevel1Token,
common.serverTokenDerivationLevel1Token,
status)) {
_mongocrypt_buffer_cleanup(&asBsonValue);
mc_TextSuffixTokenSet_cleanup(&tset);
goto fail;
}
_mongocrypt_buffer_cleanup(&asBsonValue);

if (appendCount > 1) {
mc_TextSuffixTokenSet_t tset_copy;
mc_TextSuffixTokenSet_shallow_copy(&tset, &tset_copy);
for (; appendCount > 1; appendCount--) {
_mc_array_append_val(&tsts->suffixArray, tset_copy);
}
}
_mc_array_append_val(&tsts->suffixArray, tset); // array now owns tset
}
_mc_array_append_val(&tsts->suffixArray, suffixSet);
}
if (spec->prefix.set) {
// TODO MONGOCRYPT-762 iterate on StrEncode suffixes set
mc_TextPrefixTokenSet_t prefixSet = {{0}};
mc_TextPrefixTokenSet_init(&prefixSet);

if (!_fle2_generate_TextPrefixTokenSet(kb,
&prefixSet,
&foldedValue,
contentionFactor,
common.collectionsLevel1Token,
common.serverTokenDerivationLevel1Token,
status)) {
mc_TextPrefixTokenSet_cleanup(&prefixSet);
goto fail;
}

// Generate array of prefix token sets
if (encodeSets->prefix_set) {
mc_affix_set_iter_t set_itr;
mc_affix_set_iter_init(&set_itr, encodeSets->prefix_set);

while (mc_affix_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) {
BSON_ASSERT(appendCount > 0);
BSON_ASSERT(bytelen < INT_MAX);

mc_TextPrefixTokenSet_t tset = {{0}};
mc_TextPrefixTokenSet_init(&tset);

_mongocrypt_buffer_t asBsonValue;
_mongocrypt_buffer_init(&asBsonValue);
_mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen);

if (!_fle2_generate_TextPrefixTokenSet(kb,
&tset,
&asBsonValue,
contentionFactor,
common.collectionsLevel1Token,
common.serverTokenDerivationLevel1Token,
status)) {
_mongocrypt_buffer_cleanup(&asBsonValue);
mc_TextPrefixTokenSet_cleanup(&tset);
goto fail;
}
_mongocrypt_buffer_cleanup(&asBsonValue);

if (appendCount > 1) {
mc_TextPrefixTokenSet_t tset_copy;
mc_TextPrefixTokenSet_shallow_copy(&tset, &tset_copy);
for (; appendCount > 1; appendCount--) {
_mc_array_append_val(&tsts->prefixArray, tset_copy); // array now owns tset_copy
}
}
_mc_array_append_val(&tsts->prefixArray, tset); // moves ownership of tset
}
_mc_array_append_val(&tsts->prefixArray, prefixSet);
}
payload->textSearchTokenSets.set = true;
res = true;
fail:
_FLE2EncryptedPayloadCommon_cleanup(&common);
_mongocrypt_buffer_cleanup(&foldedValue);
mc_str_encode_sets_destroy(encodeSets);
return res;
}

Expand Down Expand Up @@ -1350,7 +1430,6 @@ static bool _mongocrypt_fle2_placeholder_to_insert_update_ciphertextForTextSearc
&payload,
&placeholder->index_key_id,
&insertSpec,
&value,
payload.contentionFactor,
status)) {
goto fail;
Expand Down
25 changes: 25 additions & 0 deletions test/test-mongocrypt-buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <mongocrypt-marking-private.h>

#include "mongocrypt-buffer-private.h"
#include "test-mongocrypt-assert.h"
#include "test-mongocrypt.h"

Expand Down Expand Up @@ -232,11 +233,35 @@ static void _test_mongocrypt_buffer_from_subrange(_mongocrypt_tester_t *tester)
_mongocrypt_buffer_cleanup(&input);
}

static void _test_mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_tester_t *tester) {
_mongocrypt_buffer_t buf;
_mongocrypt_buffer_t expectedLenBuf;
const char *data = "foobar";

// expect output to contain 4-byte length + data + null string terminator
size_t expectedLen = sizeof(int32_t) + strlen(data) + sizeof(uint8_t);
_mongocrypt_buffer_copy_from_hex(&expectedLenBuf, "07000000");

_mongocrypt_buffer_copy_from_string_as_bson_value(&buf, data, (int)strlen(data));
ASSERT(buf.len == expectedLen);

// check 4-byte length
ASSERT_CMPBYTES(expectedLenBuf.data, expectedLenBuf.len, buf.data, expectedLenBuf.len);
// check data + null byte
ASSERT_CMPBYTES((const uint8_t *)data,
strlen(data) + 1,
buf.data + expectedLenBuf.len,
buf.len - expectedLenBuf.len);
_mongocrypt_buffer_cleanup(&buf);
_mongocrypt_buffer_cleanup(&expectedLenBuf);
}

void _mongocrypt_tester_install_buffer(_mongocrypt_tester_t *tester) {
INSTALL_TEST(_test_mongocrypt_buffer_from_iter);
INSTALL_TEST(_test_mongocrypt_buffer_copy_from_data_and_size);
INSTALL_TEST(_test_mongocrypt_buffer_steal_from_data_and_size);
INSTALL_TEST(_test_mongocrypt_buffer_steal_from_string);
INSTALL_TEST(_test_mongocrypt_buffer_copy_from_uint64_le);
INSTALL_TEST(_test_mongocrypt_buffer_from_subrange);
INSTALL_TEST(_test_mongocrypt_buffer_copy_from_string_as_bson_value);
}
Loading

0 comments on commit ecb7614

Please sign in to comment.