From d57ce794b1d1fd494bdb2a6b22b0fd9fad38f007 Mon Sep 17 00:00:00 2001 From: madschemas <155993105+MadSchemas@users.noreply.github.com> Date: Sat, 28 Dec 2024 10:58:51 +0300 Subject: [PATCH] Update to version v3.31.0 --- .github/workflows/test.yml | 6 +- bindings/consts.go | 2 +- changelog.md | 26 ++ cjson/decoder.go | 3 +- cpp_src/CMakeLists.txt | 2 +- cpp_src/core/cjson/cjsondecoder.cc | 6 +- cpp_src/core/cjson/cjsontools.cc | 5 + cpp_src/core/cjson/cjsontools.h | 26 ++ cpp_src/core/cjson/jsondecoder.cc | 7 +- cpp_src/core/cjson/msgpackdecoder.cc | 52 ++- cpp_src/core/cjson/msgpackdecoder.h | 1 - cpp_src/core/cjson/protobufdecoder.cc | 7 +- cpp_src/core/defnsconfigs.h | 4 +- cpp_src/core/ft/config/ftfastconfig.cc | 8 +- cpp_src/core/ft/config/ftfastconfig.h | 2 +- cpp_src/core/ft/ft_fast/dataholder.cc | 2 +- cpp_src/core/ft/ft_fast/dataholder.h | 7 +- cpp_src/core/ft/ft_fast/dataprocessor.cc | 44 +- cpp_src/core/ft/ft_fast/dataprocessor.h | 2 +- cpp_src/core/ft/ft_fast/selecter.cc | 2 +- cpp_src/core/ft/ftsetcashe.h | 4 +- cpp_src/core/ft/numtotext.cc | 195 +++------ cpp_src/core/ft/numtotext.h | 2 +- cpp_src/core/ft/stopwords/stop_en.cc | 25 +- cpp_src/core/ft/stopwords/stop_ru.cc | 378 ++---------------- cpp_src/core/idsetcache.h | 11 +- cpp_src/core/index/index.cc | 11 +- cpp_src/core/index/index.h | 13 +- cpp_src/core/index/indexfastupdate.cc | 65 +++ cpp_src/core/index/indexfastupdate.h | 18 + cpp_src/core/index/indexordered.cc | 25 +- cpp_src/core/index/indexstore.cc | 6 +- cpp_src/core/index/indextext/fastindextext.cc | 46 +-- cpp_src/core/index/indextext/fastindextext.h | 4 +- cpp_src/core/index/indextext/fieldsgetter.h | 2 +- cpp_src/core/index/indextext/indextext.cc | 32 +- cpp_src/core/index/indextext/indextext.h | 15 +- cpp_src/core/index/indexunordered.cc | 74 ++-- cpp_src/core/index/indexunordered.h | 23 +- cpp_src/core/index/rtree/indexrtree.cc | 6 +- cpp_src/core/indexopts.cc | 5 + cpp_src/core/indexopts.h | 7 +- cpp_src/core/joincache.h | 8 +- cpp_src/core/lrucache.cc | 63 ++- cpp_src/core/lrucache.h | 156 ++++++-- cpp_src/core/namespace/namespaceimpl.cc | 61 +-- cpp_src/core/namespace/namespaceimpl.h | 8 +- cpp_src/core/namespace/namespacestat.cc | 41 +- cpp_src/core/namespace/namespacestat.h | 30 +- cpp_src/core/namespacedef.h | 2 +- .../comparator/comparator_not_indexed.cc | 19 + .../comparator/comparator_not_indexed.h | 15 +- cpp_src/core/nsselecter/comparator/helpers.h | 11 +- cpp_src/core/nsselecter/joinedselector.cc | 18 +- cpp_src/core/nsselecter/nsselecter.cc | 15 +- cpp_src/core/nsselecter/querypreprocessor.cc | 5 +- .../nsselecter/selectiteratorcontainer.cc | 9 +- cpp_src/core/nsselecter/sortexpression.cc | 3 + cpp_src/core/payload/payloadfieldtype.cc | 9 + cpp_src/core/payload/payloadfieldtype.h | 7 +- cpp_src/core/payload/payloadtype.cc | 3 - cpp_src/core/query/query.cc | 56 ++- cpp_src/core/query/query.h | 16 +- cpp_src/core/querycache.h | 9 +- cpp_src/core/reindexer_impl/reindexerimpl.cc | 11 +- cpp_src/estl/elist.h | 2 +- cpp_src/estl/suffix_map.h | 14 +- cpp_src/gtests/tests/API/base_tests.cc | 49 +++ .../fixtures/fuzzing/random_generator.cc | 2 +- .../gtests/tests/fixtures/reindexertestapi.cc | 15 + .../gtests/tests/fixtures/reindexertestapi.h | 2 + .../tests/unit/composite_indexes_test.cc | 77 ++++ cpp_src/gtests/tests/unit/ft/ft_generic.cc | 40 +- cpp_src/gtests/tests/unit/rpcclient_test.cc | 14 +- .../gtests/tests/unit/string_function_test.cc | 61 +++ cpp_src/gtests/tests/unit/tolal_lru_cache.cc | 4 +- cpp_src/net/listener.cc | 4 +- cpp_src/replicator/replicator.cc | 2 +- cpp_src/server/CMakeLists.txt | 2 +- cpp_src/server/contrib/server.md | 22 +- cpp_src/server/contrib/server.yml | 33 +- describer.go | 29 ++ ftfastconfig.go | 6 +- fulltext.md | 8 +- reflect.go | 14 +- test/compatibility_test/compatibility_test.sh | 17 +- test/dsl_test.go | 4 +- test/encdec_test.go | 164 ++++---- test/index_struct_test.go | 6 +- 89 files changed, 1372 insertions(+), 975 deletions(-) create mode 100644 cpp_src/core/index/indexfastupdate.cc create mode 100644 cpp_src/core/index/indexfastupdate.h diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a1ede3c9b..e00a87d8d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -209,7 +209,7 @@ jobs: run: | if [[ $OS == ubuntu* ]]; then sudo ./dependencies.sh - python3 -m pip install setuptools + python3 -m pip install setuptools build else ./dependencies.sh fi @@ -238,7 +238,9 @@ jobs: with: repository: restream/reindexer-py - name: Install PyReindexer - run: sudo python3 setup.py install + run: | + python -m build + python -m pip install . - name: Test PyReindexer run: | cd pyreindexer diff --git a/bindings/consts.go b/bindings/consts.go index 4a7704316..b8c33cc13 100644 --- a/bindings/consts.go +++ b/bindings/consts.go @@ -2,7 +2,7 @@ package bindings const CInt32Max = int(^uint32(0) >> 1) -const ReindexerVersion = "v3.30.0" +const ReindexerVersion = "v3.31.0" // public go consts from type_consts.h and reindexer_ctypes.h const ( diff --git a/changelog.md b/changelog.md index 4960947dd..b9e05db97 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,29 @@ +# Version 3.31.0 (28.12.2024) +## Core +- [fea] Added fast path for index updates for some specific cases (when index switches type between `tree`/`hash`/`-` and/or changes it's `is_dense`-flag) +- [fea] Allowed to update part of the composite index for some specific cases (when underlying index uses `fast path` update) +- [fea] Added information about index/namespace cache hits into `#perfstats`-namespace +- [fea] Slight comparators optimization (better inlining) +- [fea] Allowed to update scalar-index to array-index and vice versa for the case, when namespace is empty +- [fix] Fixed incorrect [EqualPosition's](readme.md#search-in-array-fields-with-matching-array-indexes) interaction with brackets optimizer in `SELECT`-queries. Now optimizer should not remove the brackets containing `EqualPosition` +- [fix] Fixed crash in `ORDER BY` arithmetic sorting expresion with composite indexes +- [fix] Fixed assertion on attempt to use 'null'-values with `=`, `IN()`, `<`, `>`, `<=`, `>=` and `RANGE()` operators +- [fix] Added extra type validations for indexed document content during CJSON/JSON/MsgPack/Protobuf deserialization +- [fix] Added JSON-validation for `SetObject`-method in `UPDATE`-queries + +## Fulltext +- [fix] Fixed areas positions for `number search` +- [fix] Removed some of the default stop-words + +## Go connector +- [fea] Added support for empty `reindex`/`json` tags with default index/jsonpath-names (i.e. constructions like ``StrField string `reindex:"" json:""` ``) + +## Face +- [fea] Added `splitter` field to the Full text index settings +- [fix] Fixed wrong input field clearing on the cache settings page +- [fix] Fixed sort order icon on the tables +- [fix] Fixed data saving in JSON view on the Index settings page + # Version 3.30.0 (29.11.2024) ## Core - [fea] Optimized memory layout for `-tuple`-index (this slightly reduces memory consumation for any `reindexer` database) diff --git a/cjson/decoder.go b/cjson/decoder.go index 317942572..ed6aba6a8 100644 --- a/cjson/decoder.go +++ b/cjson/decoder.go @@ -36,8 +36,7 @@ func fieldByTag(t reflect.Type, tag string) (result reflect.StructField, ok bool } for i := 0; i < t.NumField(); i++ { result = t.Field(i) - if ftag := result.Tag.Get("json"); len(ftag) > 0 { - ftag, _ = splitStr(ftag, ',') + if ftag, _ := splitStr(result.Tag.Get("json"), ','); len(ftag) > 0 { if tag == ftag { return result, true } diff --git a/cpp_src/CMakeLists.txt b/cpp_src/CMakeLists.txt index 2900e1514..508bdbb7c 100644 --- a/cpp_src/CMakeLists.txt +++ b/cpp_src/CMakeLists.txt @@ -44,7 +44,7 @@ else() option(LINK_RESOURCES "Link web resources as binary data" ON) endif() -set(REINDEXER_VERSION_DEFAULT "3.30.0") +set(REINDEXER_VERSION_DEFAULT "3.31.0") if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "RelWithDebInfo") diff --git a/cpp_src/core/cjson/cjsondecoder.cc b/cpp_src/core/cjson/cjsondecoder.cc index 3064f14e8..b1ab6a5f4 100644 --- a/cpp_src/core/cjson/cjsondecoder.cc +++ b/cpp_src/core/cjson/cjsondecoder.cc @@ -40,11 +40,12 @@ bool CJsonDecoder::decodeCJson(Payload& pl, Serializer& rdser, WrSerializer& wrs const auto& fieldRef{pl.Type().Field(field)}; const KeyValueType fieldType{fieldRef.Type()}; if (tagType == TAG_ARRAY) { + const carraytag atag = rdser.GetCArrayTag(); + const auto count = atag.Count(); if rx_unlikely (!fieldRef.IsArray()) { throwUnexpectedArrayError(fieldRef); } - const carraytag atag = rdser.GetCArrayTag(); - const auto count = atag.Count(); + validateArrayFieldRestrictions(fieldRef, count, "cjson"); const int ofs = pl.ResizeArray(field, count, true); const TagType atagType = atag.Type(); if (atagType != TAG_OBJECT) { @@ -61,6 +62,7 @@ bool CJsonDecoder::decodeCJson(Payload& pl, Serializer& rdser, WrSerializer& wrs wrser.PutVarUint(count); } else { validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, fieldRef, field, isInArray(), "cjson"); + validateArrayFieldRestrictions(fieldRef, 1, "cjson"); objectScalarIndexes_.set(field); pl.Set(field, cjsonValueToVariant(tagType, rdser, fieldType), true); fieldType.EvaluateOneOf( diff --git a/cpp_src/core/cjson/cjsontools.cc b/cpp_src/core/cjson/cjsontools.cc index ecce2bdc4..f08e4d8b0 100644 --- a/cpp_src/core/cjson/cjsontools.cc +++ b/cpp_src/core/cjson/cjsontools.cc @@ -207,6 +207,11 @@ void throwScalarMultipleEncodesError(const Payload& pl, const PayloadFieldType& throw Error(errLogic, "Non-array field '%s' [%d] from '%s' can only be encoded once.", f.Name(), field, pl.Type().Name()); } +void throwUnexpectedArraySizeError(std::string_view parserName, const PayloadFieldType& f, int arraySize) { + throw Error(errParams, "%s array field '%s' for this index type must contain %d elements, but got %d", parserName, f.Name(), + f.ArrayDim(), arraySize); +} + static void dumpCjsonValue(TagType type, Serializer& cjson, std::ostream& dump) { switch (type) { case TAG_VARINT: diff --git a/cpp_src/core/cjson/cjsontools.h b/cpp_src/core/cjson/cjsontools.h index 93c618524..b0a10dec2 100644 --- a/cpp_src/core/cjson/cjsontools.h +++ b/cpp_src/core/cjson/cjsontools.h @@ -18,6 +18,7 @@ void skipCjsonTag(ctag tag, Serializer& rdser, std::array [[noreturn]] void throwUnexpectedNestedArrayError(std::string_view parserName, const PayloadFieldType& f); [[noreturn]] void throwScalarMultipleEncodesError(const Payload& pl, const PayloadFieldType& f, int field); +[[noreturn]] void throwUnexpectedArraySizeError(std::string_view parserName, const PayloadFieldType& f, int arraySize); RX_ALWAYS_INLINE void validateNonArrayFieldRestrictions(const ScalarIndexesSetT& scalarIndexes, const Payload& pl, const PayloadFieldType& f, int field, bool isInArray, std::string_view parserName) { if (!f.IsArray()) { @@ -30,6 +31,14 @@ RX_ALWAYS_INLINE void validateNonArrayFieldRestrictions(const ScalarIndexesSetT& } } +RX_ALWAYS_INLINE void validateArrayFieldRestrictions(const PayloadFieldType& f, int arraySize, std::string_view parserName) { + if (f.IsArray()) { + if rx_unlikely (arraySize && f.ArrayDim() > 0 && f.ArrayDim() != arraySize) { + throwUnexpectedArraySizeError(parserName, f, arraySize); + } + } +} + void DumpCjson(Serializer& cjson, std::ostream& dump, const ConstPayload*, const TagsMatcher* = nullptr, std::string_view tab = " "); inline void DumpCjson(Serializer&& cjson, std::ostream& dump, const ConstPayload* pl, const TagsMatcher* tm = nullptr, std::string_view tab = " ") { @@ -49,4 +58,21 @@ inline void DumpCjson(Serializer&& cjson, std::ostream& dump, const TagsMatcher* DumpCjson(cjson, dump, tm, tab); } +static inline Variant convertValueForPayload(Payload& pl, int field, Variant&& value, std::string_view source) { + if (field < 0) { + return value; + } + + auto plFieldType = pl.Type().Field(field).Type(); + if (plFieldType.IsSame(value.Type())) { + return value; + } else if ((plFieldType.IsNumeric() && value.Type().IsNumeric()) || + (plFieldType.Is() && value.Type().Is())) { + return value.convert(pl.Type().Field(field).Type()); + } else { + throw Error(errLogic, "Error parsing %s field '%s' - got %s, expected %s", source, pl.Type().Field(field).Name(), + value.Type().Name(), plFieldType.Name()); + } +} + } // namespace reindexer diff --git a/cpp_src/core/cjson/jsondecoder.cc b/cpp_src/core/cjson/jsondecoder.cc index 7696741cb..2a0e2eba8 100644 --- a/cpp_src/core/cjson/jsondecoder.cc +++ b/cpp_src/core/cjson/jsondecoder.cc @@ -52,6 +52,7 @@ void JsonDecoder::decodeJsonObject(Payload& pl, CJsonBuilder& builder, const gas (void)subelem; ++count; } + validateArrayFieldRestrictions(f, count, "json"); int pos = pl.ResizeArray(field, count, true); for (auto& subelem : elem.value) { pl.Set(field, pos++, jsonValue2Variant(subelem.value, f.Type(), f.Name())); @@ -70,6 +71,7 @@ void JsonDecoder::decodeJsonObject(Payload& pl, CJsonBuilder& builder, const gas case gason::JSON_TRUE: case gason::JSON_FALSE: { validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, f, field, isInArray(), "json"); + validateArrayFieldRestrictions(f, 1, "json"); objectScalarIndexes_.set(field); Variant value = jsonValue2Variant(elem.value, f.Type(), f.Name()); builder.Ref(tagName, value, field); @@ -150,7 +152,10 @@ class TagsPathGuard { void JsonDecoder::decodeJsonObject(const gason::JsonValue& root, CJsonBuilder& builder) { for (const auto& elem : root) { - int tagName = tagsMatcher_.name2tag(elem.key, true); + const int tagName = tagsMatcher_.name2tag(elem.key, true); + if (tagName == 0) { + throw Error(errParseJson, "Unsupported JSON format. Unnamed field detected"); + } TagsPathGuard tagsPathGuard(tagsPath_, tagName); decodeJson(nullptr, builder, elem.value, tagName, true); } diff --git a/cpp_src/core/cjson/msgpackdecoder.cc b/cpp_src/core/cjson/msgpackdecoder.cc index 8b72c9f21..0c7cf525c 100644 --- a/cpp_src/core/cjson/msgpackdecoder.cc +++ b/cpp_src/core/cjson/msgpackdecoder.cc @@ -11,22 +11,20 @@ template void MsgPackDecoder::setValue(Payload& pl, CJsonBuilder& builder, const T& value, int tagName) { int field = tm_.tags2field(tagsPath_.data(), tagsPath_.size()); if (field > 0) { - validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, pl.Type().Field(field), field, isInArray(), "msgpack"); + const auto& f = pl.Type().Field(field); + validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, f, field, isInArray(), "msgpack"); + if (!isInArray()) { + validateArrayFieldRestrictions(f, 1, "msgpack"); + } Variant val(value); builder.Ref(tagName, val, field); - pl.Set(field, std::move(val), true); + pl.Set(field, convertValueForPayload(pl, field, std::move(val), "msgpack")); objectScalarIndexes_.set(field); } else { builder.Put(tagName, value); } } -void MsgPackDecoder::iterateOverArray(const msgpack_object* begin, const msgpack_object* end, Payload& pl, CJsonBuilder& array) { - for (const msgpack_object* p = begin; p != end; ++p) { - decode(pl, array, *p, 0); - } -} - int MsgPackDecoder::decodeKeyToTag(const msgpack_object_kv& obj) { using namespace std::string_view_literals; switch (obj.key.type) { @@ -95,11 +93,43 @@ void MsgPackDecoder::decode(Payload& pl, CJsonBuilder& builder, const msgpack_ob if rx_unlikely (!f.IsArray()) { throw Error(errLogic, "Error parsing msgpack field '%s' - got array, expected scalar %s", f.Name(), f.Type().Name()); } - auto& array = builder.ArrayRef(tagName, field, count); - iterateOverArray(begin, end, pl, array); + validateArrayFieldRestrictions(f, count, "msgpack"); + int pos = pl.ResizeArray(field, count, true); + for (const msgpack_object* p = begin; p != end; ++p) { + pl.Set(field, pos++, + convertValueForPayload( + pl, field, + [&] { + switch (p->type) { + case MSGPACK_OBJECT_BOOLEAN: + return Variant{p->via.boolean}; + case MSGPACK_OBJECT_POSITIVE_INTEGER: + return Variant{int64_t(p->via.u64)}; + case MSGPACK_OBJECT_NEGATIVE_INTEGER: + return Variant{p->via.i64}; + case MSGPACK_OBJECT_FLOAT32: + case MSGPACK_OBJECT_FLOAT64: + return Variant{p->via.f64}; + case MSGPACK_OBJECT_STR: + return Variant{p_string(reinterpret_cast(&p->via.str)), Variant::hold_t{}}; + case MSGPACK_OBJECT_NIL: + case MSGPACK_OBJECT_ARRAY: + case MSGPACK_OBJECT_MAP: + case MSGPACK_OBJECT_BIN: + case MSGPACK_OBJECT_EXT: + default: + throw Error(errParams, "Unsupported MsgPack array field type: %s(%d)", ToString(p->type), + int(p->type)); + } + }(), + "msgpack")); + } + builder.ArrayRef(tagName, field, count); } else { auto array = builder.Array(tagName, type); - iterateOverArray(begin, end, pl, array); + for (const msgpack_object* p = begin; p != end; ++p) { + decode(pl, array, *p, 0); + } } break; } diff --git a/cpp_src/core/cjson/msgpackdecoder.h b/cpp_src/core/cjson/msgpackdecoder.h index 042b05263..e89f2ff8e 100644 --- a/cpp_src/core/cjson/msgpackdecoder.h +++ b/cpp_src/core/cjson/msgpackdecoder.h @@ -19,7 +19,6 @@ class MsgPackDecoder { private: void decode(Payload& pl, CJsonBuilder& builder, const msgpack_object& obj, int tagName); - void iterateOverArray(const msgpack_object* begin, const msgpack_object* end, Payload& pl, CJsonBuilder& builder); int decodeKeyToTag(const msgpack_object_kv& obj); diff --git a/cpp_src/core/cjson/protobufdecoder.cc b/cpp_src/core/cjson/protobufdecoder.cc index 7d2dcc04a..45d43d53b 100644 --- a/cpp_src/core/cjson/protobufdecoder.cc +++ b/cpp_src/core/cjson/protobufdecoder.cc @@ -1,4 +1,5 @@ #include "protobufdecoder.h" +#include "core/cjson/cjsontools.h" #include "core/schema.h" #include "estl/protobufparser.h" @@ -51,9 +52,10 @@ void ProtobufDecoder::setValue(Payload& pl, CJsonBuilder& builder, ProtobufValue if (item.isArray) { arraysStorage_.UpdateArraySize(item.tagName, field); } else { + validateArrayFieldRestrictions(f, 1, "protobuf"); builder.Ref(item.tagName, value, field); } - pl.Set(field, std::move(value), true); + pl.Set(field, convertValueForPayload(pl, field, std::move(value), "protobuf"), true); objectScalarIndexes_.set(field); } else { if (item.isArray) { @@ -78,13 +80,14 @@ Error ProtobufDecoder::decodeArray(Payload& pl, CJsonBuilder& builder, const Pro if (packed) { int count = 0; while (!parser.IsEof()) { - pl.Set(field, parser.ReadArrayItem(item.itemType), true); + pl.Set(field, convertValueForPayload(pl, field, parser.ReadArrayItem(item.itemType), "protobuf"), true); ++count; } builder.ArrayRef(item.tagName, field, count); } else { setValue(pl, builder, item); } + validateArrayFieldRestrictions(f, reinterpret_cast(pl.Field(field).p_)->len, "protobuf"); } else { CJsonBuilder& array = arraysStorage_.GetArray(item.tagName); if (packed) { diff --git a/cpp_src/core/defnsconfigs.h b/cpp_src/core/defnsconfigs.h index 525524bdb..41a43bde7 100644 --- a/cpp_src/core/defnsconfigs.h +++ b/cpp_src/core/defnsconfigs.h @@ -12,7 +12,7 @@ constexpr char kConfigNamespace[] = "#config"; constexpr char kActivityStatsNamespace[] = "#activitystats"; constexpr char kClientsStatsNamespace[] = "#clientsstats"; constexpr char kNsNameField[] = "name"; -const std::vector kDefDBConfig = { +constexpr std::string_view kDefDBConfig[] = { R"json({ "type":"profiling", "profiling":{ @@ -90,7 +90,7 @@ const std::vector kDefDBConfig = { } })json"}; -const std::vector kSystemNsDefs = { +const NamespaceDef kSystemNsDefs[] = { NamespaceDef(kConfigNamespace, StorageOpts().Enabled().CreateIfMissing().DropOnFileFormatError()) .AddIndex("type", "hash", "string", IndexOpts().PK()), NamespaceDef(kPerfStatsNamespace, StorageOpts()) diff --git a/cpp_src/core/ft/config/ftfastconfig.cc b/cpp_src/core/ft/config/ftfastconfig.cc index 5a1ec4cb0..e900abf27 100644 --- a/cpp_src/core/ft/config/ftfastconfig.cc +++ b/cpp_src/core/ft/config/ftfastconfig.cc @@ -131,8 +131,8 @@ void FtFastConfig::parse(std::string_view json, const RHashMap const std::string splitterStr = toLower(root["splitter"].As("fast")); if (splitterStr == "fast") { splitterType = Splitter::Fast; - } else if (splitterStr == "friso") { - splitterType = Splitter::Friso; + } else if (splitterStr == "friso" || splitterStr == "mmseg_cn") { + splitterType = Splitter::MMSegCN; } else { throw Error(errParseJson, "FtFastConfig: unknown splitter value: %s", splitterStr); } @@ -185,8 +185,8 @@ std::string FtFastConfig::GetJson(const fast_hash_map& fields) case Splitter::Fast: jsonBuilder.Put("splitter", "fast"); break; - case Splitter::Friso: - jsonBuilder.Put("splitter", "friso"); + case Splitter::MMSegCN: + jsonBuilder.Put("splitter", "mmseg_cn"); break; } diff --git a/cpp_src/core/ft/config/ftfastconfig.h b/cpp_src/core/ft/config/ftfastconfig.h index 76a9a99bd..3e8fdfd59 100644 --- a/cpp_src/core/ft/config/ftfastconfig.h +++ b/cpp_src/core/ft/config/ftfastconfig.h @@ -53,7 +53,7 @@ struct FtFastConfig : public BaseFTConfig { int maxAreasInDoc = 5; int maxTotalAreasToCache = -1; - enum class Splitter { Fast, Friso } splitterType = Splitter::Fast; + enum class Splitter { Fast, MMSegCN } splitterType = Splitter::Fast; RVector fieldsCfg; enum class Optimization { CPU, Memory } optimization = Optimization::Memory; diff --git a/cpp_src/core/ft/ft_fast/dataholder.cc b/cpp_src/core/ft/ft_fast/dataholder.cc index 55ab52d97..fad988b72 100644 --- a/cpp_src/core/ft/ft_fast/dataholder.cc +++ b/cpp_src/core/ft/ft_fast/dataholder.cc @@ -132,7 +132,7 @@ DataHolder::DataHolder(FtFastConfig* c) { cfg_ = c; if (cfg_->splitterType == FtFastConfig::Splitter::Fast) { splitter_ = make_intrusive(cfg_->extraWordSymbols); - } else if (cfg_->splitterType == FtFastConfig::Splitter::Friso) { + } else if (cfg_->splitterType == FtFastConfig::Splitter::MMSegCN) { splitter_ = make_intrusive(); } else { assertrx_throw(false); diff --git a/cpp_src/core/ft/ft_fast/dataholder.h b/cpp_src/core/ft/ft_fast/dataholder.h index 360a274e5..39b93ffff 100644 --- a/cpp_src/core/ft/ft_fast/dataholder.h +++ b/cpp_src/core/ft/ft_fast/dataholder.h @@ -50,17 +50,16 @@ class PackedWordEntry { class WordEntry { public: WordEntry() noexcept = default; - WordEntry(const IdRelSet& _vids, bool _virtualWord) : vids(_vids), virtualWord(_virtualWord) {} + WordEntry(const IdRelSet& _vids) : vids_(_vids) {} WordEntry(const WordEntry&) = delete; WordEntry(WordEntry&&) noexcept = default; WordEntry& operator=(const WordEntry&) = delete; WordEntry& operator=(WordEntry&&) noexcept = default; // Explicit copy - WordEntry MakeCopy() const { return WordEntry(this->vids, this->virtualWord); } + WordEntry MakeCopy() const { return WordEntry(vids_); } - IdRelSet vids; - bool virtualWord = false; + IdRelSet vids_; }; enum ProcessStatus { FullRebuild, RecommitLast, CreateNew }; diff --git a/cpp_src/core/ft/ft_fast/dataprocessor.cc b/cpp_src/core/ft/ft_fast/dataprocessor.cc index 240bfc8df..eefe51c88 100644 --- a/cpp_src/core/ft/ft_fast/dataprocessor.cc +++ b/cpp_src/core/ft/ft_fast/dataprocessor.cc @@ -14,8 +14,6 @@ using std::chrono::milliseconds; namespace reindexer { -constexpr int kDigitUtfSizeof = 1; - template void DataProcessor::Process(bool multithread) { ExceptionPtrWrapper exwr; @@ -77,7 +75,6 @@ typename DataProcessor::WordsVector DataProcessor::insertIntoSuf auto& suffix = holder.GetSuffix(); suffix.reserve(words_um.size() * 20, words_um.size()); - const bool enableNumbersSearch = holder.cfg_->enableNumbersSearch; WordsVector found; found.reserve(words_um.size()); @@ -96,11 +93,7 @@ typename DataProcessor::WordsVector DataProcessor::insertIntoSuf words.emplace_back(); pos = holder.BuildWordId(id); - if (enableNumbersSearch && keyIt.second.virtualWord) { - suffix.insert(keyIt.first, pos, kDigitUtfSizeof); - } else { - suffix.insert(keyIt.first, pos); - } + suffix.insert(keyIt.first, pos); } return found; } @@ -127,9 +120,9 @@ size_t DataProcessor::commitIdRelSets(const WordsVector& preprocWords, w idsetcnt += sizeof(*wIt); } - word->vids.insert(word->vids.end(), std::make_move_iterator(keyIt->second.vids.begin()), - std::make_move_iterator(keyIt->second.vids.end())); - keyIt->second.vids = IdRelSet(); + word->vids.insert(word->vids.end(), std::make_move_iterator(keyIt->second.vids_.begin()), + std::make_move_iterator(keyIt->second.vids_.end())); + keyIt->second.vids_ = IdRelSet(); word->vids.shrink_to_fit(); idsetcnt += word->vids.heap_size(); } @@ -240,7 +233,7 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea // build words map parallel in maxIndexWorkers threads auto worker = [this, &ctxs, &vdocsTexts, offset, fieldscount, &cfg, &vdocs, &textSplitter](int i) { auto ctx = &ctxs[i]; - std::vector virtualWords; + std::vector virtualWords; const size_t start = ctx->from; const size_t fin = ctx->to; const bool enableNumbersSearch = cfg->enableNumbersSearch; @@ -271,7 +264,7 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea auto [idxIt, emplaced] = ctx->words_um.try_emplace_prehashed(whash, word); (void)emplaced; - const int mfcnt = idxIt->second.vids.Add(vdocId, insertPos, rfield); + const int mfcnt = idxIt->second.vids_.Add(vdocId, insertPos, rfield); if (mfcnt > vdoc.mostFreqWordCount[rfield]) { vdoc.mostFreqWordCount[rfield] = mfcnt; } @@ -302,19 +295,18 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea #if defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) const auto fBeforeMove = it.first; const auto sBeforeMove = it.second.MakeCopy(); - const auto sCapacityBeforeMove = it.second.vids.capacity(); + const auto sCapacityBeforeMove = it.second.vids_.capacity(); #endif // defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) auto [idxIt, emplaced] = words_um.try_emplace(std::move(it.first), std::move(it.second)); if (!emplaced) { #if defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) // Make sure, that try_emplace did not moved the values assertrx(it.first == fBeforeMove); - assertrx(it.second.virtualWord == sBeforeMove.virtualWord); - assertrx(it.second.vids.size() == sBeforeMove.vids.size()); - assertrx(it.second.vids.capacity() == sCapacityBeforeMove); + assertrx(it.second.vids_.size() == sBeforeMove.vids_.size()); + assertrx(it.second.vids_.capacity() == sCapacityBeforeMove); #endif // defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) - auto& resultVids = idxIt->second.vids; - auto& newVids = it.second.vids; + auto& resultVids = idxIt->second.vids_; + auto& newVids = it.second.vids_; resultVids.insert(resultVids.end(), std::make_move_iterator(newVids.begin()), std::make_move_iterator(newVids.end())); } } @@ -347,8 +339,8 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea if (holder_.cfg_->logLevel >= LogInfo) { WrSerializer out; for (auto& w : words_um) { - if (w.second.vids.size() > vdocs.size() / 5 || int64_t(w.second.vids.size()) > holder_.cfg_->mergeLimit) { - out << w.first << "(" << w.second.vids.size() << ") "; + if (w.second.vids_.size() > vdocs.size() / 5 || int64_t(w.second.vids_.size()) > holder_.cfg_->mergeLimit) { + out << w.first << "(" << w.second.vids_.size() << ") "; } } logPrintf(LogInfo, "Total documents: %d. Potential stop words (with corresponding docs count): %s", vdocs.size(), out.Slice()); @@ -359,19 +351,17 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea template void DataProcessor::buildVirtualWord(std::string_view word, words_map& words_um, VDocIdType docType, int rfield, size_t insertPos, - std::vector& container) { + std::vector& container) { auto& vdoc(holder_.vdocs_[docType]); NumToText::convert(word, container); - for (std::string& numberWord : container) { + for (const auto numberWord : container) { WordEntry wentry; - wentry.virtualWord = true; - auto idxIt = words_um.emplace(std::move(numberWord), std::move(wentry)).first; - const int mfcnt = idxIt->second.vids.Add(docType, insertPos, rfield); + auto idxIt = words_um.emplace(numberWord, std::move(wentry)).first; + const int mfcnt = idxIt->second.vids_.Add(docType, insertPos, rfield); if (mfcnt > vdoc.mostFreqWordCount[rfield]) { vdoc.mostFreqWordCount[rfield] = mfcnt; } ++vdoc.wordsCount[rfield]; - insertPos += kDigitUtfSizeof; } } diff --git a/cpp_src/core/ft/ft_fast/dataprocessor.h b/cpp_src/core/ft/ft_fast/dataprocessor.h index 50fe679fe..936e435b3 100644 --- a/cpp_src/core/ft/ft_fast/dataprocessor.h +++ b/cpp_src/core/ft/ft_fast/dataprocessor.h @@ -86,7 +86,7 @@ class DataProcessor { [[nodiscard]] size_t buildWordsMap(words_map& m, bool multithread, intrusive_ptr textSplitter); void buildVirtualWord(std::string_view word, words_map& words_um, VDocIdType docType, int rfield, size_t insertPos, - std::vector& container); + std::vector& container); void buildTyposMap(uint32_t startPos, const WordsVector& preprocWords); [[nodiscard]] static WordsVector insertIntoSuffix(words_map& words_um, DataHolder& holder); [[nodiscard]] static size_t commitIdRelSets(const WordsVector& preprocWords, words_map& words_um, DataHolder& holder, diff --git a/cpp_src/core/ft/ft_fast/selecter.cc b/cpp_src/core/ft/ft_fast/selecter.cc index 8684ac34d..14d2d4795 100644 --- a/cpp_src/core/ft/ft_fast/selecter.cc +++ b/cpp_src/core/ft/ft_fast/selecter.cc @@ -966,7 +966,7 @@ void Selector::mergeIterationGroup(TextSearchResults& rawRes, index_t ra } if (!curMergedPos.posTmp.empty()) { present[vid] = true; - double normDist = bound(1.0 / minDist, holder_.cfg_->distanceWeight, holder_.cfg_->distanceBoost); + double normDist = bound(1.0 / (minDist < 1 ? 1 : minDist), holder_.cfg_->distanceWeight, holder_.cfg_->distanceBoost); int finalRank = normDist * termRank; //'rank' of the current subTerm is greater than the previous subTerm, update the overall 'rank' and save the rank of the // subTerm for possible diff --git a/cpp_src/core/ft/ftsetcashe.h b/cpp_src/core/ft/ftsetcashe.h index ed1ea8f74..9500b779b 100644 --- a/cpp_src/core/ft/ftsetcashe.h +++ b/cpp_src/core/ft/ftsetcashe.h @@ -10,11 +10,13 @@ struct FtIdSetCacheVal { FtIdSetCacheVal(IdSet::Ptr&& i) noexcept : ids(std::move(i)) {} FtIdSetCacheVal(IdSet::Ptr&& i, FtCtxData::Ptr&& c) noexcept : ids(std::move(i)), ctx(std::move(c)) {} size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } + bool IsInitialized() const noexcept { return bool(ids); } IdSet::Ptr ids; FtCtxData::Ptr ctx; }; -using FtIdSetCache = LRUCache; +using FtIdSetCache = + LRUCache, LRUWithAtomicPtr::No>; } // namespace reindexer diff --git a/cpp_src/core/ft/numtotext.cc b/cpp_src/core/ft/numtotext.cc index 44fe81df2..b7405f653 100644 --- a/cpp_src/core/ft/numtotext.cc +++ b/cpp_src/core/ft/numtotext.cc @@ -1,163 +1,100 @@ #include "numtotext.h" - -#include -#include -#include #include "tools/errors.h" namespace reindexer { constexpr std::string_view units[] = {"", "один", "два", "три", "четыре", "пять", "шесть", "семь", "восемь", "девять"}; -constexpr std::string_view unitsNominat[] = {"", "одна", "две"}; +constexpr std::string_view unitsNominat[] = {"", "одна", "две", "три", "четыре", "пять", "шесть", "семь", "восемь", "девять"}; constexpr std::string_view tens[] = {"", "одиннадцать", "двенадцать", "тринадцать", "четырнадцать", "пятнадцать", "шестнадцать", "семнадцать", "восемнадцать", "девятнадцать"}; constexpr std::string_view decades[] = {"", "десять", "двадцать", "тридцать", "сорок", "пятьдесят", "шестьдесят", "семьдесят", "восемьдесят", "девяносто"}; constexpr std::string_view hundreads[] = {"", "сто", "двести", "триста", "четыреста", "пятьсот", "шестьсот", "семьсот", "восемьсот", "девятьсот"}; -constexpr std::string_view thousands[] = {"тысяча", "тысячи", "тысяч"}; -constexpr std::string_view millions[] = {"миллион", "миллиона", "миллионов"}; -constexpr std::string_view billions[] = {"миллиард", "миллиарда", "миллиардов"}; -constexpr std::string_view trillions[] = {"триллион", "триллиона", "триллионов"}; -constexpr std::string_view quadrillion[] = {"квадриллион", "квадриллиона", "квадриллионов"}; -constexpr std::string_view quintillion[] = {"квинтиллион", "квинтиллиона", "квинтиллионов"}; -constexpr std::string_view sextillion[] = {"секстиллион", "секстиллиона", "секстиллионов"}; -constexpr std::string_view septillion[] = {"септиллион", "септиллиона", "септиллионов"}; -enum Numorders : int { Thousands, Millions, Billions, Trillions, Quadrillion, Quintillion, Sextillion, Septillion }; +// clang-format off +constexpr static std::string_view kNumOrders[][10] = { +// 0 1 2 3 4 5 6 7 8 9 + {"тысяч", "тысяча", "тысячи", "тысячи", "тысячи", "тысяч", "тысяч", "тысяч", "тысяч", "тысяч"}, + {"миллионов", "миллион", "миллиона", "миллиона", "миллиона", "миллионов", "миллионов", "миллионов", "миллионов", "миллионов"}, + {"миллиардов", "миллиард", "миллиарда", "миллиарда", "миллиарда", "миллиардов", "миллиардов", "миллиардов", "миллиардов", "миллиардов"}, + {"триллионов", "триллион", "триллиона", "триллиона", "триллиона", "триллионов", "триллионов", "триллионов", "триллионов", "триллионов"}, + {"квадриллионов", "квадриллион", "квадриллиона", "квадриллиона", "квадриллиона", "квадриллионов", "квадриллионов", "квадриллионов", "квадриллионов", "квадриллионов"}, + {"квинтиллионов", "квинтиллион", "квинтиллиона", "квинтиллиона", "квинтиллиона", "квинтиллионов", "квинтиллионов", "квинтиллионов", "квинтиллионов", "квинтиллионов"}, + {"секстиллионов", "секстиллион", "секстиллиона", "секстиллиона", "секстиллиона", "секстиллионов", "секстиллионов", "секстиллионов", "секстиллионов", "секстиллионов"}, + {"септиллионов", "септиллион", "септиллиона", "септиллиона", "септиллиона", "септиллионов", "септиллионов", "септиллионов", "септиллионов", "септиллионов"}}; +// clang-format on +RX_ALWAYS_INLINE static int ansiCharacterToDigit(char ch) noexcept { return static_cast(ch - 48); } -static std::string_view getNumorder(int numorder, int i) { - switch (numorder) { - case Thousands: - return thousands[i]; - case Millions: - return millions[i]; - case Billions: - return billions[i]; - case Trillions: - return trillions[i]; - case Quadrillion: - return quadrillion[i]; - case Quintillion: - return quintillion[i]; - case Sextillion: - return sextillion[i]; - case Septillion: - return septillion[i]; - default: - throw Error(errParams, "Incorrect order [%s]: too big", numorder); +static std::vector& formTextString(std::string_view str, std::vector& words) { + if (str.empty()) { + return words; } -} - -RX_ALWAYS_INLINE int ansiCharacterToDigit(char ch) noexcept { return static_cast(ch - 48); } - -static std::vector getOrders(std::string_view str) { - std::string numStr(str); - std::reverse(numStr.begin(), numStr.end()); - int numChars = numStr.length(); - std::vector orders; - orders.reserve(numChars / 3); - for (int i = 0; i < numChars; i += 3) { - std::string tempString; - if (i <= numChars - 3) { - tempString += numStr[i + 2]; - tempString += numStr[i + 1]; - tempString += numStr[i]; - } else { - int lostChars = numChars - i; - switch (lostChars) { - case 1: - tempString = numStr[i]; - break; - case 2: - tempString += numStr[i + 1]; - tempString += numStr[i]; - break; - default: - throw Error(errLogic, "Unexpected lost characters number: %d", lostChars); - } + unsigned int ordersMax = (str.length() - 1) / 3 + 1; + unsigned int orderDigitCount = str.length() - (ordersMax - 1) * 3; + unsigned int baseOffset = 0; + for (int k = ordersMax; k > 0; k--) { + unsigned int hundreadsIndx = 0; + unsigned int tenIndex = 0; + unsigned int numIndex = 0; + switch (orderDigitCount) { + case 1: + numIndex = ansiCharacterToDigit(str[baseOffset]); + break; + case 2: + tenIndex = ansiCharacterToDigit(str[baseOffset]); + numIndex = ansiCharacterToDigit(str[baseOffset + 1]); + break; + case 3: + hundreadsIndx = ansiCharacterToDigit(str[baseOffset]); + tenIndex = ansiCharacterToDigit(str[baseOffset + 1]); + numIndex = ansiCharacterToDigit(str[baseOffset + 2]); + break; + default: + throw Error(errLogic, "Incorrect orderDigitCount %d", orderDigitCount); + } + if (hundreadsIndx != 0) { + words.emplace_back(hundreads[hundreadsIndx]); } - orders.emplace_back(std::move(tempString)); - } - return orders; -} - -static std::vector getDecimal(const std::string& str, int i) { - std::vector words; - int v = std::stoi(str); - if (v < 10) { - words.emplace_back(units[v]); - } else if (v % 10 == 0) { - words.emplace_back(decades[v / 10]); - } else if (v < 20) { - words.emplace_back(tens[v % 10]); - } else if (v % 10 < 3 && i == 1) { - words.emplace_back(decades[ansiCharacterToDigit(str[0])]); - words.emplace_back(unitsNominat[ansiCharacterToDigit(str[1])]); - } else { - words.emplace_back(decades[ansiCharacterToDigit(str[0])]); - words.emplace_back(units[ansiCharacterToDigit(str[1])]); - } - return words; -} -static std::string getNumOrders(int i, int num) { - std::string orders; - if (i > 0) { - if (num % 10 > 4 || (num % 100 > 10 && num % 100 < 20) || num % 10 == 0) { - orders = getNumorder(i - 1, 2); - } else if (num % 10 > 1 && num % 10 < 5) { - orders = getNumorder(i - 1, 1); - } else { - orders = getNumorder(i - 1, 0); + if (tenIndex == 1 && numIndex != 0) { + words.emplace_back(tens[numIndex]); + } else if (tenIndex != 0) { + words.emplace_back(decades[tenIndex]); } - } - return orders; -} -static std::vector formTextString(const std::string& str, int i) { - std::vector words; - int strlen = str.length(); - if (strlen == 3) { - words.emplace_back(hundreads[ansiCharacterToDigit(str[0])]); - std::string decimal; - decimal += str[1]; - decimal += str[2]; - std::vector decimalWords(getDecimal(decimal, i)); - words.insert(words.end(), make_move_iterator(decimalWords.begin()), make_move_iterator(decimalWords.end())); - } else if (strlen == 2) { - words = getDecimal(str, i); - } else { - if ((i == 1) && std::stoi(str) < 3) { - words.emplace_back(unitsNominat[std::stoi(str)]); - } else { - words.emplace_back(units[std::stoi(str)]); + if (numIndex != 0 && tenIndex != 1) { + if (k == 2) { // thousands + words.emplace_back(unitsNominat[numIndex]); + } else { + words.emplace_back(units[numIndex]); + } } - } - if (i > 0) { - words.emplace_back(getNumOrders(i, std::stoi(str))); + bool isAllNull = hundreadsIndx == 0 && tenIndex == 0 && numIndex == 0; + if (k > 1 && !isAllNull) { + words.emplace_back(kNumOrders[k - 2][numIndex]); + } + baseOffset += orderDigitCount; + orderDigitCount = 3; } return words; } -std::vector& NumToText::convert(std::string_view str, std::vector& output) { +std::vector& NumToText::convert(std::string_view str, std::vector& output) { output.resize(0); - if ((str.length() == 1) && (str[0] == '0')) { - output = {"ноль"}; - return output; + unsigned int k = 0; + for (; k < str.length() && str[k] == '0'; ++k) { + output.emplace_back("ноль"); } + str = str.substr(k); // unreasonably big if (str.length() > 27) { + output.resize(0); return output; } - std::vector orders(getOrders(str)); - for (size_t i = 0; i < orders.size(); ++i) { - size_t oppositeSideIndex = orders.size() - 1 - i; - std::vector digits(formTextString(orders[oppositeSideIndex], oppositeSideIndex)); - output.insert(output.end(), make_move_iterator(digits.begin()), make_move_iterator(digits.end())); - } - return output; + + return formTextString(str, output); } } // namespace reindexer diff --git a/cpp_src/core/ft/numtotext.h b/cpp_src/core/ft/numtotext.h index db87cbd38..2c0a5b755 100644 --- a/cpp_src/core/ft/numtotext.h +++ b/cpp_src/core/ft/numtotext.h @@ -10,7 +10,7 @@ namespace reindexer { class NumToText { public: - static std::vector& convert(std::string_view numStr, std::vector& output); + static std::vector& convert(std::string_view numStr, std::vector& output); }; } // namespace reindexer diff --git a/cpp_src/core/ft/stopwords/stop_en.cc b/cpp_src/core/ft/stopwords/stop_en.cc index 8caf56a90..d04a1daa2 100644 --- a/cpp_src/core/ft/stopwords/stop_en.cc +++ b/cpp_src/core/ft/stopwords/stop_en.cc @@ -1,15 +1,16 @@ namespace reindexer { const char* stop_words_en[] = { - "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", - "aren", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", - "by", "can", "cannot", "could", "couldn", "did", "didn", "do", "does", "doesn", "doing", "dont", - "down", "during", "each", "few", "for", "from", "further", "had", "hadnt", "has", "hasnt", "have", - "havent", "having", "he", "hed", "hell", "hes", "her", "here", "hers", "herself", "him", "himself", - "his", "how", "hows", "i", "id", "im", "if", "in", "into", "is", "it", "its", - "itself", "me", "more", "most", "must", "my", "myself", "no", "nor", "not", "of", "off", - "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", - "same", "she", "should", "so", "some", "such", "than", "that", "the", "their", "theirs", "them", - "themselves", "then", "there", "these", "they", "this", "those", "through", "to", "too", "under", "until", - "up", "very", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", - "why", "with", "would", "you", "your", "yours", "yourself", "yourselves", nullptr}; + "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", + "aren", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", + "by", "can", "cannot", "could", "couldn", "did", "didn", "do", "does", "doesn", "doing", "dont", + "down", "during", "each", "few", "for", "from", "further", "had", "hadn", "hadnt", "has", "hasn", + "hasnt", "have", "havent", "having", "he", "hed", "hell", "hes", "her", "here", "hers", "herself", + "him", "himself", "his", "how", "hows", "i", "id", "im", "if", "in", "into", "is", + "it", "its", "itself", "ll", "me", "more", "most", "must", "my", "myself", "no", "nor", + "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", + "out", "over", "own", "same", "she", "should", "so", "some", "such", "than", "that", "the", + "their", "theirs", "them", "themselves", "then", "there", "these", "they", "this", "those", "through", "to", + "too", "under", "until", "up", "ve", "very", "was", "we", "were", "what", "when", "where", + "which", "while", "who", "whom", "why", "with", "will", "would", "you", "your", "yours", "yourself", + "yourselves", nullptr}; } diff --git a/cpp_src/core/ft/stopwords/stop_ru.cc b/cpp_src/core/ft/stopwords/stop_ru.cc index 659bfd905..217501036 100644 --- a/cpp_src/core/ft/stopwords/stop_ru.cc +++ b/cpp_src/core/ft/stopwords/stop_ru.cc @@ -1,353 +1,31 @@ namespace reindexer { const char* stop_words_ru[] = { - // clang-format off - "а", - "е", - "и", - "ж", - "м", - "о", - "на", - "не", - "ни", - "об", - "но", - "он", - "мне", - "мои", - "мож", - "она", - "они", - "оно", - "мной", - "много", - "мною", - "мой", - "мог", - "могут", - "можно", - "может", - "моя", - "моё", - "мочь", - "над", - "нее", - "оба", - "нам", - "нем", - "нами", - "ними", - "мимо", - "одной", - "одного", - "менее", - "однажды", - "однако", - "меня", - "нему", - "меньше", - "ней", - "наверху", - "него", - "ниже", - "мало", - "надо", - "назад", - "недавно", - "миллионов", - "недалеко", - "между", - "низко", - "нельзя", - "нибудь", - "наконец", - "никогда", - "никуда", - "нас", - "наш", - "нет", - "нею", - "неё", - "них", - "наша", - "наше", - "наши", - "ничего", - "нередко", - "обычно", - "опять", - "около", - "мы", - "ну", - "нх", - "от", - "нужно", - "очень", - "отсюда", - "в", - "во", - "вон", - "вниз", - "внизу", - "вокруг", - "вот", - "вверх", - "вам", - "вами", - "важное", - "важная", - "важные", - "важный", - "вдали", - "везде", - "ведь", - "вас", - "ваш", - "ваша", - "ваше", - "ваши", - "впрочем", - "весь", - "вдруг", - "вы", - "все", - "всем", - "всеми", - "всему", - "всего", - "всегда", - "всех", - "всею", - "всю", - "вся", - "всё", - "всюду", - "год", - "года", - "году", - "где", - "да", - "ее", - "за", - "из", - "ли", - "же", - "им", - "до", - "по", - "ими", - "под", - "иногда", - "довольно", - "именно", - "долго", - "позже", - "более", - "должно", - "пожалуйста", - "значит", - "иметь", - "больше", - "пока", - "ему", - "имя", - "пор", - "пора", - "потом", - "потому", - "после", - "почему", - "почти", - "посреди", - "ей", - "его", - "дел", - "или", - "без", - "день", - "занят", - "занята", - "занято", - "заняты", - "давно", - "даже", - "алло", - "жизнь", - "далеко", - "близко", - "здесь", - "дальше", - "для", - "лет", - "зато", - "даром", - "перед", - "затем", - "зачем", - "лишь", - "ею", - "её", - "их", - "бы", - "еще", - "при", - "был", - "про", - "против", - "просто", - "бывает", - "бывь", - "если", - "люди", - "была", - "были", - "было", - "будем", - "будет", - "будете", - "будешь", - "буду", - "будь", - "будто", - "будут", - "ещё", - "друго", - "другое", - "другой", - "другие", - "другая", - "других", - "есть", - "быть", - "лучше", - "к", - "ком", - "конечно", - "кому", - "кого", - "когда", - "которой", - "которого", - "которая", - "которые", - "который", - "которых", - "кем", - "каждое", - "каждая", - "каждые", - "каждый", - "кажется", - "как", - "какой", - "какая", - "кто", - "кроме", - "куда", - "кругом", - "с", - "у", - "я", - "та", - "те", - "уж", - "со", - "то", - "том", - "снова", - "тому", - "совсем", - "того", - "тогда", - "тоже", - "собой", - "тобой", - "собою", - "тобою", - "сначала", - "только", - "уметь", - "тот", - "тою", - "хорошо", - "хотеть", - "хочешь", - "хоть", - "хотя", - "свое", - "свои", - "твой", - "своей", - "своего", - "своих", - "свою", - "твоя", - "твоё", - "раз", - "уже", - "сам", - "там", - "тем", - "чем", - "сама", - "сами", - "теми", - "само", - "рано", - "самом", - "самому", - "самой", - "самого", - "самим", - "самими", - "самих", - "саму", - "чему", - "чего", - "себе", - "тебе", - "разве", - "теперь", - "себя", - "тебя", - "спасибо", - "слишком", - "так", - "такое", - "такой", - "такие", - "также", - "такая", - "сих", - "тех", - "чаще", - "через", - "часто", - "сколько", - "ту", - "ты", - "эта", - "эти", - "что", - "это", - "чтоб", - "этом", - "этому", - "этой", - "этого", - "чтобы", - "этот", - "стал", - "туда", - "этим", - "этими", - "рядом", - "этих", - "тут", - "эту", - "суть", - "чуть", - "тысяч", - nullptr}; -// clang-format on + "а", "и", "ж", "о", "на", "не", "ни", "об", "но", "он", "мне", "мои", + "мож", "она", "они", "оно", "мной", "много", "мною", "мой", "мог", "могут", "можно", "может", + "моя", "моё", "мочь", "над", "нее", "оба", "нам", "нем", "нами", "ними", "мимо", "одной", + "одного", "менее", "однажды", "однако", "меня", "нему", "меньше", "ней", "него", "ниже", "мало", "надо", + "назад", "между", "низко", "нельзя", "нибудь", "наконец", "никогда", "никуда", "нас", "наш", "нет", "нею", + "неё", "них", "наша", "наше", "наши", "ничего", "опять", "около", "мы", "ну", "нх", "от", + "нужно", "очень", "отсюда", "в", "во", "вон", "вниз", "внизу", "вот", "вверх", "вам", "вами", + "вдали", "везде", "ведь", "вас", "ваш", "ваша", "ваше", "ваши", "впрочем", "весь", "вдруг", "вы", + "все", "всем", "всеми", "всему", "всего", "всегда", "всех", "всею", "всю", "вся", "всё", "всюду", + "год", "года", "году", "где", "да", "ее", "за", "из", "ли", "же", "им", "до", + "по", "ими", "под", "иногда", "довольно", "именно", "долго", "позже", "более", "должно", "пожалуйста", "значит", + "иметь", "больше", "пока", "ему", "имя", "пор", "пора", "потом", "потому", "после", "почему", "почти", + "посреди", "ей", "его", "дел", "или", "без", "день", "давно", "даже", "алло", "жизнь", "далеко", + "близко", "здесь", "дальше", "для", "лет", "зато", "даром", "перед", "затем", "зачем", "лишь", "ею", + "её", "их", "бы", "еще", "при", "был", "про", "против", "просто", "бывает", "бывь", "если", + "люди", "была", "были", "было", "будем", "будет", "будете", "будешь", "буду", "будь", "будто", "будут", + "ещё", "друго", "другое", "другой", "другие", "другая", "других", "есть", "быть", "лучше", "к", "ком", + "конечно", "кому", "кого", "когда", "которой", "которого", "которая", "которые", "который", "которых", "кем", "каждое", + "каждая", "каждые", "каждый", "кажется", "как", "какой", "какая", "кто", "кроме", "куда", "кругом", "с", + "у", "я", "та", "те", "уж", "со", "то", "том", "снова", "тому", "совсем", "того", + "тогда", "тоже", "собой", "тобой", "собою", "тобою", "сначала", "только", "уметь", "тот", "тою", "хорошо", + "хотеть", "хочешь", "хоть", "хотя", "свое", "свои", "твой", "своей", "своего", "своих", "свою", "твоя", + "твоё", "раз", "уже", "сам", "там", "тем", "чем", "сама", "сами", "теми", "само", "самом", + "самому", "самой", "самого", "самим", "самими", "самих", "саму", "чему", "чего", "себе", "тебе", "разве", + "теперь", "себя", "тебя", "спасибо", "слишком", "так", "такое", "такой", "такие", "также", "такая", "сих", + "тех", "чаще", "через", "часто", "сколько", "ту", "ты", "эта", "эти", "что", "это", "чтоб", + "этом", "этому", "этой", "этого", "чтобы", "этот", "стал", "туда", "этим", "этими", "рядом", "этих", + "тут", "эту", "суть", "чуть", nullptr}; } // namespace reindexer diff --git a/cpp_src/core/idsetcache.h b/cpp_src/core/idsetcache.h index d70d7f266..62efdd654 100644 --- a/cpp_src/core/idsetcache.h +++ b/cpp_src/core/idsetcache.h @@ -61,6 +61,7 @@ struct IdSetCacheVal { IdSetCacheVal() = default; IdSetCacheVal(IdSet::Ptr&& i) noexcept : ids(std::move(i)) {} size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } + bool IsInitialized() const noexcept { return bool(ids); } IdSet::Ptr ids; }; @@ -76,17 +77,23 @@ T& operator<<(T& os, const IdSetCacheVal& v) { struct equal_idset_cache_key { bool operator()(const IdSetCacheKey& lhs, const IdSetCacheKey& rhs) const noexcept { - return lhs.cond == rhs.cond && lhs.sort == rhs.sort && *lhs.keys == *rhs.keys; + try { + return lhs.cond == rhs.cond && lhs.sort == rhs.sort && *lhs.keys == *rhs.keys; + } catch (...) { + return false; // For non-comparable variant arrays (really rare case in this context) + } } }; struct hash_idset_cache_key { size_t operator()(const IdSetCacheKey& s) const noexcept { return (size_t(s.cond) << 8) ^ (size_t(s.sort) << 16) ^ s.keys->Hash(); } }; -using IdSetCacheBase = LRUCache; +using IdSetCacheBase = + LRUCache, LRUWithAtomicPtr::Yes>; class IdSetCache : public IdSetCacheBase { public: + IdSetCache() = default; IdSetCache(size_t sizeLimit, uint32_t hitCount) : IdSetCacheBase(sizeLimit, hitCount) {} void ClearSorted(const std::bitset& s) { if (s.any()) { diff --git a/cpp_src/core/index/index.cc b/cpp_src/core/index/index.cc index c2fa3a9a9..1e6e52c95 100644 --- a/cpp_src/core/index/index.cc +++ b/cpp_src/core/index/index.cc @@ -61,7 +61,16 @@ std::unique_ptr Index::New(const IndexDef& idef, PayloadType&& payloadTyp case IndexUuidHash: return IndexUuid_New(idef, std::move(payloadType), std::move(fields), cacheCfg); } - throw Error(errParams, "Ivalid index type %d for index '%s'", idef.Type(), idef.name_); + throw Error(errParams, "Invalid index type %d for index '%s'", idef.Type(), idef.name_); +} + +IndexPerfStat Index::GetIndexPerfStat() { + return IndexPerfStat(name_, selectPerfCounter_.Get(), commitPerfCounter_.Get()); +} + +void Index::ResetIndexPerfStat() { + this->selectPerfCounter_.Reset(); + this->commitPerfCounter_.Reset(); } template diff --git a/cpp_src/core/index/index.h b/cpp_src/core/index/index.h index 7e4875c6e..7750b0c67 100644 --- a/cpp_src/core/index/index.h +++ b/cpp_src/core/index/index.h @@ -118,13 +118,8 @@ class Index { PerfStatCounterMT& GetSelectPerfCounter() { return selectPerfCounter_; } PerfStatCounterMT& GetCommitPerfCounter() { return commitPerfCounter_; } - IndexPerfStat GetIndexPerfStat() { - return IndexPerfStat(name_, selectPerfCounter_.Get(), commitPerfCounter_.Get()); - } - void ResetIndexPerfStat() { - selectPerfCounter_.Reset(); - commitPerfCounter_.Reset(); - } + virtual IndexPerfStat GetIndexPerfStat(); + virtual void ResetIndexPerfStat(); virtual bool HoldsStrings() const noexcept = 0; virtual void DestroyCache() {} virtual void ClearCache() {} @@ -139,7 +134,7 @@ class Index { protected: // Index type. Can be one of enum IndexType IndexType type_; - // Name of index (usualy name of field). + // Name of index (usually name of field). std::string name_; // Vector or ids, sorted by this index. Available only for ordered indexes std::vector sortOrders_; @@ -160,7 +155,7 @@ class Index { PerfStatCounterMT selectPerfCounter_; KeyValueType keyType_ = KeyValueType::Undefined{}; KeyValueType selectKeyType_ = KeyValueType::Undefined{}; - // Count of sorted indexes in namespace to resereve additional space in idsets + // Count of sorted indexes in namespace to reserve additional space in idsets int sortedIdxCount_ = 0; bool isBuilt_{false}; diff --git a/cpp_src/core/index/indexfastupdate.cc b/cpp_src/core/index/indexfastupdate.cc new file mode 100644 index 000000000..9a92ea3d6 --- /dev/null +++ b/cpp_src/core/index/indexfastupdate.cc @@ -0,0 +1,65 @@ +#include "core/index/indexfastupdate.h" +#include "core/index/index.h" +#include "core/namespace/namespaceimpl.h" +#include "tools/logger.h" + +namespace reindexer { +bool IndexFastUpdate::Try(NamespaceImpl& ns, const IndexDef& from, const IndexDef& to) { + if (RelaxedEqual(from, to)) { + logFmt(LogInfo, "[{}]:{} Start fast update index '{}'", ns.name_, ns.serverId_, from.name_); + + const auto idxNo = ns.indexesNames_.find(from.name_)->second; + auto& index = ns.indexes_[idxNo]; + auto newIndex = Index::New(to, PayloadType(index->GetPayloadType()), FieldsSet{index->Fields()}, ns.config_.cacheConfig); + VariantArray keys, resKeys; + for (size_t rowId = 0; rowId < ns.items_.size(); ++rowId) { + if (ns.items_[rowId].IsFree()) { + continue; + } + + bool needClearCache = false; + ConstPayload(ns.payloadType_, ns.items_[rowId]).Get(idxNo, keys); + newIndex->Upsert(resKeys, keys, rowId, needClearCache); + } + if (index->IsOrdered()) { + auto indexesCacheCleaner{ns.GetIndexesCacheCleaner()}; + indexesCacheCleaner.Add(index->SortId()); + } + + index = std::move(newIndex); + + ns.updateSortedIdxCount(); + ns.markUpdated(IndexOptimization::Full); + + logFmt(LogInfo, "[{}]:{} Index '{}' successfully updated using a fast strategy", ns.name_, ns.serverId_, from.name_); + + return true; + } + return false; +} + +bool IndexFastUpdate::RelaxedEqual(const IndexDef& from, const IndexDef& to) noexcept { + if (!isLegalTypeTransform(from.Type(), to.Type())) { + return false; + } + auto comparisonIndex = from; + comparisonIndex.indexType_ = to.indexType_; + comparisonIndex.opts_.Dense(to.opts_.IsDense()); + comparisonIndex.opts_.SetCollateMode(to.opts_.GetCollateMode()); + comparisonIndex.opts_.SetCollateSortOrder(to.opts_.GetCollateSortOrder()); + return comparisonIndex.IsEqual(to, IndexComparison::WithConfig); +} + +bool IndexFastUpdate::isLegalTypeTransform(IndexType from, IndexType to) noexcept { + return std::find_if(kTransforms.begin(), kTransforms.end(), [from, to](const auto& set) { + return set.find(from) != set.end() && set.find(to) != set.end(); + }) != kTransforms.end(); +} +const std::vector> IndexFastUpdate::kTransforms = { + {IndexType::IndexIntBTree, IndexType::IndexIntHash, IndexType::IndexIntStore}, + {IndexType::IndexInt64BTree, IndexType::IndexInt64Hash, IndexType::IndexInt64Store}, + {IndexType::IndexStrBTree, IndexType::IndexStrHash, IndexType::IndexStrStore}, + {IndexType::IndexDoubleStore, IndexType::IndexDoubleBTree}, + {IndexType::IndexUuidStore, IndexType::IndexUuidHash}, +}; +} // namespace reindexer diff --git a/cpp_src/core/index/indexfastupdate.h b/cpp_src/core/index/indexfastupdate.h new file mode 100644 index 000000000..03aba3636 --- /dev/null +++ b/cpp_src/core/index/indexfastupdate.h @@ -0,0 +1,18 @@ +#include "core/type_consts.h" +#include "estl/fast_hash_set.h" + +namespace reindexer { + +class NamespaceImpl; +struct IndexDef; + +struct IndexFastUpdate { + static bool Try(NamespaceImpl& ns, const IndexDef& from, const IndexDef& to); + static bool RelaxedEqual(const IndexDef& from, const IndexDef& to) noexcept; + +private: + static bool isLegalTypeTransform(IndexType from, IndexType to) noexcept; + static const std::vector> kTransforms; +}; + +} // namespace reindexer \ No newline at end of file diff --git a/cpp_src/core/index/indexordered.cc b/cpp_src/core/index/indexordered.cc index ba30c2c73..7fd7172b1 100644 --- a/cpp_src/core/index/indexordered.cc +++ b/cpp_src/core/index/indexordered.cc @@ -1,4 +1,3 @@ - #include "indexordered.h" #include "core/nsselecter/btreeindexiterator.h" #include "core/rdxcontext.h" @@ -11,7 +10,7 @@ template Variant IndexOrdered::Upsert(const Variant& key, IdType id, bool& clearCache) { if (key.Type().Is()) { if (this->empty_ids_.Unsorted().Add(id, IdSet::Auto, this->sortedIdxCount_)) { - this->cache_.reset(); + this->cache_.ResetImpl(); clearCache = true; this->isBuilt_ = false; } @@ -29,7 +28,7 @@ Variant IndexOrdered::Upsert(const Variant& key, IdType id, bool& clearCache) if (keyIt->second.Unsorted().Add(id, this->opts_.IsPK() ? IdSet::Ordered : IdSet::Auto, this->sortedIdxCount_)) { this->isBuilt_ = false; - this->cache_.reset(); + this->cache_.ResetImpl(); clearCache = true; } this->tracker_.markUpdated(this->idx_map, keyIt); @@ -49,7 +48,7 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray& keys, CondType c // Get set of keys or single key if (!IsOrderedCondition(condition)) { if (opts.unbuiltSortOrders && keys.size() > 1) { - throw Error(errLogic, "Attemt to use btree index '%s' for sort optimization with unordered multivalue condition (%s)", + throw Error(errLogic, "Attempt to use btree index '%s' for sort optimization with unordered multivalued condition (%s)", this->Name(), CondTypeToStr(condition)); } return IndexUnordered::SelectKey(keys, condition, sortId, opts, ctx, rdxCtx); @@ -58,7 +57,10 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray& keys, CondType c SelectKeyResult res; auto startIt = this->idx_map.begin(); auto endIt = this->idx_map.end(); - auto key1 = *keys.begin(); + const auto& key1 = *keys.begin(); + if (key1.IsNullValue() || (keys.size() > 1 && keys[1].IsNullValue())) { + throw Error(errParams, "Can not use 'null'-value with operators '>','<','<=','>=' and 'RANGE()' (index: '%s')", this->Name()); + } switch (condition) { case CondLt: endIt = this->idx_map.lower_bound(static_cast(key1)); @@ -137,15 +139,15 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray& keys, CondType c T* i_map; SortType sortId; typename T::iterator startIt, endIt; - } ctx = {&this->idx_map, sortId, startIt, endIt}; + } selectorCtx = {&this->idx_map, sortId, startIt, endIt}; - auto selector = [&ctx, count](SelectKeyResult& res, size_t& idsCount) { + auto selector = [&selectorCtx, count](SelectKeyResult& res, size_t& idsCount) { idsCount = 0; res.reserve(count); - for (auto it = ctx.startIt; it != ctx.endIt; ++it) { - assertrx_dbg(it != ctx.i_map->end()); + for (auto it = selectorCtx.startIt; it != selectorCtx.endIt; ++it) { + assertrx_dbg(it != selectorCtx.i_map->end()); idsCount += it->second.Unsorted().Size(); - res.emplace_back(it->second, ctx.sortId); + res.emplace_back(it->second, selectorCtx.sortId); } res.deferedExplicitSort = false; return false; @@ -198,8 +200,7 @@ void IndexOrdered::MakeSortOrders(UpdateSortedContext& ctx) { } } } - // fill unexist indexs - + // fill non-existent indexs for (auto it = ids2Sorts.begin(); it != ids2Sorts.end(); ++it) { if (*it == SortIdUnfilled) { *it = idx; diff --git a/cpp_src/core/index/indexstore.cc b/cpp_src/core/index/indexstore.cc index 1fc21dcad..64e8797fa 100644 --- a/cpp_src/core/index/indexstore.cc +++ b/cpp_src/core/index/indexstore.cc @@ -23,7 +23,7 @@ void IndexStore::Delete(const Variant& key, IdType /*id*/, StringsHo return; } auto keyIt = str_map.find(std::string_view(key)); - // assertf(keyIt != str_map.end(), "Delete unexists key from index '%s' id=%d", name_, id); + // assertf(keyIt != str_map.end(), "Delete non-existent key from index '%s' id=%d", name_, id); if (keyIt == str_map.end()) { return; } @@ -127,11 +127,11 @@ SelectKeyResults IndexStore::SelectKey(const VariantArray& keys, CondType con const BaseFunctionCtx::Ptr& /*ctx*/, const RdxContext& rdxCtx) { const auto indexWard(rdxCtx.BeforeIndexWork()); if (condition == CondEmpty && !this->opts_.IsArray() && !this->opts_.IsSparse()) { - throw Error(errParams, "The 'is NULL' condition is suported only by 'sparse' or 'array' indexes"); + throw Error(errParams, "The 'is NULL' condition is supported only by 'sparse' or 'array' indexes"); } if (condition == CondAny && !this->opts_.IsArray() && !this->opts_.IsSparse() && !sopts.distinct) { - throw Error(errParams, "The 'NOT NULL' condition is suported only by 'sparse' or 'array' indexes"); + throw Error(errParams, "The 'NOT NULL' condition is supported only by 'sparse' or 'array' indexes"); } return ComparatorIndexed{ diff --git a/cpp_src/core/index/indextext/fastindextext.cc b/cpp_src/core/index/indextext/fastindextext.cc index ade4732b6..1ae446e89 100644 --- a/cpp_src/core/index/indextext/fastindextext.cc +++ b/cpp_src/core/index/indextext/fastindextext.cc @@ -60,9 +60,7 @@ Variant FastIndexText::Upsert(const Variant& key, IdType id, bool& clearCache } if (keyIt->second.Unsorted().Add(id, this->opts_.IsPK() ? IdSet::Ordered : IdSet::Auto, 0)) { this->isBuilt_ = false; - if (this->cache_ft_) { - this->cache_ft_->Clear(); - } + this->cache_ft_.Clear(); clearCache = true; } this->addMemStat(keyIt); @@ -88,8 +86,8 @@ void FastIndexText::Delete(const Variant& key, IdType id, StringsHolder& strH int delcnt = keyIt->second.Unsorted().Erase(id); (void)delcnt; // TODO: we have to implement removal of composite indexes (doesn't work right now) - assertf(this->opts_.IsArray() || this->Opts().IsSparse() || delcnt, "Delete unexists id from index '%s' id=%d,key=%s", this->name_, id, - key.As()); + assertf(this->opts_.IsArray() || this->Opts().IsSparse() || delcnt, "Delete non-existent id from index '%s' id=%d,key=%s", this->name_, + id, key.As()); if (keyIt->second.Unsorted().IsEmpty()) { this->tracker_.markDeleted(keyIt); @@ -107,9 +105,7 @@ void FastIndexText::Delete(const Variant& key, IdType id, StringsHolder& strH } else { this->addMemStat(keyIt); } - if (this->cache_ft_) { - this->cache_ft_->Clear(); - } + this->cache_ft_.Clear(); clearCache = true; } @@ -119,12 +115,12 @@ IndexMemStat FastIndexText::GetMemStat(const RdxContext& ctx) { contexted_shared_lock lck(this->mtx_, ctx); ret.fulltextSize = this->holder_->GetMemStat(); - ret.idsetCache = this->cache_ft_ ? this->cache_ft_->GetMemStat() : LRUCacheMemStat(); + ret.idsetCache = this->cache_ft_.GetMemStat(); return ret; } template template -typename MergeType::iterator FastIndexText::unstableRemoveIf(MergeType& md, int minRelevancy, double scalingFactor, size_t& releventDocs, +typename MergeType::iterator FastIndexText::unstableRemoveIf(MergeType& md, int minRelevancy, double scalingFactor, size_t& relevantDocs, int& cnt) { if (md.empty()) { return md.begin(); @@ -144,7 +140,7 @@ typename MergeType::iterator FastIndexText::unstableRemoveIf(MergeType& md, i auto& vdoc = holder.vdocs_[first->id]; assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); cnt += vdoc.keyEntry->Sorted(0).size(); - ++releventDocs; + ++relevantDocs; ++first; } @@ -161,7 +157,7 @@ typename MergeType::iterator FastIndexText::unstableRemoveIf(MergeType& md, i auto& vdoc = holder.vdocs_[last->id]; assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); cnt += vdoc.keyEntry->Sorted(0).size(); - ++releventDocs; + ++relevantDocs; *first = std::move(*last); ++first; @@ -183,10 +179,10 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS const double scalingFactor = mergeData.maxRank > 255 ? 255.0 / mergeData.maxRank : 1.0; const int minRelevancy = getConfig()->minRelevancy * 100 * scalingFactor; - size_t releventDocs = 0; + size_t relevantDocs = 0; switch (ftSortType) { case FtSortType::RankAndID: { - auto itF = unstableRemoveIf(mergeData, minRelevancy, scalingFactor, releventDocs, cnt); + auto itF = unstableRemoveIf(mergeData, minRelevancy, scalingFactor, relevantDocs, cnt); mergeData.erase(itF, mergeData.end()); break; } @@ -199,7 +195,7 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS } assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); cnt += vdoc.keyEntry->Sorted(0).size(); - ++releventDocs; + ++relevantDocs; } break; } @@ -211,13 +207,13 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS mergedIds->reserve(cnt); if constexpr (std::is_same_v) { if (useExternSt == FtUseExternStatuses::No) { - appendMergedIds(mergeData, releventDocs, + appendMergedIds(mergeData, relevantDocs, [&fctx, &mergedIds](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { fctx.Add(ebegin, eend, vid.proc); mergedIds->Append(ebegin, eend, IdSet::Unordered); }); } else { - appendMergedIds(mergeData, releventDocs, + appendMergedIds(mergeData, relevantDocs, [&fctx, &mergedIds, &statuses](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { fctx.Add(ebegin, eend, vid.proc, statuses.rowIds); mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); @@ -225,7 +221,7 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS } } else if constexpr (std::is_same_v, MergeType> || std::is_same_v, MergeType>) { if (useExternSt == FtUseExternStatuses::No) { - appendMergedIds(mergeData, releventDocs, + appendMergedIds(mergeData, relevantDocs, [&fctx, &mergedIds, &mergeData](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { fctx.Add(ebegin, eend, vid.proc, std::move(mergeData.vectorAreas[vid.areaIndex])); mergedIds->Append(ebegin, eend, IdSet::Unordered); @@ -233,7 +229,7 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS } else { appendMergedIds( - mergeData, releventDocs, + mergeData, relevantDocs, [&fctx, &mergedIds, &statuses, &mergeData](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { fctx.Add(ebegin, eend, vid.proc, statuses.rowIds, std::move(mergeData.vectorAreas[vid.areaIndex])); mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); @@ -480,9 +476,9 @@ void FastIndexText::buildVdocs(Container& data) { template template -RX_ALWAYS_INLINE void FastIndexText::appendMergedIds(MergeType& mergeData, size_t releventDocs, F&& appender) { +RX_ALWAYS_INLINE void FastIndexText::appendMergedIds(MergeType& mergeData, size_t relevantDocs, F&& appender) { auto& holder = *this->holder_; - for (size_t i = 0; i < releventDocs; i++) { + for (size_t i = 0; i < relevantDocs; i++) { auto& vid = mergeData[i]; auto& vdoc = holder.vdocs_[vid.id]; appender(vdoc.keyEntry->Sorted(0).begin(), vdoc.keyEntry->Sorted(0).end(), vid); @@ -525,17 +521,13 @@ void FastIndexText::SetOpts(const IndexOpts& opts) { this->holder_->Clear(); } this->holder_->status_ = FullRebuild; - if (this->cache_ft_) { - this->cache_ft_->Clear(); - } + this->cache_ft_.Clear(); for (auto& idx : this->idx_map) { idx.second.SetVDocID(FtKeyEntryData::ndoc); } } else { logPrintf(LogInfo, "FulltextIndex config changed, cache cleared"); - if (this->cache_ft_) { - this->cache_ft_->Clear(); - } + this->cache_ft_.Clear(); } this->holder_->synonyms_->SetConfig(&newCfg); } diff --git a/cpp_src/core/index/indextext/fastindextext.h b/cpp_src/core/index/indextext/fastindextext.h index 3cb5127a7..4cac98953 100644 --- a/cpp_src/core/index/indextext/fastindextext.h +++ b/cpp_src/core/index/indextext/fastindextext.h @@ -66,9 +66,9 @@ class FastIndexText : public IndexText { template void buildVdocs(Data& data); template - void appendMergedIds(MergeType& merged, size_t releventDocs, F&& appender); + void appendMergedIds(MergeType& merged, size_t relevantDocs, F&& appender); template - typename MergeType::iterator unstableRemoveIf(MergeType& md, int minRelevancy, double scalingFactor, size_t& releventDocs, int& cnt); + typename MergeType::iterator unstableRemoveIf(MergeType& md, int minRelevancy, double scalingFactor, size_t& relevantDocs, int& cnt); std::unique_ptr holder_; }; diff --git a/cpp_src/core/index/indextext/fieldsgetter.h b/cpp_src/core/index/indextext/fieldsgetter.h index bce1e0579..303392a15 100644 --- a/cpp_src/core/index/indextext/fieldsgetter.h +++ b/cpp_src/core/index/indextext/fieldsgetter.h @@ -19,7 +19,7 @@ class FieldsGetter { VariantArray krefs; - // Specific implemetation for composite index + // Specific implementation for composite index RVector, 8> getDocFields(const PayloadValue& doc, std::vector>& strsBuf) { ConstPayload pl(plt_, doc); diff --git a/cpp_src/core/index/indextext/indextext.cc b/cpp_src/core/index/indextext/indextext.cc index e1e5db9c0..f7fc46af3 100644 --- a/cpp_src/core/index/indextext/indextext.cc +++ b/cpp_src/core/index/indextext/indextext.cc @@ -10,13 +10,14 @@ namespace reindexer { template IndexText::IndexText(const IndexText& other) : IndexUnordered(other), - cache_ft_(std::make_unique(other.cacheMaxSize_, other.hitsToCache_)), + cache_ft_(other.cacheMaxSize_, other.hitsToCache_), cacheMaxSize_(other.cacheMaxSize_), hitsToCache_(other.hitsToCache_) { + cache_ft_.CopyInternalPerfStatsFrom(other.cache_ft_); initSearchers(); } -// Generic implemetation for string index +// Generic implementation for string index template void IndexText::initSearchers() { size_t jsonPathIdx = 0; @@ -36,7 +37,7 @@ void IndexText::initSearchers() { throw Error(errParams, "Composite fulltext index '%s' contains duplicated fields", this->name_); } if rx_unlikely (ftFields_.size() > kMaxFtCompositeFields) { - throw Error(errParams, "Unable to create composite fulltext '%s' index with %d fields. Fileds count limit is %d", this->name_, + throw Error(errParams, "Unable to create composite fulltext '%s' index with %d fields. Fields count limit is %d", this->name_, ftFields_.size(), kMaxFtCompositeFields); } } @@ -64,13 +65,26 @@ void IndexText::ReconfigureCache(const NamespaceCacheConfigData& cacheCfg) { if (cacheMaxSize_ != cacheCfg.ftIdxCacheSize || hitsToCache_ != cacheCfg.ftIdxHitsToCache) { cacheMaxSize_ = cacheCfg.ftIdxCacheSize; hitsToCache_ = cacheCfg.ftIdxHitsToCache; - if (cache_ft_) { - cache_ft_ = std::make_unique(cacheMaxSize_, hitsToCache_); + if (cache_ft_.IsActive()) { + cache_ft_.Reinitialize(cacheMaxSize_, hitsToCache_); } } Base::ReconfigureCache(cacheCfg); } +template +IndexPerfStat IndexText::GetIndexPerfStat() { + auto stats = Base::GetIndexPerfStat(); + stats.cache = cache_ft_.GetPerfStat(); + return stats; +} + +template +void IndexText::ResetIndexPerfStat() { + Base::ResetIndexPerfStat(); + cache_ft_.ResetPerfStat(); +} + template void IndexText::build(const RdxContext& rdxCtx) { smart_lock lck(mtx_, rdxCtx); @@ -84,7 +98,7 @@ void IndexText::build(const RdxContext& rdxCtx) { } } -// Generic implemetation for string index +// Generic implementation for string index template SelectKeyResults IndexText::SelectKey(const VariantArray& keys, CondType condition, SortType, Index::SelectOpts opts, const BaseFunctionCtx::Ptr& ctx, const RdxContext& rdxCtx) { @@ -96,9 +110,9 @@ SelectKeyResults IndexText::SelectKey(const VariantArray& keys, CondType cond auto mergeStatuses = this->GetFtMergeStatuses(rdxCtx); bool needPutCache = false; IdSetCacheKey ckey{keys, condition, 0}; - auto cache_ft = cache_ft_->Get(ckey); + auto cache_ft = cache_ft_.Get(ckey); if (cache_ft.valid) { - if (!cache_ft.val.ids) { + if (!cache_ft.val.IsInitialized()) { needPutCache = true; } else if (ctx->type == BaseFunctionCtx::CtxType::kFtArea && (!cache_ft.val.ctx || !(cache_ft.val.ctx->type == BaseFunctionCtx::CtxType::kFtArea))) { @@ -170,7 +184,7 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray& keys, const std:: } } if (need_put && mergedIds->size()) { - cache_ft_->Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(ftCtxDataBase)}); + cache_ft_.Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(ftCtxDataBase)}); } res.emplace_back(std::move(mergedIds)); diff --git a/cpp_src/core/index/indextext/indextext.h b/cpp_src/core/index/indextext/indextext.h index 3a0af5a12..cf20caaf7 100644 --- a/cpp_src/core/index/indextext/indextext.h +++ b/cpp_src/core/index/indextext/indextext.h @@ -1,6 +1,5 @@ #pragma once -#include #include "core/ft/config/baseftconfig.h" #include "core/ft/filters/itokenfilter.h" #include "core/ft/ft_fast/dataholder.h" @@ -21,7 +20,7 @@ class IndexText : public IndexUnordered { IndexText(const IndexText& other); IndexText(const IndexDef& idef, PayloadType&& payloadType, FieldsSet&& fields, const NamespaceCacheConfigData& cacheCfg) : IndexUnordered(idef, std::move(payloadType), std::move(fields), cacheCfg), - cache_ft_(std::make_unique(cacheCfg.ftIdxCacheSize, cacheCfg.ftIdxHitsToCache)), + cache_ft_(cacheCfg.ftIdxCacheSize, cacheCfg.ftIdxHitsToCache), cacheMaxSize_(cacheCfg.ftIdxCacheSize), hitsToCache_(cacheCfg.ftIdxHitsToCache) { this->selectKeyType_ = KeyValueType::String{}; @@ -42,25 +41,25 @@ class IndexText : public IndexUnordered { // Rebuild will be done on first select } void CommitFulltext() override final { - cache_ft_ = std::make_unique(cacheMaxSize_, hitsToCache_); + cache_ft_.Reinitialize(cacheMaxSize_, hitsToCache_); commitFulltextImpl(); this->isBuilt_ = true; } void SetSortedIdxCount(int) override final {} void DestroyCache() override { Base::DestroyCache(); - cache_ft_.reset(); + cache_ft_.ResetImpl(); } void ClearCache() override { Base::ClearCache(); - if (cache_ft_) { - cache_ft_->Clear(); - } + cache_ft_.Clear(); } void ClearCache(const std::bitset& s) override { Base::ClearCache(s); } void MarkBuilt() noexcept override { assertrx(0); } bool IsFulltext() const noexcept override final { return true; } void ReconfigureCache(const NamespaceCacheConfigData& cacheCfg) override final; + IndexPerfStat GetIndexPerfStat() override final; + void ResetIndexPerfStat() override final; protected: using Mutex = MarkedMutex; @@ -76,7 +75,7 @@ class IndexText : public IndexUnordered { void initSearchers(); FieldsGetter Getter(); - std::unique_ptr cache_ft_; + FtIdSetCache cache_ft_; size_t cacheMaxSize_; uint32_t hitsToCache_; diff --git a/cpp_src/core/index/indexunordered.cc b/cpp_src/core/index/indexunordered.cc index 18af49bea..640871f75 100644 --- a/cpp_src/core/index/indexunordered.cc +++ b/cpp_src/core/index/indexunordered.cc @@ -120,11 +120,12 @@ template IndexUnordered::IndexUnordered(const IndexUnordered& other) : Base(other), idx_map(other.idx_map), - cache_(nullptr), cacheMaxSize_(other.cacheMaxSize_), hitsToCache_(other.hitsToCache_), empty_ids_(other.empty_ids_), - tracker_(other.tracker_) {} + tracker_(other.tracker_) { + cache_.CopyInternalPerfStatsFrom(other.cache_); +} template size_t heap_size(const key_type& /*kt*/) { @@ -175,7 +176,7 @@ Variant IndexUnordered::Upsert(const Variant& key, IdType id, bool& clearCach // reset cache if (key.Type().Is()) { // TODO maybe error or default value if the index is not sparse if (this->empty_ids_.Unsorted().Add(id, IdSet::Auto, this->sortedIdxCount_)) { - cache_.reset(); + cache_.ResetImpl(); clearCache = true; this->isBuilt_ = false; } @@ -191,7 +192,7 @@ Variant IndexUnordered::Upsert(const Variant& key, IdType id, bool& clearCach } if (keyIt->second.Unsorted().Add(id, this->opts_.IsPK() ? IdSet::Ordered : IdSet::Auto, this->sortedIdxCount_)) { - cache_.reset(); + cache_.ResetImpl(); clearCache = true; this->isBuilt_ = false; } @@ -207,7 +208,7 @@ void IndexUnordered::Delete(const Variant& key, IdType id, StringsHolder& str if (key.Type().Is()) { this->empty_ids_.Unsorted().Erase(id); // ignore result this->isBuilt_ = false; - cache_.reset(); + cache_.ResetImpl(); clearCache = true; return; } @@ -218,7 +219,7 @@ void IndexUnordered::Delete(const Variant& key, IdType id, StringsHolder& str delMemStat(keyIt); delcnt = keyIt->second.Unsorted().Erase(id); this->isBuilt_ = false; - cache_.reset(); + cache_.ResetImpl(); clearCache = true; } assertf(delcnt || this->opts_.IsArray() || this->Opts().IsSparse(), "Delete non-existing id from index '%s' id=%d,key=%s (%s)", @@ -254,23 +255,23 @@ template bool IndexUnordered::tryIdsetCache(const VariantArray& keys, CondType condition, SortType sortId, const std::function& selector, SelectKeyResult& res) { size_t idsCount; - if (!cache_ || IsComposite(this->Type())) { + if (!cache_.IsActive() || IsComposite(this->Type())) { selector(res, idsCount); return false; } bool scanWin = false; IdSetCacheKey ckey{keys, condition, sortId}; - auto cached = cache_->Get(ckey); + auto cached = cache_.Get(ckey); if (cached.valid) { - if (!cached.val.ids) { + if (!cached.val.IsInitialized()) { scanWin = selector(res, idsCount); if (!scanWin) { // Do not use generic sort, when expecting duplicates in the id sets const bool useGenericSort = res.deferedExplicitSort && !(this->opts_.IsArray() && (condition == CondEq || condition == CondSet)); - cache_->Put(ckey, - res.MergeIdsets(SelectKeyResult::MergeOptions{.genericSort = useGenericSort, .shrinkResult = true}, idsCount)); + cache_.Put(ckey, + res.MergeIdsets(SelectKeyResult::MergeOptions{.genericSort = useGenericSort, .shrinkResult = true}, idsCount)); } } else { res.emplace_back(std::move(cached.val.ids)); @@ -294,20 +295,29 @@ SelectKeyResults IndexUnordered::SelectKey(const VariantArray& keys, CondType switch (condition) { case CondEmpty: if (!this->opts_.IsArray() && !this->opts_.IsSparse()) { - throw Error(errParams, "The 'is NULL' condition is suported only by 'sparse' or 'array' indexes"); + throw Error(errParams, "The 'is NULL' condition is supported only by 'sparse' or 'array' indexes"); } res.emplace_back(this->empty_ids_, sortId); break; // Get set of keys or single key case CondEq: case CondSet: { + for (const auto& key : keys) { + if (key.IsNullValue()) { + throw Error(errParams, + "Can not use 'null'-value with operators '=' and 'IN()' (index: '%s'). Use 'IS NULL'/'IS NOT NULL' instead", + this->Name()); + } + } + struct { T* i_map; const VariantArray& keys; + std::string_view indexName; SortType sortId; Index::SelectOpts opts; bool isSparse; - } ctx = {&this->idx_map, keys, sortId, opts, this->opts_.IsSparse()}; + } ctx = {&this->idx_map, keys, this->Name(), sortId, opts, this->opts_.IsSparse()}; bool selectorWasSkipped = false; // should return true, if fallback to comparator required auto selector = [&ctx, &selectorWasSkipped](SelectKeyResult& res, size_t& idsCount) -> bool { @@ -357,7 +367,10 @@ SelectKeyResults IndexUnordered::SelectKey(const VariantArray& keys, CondType case CondAllSet: { // Get set of key, where all request keys are present SelectKeyResults rslts; - for (auto key : keys) { + for (const auto& key : keys) { + if (key.IsNullValue()) { + throw Error(errParams, "Can not use 'null'-value with operator 'allset' (index: '%s')", this->Name()); + } SelectKeyResult res1; auto keyIt = this->idx_map.find(static_cast(key.convert(this->KeyType()))); if (keyIt == this->idx_map.end()) { @@ -398,8 +411,8 @@ template void IndexUnordered::Commit() { this->empty_ids_.Unsorted().Commit(); - if (!cache_) { - cache_.reset(new IdSetCache(cacheMaxSize_, hitsToCache_)); + if (!cache_.IsActive()) { + cache_.Reinitialize(cacheMaxSize_, hitsToCache_); } if (!tracker_.isUpdated()) { @@ -442,17 +455,28 @@ void IndexUnordered::SetSortedIdxCount(int sortedIdxCount) { } } +template +IndexPerfStat IndexUnordered::GetIndexPerfStat() { + auto stats = Base::GetIndexPerfStat(); + stats.cache = cache_.GetPerfStat(); + return stats; +} + +template +void IndexUnordered::ResetIndexPerfStat() { + Base::ResetIndexPerfStat(); + cache_.ResetPerfStat(); +} + template IndexMemStat IndexUnordered::GetMemStat(const RdxContext& ctx) { IndexMemStat ret = Base::GetMemStat(ctx); ret.uniqKeysCount = idx_map.size(); - if (cache_) { - ret.idsetCache = cache_->GetMemStat(); - } + ret.idsetCache = cache_.GetMemStat(); ret.trackedUpdatesCount = tracker_.updatesSize(); ret.trackedUpdatesBuckets = tracker_.updatesBuckets(); ret.trackedUpdatesSize = tracker_.allocated(); - ret.trackedUpdatesOveflow = tracker_.overflow(); + ret.trackedUpdatesOverflow = tracker_.overflow(); return ret; } @@ -478,11 +502,7 @@ void IndexUnordered::dump(S& os, std::string_view step, std::string_view offs os << '\n' << newOffset; } os << "},\n" << newOffset << "cache: "; - if (cache_) { - cache_->Dump(os, step, newOffset); - } else { - os << "empty"; - } + cache_.Dump(os, step, newOffset); os << ",\n" << newOffset << "empty_ids: "; empty_ids_.Dump(os, step, newOffset); os << "\n" << offset << '}'; @@ -501,8 +521,8 @@ void IndexUnordered::ReconfigureCache(const NamespaceCacheConfigData& cacheCf if (cacheMaxSize_ != cacheCfg.idxIdsetCacheSize || hitsToCache_ != cacheCfg.idxIdsetHitsToCache) { cacheMaxSize_ = cacheCfg.idxIdsetCacheSize; hitsToCache_ = cacheCfg.idxIdsetHitsToCache; - if (cache_) { - cache_.reset(new IdSetCache(cacheMaxSize_, hitsToCache_)); + if (cache_.IsActive()) { + cache_.Reinitialize(cacheMaxSize_, hitsToCache_); } } } diff --git a/cpp_src/core/index/indexunordered.h b/cpp_src/core/index/indexunordered.h index 418931c9e..f8dbfae26 100644 --- a/cpp_src/core/index/indexunordered.h +++ b/cpp_src/core/index/indexunordered.h @@ -5,7 +5,6 @@ #include "core/idsetcache.h" #include "core/index/indexstore.h" #include "core/index/updatetracker.h" -#include "estl/atomic_unique_ptr.h" namespace reindexer { @@ -29,8 +28,8 @@ class IndexUnordered : public IndexStore> { IndexUnordered(const IndexDef& idef, PayloadType&& payloadType, FieldsSet&& fields, const NamespaceCacheConfigData& cacheCfg); IndexUnordered(const IndexUnordered& other); - Variant Upsert(const Variant& key, IdType id, bool& chearCache) override; - void Delete(const Variant& key, IdType id, StringsHolder&, bool& chearCache) override; + Variant Upsert(const Variant& key, IdType id, bool& clearCache) override; + void Delete(const Variant& key, IdType id, StringsHolder&, bool& clearCache) override; SelectKeyResults SelectKey(const VariantArray& keys, CondType cond, SortType stype, Index::SelectOpts opts, const BaseFunctionCtx::Ptr& ctx, const RdxContext&) override; void Commit() override; @@ -39,18 +38,12 @@ class IndexUnordered : public IndexStore> { IndexMemStat GetMemStat(const RdxContext&) override; size_t Size() const noexcept override final { return idx_map.size(); } void SetSortedIdxCount(int sortedIdxCount) override; + IndexPerfStat GetIndexPerfStat() override; + void ResetIndexPerfStat() override; bool HoldsStrings() const noexcept override; - void DestroyCache() override { cache_.reset(); } - void ClearCache() override { - if (cache_) { - cache_->Clear(); - } - } - void ClearCache(const std::bitset& s) override { - if (cache_) { - cache_->ClearSorted(s); - } - } + void DestroyCache() override { cache_.ResetImpl(); } + void ClearCache() override { cache_.Clear(); } + void ClearCache(const std::bitset& s) override { cache_.ClearSorted(s); } void Dump(std::ostream& os, std::string_view step = " ", std::string_view offset = "") const override { dump(os, step, offset); } void EnableUpdatesCountingMode(bool val) noexcept override { tracker_.enableCountingMode(val); } @@ -66,7 +59,7 @@ class IndexUnordered : public IndexStore> { // Index map T idx_map; // Merged idsets cache - atomic_unique_ptr cache_; + IdSetCache cache_; size_t cacheMaxSize_; uint32_t hitsToCache_; // Empty ids diff --git a/cpp_src/core/index/rtree/indexrtree.cc b/cpp_src/core/index/rtree/indexrtree.cc index fe6628dbf..8bf61d162 100644 --- a/cpp_src/core/index/rtree/indexrtree.cc +++ b/cpp_src/core/index/rtree/indexrtree.cc @@ -82,7 +82,7 @@ void IndexRTree::Upsert(VariantArra if (keyIt->second.Unsorted().Add(id, this->opts_.IsPK() ? IdSet::Ordered : IdSet::Auto, this->sortedIdxCount_)) { this->isBuilt_ = false; // reset cache - this->cache_.reset(); + this->cache_.ResetImpl(); clearCache = true; } this->tracker_.markUpdated(this->idx_map, keyIt); @@ -105,7 +105,7 @@ void IndexRTree::Delete(const Varia if (keyIt == this->idx_map.end()) { return; } - this->cache_.reset(); + this->cache_.ResetImpl(); clearCache = true; this->isBuilt_ = false; @@ -113,7 +113,7 @@ void IndexRTree::Delete(const Varia delcnt = keyIt->second.Unsorted().Erase(id); (void)delcnt; // TODO: we have to implement removal of composite indexes (doesn't work right now) - assertf(this->Opts().IsSparse() || delcnt, "Delete unexists id from index '%s' id=%d,key=%s (%s)", this->name_, id, + assertf(this->Opts().IsSparse() || delcnt, "Delete non-existent id from index '%s' id=%d,key=%s (%s)", this->name_, id, Variant(keys).template As(this->payloadType_, this->Fields()), Variant(keyIt->first).As(this->payloadType_, this->Fields())); diff --git a/cpp_src/core/indexopts.cc b/cpp_src/core/indexopts.cc index b103a6b8c..d626c8312 100644 --- a/cpp_src/core/indexopts.cc +++ b/cpp_src/core/indexopts.cc @@ -59,6 +59,11 @@ IndexOpts& IndexOpts::SetCollateMode(CollateMode mode) & noexcept { return *this; } +IndexOpts& IndexOpts::SetCollateSortOrder(reindexer::SortingPrioritiesTable&& sortOrder) & noexcept { + collateOpts_.sortOrderTable = std::move(sortOrder); + return *this; +} + template void IndexOpts::Dump(T& os) const { os << '{'; diff --git a/cpp_src/core/indexopts.h b/cpp_src/core/indexopts.h index 90446e8bb..420581820 100644 --- a/cpp_src/core/indexopts.h +++ b/cpp_src/core/indexopts.h @@ -43,6 +43,10 @@ struct IndexOpts { [[nodiscard]] IndexOpts&& RTreeType(RTreeIndexType type) && noexcept { return std::move(RTreeType(type)); } IndexOpts& SetCollateMode(CollateMode mode) & noexcept; [[nodiscard]] IndexOpts&& SetCollateMode(CollateMode mode) && noexcept { return std::move(SetCollateMode(mode)); } + IndexOpts& SetCollateSortOrder(reindexer::SortingPrioritiesTable&& sortOrder) & noexcept; + [[nodiscard]] IndexOpts&& SetCollateSortOrder(reindexer::SortingPrioritiesTable&& sortOrder) && noexcept { + return std::move(SetCollateSortOrder(std::move(sortOrder))); + } template >* = nullptr> IndexOpts& SetConfig(Str&& conf) & { config = std::forward(conf); @@ -52,7 +56,8 @@ struct IndexOpts { [[nodiscard]] IndexOpts&& SetConfig(Str&& config) && { return std::move(SetConfig(std::forward(config))); } - CollateMode GetCollateMode() const noexcept { return static_cast(collateOpts_.mode); } + CollateMode GetCollateMode() const noexcept { return collateOpts_.mode; } + reindexer::SortingPrioritiesTable GetCollateSortOrder() const noexcept { return collateOpts_.sortOrderTable; } bool IsEqual(const IndexOpts& other, IndexComparison cmpType) const noexcept; diff --git a/cpp_src/core/joincache.h b/cpp_src/core/joincache.h index 63314f8d0..446a3e3d3 100644 --- a/cpp_src/core/joincache.h +++ b/cpp_src/core/joincache.h @@ -48,14 +48,16 @@ struct JoinPreResult; struct JoinCacheVal { JoinCacheVal() = default; - size_t Size() const noexcept { return ids_ ? (sizeof(*ids_.get()) + ids_->heap_size()) : 0; } - IdSet::Ptr ids_; + size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } + bool IsInitialized() const noexcept { return inited; } + + IdSet::Ptr ids; bool matchedAtLeastOnce = false; bool inited = false; std::shared_ptr preResult; }; -using JoinCache = LRUCache; +using JoinCache = LRUCache, LRUWithAtomicPtr::No>; struct JoinCacheRes { bool haveData = false; diff --git a/cpp_src/core/lrucache.cc b/cpp_src/core/lrucache.cc index 9170122f7..922f0105c 100644 --- a/cpp_src/core/lrucache.cc +++ b/cpp_src/core/lrucache.cc @@ -9,8 +9,12 @@ namespace reindexer { constexpr uint32_t kMaxHitCountToCache = 1024; -template -typename LRUCache::Iterator LRUCache::Get(const K& key) { +template +LRUCacheImpl::LRUCacheImpl(size_t sizeLimit, uint32_t hitCount) noexcept + : totalCacheSize_(0), cacheSizeLimit_(sizeLimit), hitCountToCache_(hitCount) {} + +template +typename LRUCacheImpl::Iterator LRUCacheImpl::Get(const K& key) { if rx_unlikely (cacheSizeLimit_ == 0) { return Iterator(); } @@ -36,8 +40,8 @@ typename LRUCache::Iterator LRUCache::Get( return Iterator(true, it->second.val); } -template -void LRUCache::Put(const K& key, V&& v) { +template +void LRUCacheImpl::Put(const K& key, V&& v) { if rx_unlikely (cacheSizeLimit_ == 0) { return; } @@ -65,8 +69,8 @@ void LRUCache::Put(const K& key, V&& v) { } } -template -RX_ALWAYS_INLINE bool LRUCache::eraseLRU() { +template +RX_ALWAYS_INLINE bool LRUCacheImpl::eraseLRU() { typename LRUList::iterator it = lru_.begin(); while (totalCacheSize_ > cacheSizeLimit_) { @@ -98,11 +102,11 @@ RX_ALWAYS_INLINE bool LRUCache::eraseLRU() { return !lru_.empty(); } -template -bool LRUCache::clearAll() { +template +bool LRUCacheImpl::clearAll() { const bool res = !items_.empty(); totalCacheSize_ = 0; - std::unordered_map().swap(items_); + std::unordered_map().swap(items_); LRUList().swap(lru_); getCount_ = 0; putCount_ = 0; @@ -110,8 +114,8 @@ bool LRUCache::clearAll() { return res; } -template -LRUCacheMemStat LRUCache::GetMemStat() { +template +LRUCacheMemStat LRUCacheImpl::GetMemStat() const { LRUCacheMemStat ret; std::lock_guard lk(lock_); @@ -125,9 +129,38 @@ LRUCacheMemStat LRUCache::GetMemStat() { return ret; } -template class LRUCache; -template class LRUCache; -template class LRUCache; -template class LRUCache; + +template +void LRUCacheImpl::Clear() { + std::lock_guard lk(lock_); + clearAll(); +} + +template +void LRUCacheImpl::Clear(std::function cond) { + std::lock_guard lock(lock_); + for (auto it = lru_.begin(); it != lru_.end();) { + if (!cond(**it)) { + ++it; + continue; + } + auto mIt = items_.find(**it); + assertrx(mIt != items_.end()); + const size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); + if rx_unlikely (oldSize > totalCacheSize_) { + clearAll(); + return; + } + totalCacheSize_ -= oldSize; + items_.erase(mIt); + it = lru_.erase(it); + ++eraseCount_; + } +} + +template class LRUCacheImpl; +template class LRUCacheImpl; +template class LRUCacheImpl; +template class LRUCacheImpl; } // namespace reindexer diff --git a/cpp_src/core/lrucache.h b/cpp_src/core/lrucache.h index 3acfb7dca..03f946284 100644 --- a/cpp_src/core/lrucache.h +++ b/cpp_src/core/lrucache.h @@ -1,21 +1,22 @@ #pragma once -#include #include #include #include #include "dbconfig.h" +#include "estl/atomic_unique_ptr.h" #include "namespace/namespacestat.h" namespace reindexer { constexpr size_t kElemSizeOverhead = 256; -template -class LRUCache { +template +class LRUCacheImpl { public: using Key = K; - LRUCache(size_t sizeLimit, uint32_t hitCount) noexcept : totalCacheSize_(0), cacheSizeLimit_(sizeLimit), hitCountToCache_(hitCount) {} + using Value = V; + LRUCacheImpl(size_t sizeLimit, uint32_t hitCount) noexcept; struct Iterator { Iterator(bool k = false, const V& v = V()) : valid(k), val(v) {} Iterator(const Iterator& other) = delete; @@ -32,17 +33,13 @@ class LRUCache { bool valid; V val; }; - // Get cached val. Create new entry in cache if unexists + // Get cached val. Create new entry in cache if does not exist Iterator Get(const K& k); // Put cached val void Put(const K& k, V&& v); - - LRUCacheMemStat GetMemStat(); - - bool Clear() { - std::lock_guard lk(lock_); - return clearAll(); - } + LRUCacheMemStat GetMemStat() const; + void Clear(); + void Clear(std::function cond); template void Dump(T& os, std::string_view step, std::string_view offset) const { @@ -78,29 +75,7 @@ class LRUCache { os << "]\n" << offset << '}'; } - template - void Clear(const F& cond) { - std::lock_guard lock(lock_); - for (auto it = lru_.begin(); it != lru_.end();) { - if (!cond(**it)) { - ++it; - continue; - } - auto mIt = items_.find(**it); - assertrx(mIt != items_.end()); - const size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); - if rx_unlikely (oldSize > totalCacheSize_) { - clearAll(); - return; - } - totalCacheSize_ -= oldSize; - items_.erase(mIt); - it = lru_.erase(it); - ++eraseCount_; - } - } - -protected: +private: typedef std::list LRUList; struct Entry { V val; @@ -115,7 +90,7 @@ class LRUCache { bool eraseLRU(); bool clearAll(); - std::unordered_map items_; + std::unordered_map items_; LRUList lru_; mutable std::mutex lock_; size_t totalCacheSize_; @@ -125,4 +100,113 @@ class LRUCache { uint64_t getCount_ = 0, putCount_ = 0, eraseCount_ = 0; }; +enum class LRUWithAtomicPtr : bool { Yes, No }; + +template +class LRUCache { + using CachePtrT = std::conditional_t, std::unique_ptr>; + +public: + using Iterator = typename CacheT::Iterator; + + LRUCache() = default; + template + LRUCache(Args&&... args) noexcept : ptr_(makePtr(std::forward(args)...)) { + (void)alignment1_; + (void)alignment2_; +#if defined(__x86_64__) + static_assert(sizeof(LRUCache) == 128, "Unexpected size. Check alignment"); +#endif // defined(__x86_64__) + } + virtual ~LRUCache() = default; + + typename CacheT::Iterator Get(const typename CacheT::Key& k) const { + typename CacheT::Iterator it; + if (ptr_) { + it = ptr_->Get(k); + if (it.valid && it.val.IsInitialized()) { + stats_.hits.fetch_add(1, std::memory_order_relaxed); + } else { + stats_.misses.fetch_add(1, std::memory_order_relaxed); + } + } + return it; + } + void Put(const typename CacheT::Key& k, typename CacheT::Value&& v) const { + if (ptr_) { + ptr_->Put(k, std::move(v)); + } + } + LRUCacheMemStat GetMemStat() const { return ptr_ ? ptr_->GetMemStat() : LRUCacheMemStat(); } + LRUCachePerfStat GetPerfStat() const noexcept { + auto stats = stats_.GetPerfStat(); + stats.state = ptr_ ? LRUCachePerfStat::State::Active : LRUCachePerfStat::State::Inactive; + return stats; + } + void ResetPerfStat() noexcept { stats_.Reset(); } + void Clear() { + if (ptr_) { + ptr_->Clear(); + } + } + template + void Clear(const F& cond) { + if (ptr_) { + ptr_->Clear(cond); + } + } + + template + void Dump(T& os, std::string_view step, std::string_view offset) const { + if (ptr_) { + ptr_->Dump(os, step, offset); + } else { + os << ""; + } + } + void ResetImpl() noexcept { ptr_.reset(); } + template + void Reinitialize(Args&&... args) { + ptr_ = makePtr(std::forward(args)...); + } + bool IsActive() const noexcept { return ptr_.get(); } + void CopyInternalPerfStatsFrom(const LRUCache& o) noexcept { stats_ = o.stats_; } + +private: + template + CachePtrT makePtr(Args&&... args) { + return CachePtrT(new CacheT(std::forward(args)...)); + } + + class Stats { + public: + Stats(uint64_t _hits = 0, uint64_t _misses = 0) noexcept : hits{_hits}, misses{_misses} {} + Stats(const Stats& o) : hits(o.hits.load(std::memory_order_relaxed)), misses(o.misses.load(std::memory_order_relaxed)) {} + LRUCachePerfStat GetPerfStat() const noexcept { + return LRUCachePerfStat{.hits = hits.load(std::memory_order_relaxed), .misses = misses.load(std::memory_order_relaxed)}; + } + void Reset() noexcept { + hits.store(0, std::memory_order_relaxed); + misses.store(0, std::memory_order_relaxed); + } + Stats& operator=(const Stats& o) { + if (&o != this) { + hits.store(o.hits.load()); + misses.store(o.misses.load()); + } + return *this; + } + + std::atomic_uint64_t hits; + std::atomic_uint64_t misses; + }; + + // Cache line alignment to avoid contention betwee atomic cache ptr and cache stats (alignas would be better, but it does not work + // properly with tcmalloc on CentOS7) + uint8_t alignment1_[48]; + CachePtrT ptr_; + uint8_t alignment2_[48]; + mutable Stats stats_; +}; + } // namespace reindexer diff --git a/cpp_src/core/namespace/namespaceimpl.cc b/cpp_src/core/namespace/namespaceimpl.cc index 8805fbd4e..283e54596 100644 --- a/cpp_src/core/namespace/namespaceimpl.cc +++ b/cpp_src/core/namespace/namespaceimpl.cc @@ -7,6 +7,7 @@ #include "core/cjson/uuid_recoders.h" #include "core/formatters/lsn_fmt.h" #include "core/index/index.h" +#include "core/index/indexfastupdate.h" #include "core/index/ttlindex.h" #include "core/itemimpl.h" #include "core/itemmodifier.h" @@ -80,9 +81,8 @@ NamespaceImpl::NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& schema_{src.schema_}, enablePerfCounters_{src.enablePerfCounters_.load()}, config_{src.config_}, - queryCountCache_{ - std::make_unique(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache)}, - joinCache_{std::make_unique(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache)}, + queryCountCache_{config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache}, + joinCache_{config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache}, wal_{src.wal_, storage_}, repl_{src.repl_}, observers_{src.observers_}, @@ -102,6 +102,8 @@ NamespaceImpl::NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& for (auto& idxIt : src.indexes_) { indexes_.push_back(idxIt->Clone()); } + queryCountCache_.CopyInternalPerfStatsFrom(src.queryCountCache_); + joinCache_.CopyInternalPerfStatsFrom(src.joinCache_); markUpdated(IndexOptimization::Full); logPrintf(LogInfo, "Namespace::CopyContentsFrom (%s).Workers: %d, timeout: %d, tm: { state_token: 0x%08X, version: %d }", name_, @@ -115,9 +117,8 @@ NamespaceImpl::NamespaceImpl(const std::string& name, UpdatesObservers& observer payloadType_{name}, tagsMatcher_{payloadType_}, enablePerfCounters_{false}, - queryCountCache_{ - std::make_unique(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache)}, - joinCache_{std::make_unique(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache)}, + queryCountCache_{config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache}, + joinCache_{config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache}, wal_(getWalSize(config_)), observers_{&observers}, lastSelectTime_{0}, @@ -269,13 +270,12 @@ void NamespaceImpl::OnConfigUpdated(DBConfigProvider& configProvider, const RdxC config_.cacheConfig.ftIdxCacheSize / 1024, config_.cacheConfig.ftIdxHitsToCache); } if (needReconfigureJoinCache) { - joinCache_ = std::make_unique(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache); + joinCache_.Reinitialize(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache); logPrintf(LogTrace, "[%s] Join cache has been reconfigured: { max_size %lu KB; hits: %u }", name_, config_.cacheConfig.joinCacheSize / 1024, config_.cacheConfig.joinHitsToCache); } if (needReconfigureQueryCountCache) { - queryCountCache_ = - std::make_unique(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache); + queryCountCache_.Reinitialize(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache); logPrintf(LogTrace, "[%s] Queries count cache has been reconfigured: { max_size %lu KB; hits: %u }", name_, config_.cacheConfig.queryCountCacheSize / 1024, config_.cacheConfig.queryCountHitsToCache); } @@ -752,8 +752,8 @@ void NamespaceImpl::dumpIndex(std::ostream& os, std::string_view index) const { } void NamespaceImpl::clearNamespaceCaches() { - queryCountCache_->Clear(); - joinCache_->Clear(); + queryCountCache_.Clear(); + joinCache_.Clear(); } void NamespaceImpl::dropIndex(const IndexDef& index) { @@ -926,9 +926,12 @@ void NamespaceImpl::verifyUpdateIndex(const IndexDef& indexDef) const { throw Error(errConflict, "Cannot add PK index '%s.%s'. Already exists another PK index - '%s'", name_, indexDef.name_, indexes_[currentPKIt->second]->Name()); } - if (indexDef.opts_.IsArray() != oldIndex->Opts().IsArray()) { - throw Error(errParams, "Cannot update index '%s' in namespace '%s'. Can't convert array index to not array and vice versa", - indexDef.name_, name_); + if (indexDef.opts_.IsArray() != oldIndex->Opts().IsArray() && !items_.empty()) { + // Array may be converted to scalar and scalar to array only if there are no items in namespace + throw Error( + errParams, + "Cannot update index '%s' in namespace '%s'. Can't convert array index to not array and vice versa for non-empty namespace", + indexDef.name_, name_); } if (indexDef.opts_.IsPK() && indexDef.opts_.IsArray()) { throw Error(errParams, "Cannot update index '%s' in namespace '%s'. PK field can't be array", indexDef.name_, name_); @@ -967,7 +970,7 @@ void NamespaceImpl::verifyUpdateIndex(const IndexDef& indexDef) const { FieldsSet changedFields{idxNameIt->second}; PayloadType newPlType = payloadType_; newPlType.Drop(indexDef.name_); - newPlType.Add(PayloadFieldType(newIndex->KeyType(), indexDef.name_, indexDef.jsonPaths_, indexDef.opts_.IsArray())); + newPlType.Add(PayloadFieldType(*newIndex, indexDef)); verifyConvertTypes(oldIndex->KeyType(), newIndex->KeyType(), newPlType, changedFields); } } @@ -1168,7 +1171,7 @@ bool NamespaceImpl::addIndex(const IndexDef& indexDef) { } else { PayloadType oldPlType = payloadType_; auto newIndex = Index::New(indexDef, PayloadType(), FieldsSet(), config_.cacheConfig); - payloadType_.Add(PayloadFieldType{newIndex->KeyType(), indexName, jsonPaths, newIndex->Opts().IsArray()}); + payloadType_.Add(PayloadFieldType(*newIndex, indexDef)); rollbacker.SetOldPayloadType(std::move(oldPlType)); tagsMatcher_.UpdatePayloadType(payloadType_); rollbacker.NeedResetPayloadTypeInTagsMatcher(); @@ -1220,8 +1223,11 @@ bool NamespaceImpl::updateIndex(const IndexDef& indexDef) { } verifyUpdateIndex(indexDef); - dropIndex(indexDef); - addIndex(indexDef); + + if (!IndexFastUpdate::Try(*this, foundIndex, indexDef)) { + dropIndex(indexDef); + addIndex(indexDef); + } return true; } @@ -2188,7 +2194,7 @@ void NamespaceImpl::doModifyItem(Item& item, ItemModifyMode mode, const NsContex } for (int field = 1, regularIndexes = indexes_.firstCompositePos(); field < regularIndexes; ++field) { - Index& index = *indexes_[field]; + const Index& index = *indexes_[field]; if (index.Opts().GetCollateMode() == CollateUTF8 && index.KeyType().Is()) { if (index.Opts().IsSparse()) { assertrx(index.Fields().getTagsPathsLength() > 0); @@ -2480,8 +2486,8 @@ NamespaceMemStat NamespaceImpl::GetMemStat(const RdxContext& ctx) { NamespaceMemStat ret; auto rlck = rLock(ctx); ret.name = name_; - ret.joinCache = joinCache_->GetMemStat(); - ret.queryCache = queryCountCache_->GetMemStat(); + ret.joinCache = joinCache_.GetMemStat(); + ret.queryCache = queryCountCache_.GetMemStat(); ret.itemsCount = itemsCount(); *(static_cast(&ret.replication)) = getReplState(); @@ -2550,6 +2556,9 @@ NamespacePerfStat NamespaceImpl::GetPerfStat(const RdxContext& ctx) { ret.name = name_; ret.selects = selectPerfCounter_.Get(); ret.updates = updatePerfCounter_.Get(); + ret.joinCache = joinCache_.GetPerfStat(); + ret.queryCountCache = queryCountCache_.GetPerfStat(); + ret.indexes.reserve(indexes_.size() - 1); for (unsigned i = 1; i < indexes_.size(); i++) { ret.indexes.emplace_back(indexes_[i]->GetIndexPerfStat()); } @@ -2563,6 +2572,8 @@ void NamespaceImpl::ResetPerfStat(const RdxContext& ctx) { for (auto& i : indexes_) { i->ResetIndexPerfStat(); } + queryCountCache_.ResetPerfStat(); + joinCache_.ResetPerfStat(); } Error NamespaceImpl::loadLatestSysRecord(std::string_view baseSysTag, uint64_t& version, std::string& content) { @@ -3282,11 +3293,11 @@ void NamespaceImpl::getFromJoinCache(const Query& q, JoinCacheRes& out) const { } void NamespaceImpl::getFromJoinCacheImpl(JoinCacheRes& ctx) const { - auto it = joinCache_->Get(ctx.key); + auto it = joinCache_.Get(ctx.key); ctx.needPut = false; ctx.haveData = false; if (it.valid) { - if (!it.val.inited) { + if (!it.val.IsInitialized()) { ctx.needPut = true; } else { ctx.haveData = true; @@ -3307,11 +3318,11 @@ void NamespaceImpl::putToJoinCache(JoinCacheRes& res, JoinPreResult::CPtr preRes res.needPut = false; joinCacheVal.inited = true; joinCacheVal.preResult = std::move(preResult); - joinCache_->Put(res.key, std::move(joinCacheVal)); + joinCache_.Put(res.key, std::move(joinCacheVal)); } void NamespaceImpl::putToJoinCache(JoinCacheRes& res, JoinCacheVal&& val) const { val.inited = true; - joinCache_->Put(res.key, std::move(val)); + joinCache_.Put(res.key, std::move(val)); } const FieldsSet& NamespaceImpl::pkFields() { diff --git a/cpp_src/core/namespace/namespaceimpl.h b/cpp_src/core/namespace/namespaceimpl.h index 2bd31dd74..c009578cb 100644 --- a/cpp_src/core/namespace/namespaceimpl.h +++ b/cpp_src/core/namespace/namespaceimpl.h @@ -3,8 +3,6 @@ #include #include #include -#include -#include #include #include "asyncstorage.h" #include "core/cjson/tagsmatcher.h" @@ -334,6 +332,8 @@ class NamespaceImpl final : public intrusive_atomic_rc_base { // NOLINT(*perfor lsn_t lsn; }; + friend struct IndexFastUpdate; + Error rebuildIndexesTagsPaths(const TagsMatcher& newTm); ReplicationState getReplState() const; std::string sysRecordName(std::string_view sysTag, uint64_t version); @@ -493,8 +493,8 @@ class NamespaceImpl final : public intrusive_atomic_rc_base { // NOLINT(*perfor std::atomic_bool enablePerfCounters_{false}; NamespaceConfigData config_; - std::unique_ptr queryCountCache_; - std::unique_ptr joinCache_; + QueryCountCache queryCountCache_; + JoinCache joinCache_; // Replication variables WALTracker wal_; ReplicationState repl_; diff --git a/cpp_src/core/namespace/namespacestat.cc b/cpp_src/core/namespace/namespacestat.cc index de8a1d2a1..67b143800 100644 --- a/cpp_src/core/namespace/namespacestat.cc +++ b/cpp_src/core/namespace/namespacestat.cc @@ -1,8 +1,6 @@ - #include "namespacestat.h" #include "core/cjson/jsonbuilder.h" #include "gason/gason.h" -#include "tools/jsontools.h" #include "tools/logger.h" namespace reindexer { @@ -75,8 +73,8 @@ void IndexMemStat::GetJSON(JsonBuilder& builder) { if (trackedUpdatesSize) { builder.Put("tracked_updates_size", trackedUpdatesSize); } - if (trackedUpdatesOveflow) { - builder.Put("tracked_updates_overflow", trackedUpdatesOveflow); + if (trackedUpdatesOverflow) { + builder.Put("tracked_updates_overflow", trackedUpdatesOverflow); } if (dataSize) { builder.Put("data_size", dataSize); @@ -133,6 +131,14 @@ void NamespacePerfStat::GetJSON(WrSerializer& ser) { auto obj = builder.Object("transactions"); transactions.GetJSON(obj); } + if (queryCountCache.state != LRUCachePerfStat::State::DoesNotExist) { + auto obj = builder.Object("query_count_cache"); + queryCountCache.GetJSON(obj); + } + if (joinCache.state != LRUCachePerfStat::State::DoesNotExist) { + auto obj = builder.Object("join_cache"); + joinCache.GetJSON(obj); + } auto arr = builder.Array("indexes"); @@ -152,6 +158,10 @@ void IndexPerfStat::GetJSON(JsonBuilder& builder) { auto obj = builder.Object("commits"); commits.GetJSON(obj); } + if (cache.state != LRUCachePerfStat::State::DoesNotExist) { + auto obj = builder.Object("cache"); + cache.GetJSON(obj); + } } void MasterState::GetJSON(JsonBuilder& builder) { @@ -318,4 +328,27 @@ void TxPerfStat::GetJSON(JsonBuilder& builder) { builder.Put("max_copy_time_us", maxCopyTimeUs); } +void LRUCachePerfStat::GetJSON(JsonBuilder& builder) { + switch (state) { + case State::DoesNotExist: + return; + case State::Inactive: + builder.Put("is_active", false); + break; + case State::Active: + builder.Put("is_active", true); + break; + } + + builder.Put("total_queries", TotalQueries()); + builder.Put("cache_hit_rate", HitRate()); +} + +uint64_t LRUCachePerfStat::TotalQueries() const noexcept { return hits + misses; } + +double LRUCachePerfStat::HitRate() const noexcept { + const auto tq = TotalQueries(); + return tq ? (double(hits) / double(tq)) : 0.0; +} + } // namespace reindexer diff --git a/cpp_src/core/namespace/namespacestat.h b/cpp_src/core/namespace/namespacestat.h index 2f3e861e8..244c8c894 100644 --- a/cpp_src/core/namespace/namespacestat.h +++ b/cpp_src/core/namespace/namespacestat.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include "core/lsn.h" @@ -37,7 +35,7 @@ struct IndexMemStat { size_t trackedUpdatesCount = 0; size_t trackedUpdatesBuckets = 0; size_t trackedUpdatesSize = 0; - size_t trackedUpdatesOveflow = 0; + size_t trackedUpdatesOverflow = 0; LRUCacheMemStat idsetCache; size_t GetIndexStructSize() const noexcept { return idsetPlainSize + idsetBTreeSize + sortOrdersSize + fulltextSize + columnSize + trackedUpdatesSize; @@ -62,7 +60,9 @@ struct MasterState { struct ReplicationState { enum class Status { None, Idle, Error, Fatal, Syncing }; - void GetJSON(JsonBuilder& builder); + virtual ~ReplicationState() = default; + + virtual void GetJSON(JsonBuilder& builder); void FromJSON(span); // LSN of last change @@ -94,8 +94,9 @@ struct ReplicationState { lsn_t lastUpstreamLSN; }; -struct ReplicationStat : public ReplicationState { - void GetJSON(JsonBuilder& builder); +struct ReplicationStat final : public ReplicationState { + void GetJSON(JsonBuilder& builder) override; + size_t walCount = 0; size_t walSize = 0; }; @@ -125,6 +126,18 @@ struct NamespaceMemStat { std::vector indexes; }; +struct LRUCachePerfStat { + enum class State { DoesNotExist, Active, Inactive }; + + void GetJSON(JsonBuilder& builder); + uint64_t TotalQueries() const noexcept; + double HitRate() const noexcept; + + State state = State::DoesNotExist; + uint64_t hits = 0; + uint64_t misses = 0; +}; + struct PerfStat { void GetJSON(JsonBuilder& builder); @@ -160,13 +173,14 @@ struct TxPerfStat { struct IndexPerfStat { IndexPerfStat() = default; - IndexPerfStat(const std::string& n, const PerfStat& s, const PerfStat& c) : name(n), selects(s), commits(c) {} + IndexPerfStat(const std::string& n, PerfStat&& s, PerfStat&& c) : name(n), selects(std::move(s)), commits(std::move(c)) {} void GetJSON(JsonBuilder& builder); std::string name; PerfStat selects; PerfStat commits; + LRUCachePerfStat cache; }; struct NamespacePerfStat { @@ -177,6 +191,8 @@ struct NamespacePerfStat { PerfStat selects; TxPerfStat transactions; std::vector indexes; + LRUCachePerfStat joinCache; + LRUCachePerfStat queryCountCache; }; } // namespace reindexer diff --git a/cpp_src/core/namespacedef.h b/cpp_src/core/namespacedef.h index 05f12ecd5..6450b262f 100644 --- a/cpp_src/core/namespacedef.h +++ b/cpp_src/core/namespacedef.h @@ -14,7 +14,7 @@ class WrSerializer; struct NamespaceDef { NamespaceDef() = default; - NamespaceDef(const std::string& iname, StorageOpts istorage = StorageOpts().Enabled().CreateIfMissing()) + explicit NamespaceDef(const std::string& iname, StorageOpts istorage = StorageOpts().Enabled().CreateIfMissing()) : name(iname), storage(istorage) {} NamespaceDef& AddIndex(const std::string& iname, const std::string& indexType, const std::string& fieldType, diff --git a/cpp_src/core/nsselecter/comparator/comparator_not_indexed.cc b/cpp_src/core/nsselecter/comparator/comparator_not_indexed.cc index 1f9350ed1..2ed129f86 100644 --- a/cpp_src/core/nsselecter/comparator/comparator_not_indexed.cc +++ b/cpp_src/core/nsselecter/comparator/comparator_not_indexed.cc @@ -25,6 +25,13 @@ ComparatorNotIndexedImplBase::ComparatorNotIndexedImplBase(const VariantAr ComparatorNotIndexedImplBase::ComparatorNotIndexedImplBase(const VariantArray& values) : value1_{GetValue(CondRange, values, 0)}, value2_{GetValue(CondRange, values, 1)} {} +ComparatorNotIndexedImplBase::ComparatorNotIndexedImplBase(const VariantArray& values) : values_{values.size()} { + for (const Variant& v : values) { + throwOnNull(v, CondSet); + values_.insert(v); + } +} + ComparatorNotIndexedImplBase::ComparatorNotIndexedImplBase(const VariantArray& values) : value_{GetValue(CondLike, values, 0)}, valueView_{p_string{value_}} {} @@ -35,6 +42,18 @@ ComparatorNotIndexedImpl::ComparatorNotIndexedImpl(const Var point_{GetValue(CondDWithin, values, 0)}, distance_{GetValue(CondDWithin, values, 1)} {} +reindexer::comparators::ComparatorNotIndexedImpl::ComparatorNotIndexedImpl(const VariantArray& values, + const PayloadType& payloadType, + const TagsPath& fieldPath) + : payloadType_{payloadType}, fieldPath_{fieldPath}, values_{values.size()} { + int i = 0; + for (const Variant& v : values) { + throwOnNull(v, CondAllSet); + values_.emplace(v, i); + ++i; + } +} + template [[nodiscard]] std::string ComparatorNotIndexedImplBase::ConditionStr() const { return fmt::sprintf("%s %s", CondToStr(), value_.As()); diff --git a/cpp_src/core/nsselecter/comparator/comparator_not_indexed.h b/cpp_src/core/nsselecter/comparator/comparator_not_indexed.h index e74ee8843..ad755d104 100644 --- a/cpp_src/core/nsselecter/comparator/comparator_not_indexed.h +++ b/cpp_src/core/nsselecter/comparator/comparator_not_indexed.h @@ -66,11 +66,7 @@ class ComparatorNotIndexedImplBase { template <> class ComparatorNotIndexedImplBase { protected: - ComparatorNotIndexedImplBase(const VariantArray& values) : values_{values.size()} { - for (const Variant& v : values) { - values_.insert(v); - } - } + ComparatorNotIndexedImplBase(const VariantArray& values); ComparatorNotIndexedImplBase(const ComparatorNotIndexedImplBase&) = default; ComparatorNotIndexedImplBase& operator=(const ComparatorNotIndexedImplBase&) = delete; ComparatorNotIndexedImplBase(ComparatorNotIndexedImplBase&&) = default; @@ -364,14 +360,7 @@ class ComparatorNotIndexedImpl : private ComparatorNotIndexed template <> class ComparatorNotIndexedImpl { public: - ComparatorNotIndexedImpl(const VariantArray& values, const PayloadType& payloadType, const TagsPath& fieldPath) - : payloadType_{payloadType}, fieldPath_{fieldPath}, values_{values.size()} { - int i = 0; - for (const Variant& v : values) { - values_.emplace(v, i); - ++i; - } - } + ComparatorNotIndexedImpl(const VariantArray& values, const PayloadType& payloadType, const TagsPath& fieldPath); ComparatorNotIndexedImpl(const ComparatorNotIndexedImpl&) = default; ComparatorNotIndexedImpl& operator=(const ComparatorNotIndexedImpl&) = delete; ComparatorNotIndexedImpl(ComparatorNotIndexedImpl&&) = default; diff --git a/cpp_src/core/nsselecter/comparator/helpers.h b/cpp_src/core/nsselecter/comparator/helpers.h index f5a2d947c..7b51fc5df 100644 --- a/cpp_src/core/nsselecter/comparator/helpers.h +++ b/cpp_src/core/nsselecter/comparator/helpers.h @@ -74,13 +74,20 @@ template } } +inline static void throwOnNull(const Variant& v, CondType cond) { + if (v.IsNullValue()) { + throw Error{errParams, "Can not use 'null'-value directly with '%s' condition in comparator", CondTypeToStr(cond)}; + } +} + template [[nodiscard]] T GetValue(CondType cond, const VariantArray& values, size_t i) { if (values.size() <= i) { throw Error{errQueryExec, "Too many arguments for condition %s", CondTypeToStr(cond)}; - } else { - return GetValue(values[i]); } + const auto& val = values[i]; + throwOnNull(val, cond); + return GetValue(values[i]); } } // namespace comparators diff --git a/cpp_src/core/nsselecter/joinedselector.cc b/cpp_src/core/nsselecter/joinedselector.cc index d726ae1f9..f1f77567c 100644 --- a/cpp_src/core/nsselecter/joinedselector.cc +++ b/cpp_src/core/nsselecter/joinedselector.cc @@ -21,9 +21,9 @@ void JoinedSelector::selectFromRightNs(QueryResults& joinItemR, const Query& que rightNs_->putToJoinCache(joinRes_, preSelectCtx_.ResultPtr()); } if (joinResLong.haveData) { - found = joinResLong.it.val.ids_->size(); + found = joinResLong.it.val.ids->size(); matchedAtLeastOnce = joinResLong.it.val.matchedAtLeastOnce; - rightNs_->FillResult(joinItemR, *joinResLong.it.val.ids_); + rightNs_->FillResult(joinItemR, *joinResLong.it.val.ids); } else { SelectCtxWithJoinPreSelect ctx(query, nullptr, preSelectCtx_); ctx.matchedAtLeastOnce = false; @@ -41,10 +41,10 @@ void JoinedSelector::selectFromRightNs(QueryResults& joinItemR, const Query& que } if (joinResLong.needPut) { JoinCacheVal val; - val.ids_ = make_intrusive>(); + val.ids = make_intrusive>(); val.matchedAtLeastOnce = matchedAtLeastOnce; for (auto& r : joinItemR.Items()) { - val.ids_->Add(r.Id(), IdSet::Unordered, 0); + val.ids->Add(r.Id(), IdSet::Unordered, 0); } rightNs_->putToJoinCache(joinResLong, std::move(val)); } @@ -199,7 +199,7 @@ void JoinedSelector::AppendSelectIteratorOfJoinIndexData(SelectIteratorContainer assertrx_throw(!IsFullText(leftIndex->Type())); // Avoiding to use 'GetByJsonPath' during values extraction - // TODO: Sometimes this substituition may be effective even with 'GetByJsonPath', so we should allow user to hint this optimization. + // TODO: Sometimes this substitution may be effective even with 'GetByJsonPath', so we should allow user to hint this optimization. bool hasSparse = false; for (int field : joinEntry.RightFields()) { if (field == SetByJsonPath) { @@ -213,14 +213,14 @@ void JoinedSelector::AppendSelectIteratorOfJoinIndexData(SelectIteratorContainer const VariantArray values = std::visit(overloaded{[&](const IdSet& preselected) { - const std::vector* sortOrderes = nullptr; + const std::vector* sortOrders = nullptr; if (preresult.sortOrder.index) { - sortOrderes = &(preresult.sortOrder.index->SortOrders()); + sortOrders = &(preresult.sortOrder.index->SortOrders()); } return readValuesOfRightNsFrom( preselected, - [this, sortOrderes](IdType rowId) noexcept { - const auto properRowId = sortOrderes ? (*sortOrderes)[rowId] : rowId; + [this, sortOrders](IdType rowId) noexcept { + const auto properRowId = sortOrders ? (*sortOrders)[rowId] : rowId; return ConstPayload{rightNs_->payloadType_, rightNs_->items_[properRowId]}; }, joinEntry, rightNs_->payloadType_); diff --git a/cpp_src/core/nsselecter/nsselecter.cc b/cpp_src/core/nsselecter/nsselecter.cc index 312e9d622..87a8c721d 100644 --- a/cpp_src/core/nsselecter/nsselecter.cc +++ b/cpp_src/core/nsselecter/nsselecter.cc @@ -53,9 +53,9 @@ void NsSelecter::operator()(QueryResults& result, SelectCtxWithJoinPreSelectqueryCountCache_->Get(ckey); - if (cached.valid && cached.val.total_count >= 0) { - result.totalCount += cached.val.total_count; + auto cached = ns_->queryCountCache_.Get(ckey); + if (cached.valid && cached.val.IsInitialized()) { + result.totalCount += cached.val.totalCount; if (logLevel >= LogTrace) { logPrintf(LogInfo, "[%s] using total count value from cache: %d", ns_->name_, result.totalCount); } @@ -442,7 +442,7 @@ void NsSelecter::operator()(QueryResults& result, SelectCtxWithJoinPreSelect= LogTrace) { logPrintf(LogInfo, "[%s] put totalCount value into query cache: %d ", ns_->name_, result.totalCount); } - ns_->queryCountCache_->Put(ckey, {static_cast(result.totalCount - initTotalCount)}); + ns_->queryCountCache_.Put(ckey, {static_cast(result.totalCount - initTotalCount)}); } if constexpr (std::is_same_v) { if rx_unlikely (logLevel >= LogTrace) { @@ -669,11 +669,8 @@ It NsSelecter::applyForcedSortImpl(NamespaceImpl& ns, It begin, It end, const It VariantArray keyRefs; const auto boundary = std::stable_partition(begin, end, [&](const ItemRef& itemRef) { valueGetter.Payload(itemRef).Get(idx, keyRefs); - if constexpr (desc) { - return keyRefs.empty() || (sortMap.find(keyRefs[0]) == sortMap.end()); - } else { - return !keyRefs.empty() && (sortMap.find(keyRefs[0]) != sortMap.end()); - } + const auto descOrder = keyRefs.empty() || (sortMap.find(keyRefs[0]) == sortMap.end()); + return desc ? descOrder : !descOrder; }); VariantArray lhsItemValue; diff --git a/cpp_src/core/nsselecter/querypreprocessor.cc b/cpp_src/core/nsselecter/querypreprocessor.cc index e4c57e877..1c520a826 100644 --- a/cpp_src/core/nsselecter/querypreprocessor.cc +++ b/cpp_src/core/nsselecter/querypreprocessor.cc @@ -403,9 +403,12 @@ size_t QueryPreprocessor::removeBrackets(size_t begin, size_t end) { if (begin != end && GetOperation(begin) == OpOr) { throw Error{errQueryExec, "OR operator in first condition or after left join"}; } + if (!equalPositions.empty()) { + return 0; + } size_t deleted = 0; for (size_t i = begin; i < end - deleted; i = Next(i)) { - if (!IsSubTree(i)) { + if (!IsSubTree(i) || (Is(i) && !Get(i).equalPositions.empty())) { continue; } deleted += removeBrackets(i + 1, Next(i)); diff --git a/cpp_src/core/nsselecter/selectiteratorcontainer.cc b/cpp_src/core/nsselecter/selectiteratorcontainer.cc index a0194ecd7..31ff5f288 100644 --- a/cpp_src/core/nsselecter/selectiteratorcontainer.cc +++ b/cpp_src/core/nsselecter/selectiteratorcontainer.cc @@ -670,14 +670,11 @@ bool SelectIteratorContainer::checkIfSatisfyAllConditions(iterator begin, iterat }, [&] RX_PRE_LMBD_ALWAYS_INLINE(SelectIterator & sit) RX_POST_LMBD_ALWAYS_INLINE { return checkIfSatisfyCondition(sit, &lastFinish, rowId); }, - [&] /*RX_PRE_LMBD_ALWAYS_INLINE*/ (JoinSelectIterator & jit) /*RX_POST_LMBD_ALWAYS_INLINE*/ { - return checkIfSatisfyCondition(jit, pv, properRowId, match); - }, + [&] RX_PRE_LMBD_ALWAYS_INLINE(JoinSelectIterator & jit) + RX_POST_LMBD_ALWAYS_INLINE { return checkIfSatisfyCondition(jit, pv, properRowId, match); }, Restricted>{}( - [&pv, properRowId] /*RX_PRE_LMBD_ALWAYS_INLINE*/ (auto& c) /*RX_POST_LMBD_ALWAYS_INLINE*/ { - return c.Compare(pv, properRowId); - }), + [&pv, properRowId] RX_PRE_LMBD_ALWAYS_INLINE(auto& c) RX_POST_LMBD_ALWAYS_INLINE { return c.Compare(pv, properRowId); }), [] RX_PRE_LMBD_ALWAYS_INLINE(AlwaysTrue&) RX_POST_LMBD_ALWAYS_INLINE noexcept { return true; }); if (op == OpOr) { result |= lastResult; diff --git a/cpp_src/core/nsselecter/sortexpression.cc b/cpp_src/core/nsselecter/sortexpression.cc index d8523a680..b193f16c7 100644 --- a/cpp_src/core/nsselecter/sortexpression.cc +++ b/cpp_src/core/nsselecter/sortexpression.cc @@ -26,6 +26,9 @@ static reindexer::VariantArray getFieldValues(reindexer::ConstPayload pv, reinde if (index == IndexValueType::SetByJsonPath) { pv.GetByJsonPath(column, tagsMatcher, values, reindexer::KeyValueType::Undefined{}); } else { + if (index >= pv.NumFields()) { + throw reindexer::Error(errQueryExec, "Composite fields in sort expression are not supported"); + } pv.Get(index, values); } return values; diff --git a/cpp_src/core/payload/payloadfieldtype.cc b/cpp_src/core/payload/payloadfieldtype.cc index a66ce588f..2b5f6ac28 100644 --- a/cpp_src/core/payload/payloadfieldtype.cc +++ b/cpp_src/core/payload/payloadfieldtype.cc @@ -1,5 +1,6 @@ #include "payloadfieldtype.h" #include +#include "core/index/index.h" #include "core/keyvalue/p_string.h" #include "core/keyvalue/uuid.h" #include "estl/one_of.h" @@ -7,6 +8,14 @@ namespace reindexer { +PayloadFieldType::PayloadFieldType(const Index& index, const IndexDef& indexDef) noexcept + : type_(index.KeyType()), + name_(indexDef.name_), + jsonPaths_(indexDef.jsonPaths_), + offset_(0), + isArray_(index.Opts().IsArray()), + arrayDim_(indexDef.Type() == IndexType::IndexRTree ? 2 : -1) {} + size_t PayloadFieldType::Sizeof() const noexcept { if (IsArray()) { return sizeof(PayloadFieldValue::Array); diff --git a/cpp_src/core/payload/payloadfieldtype.h b/cpp_src/core/payload/payloadfieldtype.h index 438b3563b..5f6af0b06 100644 --- a/cpp_src/core/payload/payloadfieldtype.h +++ b/cpp_src/core/payload/payloadfieldtype.h @@ -5,16 +5,20 @@ namespace reindexer { +class Index; +struct IndexDef; // Type of field class PayloadFieldType { public: + explicit PayloadFieldType(const Index&, const IndexDef&) noexcept; PayloadFieldType(KeyValueType t, std::string n, std::vector j, bool a) noexcept - : type_(t), name_(std::move(n)), jsonPaths_(std::move(j)), offset_(0), isArray_(a) {} + : type_(t), name_(std::move(n)), jsonPaths_(std::move(j)), offset_(0), isArray_(a), arrayDim_(-1) {} size_t Sizeof() const noexcept; size_t ElemSizeof() const noexcept; size_t Alignof() const noexcept; bool IsArray() const noexcept { return isArray_; } + int8_t ArrayDim() const noexcept { return arrayDim_; } void SetArray() noexcept { isArray_ = true; } void SetOffset(size_t o) noexcept { offset_ = o; } size_t Offset() const noexcept { return offset_; } @@ -32,6 +36,7 @@ class PayloadFieldType { std::vector jsonPaths_; size_t offset_; bool isArray_; + int8_t arrayDim_; }; } // namespace reindexer diff --git a/cpp_src/core/payload/payloadtype.cc b/cpp_src/core/payload/payloadtype.cc index 0f9bfd33d..2cc553053 100644 --- a/cpp_src/core/payload/payloadtype.cc +++ b/cpp_src/core/payload/payloadtype.cc @@ -193,9 +193,6 @@ void PayloadTypeImpl::deserialize(Serializer& ser) { PayloadFieldType ft(t, name, jsonPaths, isArray); - if (isArray) { - ft.SetArray(); - } ft.SetOffset(offset); fieldsByName_.emplace(name, fields_.size()); if (t.Is()) { diff --git a/cpp_src/core/query/query.cc b/cpp_src/core/query/query.cc index e57e1d95d..ce4cf0cab 100644 --- a/cpp_src/core/query/query.cc +++ b/cpp_src/core/query/query.cc @@ -45,7 +45,7 @@ void Query::checkSubQuery() const { void Query::checkSubQueryNoData() const { if rx_unlikely (!aggregations_.empty()) { - throw Error{errQueryExec, "Aggregaton cannot be in subquery with condition Any or Empty"}; + throw Error{errQueryExec, "Aggregation cannot be in subquery with condition Any or Empty"}; } if rx_unlikely (HasLimit() && Limit() != 0) { throw Error{errQueryExec, "Limit cannot be in subquery with condition Any or Empty"}; @@ -175,7 +175,7 @@ void Query::checkSetObjectValue(const Variant& value) const { } } -VariantArray Query::deserializeValues(Serializer& ser, CondType cond) { +VariantArray Query::deserializeValues(Serializer& ser, CondType cond) const { VariantArray values; auto cnt = ser.GetVarUint(); if (cond == CondDWithin) { @@ -198,6 +198,10 @@ VariantArray Query::deserializeValues(Serializer& ser, CondType cond) { return values; } +void Query::deserializeJoinOn(Serializer&) { + throw Error(errLogic, "Unexpected call. JoinOn actual only for JoinQuery"); +} + void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { bool end = false; std::vector> equalPositions; @@ -252,7 +256,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { aggregations_.emplace_back(type, std::move(fields)); auto& ae = aggregations_.back(); while (!ser.Eof() && !aggEnd) { - int atype = ser.GetVarUint(); + auto atype = ser.GetVarUint(); switch (atype) { case QueryAggregationSort: { auto fieldName = ser.GetVString(); @@ -287,7 +291,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { if (sortingEntry.expression.length()) { sortingEntries_.push_back(std::move(sortingEntry)); } - int cnt = ser.GetVarUint(); + auto cnt = ser.GetVarUint(); if (cnt != 0 && sortingEntries_.size() != 1) { throw Error(errParams, "Forced sort order is allowed for the first sorting entry only"); } @@ -298,12 +302,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { break; } case QueryJoinOn: { - const OpType op = static_cast(ser.GetVarUint()); - const CondType condition = static_cast(ser.GetVarUint()); - std::string leftFieldName{ser.GetVString()}; - std::string rightFieldName{ser.GetVString()}; - reinterpret_cast(this)->joinEntries_.emplace_back(op, condition, std::move(leftFieldName), - std::move(rightFieldName)); + deserializeJoinOn(ser); break; } case QueryDebugLevel: @@ -350,7 +349,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { VariantArray val; std::string field(ser.GetVString()); bool isArray = ser.GetVarUint(); - int numValues = ser.GetVarUint(); + auto numValues = ser.GetVarUint(); bool hasExpressions = false; while (numValues--) { hasExpressions = ser.GetVarUint(); @@ -362,7 +361,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { case QueryUpdateField: { VariantArray val; std::string field(ser.GetVString()); - int numValues = ser.GetVarUint(); + auto numValues = ser.GetVarUint(); bool isArray = numValues > 1; bool hasExpressions = false; while (numValues--) { @@ -376,7 +375,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { VariantArray val; std::string field(ser.GetVString()); bool hasExpressions = false; - int numValues = ser.GetVarUint(); + auto numValues = ser.GetVarUint(); val.MarkArray(ser.GetVarUint() == 1); while (numValues--) { hasExpressions = ser.GetVarUint(); @@ -428,7 +427,10 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { entries_.Get(eqPos.first - 1).equalPositions.emplace_back(std::move(eqPos.second)); } } - return; +} + +void Query::serializeJoinEntries(WrSerializer&) const { + throw Error(errLogic, "Unexpected call. JoinEntries actual only for JoinQuery"); } void Query::Serialize(WrSerializer& ser, uint8_t mode) const { @@ -473,13 +475,7 @@ void Query::Serialize(WrSerializer& ser, uint8_t mode) const { } if (mode & WithJoinEntries) { - for (const auto& qje : reinterpret_cast(this)->joinEntries_) { - ser.PutVarUint(QueryJoinOn); - ser.PutVarUint(qje.Operation()); - ser.PutVarUint(qje.Condition()); - ser.PutVString(qje.LeftFieldName()); - ser.PutVString(qje.RightFieldName()); - } + serializeJoinEntries(ser); } for (const auto& equalPoses : entries_.equalPositions) { @@ -747,4 +743,22 @@ bool Query::IsWALQuery() const noexcept { return false; } +void JoinedQuery::deserializeJoinOn(Serializer& ser) { + const OpType op = static_cast(ser.GetVarUint()); + const CondType condition = static_cast(ser.GetVarUint()); + std::string leftFieldName{ser.GetVString()}; + std::string rightFieldName{ser.GetVString()}; + joinEntries_.emplace_back(op, condition, std::move(leftFieldName), std::move(rightFieldName)); +} + +void JoinedQuery::serializeJoinEntries(WrSerializer& ser) const { + for (const auto& qje : joinEntries_) { + ser.PutVarUint(QueryJoinOn); + ser.PutVarUint(qje.Operation()); + ser.PutVarUint(qje.Condition()); + ser.PutVString(qje.LeftFieldName()); + ser.PutVString(qje.RightFieldName()); + } +} + } // namespace reindexer diff --git a/cpp_src/core/query/query.h b/cpp_src/core/query/query.h index 7e8f9bbd6..77945cbd4 100644 --- a/cpp_src/core/query/query.h +++ b/cpp_src/core/query/query.h @@ -34,6 +34,12 @@ class Query { : namespace_(std::forward(nsName)), start_(start), count_(count), calcTotal_(calcTotal) {} Query() = default; + virtual ~Query() = default; + + Query(Query&& other) noexcept = default; + Query& operator=(Query&& other) noexcept = default; + Query(const Query& other) = default; + Query& operator=(const Query& other) = delete; /// Allows to compare 2 Query objects. [[nodiscard]] bool operator==(const Query&) const; @@ -970,8 +976,10 @@ class Query { using OnHelperR = OnHelperTempl; void checkSetObjectValue(const Variant& value) const; + virtual void deserializeJoinOn(Serializer& ser); void deserialize(Serializer& ser, bool& hasJoinConditions); - VariantArray deserializeValues(Serializer&, CondType); + VariantArray deserializeValues(Serializer&, CondType) const; + virtual void serializeJoinEntries(WrSerializer& ser) const; void checkSubQueryNoData() const; void checkSubQueryWithData() const; void checkSubQuery() const; @@ -996,7 +1004,7 @@ class Query { OpType nextOp_ = OpAnd; /// Next operation constant. }; -class JoinedQuery : public Query { +class JoinedQuery final : public Query { public: JoinedQuery(JoinType jt, const Query& q) : Query(q), joinType{jt} {} JoinedQuery(JoinType jt, Query&& q) : Query(std::move(q)), joinType{jt} {} @@ -1005,6 +1013,10 @@ class JoinedQuery : public Query { JoinType joinType{JoinType::LeftJoin}; /// Default join type. h_vector joinEntries_; /// Condition for join. Filled in each subqueries, empty in root query + +private: + void deserializeJoinOn(Serializer& ser); + void serializeJoinEntries(WrSerializer& ser) const; }; template diff --git a/cpp_src/core/querycache.h b/cpp_src/core/querycache.h index 566015220..6f323b29b 100644 --- a/cpp_src/core/querycache.h +++ b/cpp_src/core/querycache.h @@ -10,11 +10,12 @@ namespace reindexer { struct QueryCountCacheVal { QueryCountCacheVal() = default; - QueryCountCacheVal(size_t total) noexcept : total_count(total) {} + QueryCountCacheVal(size_t total) noexcept : totalCount(total) {} size_t Size() const noexcept { return 0; } + bool IsInitialized() const noexcept { return totalCount >= 0; } - int total_count = -1; + int totalCount = -1; }; constexpr uint8_t kCountCachedKeyMode = @@ -67,8 +68,6 @@ struct HashQueryCacheKey { } }; -using QueryCountCache = LRUCache; - -; +using QueryCountCache = LRUCache, LRUWithAtomicPtr::No>; } // namespace reindexer diff --git a/cpp_src/core/reindexer_impl/reindexerimpl.cc b/cpp_src/core/reindexer_impl/reindexerimpl.cc index 278d5cb91..19963d913 100644 --- a/cpp_src/core/reindexer_impl/reindexerimpl.cc +++ b/cpp_src/core/reindexer_impl/reindexerimpl.cc @@ -8,15 +8,10 @@ #include "core/defnsconfigs.h" #include "core/iclientsstats.h" #include "core/index/index.h" -#include "core/itemimpl.h" -#include "core/nsselecter/nsselecter.h" #include "core/nsselecter/querypreprocessor.h" #include "core/query/sql/sqlsuggester.h" -#include "core/queryresults/joinresults.h" #include "core/selectfunc/selectfunc.h" -#include "core/type_consts_helpers.h" #include "debug/crashqueryreporter.h" -#include "estl/defines.h" #include "replicator/replicator.h" #include "rx_selector.h" #include "server/outputparameters.h" @@ -616,7 +611,7 @@ Error ReindexerImpl::renameNamespace(std::string_view srcNsName, const std::stri return {}; } -template +template Error ReindexerImpl::applyNsFunction(std::string_view nsName, const InternalRdxContext& ctx, const MakeCtxStrFn& makeCtxStr, Arg arg, Args... args) { Error err; @@ -1746,8 +1741,8 @@ Error ReindexerImpl::GetProtobufSchema(WrSerializer& ser, std::vector class elist { diff --git a/cpp_src/estl/suffix_map.h b/cpp_src/estl/suffix_map.h index d303b6b4e..c70513083 100644 --- a/cpp_src/estl/suffix_map.h +++ b/cpp_src/estl/suffix_map.h @@ -35,7 +35,7 @@ class suffix_map { return value_type(std::make_pair(p, m_->mapped_[m_->sa_[idx_]])); } - const value_type operator->() const { + value_type operator->() const { auto* p = &m_->text_[m_->sa_[idx_]]; return value_type(std::make_pair(p, m_->mapped_[m_->sa_[idx_]])); } @@ -136,25 +136,21 @@ class suffix_map { return end(); } - int insert(std::string_view word, const V& val, int virtual_len = -1) { - if (virtual_len == -1) { - virtual_len = word.length(); - } + int insert(std::string_view word, const V& val) { int wpos = text_.size(); size_t real_len = word.length(); text_.insert(text_.end(), word.begin(), word.end()); text_.emplace_back('\0'); mapped_.insert(mapped_.end(), real_len + 1, val); words_.emplace_back(wpos); - words_len_.emplace_back(real_len, virtual_len); + words_len_.emplace_back(real_len); built_ = false; return wpos; } const CharT* word_at(int idx) const noexcept { return &text_[words_[idx]]; } - int16_t word_len_at(int idx) const noexcept { return words_len_[idx].first; } - int16_t virtual_word_len(int idx) const noexcept { return words_len_[idx].second; } + int16_t word_len_at(int idx) const noexcept { return words_len_[idx]; } void build() { if (built_) { @@ -220,7 +216,7 @@ class suffix_map { std::vector sa_, words_; std::vector lcp_; - std::vector> words_len_; + std::vector words_len_; std::vector mapped_; std::vector text_; bool built_ = false; diff --git a/cpp_src/gtests/tests/API/base_tests.cc b/cpp_src/gtests/tests/API/base_tests.cc index 7ea486928..1582ee68a 100644 --- a/cpp_src/gtests/tests/API/base_tests.cc +++ b/cpp_src/gtests/tests/API/base_tests.cc @@ -2224,3 +2224,52 @@ TEST_F(ReindexerApi, QueryResultsLSNTest) { ASSERT_EQ(lsn, lsns[i]) << i; } } + +TEST_F(ReindexerApi, SelectNull) { + rt.OpenNamespace(default_namespace, StorageOpts().Enabled(false)); + rt.AddIndex(default_namespace, {"id", "hash", "int", IndexOpts().PK()}); + rt.AddIndex(default_namespace, {"value", "tree", "string", IndexOpts()}); + rt.AddIndex(default_namespace, {"store", "-", "string", IndexOpts()}); + rt.AddIndex(default_namespace, {"store_num", "-", "string", IndexOpts().Sparse()}); + rt.UpsertJSON(default_namespace, R"_({"id":1234, "value" : "value", "store": "store", "store_num": 10, "not_indexed": null})_"); + + for (unsigned i = 0; i < 3; ++i) { + QueryResults qr; + auto err = rt.reindexer->Select(Query(default_namespace).Not().Where("id", CondEq, Variant()), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.code(), errParams) << err.what(); + EXPECT_EQ(err.what(), "Can not use 'null'-value with operators '=' and 'IN()' (index: 'id'). Use 'IS NULL'/'IS NOT NULL' instead"); + + qr.Clear(); + err = rt.reindexer->Select(Query(default_namespace).Where("id", CondSet, VariantArray{Variant(1234), Variant()}), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.code(), errParams) << err.what(); + EXPECT_EQ(err.what(), "Can not use 'null'-value with operators '=' and 'IN()' (index: 'id'). Use 'IS NULL'/'IS NOT NULL' instead"); + + qr.Clear(); + err = rt.reindexer->Select(Query(default_namespace).Where("value", CondLt, Variant()), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.code(), errParams) << err.what(); + EXPECT_EQ(err.what(), "Can not use 'null'-value with operators '>','<','<=','>=' and 'RANGE()' (index: 'value')"); + + qr.Clear(); + err = rt.reindexer->Select(Query(default_namespace).Where("store", CondEq, Variant()), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.code(), errParams) << err.what(); + EXPECT_EQ(err.what(), "Can not use 'null'-value directly with 'CondEq' condition in comparator"); + + qr.Clear(); + err = rt.reindexer->Select(Query(default_namespace).Where("store_num", CondSet, Variant()), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.code(), errParams) << err.what(); + EXPECT_EQ(err.what(), "Can not use 'null'-value directly with 'CondEq' condition in comparator"); + + qr.Clear(); + err = rt.reindexer->Select(Query(default_namespace).Where("not_indexed", CondSet, VariantArray{Variant(1234), Variant()}), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.code(), errParams) << err.what(); + EXPECT_EQ(err.what(), "Can not use 'null'-value directly with 'CondSet' condition in comparator"); + + AwaitIndexOptimization(default_namespace); + } +} diff --git a/cpp_src/gtests/tests/fixtures/fuzzing/random_generator.cc b/cpp_src/gtests/tests/fixtures/fuzzing/random_generator.cc index fc429300e..a68d0e2fa 100644 --- a/cpp_src/gtests/tests/fixtures/fuzzing/random_generator.cc +++ b/cpp_src/gtests/tests/fixtures/fuzzing/random_generator.cc @@ -391,7 +391,7 @@ std::vector RandomGenerator::RndFieldsForCompositeIndex(const std::vecto const size_t count = compositeIndexSize(scalarIndexes.size()); result.reserve(count); const bool uniqueFields = count <= scalarIndexes.size() && !RndErr(); - // TODO unexisted and not indexed fields + // TODO non-existent and not indexed fields if (uniqueFields) { auto scalars = scalarIndexes; while (result.size() < count) { diff --git a/cpp_src/gtests/tests/fixtures/reindexertestapi.cc b/cpp_src/gtests/tests/fixtures/reindexertestapi.cc index 23c280b9c..652e8bb2d 100644 --- a/cpp_src/gtests/tests/fixtures/reindexertestapi.cc +++ b/cpp_src/gtests/tests/fixtures/reindexertestapi.cc @@ -266,5 +266,20 @@ typename ReindexerTestApi::QueryResultsType ReindexerTestApi::createQR() } } +template +std::vector ReindexerTestApi::GetSerializedQrItems(reindexer::QueryResults& qr) { + std::vector items; + items.reserve(qr.Count()); + reindexer::WrSerializer wrser; + for (auto it : qr) { + EXPECT_TRUE(it.Status().ok()) << it.Status().what(); + wrser.Reset(); + auto err = it.GetJSON(wrser, false); + EXPECT_TRUE(err.ok()) << err.what(); + items.emplace_back(wrser.Slice()); + } + return items; +} + template class ReindexerTestApi; template class ReindexerTestApi; diff --git a/cpp_src/gtests/tests/fixtures/reindexertestapi.h b/cpp_src/gtests/tests/fixtures/reindexertestapi.h index adee82d21..f5b4f9a70 100644 --- a/cpp_src/gtests/tests/fixtures/reindexertestapi.h +++ b/cpp_src/gtests/tests/fixtures/reindexertestapi.h @@ -95,6 +95,8 @@ class ReindexerTestApi { void SetVerbose(bool v) noexcept { verbose = v; } std::shared_ptr reindexer; + static std::vector GetSerializedQrItems(reindexer::QueryResults& qr); + private: QueryResultsType createQR(); diff --git a/cpp_src/gtests/tests/unit/composite_indexes_test.cc b/cpp_src/gtests/tests/unit/composite_indexes_test.cc index baefff01d..28ee5f794 100644 --- a/cpp_src/gtests/tests/unit/composite_indexes_test.cc +++ b/cpp_src/gtests/tests/unit/composite_indexes_test.cc @@ -1,4 +1,8 @@ #include "composite_indexes_api.h" +#include "gmock/gmock.h" +#include "yaml-cpp/node/node.h" +#include "yaml-cpp/node/parse.h" +#include "yaml-cpp/yaml.h" TEST_F(CompositeIndexesApi, CompositeIndexesAddTest) { addCompositeIndex({kFieldNameBookid, kFieldNameBookid2}, CompositeIndexHash, IndexOpts().PK()); @@ -285,3 +289,76 @@ TEST_F(CompositeIndexesApi, CompositeOverCompositeTest) { EXPECT_EQ(err.what(), fmt::sprintf(kExpectedErrorPattern, getCompositeIndexName({kComposite1, kComposite2}), kComposite1)); addData(); } + +TEST_F(CompositeIndexesApi, FastUpdateIndex) { + const std::vector kIndexTypes{"-", "hash", "tree"}; + const std::vector kIndexNames{"IntIndex", "Int64Index", "DoubleIndex", "StringIndex"}; + const std::vector kFieldTypes{"int", "int64", "double", "string"}; + + auto indexDef = [](const std::string& idxName, const std::string& fieldType, const std::string& type) { + return reindexer::IndexDef{idxName, {idxName}, type, fieldType, IndexOpts()}; + }; + + auto err = rt.reindexer->AddIndex(default_namespace, reindexer::IndexDef{"id", {"id"}, "hash", "int", IndexOpts().PK()}); + ASSERT_TRUE(err.ok()) << err.what(); + + for (size_t i = 0; i < kIndexNames.size(); ++i) { + err = rt.reindexer->AddIndex(default_namespace, indexDef(kIndexNames[i], kFieldTypes[i], kIndexTypes[2])); + ASSERT_TRUE(err.ok()) << err.what(); + } + + auto compParts = {kIndexNames[0], kIndexNames[1], kIndexNames[2], kIndexNames[3]}; + + addCompositeIndex(compParts, CompositeIndexHash, IndexOpts()); + + for (int i = 0; i < 100; ++i) { + Item item = NewItem(default_namespace); + item["id"] = i; + item[kIndexNames[0]] = i % 10 == 0 ? 0 : rand(); + item[kIndexNames[1]] = i % 10 == 0 ? 1 : rand(); + item[kIndexNames[2]] = i % 10 == 0 ? 2.0 : (rand() / 100.0); + item[kIndexNames[3]] = i % 10 == 0 ? "string" : RandString(); + Upsert(default_namespace, item); + }; + + auto query = Query(default_namespace) + .Explain() + .WhereComposite(getCompositeIndexName(compParts), CondEq, {{Variant{0}, Variant{1}, Variant{2.0}, Variant{"string"}}}); + + auto qrCheck = rt.Select(query); + auto checkItems = rt.GetSerializedQrItems(qrCheck); + auto checkCount = qrCheck.Count(); + for (size_t i = 0; i < kIndexNames.size(); ++i) { + for (size_t j = 0; j < kIndexTypes.size(); ++j) { + if (kFieldTypes[i] == "double" && kIndexTypes[j] == "hash") { + continue; + } + auto err = rt.reindexer->UpdateIndex(default_namespace, indexDef(kIndexNames[i], kFieldTypes[i], kIndexTypes[j])); + ASSERT_TRUE(err.ok()) << err.what(); + + auto qr = rt.Select(query); + + ASSERT_EQ(rt.GetSerializedQrItems(qr), checkItems); + ASSERT_EQ(qr.Count(), checkCount); + + YAML::Node root = YAML::Load(qr.explainResults); + auto selectors = root["selectors"]; + ASSERT_TRUE(selectors.IsSequence()) << qr.explainResults; + ASSERT_EQ(selectors.size(), 1) << qr.explainResults; + ASSERT_EQ(selectors[0]["field"].as(), getCompositeIndexName(compParts)) << qr.explainResults; + } + } + + for (size_t i = 0; i < kFieldTypes.size(); ++i) { + for (size_t j = 0; j < kFieldTypes.size(); ++j) { + if (i == j) { + continue; + } + auto err = rt.reindexer->UpdateIndex(default_namespace, indexDef(kIndexNames[i], kFieldTypes[j], "tree")); + ASSERT_FALSE(err.ok()) << err.what(); + auto err1Text = fmt::format("Cannot remove index {} : it's a part of a composite index .*", kIndexNames[i]); + auto err2Text = fmt::format("Cannot convert key from type {} to {}", kFieldTypes[i], kFieldTypes[j]); + ASSERT_THAT(err.what(), testing::MatchesRegex(fmt::format("({}|{})", err1Text, err2Text))); + } + } +} \ No newline at end of file diff --git a/cpp_src/gtests/tests/unit/ft/ft_generic.cc b/cpp_src/gtests/tests/unit/ft/ft_generic.cc index 6672b748c..069208e6b 100644 --- a/cpp_src/gtests/tests/unit/ft/ft_generic.cc +++ b/cpp_src/gtests/tests/unit/ft/ft_generic.cc @@ -835,9 +835,36 @@ TEST_P(FTGenericApi, SelectWithSeveralGroup) { TEST_P(FTGenericApi, NumberToWordsSelect) { Init(GetDefaultConfig()); - Add("оценка 5 майкл джордан 23"sv, ""sv); - - CheckAllPermutations("", {"пять", "+двадцать", "+три"}, "", {{"оценка !5! майкл джордан !23!", ""}}); + auto row1 = Add("оценка 52 майкл джордан 23 пятьдесят"sv); + auto row2 = Add("8"sv); + auto row3 = Add("41 цифра и еще цифра 241"sv); + auto row4 = Add("начало 120 цифра и еще цифра 9120 конец"sv); + auto row5 = Add("слово один пять два 5 семь 7 ещё пять слово"sv); + auto row6 = Add("слово один 5 два пять семь 7 ещё 5 слово"sv); + auto row7 = Add("1000000000000 1000000000 50000000055 1000000"sv); + auto row8 = Add("70 1 7 77 377 70 7"sv); + + auto select = [this](int id, const std::string& ftQuery, const std::string& result) { + auto q{reindexer::Query("nm1").Where("ft3", CondEq, std::string(ftQuery)).And().Where("id", CondEq, id).WithRank()}; + reindexer::QueryResults res; + q.AddFunction("ft3 = highlight(!,!)"); + auto err = rt.reindexer->Select(q, res); + EXPECT_TRUE(err.ok()) << err.what(); + ASSERT_EQ(res.Count(), 1); + auto item = res.begin().GetItem(); + std::string val = item["ft1"].As(); + ASSERT_EQ(val, result); + }; + select(row1.second, "52 +двадцать +три", "оценка !52! майкл джордан !23! пятьдесят"); + select(row2.second, "восемь", "!8!"); + select(row3.second, "сорок", "!41! цифра и еще цифра !241!"); + select(row3.second, "один", "!41! цифра и еще цифра !241!"); + select(row4.second, "сто конец", "начало !120! цифра и еще цифра !9120 конец!"); + select(row4.second, "тысяч", "начало 120 цифра и еще цифра !9120! конец"); + select(row5.second, "пять", "слово один !пять! два !5! семь 7 ещё !пять! слово"); + select(row6.second, "пять", "слово один !5! два !пять! семь 7 ещё !5! слово"); + select(row7.second, "миллиардов", "1000000000000 !1000000000 50000000055! 1000000"); + select(row8.second, "\"=семьдесят =семь\"", "70 1 7 !77 377 70 7!"); } // Make sure FT seeks by a huge number set by string in DSL @@ -853,6 +880,9 @@ TEST_P(FTGenericApi, HugeNumberToWordsSelect) { "+четыреста +сорок"); // Make sure it found this only string ASSERT_TRUE(qr.Count() == 1); + auto item = qr.begin().GetItem(); + std::string json = item["ft1"].As(); + ASSERT_EQ(json, "много !7343121521906522180408440! денег"); } // Make sure way too huge numbers are ignored in FT @@ -1893,7 +1923,7 @@ TEST_P(FTGenericApi, FrisoTest) { TEST_P(FTGenericApi, FrisoTestSelect) { reindexer::FtFastConfig cfg = GetDefaultConfig(); cfg.stopWords = {}; - cfg.splitterType = reindexer::FtFastConfig::Splitter::Friso; + cfg.splitterType = reindexer::FtFastConfig::Splitter::MMSegCN; Init(cfg); std::unordered_map> index; @@ -1948,7 +1978,7 @@ TEST_P(FTGenericApi, FrisoTestSelect) { TEST_P(FTGenericApi, FrisoTextPostprocess) { reindexer::FtFastConfig cfg = GetDefaultConfig(); - cfg.splitterType = reindexer::FtFastConfig::Splitter::Friso; + cfg.splitterType = reindexer::FtFastConfig::Splitter::MMSegCN; cfg.stopWords = {}; cfg.maxAreasInDoc = 10; Init(cfg); diff --git a/cpp_src/gtests/tests/unit/rpcclient_test.cc b/cpp_src/gtests/tests/unit/rpcclient_test.cc index 31d8b765f..a8c5b767e 100644 --- a/cpp_src/gtests/tests/unit/rpcclient_test.cc +++ b/cpp_src/gtests/tests/unit/rpcclient_test.cc @@ -1384,8 +1384,20 @@ TEST_F(RPCClientTestApi, QuerySetObjectUpdate) { insertFn(kNsName, kNsSize); + client::CoroQueryResults qr; + { + err = rx.Update(Query(kNsName).Where("id", CondGe, "0").SetObject("nested", Variant(std::string(R"([{"field": 1240}])"))), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.what(), "Error modifying field value: 'Unsupported JSON format. Unnamed field detected'"); + } + + { + err = rx.Update(Query(kNsName).Where("id", CondGe, "0").SetObject("nested", Variant(std::string(R"({{"field": 1240}})"))), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.what(), "Error modifying field value: 'JSONDecoder: Error parsing json: unquoted key, pos 15'"); + } + { - client::CoroQueryResults qr; // R"(UPDATE TestQuerySetObjectUpdate SET nested = {"field": 1240} where id >= 0)" auto query = Query(kNsName).Where("id", CondGe, "0") .SetObject("nested", Variant(std::string(R"({"field": 1240})"))); diff --git a/cpp_src/gtests/tests/unit/string_function_test.cc b/cpp_src/gtests/tests/unit/string_function_test.cc index 62ad2cea4..ed60eb25a 100644 --- a/cpp_src/gtests/tests/unit/string_function_test.cc +++ b/cpp_src/gtests/tests/unit/string_function_test.cc @@ -7,6 +7,7 @@ #include #endif // REINDEX_WITH_ASAN +#include "core/ft/numtotext.h" #include "gtest/gtest.h" #include "reindexer_api.h" #include "tools/customlocal.h" @@ -170,3 +171,63 @@ TEST_F(ReindexerApi, LikeWithFullTextIndex) { err = rt.reindexer->Select(Query(default_namespace).Where("name", CondLike, "%" + content[rand() % content.size()]), qr); ASSERT_TRUE(!err.ok()); } + +TEST_F(ReindexerApi, NumToText) { + auto out = [](const std::vector& resNum) { + std::stringstream s; + for (auto& v : resNum) { + s << "[" << v << "] "; + } + s << std::endl; + return s.str(); + }; + std::vector resNum; + bool r = reindexer::NumToText::convert("0", resNum) == std::vector{"ноль"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("00", resNum) == std::vector{"ноль", "ноль"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("000010", resNum) == std::vector{"ноль", "ноль", "ноль", "ноль", "десять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("01000000", resNum) == std::vector{"ноль", "один", "миллион"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("121", resNum) == std::vector{"сто", "двадцать", "один"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1", resNum) == std::vector{"один"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("9", resNum) == std::vector{"девять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("10", resNum) == std::vector{"десять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("13", resNum) == std::vector{"тринадцать"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("30", resNum) == std::vector{"тридцать"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("48", resNum) == std::vector{"сорок", "восемь"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("100", resNum) == std::vector{"сто"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("500", resNum) == std::vector{"пятьсот"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("999", resNum) == std::vector{"девятьсот", "девяносто", "девять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1000", resNum) == std::vector{"одна", "тысяча"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1001", resNum) == std::vector{"одна", "тысяча", "один"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("5111", resNum) == std::vector{"пять", "тысяч", "сто", "одиннадцать"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("777101", resNum) == + std::vector{"семьсот", "семьдесят", "семь", "тысяч", "сто", "один"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1000000000", resNum) == std::vector{"один", "миллиард"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1005000000", resNum) == std::vector{"один", "миллиард", "пять", "миллионов"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("50000000055", resNum) == + std::vector{"пятьдесят", "миллиардов", "пятьдесят", "пять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("100000000000000000000000000", resNum) == std::vector{"сто", "септиллионов"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1000000000000000000000000000", resNum) == std::vector{}; + ASSERT_TRUE(r) << out(resNum); +} diff --git a/cpp_src/gtests/tests/unit/tolal_lru_cache.cc b/cpp_src/gtests/tests/unit/tolal_lru_cache.cc index 80832acba..573956d4e 100644 --- a/cpp_src/gtests/tests/unit/tolal_lru_cache.cc +++ b/cpp_src/gtests/tests/unit/tolal_lru_cache.cc @@ -82,7 +82,7 @@ TEST(LruCache, SimpleTest) { QueryCountCache cache(reindexer::kDefaultCacheSizeLimit, reindexer::kDefaultHitCountToCache); PRINTF("checking query cache...\n"); - for (auto i = 0; i < kIterCount; i++) { + for (i = 0; i < kIterCount; i++) { auto idx = rand() % qs.size(); auto& qce = qs.at(idx); QueryCacheKey ckey{qce.q, kCountCachedKeyMode, qce.JoinedSelectorsPtr()}; @@ -91,7 +91,7 @@ TEST(LruCache, SimpleTest) { if (cached.valid) { ASSERT_TRUE(exist) << "query missing in query cache"; - ASSERT_EQ(cached.val.total_count, qce.expectedTotal) << "cached data are not valid"; + ASSERT_EQ(cached.val.totalCount, qce.expectedTotal) << "cached data are not valid"; } else { size_t total = static_cast(rand() % 10000); cache.Put(ckey, QueryCountCacheVal{total}); diff --git a/cpp_src/net/listener.cc b/cpp_src/net/listener.cc index aa6b3ce6a..1d0027080 100644 --- a/cpp_src/net/listener.cc +++ b/cpp_src/net/listener.cc @@ -32,8 +32,8 @@ Listener::Listener(ev::dynamic_loop& loop, std::shared_ptr shared) template Listener::Listener(ev::dynamic_loop& loop, ConnectionFactory&& connFactory, int maxListeners) - : Listener(loop, - std::make_shared(std::move(connFactory), (maxListeners ? maxListeners : std::thread::hardware_concurrency()) + 1)) {} + : Listener(loop, std::make_shared(std::move(connFactory), + (maxListeners ? maxListeners : (double(std::thread::hardware_concurrency()) * 1.2)) + 1)) {} template Listener::~Listener() { diff --git a/cpp_src/replicator/replicator.cc b/cpp_src/replicator/replicator.cc index e7f478b1e..905cde0bb 100644 --- a/cpp_src/replicator/replicator.cc +++ b/cpp_src/replicator/replicator.cc @@ -609,7 +609,7 @@ Error Replicator::syncNamespaceForced(const NamespaceDef& ns, std::string_view r err = syncMetaForced(tmpNs, ns.name); if (err.ok()) { - err = syncSchemaForced(tmpNs, ns.name); + err = syncSchemaForced(tmpNs, NamespaceDef(ns.name)); } // Make query to complete master's namespace data diff --git a/cpp_src/server/CMakeLists.txt b/cpp_src/server/CMakeLists.txt index d297bcc5b..d7ba57e3e 100644 --- a/cpp_src/server/CMakeLists.txt +++ b/cpp_src/server/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.10) project(reindexer_server_library) set (SWAGGER_VERSION "2.x") -set (GH_FACE_VERSION "3.30.0") +set (GH_FACE_VERSION "3.31.0") set (GH_FACE_TAG "v${GH_FACE_VERSION}") set (TARGET reindexer_server_library) set (SERVER_LIB_DIR ${PROJECT_BINARY_DIR} PARENT_SCOPE) diff --git a/cpp_src/server/contrib/server.md b/cpp_src/server/contrib/server.md index 28d7951b8..8667211b1 100644 --- a/cpp_src/server/contrib/server.md +++ b/cpp_src/server/contrib/server.md @@ -88,6 +88,7 @@ * [JoinCacheMemStats](#joincachememstats) * [JoinedDef](#joineddef) * [JsonObjectDef](#jsonobjectdef) + * [LRUCachePerfStats](#lrucacheperfstats) * [LongQueriesLogging](#longquerieslogging) * [MetaByKeyResponse](#metabykeyresponse) * [MetaInfo](#metainfo) @@ -133,7 +134,7 @@ Reindexer is compact, fast and it does not have heavy dependencies. ### Version information -*Version* : 3.30.0 +*Version* : 3.31.0 ### License information @@ -2364,7 +2365,7 @@ Fulltext Index configuration |**partial_match_decrease**
*optional*|Decrease of relevancy in case of partial match by value: partial_match_decrease * (non matched symbols) / (matched symbols)
**Minimum value** : `0`
**Maximum value** : `100`|integer| |**position_boost**
*optional*|Boost of search query term position
**Default** : `1.0`
**Minimum value** : `0`
**Maximum value** : `10`|number (float)| |**position_weight**
*optional*|Weight of search query term position in final rank. 0: term position will not change final rank. 1: term position will affect to final rank in 0 - 100% range
**Default** : `0.1`
**Minimum value** : `0`
**Maximum value** : `1`|number (float)| -|**splitter**
*optional*|Text tokenization algorithm. 'fast' - splits text by spaces, special characters and unsupported UTF-8 symbols. Each token is a combination of letters from supported UTF-8 subset, numbers and extra word symbols. 'friso' - algorithm based on mmseg for Chinese and English
**Default** : `"fast"`|enum (fast, friso)| +|**splitter**
*optional*|Text tokenization algorithm. 'fast' - splits text by spaces, special characters and unsupported UTF-8 symbols. Each token is a combination of letters from supported UTF-8 subset, numbers and extra word symbols. 'mmseg_cn' - algorithm based on friso implementation of mmseg for Chinese and English
**Default** : `"fast"`|enum (fast, mmseg_cn)| |**stemmers**
*optional*|List of stemmers to use|< string > array| |**stop_words**
*optional*|List of objects of stop words. Words from this list will be ignored when building indexes|< [FtStopWordObject](#ftstopwordobject) > array| |**sum_ranks_by_fields_ratio**
*optional*|Ratio to summation of ranks of match one term in several fields. For example, if value of this ratio is K, request is '@+f1,+f2,+f3 word', ranks of match in fields are R1, R2, R3 and R2 < R1 < R3, final rank will be R = R2 + K*R1 + K*K*R3
**Default** : `0.0`
**Minimum value** : `0`
**Maximum value** : `1`|number (float)| @@ -2568,6 +2569,18 @@ Join cache stats. Stores results of selects to right table by ON condition +### LRUCachePerfStats +Performance statistics for specific LRU-cache instance + + +|Name|Description|Schema| +|---|---|---| +|**cache_hit_rate**
*optional*|Cache hit rate (hits / total_queries)
**Minimum value** : `0`
**Maximum value** : `1`|number| +|**is_active**
*optional*|Determines if cache is currently in use. Usually it has 'false' value for uncommited indexes|boolean| +|**total_queries**
*optional*|Queries total count
**Minimum value** : `0`|integer| + + + ### LongQueriesLogging Parameters for logging long queries and transactions @@ -2673,8 +2686,10 @@ List of meta info of the specified namespace |Name|Description|Schema| |---|---|---| -|**indexes**
*optional*|Memory consumption of each namespace index|< [indexes](#namespaceperfstats-indexes) > array| +|**indexes**
*optional*|Performance statistics for each namespace index|< [indexes](#namespaceperfstats-indexes) > array| +|**join_cache**
*optional*|Joins cache statistics|[LRUCachePerfStats](#lrucacheperfstats)| |**name**
*optional*|Name of namespace|string| +|**query_count_cache**
*optional*|Queries cache statistics (for the queries with COUNT_CACHED() aggregation)|[LRUCachePerfStats](#lrucacheperfstats)| |**selects**
*optional*||[SelectPerfStats](#selectperfstats)| |**transactions**
*optional*||[TransactionsPerfStats](#transactionsperfstats)| |**updates**
*optional*||[UpdatePerfStats](#updateperfstats)| @@ -2684,6 +2699,7 @@ List of meta info of the specified namespace |Name|Description|Schema| |---|---|---| +|**cache**
*optional*|If index does not use IDs cache at all, this struct won't be present in response|[LRUCachePerfStats](#lrucacheperfstats)| |**name**
*optional*|Name of index|string| |**selects**
*optional*||[SelectPerfStats](#selectperfstats)| |**updates**
*optional*||[UpdatePerfStats](#updateperfstats)| diff --git a/cpp_src/server/contrib/server.yml b/cpp_src/server/contrib/server.yml index 7e04c6f7a..bb21ff0ff 100644 --- a/cpp_src/server/contrib/server.yml +++ b/cpp_src/server/contrib/server.yml @@ -4,7 +4,7 @@ info: **Reindexer** is an embeddable, in-memory, document-oriented database with a high-level Query builder interface. Reindexer's goal is to provide fast search with complex queries. Reindexer is compact, fast and it does not have heavy dependencies. - version: "3.30.0" + version: "3.31.0" title: "Reindexer REST API" license: name: "Apache 2.0" @@ -3124,10 +3124,10 @@ definitions: maximum: 500 splitter: type: string - description: "Text tokenization algorithm. 'fast' - splits text by spaces, special characters and unsupported UTF-8 symbols. Each token is a combination of letters from supported UTF-8 subset, numbers and extra word symbols. 'friso' - algorithm based on mmseg for Chinese and English" + description: "Text tokenization algorithm. 'fast' - splits text by spaces, special characters and unsupported UTF-8 symbols. Each token is a combination of letters from supported UTF-8 subset, numbers and extra word symbols. 'mmseg_cn' - algorithm based on friso implementation of mmseg for Chinese and English" enum: - "fast" - - "friso" + - "mmseg_cn" default: "fast" FulltextFieldConfig: @@ -3811,9 +3811,15 @@ definitions: $ref: "#/definitions/SelectPerfStats" transactions: $ref: "#/definitions/TransactionsPerfStats" + join_cache: + description: "Joins cache statistics" + $ref: "#/definitions/LRUCachePerfStats" + query_count_cache: + description: "Queries cache statistics (for the queries with COUNT_CACHED() aggregation)" + $ref: "#/definitions/LRUCachePerfStats" indexes: type: array - description: "Memory consumption of each namespace index" + description: "Performance statistics for each namespace index" items: type: object properties: @@ -3824,6 +3830,9 @@ definitions: $ref: "#/definitions/UpdatePerfStats" selects: $ref: "#/definitions/SelectPerfStats" + cache: + description: "If index does not use IDs cache at all, this struct won't be present in response" + $ref: "#/definitions/LRUCachePerfStats" CommonPerfStats: type: object @@ -3938,6 +3947,22 @@ definitions: type: string description: "not normalized SQL representation of longest query" + LRUCachePerfStats: + description: "Performance statistics for specific LRU-cache instance" + properties: + total_queries: + description: "Queries total count" + type: integer + minimum: 0 + cache_hit_rate: + description: "Cache hit rate (hits / total_queries)" + type: number + minimum: 0.0 + maximum: 1.0 + is_active: + description: "Determines if cache is currently in use. Usually it has 'false' value for uncommited indexes" + type: boolean + SystemConfigItem: type: object required: diff --git a/describer.go b/describer.go index e2f36c183..117eaefdb 100644 --- a/describer.go +++ b/describer.go @@ -260,6 +260,29 @@ type TxPerfStat struct { MaxCopyTimeUs int64 `json:"max_copy_time_us"` } +// LRUCachePerfStat is information about LRU cache efficiency +type LRUCachePerfStat struct { + // Total queries to cache + TotalQueries uint64 `json:"total_queries"` + // Cache hit rate (CacheHits / TotalQueries) + CacheHitRate float64 `json:"cache_hit_rate"` + // Determines if cache is currently in use. Usually it has 'false' value for uncommited indexes + IsActive bool `json:"is_active"` +} + +// IndexPerfStat is information about specific index performance statistics +type IndexPerfStat struct { + // Name of index + Name string `json:"name"` + // Performance statistics for index commit operations + Commits PerfStat `json:"commits"` + // Performance statistics for index select operations + Selects PerfStat `json:"selects"` + // Performance statistics for LRU IdSets index cache (or fulltext cache for text indexes). + // Nil-value means, that index does not use cache at all + Cache *LRUCachePerfStat `json:"cache,omitempty"` +} + // NamespacePerfStat is information about namespace's performance statistics // and located in '#perfstats' system namespace type NamespacePerfStat struct { @@ -271,6 +294,12 @@ type NamespacePerfStat struct { Selects PerfStat `json:"selects"` // Performance statistics for transactions Transactions TxPerfStat `json:"transactions"` + // Performance statistics for JOINs cache + JoinCache LRUCachePerfStat `json:"join_cache"` + // Performance statistics for CountCached aggregation cache + QueryCountCache LRUCachePerfStat `json:"query_count_cache"` + // Performance statistics for each namespace index + Indexes IndexPerfStat `json:"indexes"` } // ClientConnectionStat is information about client connection diff --git a/ftfastconfig.go b/ftfastconfig.go index f2e3ab9be..e97a0f800 100644 --- a/ftfastconfig.go +++ b/ftfastconfig.go @@ -191,9 +191,9 @@ type FtFastConfig struct { // Config for document ranking Bm25Config *Bm25ConfigType `json:"bm25_config,omitempty"` // Text tokenization algorithm. Default 'fast'. - // 'fast' : splits text by spaces, special characters and unsupported UTF-8 symbols. - // Each token is a combination of letters from supported UTF-8 subset, numbers and extra word symbols. - // 'friso': algorithm based on mmseg for Chinese and English + // 'fast' : splits text by spaces, special characters and unsupported UTF-8 symbols. + // Each token is a combination of letters from supported UTF-8 subset, numbers and extra word symbols. + // 'mmseg_cn': algorithm based on friso mmseg for Chinese and English SplitterType string `json:"splitter,omitempty"` } diff --git a/fulltext.md b/fulltext.md index c5258e823..274ece7a7 100644 --- a/fulltext.md +++ b/fulltext.md @@ -385,7 +385,7 @@ Several parameters of full text search engine can be configured from application | | Optimization | string | Optimize the index by 'memory' or by 'cpu' | "memory" | | | FtBaseRanking | struct | Relevance of the word in different forms | | | | Bm25Config | struct | Document ranking function parameters [More...](#basic-document-ranking-algorithms) | | -| | SplitterType | string | Text breakdown algorithm. Available values: 'friso' and 'fast' | "fast" | +| | SplitterType | string | Text breakdown algorithm. Available values: 'mmseg_cn' and 'fast' | "fast" | ### Stopwords details The list item can be either a string or a structure containing a string (the stopword) and a bool attribute (`is_morpheme`) indicating whether the stopword can be part of a word that can be shown in query-results. @@ -420,7 +420,7 @@ If the list of stopwords looks like this: ``` and there are pair of documents containing this word: `{"...under the roof ..."}, {"... to understand and forgive..."}`. Then for the query 'under*' we will get as a result only document `{"... to understand and forgive..."}` and for the query 'under' we will get nothing as a result. -If the "StopWords" section is not specified in the config, then the [default](./cpp_src/core/ft/stopwords/stop_en.cc) stopwords list will be used, and if it is explicitly specified empty, it means that there are no stopwords. +If the "StopWords" section is not specified in the config, then the [default_en](./cpp_src/core/ft/stopwords/stop_en.cc) and [default_ru](./cpp_src/core/ft/stopwords/stop_ru.cc) stopwords list will be used, and if it is explicitly specified empty, it means that there are no stopwords. ### Detailed typos config @@ -451,7 +451,7 @@ FtBaseRanking: config for the base relevancy of the word in different forms. ### Text splitters -Reindexer supports two algorithms to break texts into words: `fast` and `friso`. +Reindexer supports two algorithms to break texts into words: `fast` and `mmseg_cn`. Default `fast` algorithm is based on the definition of a word in the form of a alpha (from supported Unicode subset), number and an extended character, everything else (whitespaces, special characters, unsopported Unicode subsets, etc) will be threated as a delimiters. @@ -487,7 +487,7 @@ Reindexer supports the following unicode block codes / extra symbols: This algorithm is simple and provides high performance, but it can not handle texts without delimiters (for example, Chinese language does not requires whitespaces between words, so `fast`-splitter will not be able to index it properly). -Alternative `friso`-splitter is based on [friso](https://github.com/lionsoul2014/friso) implementation of `mmseg` algorithm and uses dictionaries for tokenization. Currently this splitter supports only Chinese and English languages. +Alternative `mmseg_cn`-splitter is based on [friso](https://github.com/lionsoul2014/friso) implementation of `mmseg` algorithm and uses dictionaries for tokenization. Currently this splitter supports only Chinese and English languages. ### Basic document ranking algorithms diff --git a/reflect.go b/reflect.go index e23197a69..36b957864 100644 --- a/reflect.go +++ b/reflect.go @@ -40,9 +40,13 @@ type indexOptions struct { } func parseRxTags(field reflect.StructField) (idxName string, idxType string, expireAfter string, idxSettings []string) { - tagsSlice := strings.SplitN(field.Tag.Get("reindex"), ",", 3) + tag, isSet := field.Tag.Lookup("reindex") + tagsSlice := strings.SplitN(tag, ",", 3) var idxOpts string idxName, idxType, expireAfter, idxOpts = tagsSlice[0], "", "", "" + if isSet && len(idxName) == 0 && !field.Anonymous && field.Name != "_" { + idxName = field.Name + } if len(tagsSlice) > 1 { idxType = tagsSlice[1] @@ -140,14 +144,14 @@ func parseIndexesImpl(indexDefs *[]bindings.IndexDef, st reflect.Type, subArray } if jsonTag == "-" && !opts.isComposite && !opts.isJoined { - if reindexTag := field.Tag.Get("reindex"); reindexTag != "" { - return fmt.Errorf("non-composite/non-joined field ('%s'), marked with `json:-` can not have explicit reindex tags, but it does ('%s')", field.Name, reindexTag) + if reindexTag, isSet := field.Tag.Lookup("reindex"); isSet { + return fmt.Errorf("non-composite/non-joined field ('%s'), marked with `json:-` can not have explicit reindex tags, but it does (reindex:\"%s\")", field.Name, reindexTag) } continue } if !opts.isComposite && !field.IsExported() { - if reindexTag := field.Tag.Get("reindex"); reindexTag != "" { - return fmt.Errorf("unexported non-composite field ('%s') can not have reindex tags, but it does ('%s')", field.Name, reindexTag) + if reindexTag, isSet := field.Tag.Lookup("reindex"); isSet { + return fmt.Errorf("unexported non-composite field ('%s') can not have reindex tags, but it does (reindex:\"%s\")", field.Name, reindexTag) } continue } diff --git a/test/compatibility_test/compatibility_test.sh b/test/compatibility_test/compatibility_test.sh index 23d5edbe2..4cf7fd816 100755 --- a/test/compatibility_test/compatibility_test.sh +++ b/test/compatibility_test/compatibility_test.sh @@ -39,19 +39,19 @@ test_outdated_instance() { echo "====Master: ${master_cmd}" echo "====Slave: ${slave_cmd}" init_storages - ${master_cmd} --db "${master_db_path}" -l0 --serverlog=\"reindexer_master_$3.1.log\" --corelog=\"reindexer_master_$3.1.log\" --httplog=\"\" --rpclog=\"\" & + ${master_cmd} --db "${master_db_path}" -l trace --serverlog=reindexer_master_$3.1.log --corelog=reindexer_master_$3.1.log --httplog=none --rpclog=none & master_pid=$! sleep 8 go run ${script_dir}/filler.go --dsn "${master_dsn}/${db_name}" --offset 0 echo "====Force sync" - ${slave_cmd} --db "${slave_db_path}" -p 9089 -r 6535 -l0 --serverlog=\"reindexer_slave_$3.1.log\" --corelog=\"reindexer_slave_$3.1.log\" --httplog=\"\" --rpclog=\"\" & + ${slave_cmd} --db "${slave_db_path}" -p 9089 -r 6535 -l trace --serverlog=reindexer_slave_$3.1.log --corelog=reindexer_slave_$3.1.log --httplog=none --rpclog=none & slave_pid=$! sleep 8 kill $slave_pid - wait $slave_pid + wait $slave_pid go run ${script_dir}/filler.go --dsn "${master_dsn}/${db_name}" --offset 100 echo "====Sync by WAL" - ${slave_cmd} --db "${slave_db_path}" -p 9089 -r 6535 -l0 --serverlog=\"reindexer_slave_$3.2.log\" --corelog=\"reindexer_slave_$3.2.log\" --httplog=\"\" --rpclog=\"\" & + ${slave_cmd} --db "${slave_db_path}" -p 9089 -r 6535 -l trace --serverlog=reindexer_slave_$3.2.log --corelog=reindexer_slave_$3.2.log --httplog=none --rpclog=none & slave_pid=$! sleep 12 echo "====Online sync" @@ -60,11 +60,10 @@ test_outdated_instance() { build/cpp_src/cmd/reindexer_tool/reindexer_tool --dsn "${master_dsn}/${db_name}" --command "\dump ${ns_name}" --output "${master_dump}" build/cpp_src/cmd/reindexer_tool/reindexer_tool --dsn "${slave_dsn}/${db_name}" --command "\dump ${ns_name}" --output "${slave_dump}" kill $slave_pid - wait $slave_pid - kill $master_pid - wait $master_pid - sed -i -E "s/(\\NAMESPACES ADD.*)(\"schema\":\"\{.*\}\")/\1\"schema\":\"\{\}\"/" "${master_dump}" - ${script_dir}/compare_dumps.sh "${master_dump}" "${slave_dump}" + wait $slave_pid + kill $master_pid + wait $master_pid + ${script_dir}/compare_dumps.sh "${master_dump}" "${slave_dump}" } echo "====Installing reindexer package====" diff --git a/test/dsl_test.go b/test/dsl_test.go index 5e0f44971..c1db5f010 100644 --- a/test/dsl_test.go +++ b/test/dsl_test.go @@ -622,7 +622,7 @@ func TestDSLQueries(t *testing.T) { execDSLTwice(t, func(t *testing.T, q *reindexer.Query) { _, err := q.Exec().FetchAll() - require.ErrorContains(t, err, fmt.Sprintf("The '%s' condition is suported only by 'sparse' or 'array' indexes", cond[1])) + require.ErrorContains(t, err, fmt.Sprintf("The '%s' condition is supported only by 'sparse' or 'array' indexes", cond[1])) }, fmt.Sprintf(jsonDSL, cond[0])) } }) @@ -1296,7 +1296,7 @@ func TestDSLQueries(t *testing.T) { execDSLTwice(t, func(t *testing.T, q *reindexer.Query) { _, err := q.Exec().FetchAll() - require.ErrorContains(t, err, fmt.Sprintf("The '%s' condition is suported only by 'sparse' or 'array' indexes", cond[1])) + require.ErrorContains(t, err, fmt.Sprintf("The '%s' condition is supported only by 'sparse' or 'array' indexes", cond[1])) }, fmt.Sprintf(jsonDSL, cond[0])) } }) diff --git a/test/encdec_test.go b/test/encdec_test.go index 09f71e274..990133c9d 100644 --- a/test/encdec_test.go +++ b/test/encdec_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/restream/reindexer/v3" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -46,63 +47,68 @@ type ( type TestItemEncDec struct { ID int `reindex:"id,-"` *TestEmbedItem - Prices []*TestJoinItem `reindex:"prices,,joined"` - Pricesx []*TestJoinItem `reindex:"pricesx,,joined"` - Packages []int `reindex:"packages,hash"` - UPackages []uint `reindex:"upackages,hash"` - UPackages64 []uint64 `reindex:"upackages64,hash"` - FPackages []float32 `reindex:"fpackages,tree"` - FPackages64 []float64 `reindex:"fpackages64,tree"` - Bool bool `reindex:"bool"` - Bools []bool `reindex:"bools"` - Name string `reindex:"name,tree"` - Countries []string `reindex:"countries,tree"` - Description string `reindex:"description,fuzzytext"` - Rate float64 `reindex:"rate,tree"` - CustomStringsPtr TestCustomStringsPtrs `reindex:"custom_strings_ptrs"` - CustomStrings TestCustomStrings `reindex:"custom_strings"` - CustomInts64 TestCustomInts64 `reindex:"custom_ints64"` - CustomInts16 TestCustomInts16 `reindex:"custom_ints16"` - CustomFloats TestCustomFloats `reindex:"custom_floats"` - IsDeleted bool `reindex:"isdeleted,-"` - PNested *TestNest `reindex:"-"` - Nested TestNest - NestedA [1]TestNest `reindex:"-"` - NonIndexArr []int - NonIndexA [20]float64 - NonIndexPA []*int - Actors []Actor `reindex:"-"` - PricesIDs []int `reindex:"price_id"` - LocationID string `reindex:"location"` - EndTime uint32 `reindex:"end_time,-"` - StartTime uint64 `reindex:"start_time,tree"` - PStrNull *string - PStr *string - Tmp string `reindex:"tmp,-"` - Map1 map[string]int - Map2 map[int64]Actor - Map3 map[int]*Actor - Map4 map[int]*int - Map5 map[int][]int - Map6 map[uint][]uint - Interface interface{} - Interface2 interface{} - InterfaceNull interface{} - MapNull map[string]int - SliceStrNull []string - SliceNull []int - SliceStr []string - SliceUInt []uint - SliceUInt64 []uint64 - NegativeSliceInt64 []int64 - SliceF64 []float64 - SliceF32 []float32 - SliceBool []bool - SliceIface []interface{} - SliceIface1 []interface{} - UInt64 uint64 - UInt32 uint32 - UInt uint + Prices []*TestJoinItem `reindex:"prices,,joined"` + Pricesx []*TestJoinItem `reindex:"pricesx,,joined"` + Packages []int `reindex:"packages,hash"` + UPackages []uint `reindex:"upackages,hash"` + UPackages64 []uint64 `reindex:"upackages64,hash"` + FPackages []float32 `reindex:"fpackages,tree"` + FPackages64 []float64 `reindex:"fpackages64,tree"` + Bool bool `reindex:"bool"` + Bools []bool `reindex:"bools"` + Name string `reindex:"name,tree"` + Countries []string `reindex:"countries,tree"` + Description string `reindex:"description,fuzzytext"` + Rate float64 `reindex:"rate,tree"` + CustomStringsPtr TestCustomStringsPtrs `reindex:"custom_strings_ptrs"` + CustomStrings TestCustomStrings `reindex:"custom_strings"` + CustomInts64 TestCustomInts64 `reindex:"custom_ints64"` + CustomInts16 TestCustomInts16 `reindex:"custom_ints16"` + CustomFloats TestCustomFloats `reindex:"custom_floats"` + IsDeleted bool `reindex:"isdeleted,-"` + EmptyReindexTagStr1 string `reindex:",-"` + EmptyReindexTagStr2 string `reindex:""` + EmptyJsonTagStr string `json:""` + TextLabel1 string `reindex:"TextLabel,text" json:",omitempty"` + TextLabel2 string `reindex:",text" json:",omitempty"` + PNested *TestNest `reindex:"-"` + Nested TestNest + NestedA [1]TestNest `reindex:"-"` + NonIndexArr []int + NonIndexA [20]float64 + NonIndexPA []*int + Actors []Actor `reindex:"-"` + PricesIDs []int `reindex:"price_id"` + LocationID string `reindex:"location"` + EndTime uint32 `reindex:"end_time,-"` + StartTime uint64 `reindex:"start_time,tree"` + PStrNull *string + PStr *string + Tmp string `reindex:"tmp,-"` + Map1 map[string]int + Map2 map[int64]Actor + Map3 map[int]*Actor + Map4 map[int]*int + Map5 map[int][]int + Map6 map[uint][]uint + Interface interface{} + Interface2 interface{} + InterfaceNull interface{} + MapNull map[string]int + SliceStrNull []string + SliceNull []int + SliceStr []string + SliceUInt []uint + SliceUInt64 []uint64 + NegativeSliceInt64 []int64 + SliceF64 []float64 + SliceF32 []float32 + SliceBool []bool + SliceIface []interface{} + SliceIface1 []interface{} + UInt64 uint64 + UInt32 uint32 + UInt uint Custom TestCustomBytes Time time.Time @@ -187,6 +193,11 @@ func FillTestItemsEncDec(start int, count int, pkgsCount int, asJson bool) { Age: rand.Int() % 5, Name: randString(), }, + EmptyReindexTagStr1: randString(), + EmptyReindexTagStr2: randString(), + EmptyJsonTagStr: randString(), + TextLabel1: randString(), + TextLabel2: randString(), PNested: &TestNest{ Age: rand.Int() % 5, Name: randString(), @@ -202,24 +213,24 @@ func FillTestItemsEncDec(start int, count int, pkgsCount int, asJson bool) { `xxxx`: int(rand.Int31()), }, Map2: map[int64]Actor{ - 1: Actor{randString()}, - 100: Actor{randString()}, + 1: {randString()}, + 100: {randString()}, }, Map3: map[int]*Actor{ - 4: &Actor{randString()}, - 2: &Actor{randString()}, + 4: {randString()}, + 2: {randString()}, }, Map4: map[int]*int{ 5: vint1, 120: vint2, }, Map5: map[int][]int{ - 0: []int{1, 2, 3}, - -1: []int{9, 8, 7}, + 0: {1, 2, 3}, + -1: {9, 8, 7}, }, Map6: map[uint][]uint{ - 0: []uint{1, 2, 3}, - 4: []uint{9, 8, 7}, + 0: {1, 2, 3}, + 4: {9, 8, 7}, }, NonIndexPA: []*int{ vint1, @@ -293,15 +304,32 @@ func TestHeterogeneusArrayEncDec(t *testing.T) { defer it.Close() require.NoError(t, it.Error()) - items := make([]interface{}, 0, 1) for it.Next() { item := &TestItemEncDec{} err := json.Unmarshal(it.JSON(), &item) require.NoError(t, err, "error json was: %s\n", it.JSON()) - items = append(items, item) } } +func checkIndexesWithEmptyTags(t *testing.T) { + expectedIndexes := map[string]string{ + "EmptyReindexTagStr1": "-", + "EmptyReindexTagStr2": "hash", + "TextLabel": "text", + "TextLabel2": "text", + } + + desc, err := DB.DescribeNamespace("test_items_encdec") + require.NoError(t, err) + for _, index := range desc.Indexes { + if typ, ok := expectedIndexes[index.Name]; ok { + assert.Equal(t, typ, index.IndexType) + delete(expectedIndexes, index.Name) + } + } + assert.Empty(t, expectedIndexes, "Some of the indexes are missing") +} + func TestEncDec(t *testing.T) { t.Parallel() // Fill items by cjson encoder @@ -310,6 +338,8 @@ func TestEncDec(t *testing.T) { // fill items in json format FillTestItemsEncDec(5000, 10000, 20, true) + checkIndexesWithEmptyTags(t) + // get and decode all items by cjson decoder newTestQuery(DB, "test_items_encdec").ExecAndVerify(t) diff --git a/test/index_struct_test.go b/test/index_struct_test.go index ffd37db08..54336ab19 100644 --- a/test/index_struct_test.go +++ b/test/index_struct_test.go @@ -120,15 +120,15 @@ func TestOpenNs(t *testing.T) { err := DB.OpenNamespace(ns, reindexer.DefaultNamespaceOptions(), FailSimple{}) assert.ErrorContains(t, err, - "non-composite/non-joined field ('Age'), marked with `json:-` can not have explicit reindex tags, but it does ('age,hash')") + "non-composite/non-joined field ('Age'), marked with `json:-` can not have explicit reindex tags, but it does (reindex:\"age,hash\")") err = DB.OpenNamespace(ns, reindexer.DefaultNamespaceOptions(), FailPrivate{}) assert.ErrorContains(t, err, - "unexported non-composite field ('private') can not have reindex tags, but it does ('private,hash')") + "unexported non-composite field ('private') can not have reindex tags, but it does (reindex:\"private,hash\")") err = DB.OpenNamespace(ns, reindexer.DefaultNamespaceOptions(), FailPrivateJoin{}) assert.ErrorContains(t, err, - "unexported non-composite field ('privateAccounts') can not have reindex tags, but it does ('accounts,,joined')") + "unexported non-composite field ('privateAccounts') can not have reindex tags, but it does (reindex:\"accounts,,joined\")") err = DB.OpenNamespace(ns, reindexer.DefaultNamespaceOptions(), FailJoinScalar{}) assert.ErrorContains(t, err,