From 6c0e792e50f82179429b5f43f07f6b04951bbfd2 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 4 Feb 2025 18:55:48 +0100 Subject: [PATCH] Don't use `cache-max-size-lazy-result` setting for the root operation, even when it's lazy (#1709) Since https://github.com/ad-freiburg/qlever/pull/1638, QLever has the runtime parameter `lazy-result-max-cache-size`, with a deliberately small default value of 5 MB. However, the final result (of the root operation) should be cached not depending on that size, but depending on the regular `cache-max-size`, `cache-max-size-single-entry`, and `cache-max-num-entries`, even if the last operation is (output-)lazy. This is now indeed so. Fixes #1701 (which provides a good example of a query, where the old behavior is weird and the new behavior makes much more sense). Use the occasion to give the runtime parameter a more consistent name, namely `cache-max-size-lazy-result`. --- src/ServerMain.cpp | 4 +-- src/engine/Operation.cpp | 12 ++++---- src/engine/Operation.h | 3 +- src/global/RuntimeParameters.h | 2 +- test/OperationTest.cpp | 54 +++++++++++++++++++++++++++++++--- test/engine/ValuesForTesting.h | 11 ++++++- 6 files changed, 72 insertions(+), 14 deletions(-) diff --git a/src/ServerMain.cpp b/src/ServerMain.cpp index f50d12cba2..e0808817b1 100644 --- a/src/ServerMain.cpp +++ b/src/ServerMain.cpp @@ -82,8 +82,8 @@ int main(int argc, char** argv) { optionFactory.getProgramOption<"cache-max-size-single-entry">(), "Maximum size for a single cache entry. That is, " "results larger than this will not be cached unless pinned."); - add("lazy-result-max-cache-size,E", - optionFactory.getProgramOption<"lazy-result-max-cache-size">(), + add("cache-max-size-lazy-result,E", + optionFactory.getProgramOption<"cache-max-size-lazy-result">(), "Maximum size up to which lazy results will be cached by aggregating " "partial results. Caching does cause significant overhead for this " "case."); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index ed5d9d3cc6..0f94ff7886 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -226,12 +226,13 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer, // _____________________________________________________________________________ CacheValue Operation::runComputationAndPrepareForCache( const ad_utility::Timer& timer, ComputationMode computationMode, - const QueryCacheKey& cacheKey, bool pinned) { + const QueryCacheKey& cacheKey, bool pinned, bool isRoot) { auto& cache = _executionContext->getQueryTreeCache(); auto result = runComputation(timer, computationMode); auto maxSize = - std::min(RuntimeParameters().get<"lazy-result-max-cache-size">(), - cache.getMaxSizeSingleEntry()); + isRoot ? cache.getMaxSizeSingleEntry() + : std::min(RuntimeParameters().get<"cache-max-size-lazy-result">(), + cache.getMaxSizeSingleEntry()); if (canResultBeCached() && !result.isFullyMaterialized() && !unlikelyToFitInCache(maxSize)) { AD_CONTRACT_CHECK(!pinned); @@ -306,9 +307,10 @@ std::shared_ptr Operation::getResult( updateRuntimeInformationOnFailure(timer.msecs()); } }); - auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult]() { + auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult, + isRoot]() { return runComputationAndPrepareForCache(timer, computationMode, cacheKey, - pinResult); + pinResult, isRoot); }; auto suitedForCache = [](const CacheValue& cacheValue) { diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 893e85ca22..1a3f68b83d 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -384,7 +384,7 @@ class Operation { CacheValue runComputationAndPrepareForCache(const ad_utility::Timer& timer, ComputationMode computationMode, const QueryCacheKey& cacheKey, - bool pinned); + bool pinned, bool isRoot); // Create and store the complete runtime information for this operation after // it has either been successfully computed or read from the cache. @@ -454,4 +454,5 @@ class Operation { FRIEND_TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough); FRIEND_TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge); FRIEND_TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache); + FRIEND_TEST(Operation, checkMaxCacheSizeIsComputedCorrectly); }; diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index c082b14ca4..f61705e7fc 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -54,7 +54,7 @@ inline auto& RuntimeParameters() { Bool<"throw-on-unbound-variables">{false}, // Control up until which size lazy results should be cached. Caching // does cause significant overhead for this case. - MemorySizeParameter<"lazy-result-max-cache-size">{5_MB}, + MemorySizeParameter<"cache-max-size-lazy-result">{5_MB}, Bool<"websocket-updates-enabled">{true}, // When the result of an index scan is smaller than a single block, then // its size estimate will be the size of the block divided by this diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index e39b9312e0..96c6525221 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -9,6 +9,7 @@ #include "engine/NeutralElementOperation.h" #include "engine/ValuesForTesting.h" #include "global/RuntimeParameters.h" +#include "util/GTestHelpers.h" #include "util/IdTableHelpers.h" #include "util/IndexTestHelpers.h" #include "util/OperationTestHelpers.h" @@ -556,7 +557,7 @@ TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough) { auto cacheValue = valuesForTesting.runComputationAndPrepareForCache( timer, ComputationMode::LAZY_IF_SUPPORTED, makeQueryCacheKey("test"), - false); + false, false); EXPECT_FALSE( qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test"))); @@ -610,11 +611,11 @@ TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge) { // generator to additionally assert sure it is not re-read on every // iteration. auto cleanup = - setRuntimeParameterForTest<"lazy-result-max-cache-size">(1_B); + setRuntimeParameterForTest<"cache-max-size-lazy-result">(1_B); cacheValue = valuesForTesting.runComputationAndPrepareForCache( timer, ComputationMode::LAZY_IF_SUPPORTED, makeQueryCacheKey("test"), - false); + false, false); EXPECT_FALSE( qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test"))); } @@ -641,7 +642,7 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) { auto cacheValue = valuesForTesting.runComputationAndPrepareForCache( timer, ComputationMode::LAZY_IF_SUPPORTED, makeQueryCacheKey("test"), - false); + false, false); EXPECT_FALSE( qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test"))); @@ -653,6 +654,51 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) { qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test"))); } +// _____________________________________________________________________________ +TEST(Operation, checkMaxCacheSizeIsComputedCorrectly) { + auto runTest = [](ad_utility::MemorySize cacheLimit, + ad_utility::MemorySize runtimeParameterLimit, bool isRoot, + ad_utility::MemorySize expectedSize, + ad_utility::source_location sourceLocation = + ad_utility::source_location::current()) { + auto loc = generateLocationTrace(sourceLocation); + auto qec = getQec(); + qec->getQueryTreeCache().clearAll(); + std::vector idTablesVector{}; + idTablesVector.push_back(makeIdTableFromVector({{3, 4}})); + ValuesForTesting valuesForTesting{ + qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}, true}; + + ad_utility::MemorySize actualCacheSize; + valuesForTesting.setCacheSizeStorage(&actualCacheSize); + + absl::Cleanup restoreOriginalSize{ + [qec, original = qec->getQueryTreeCache().getMaxSizeSingleEntry()]() { + qec->getQueryTreeCache().setMaxSizeSingleEntry(original); + }}; + qec->getQueryTreeCache().setMaxSizeSingleEntry(cacheLimit); + + auto cleanup = setRuntimeParameterForTest<"cache-max-size-lazy-result">( + runtimeParameterLimit); + + ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; + + auto cacheValue = valuesForTesting.runComputationAndPrepareForCache( + timer, ComputationMode::LAZY_IF_SUPPORTED, makeQueryCacheKey("test"), + false, isRoot); + + EXPECT_EQ(actualCacheSize, expectedSize); + }; + + runTest(10_B, 10_B, true, 10_B); + runTest(10_B, 10_B, false, 10_B); + runTest(10_B, 1_B, false, 1_B); + runTest(1_B, 10_B, false, 1_B); + runTest(10_B, 1_B, true, 10_B); + runTest(1_B, 10_B, true, 1_B); +} + +// _____________________________________________________________________________ TEST(OperationTest, disableCaching) { auto qec = getQec(); qec->getQueryTreeCache().clearAll(); diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 097ccd9c78..ec9b55cd30 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -22,6 +22,7 @@ class ValuesForTesting : public Operation { size_t sizeEstimate_; size_t costEstimate_; bool unlikelyToFitInCache_ = false; + ad_utility::MemorySize* cacheSizeStorage_ = nullptr; public: // Create an operation that has as its result the given `table` and the given @@ -115,9 +116,17 @@ class ValuesForTesting : public Operation { } return {std::move(table), resultSortedOn(), localVocab_.clone()}; } - bool unlikelyToFitInCache(ad_utility::MemorySize) const override { + bool unlikelyToFitInCache(ad_utility::MemorySize cacheSize) const override { + if (cacheSizeStorage_ != nullptr) { + *cacheSizeStorage_ = cacheSize; + } return unlikelyToFitInCache_; } + + void setCacheSizeStorage(ad_utility::MemorySize* cacheSizeStorage) { + cacheSizeStorage_ = cacheSizeStorage; + } + bool supportsLimit() const override { return supportsLimit_; } bool& forceFullyMaterialized() { return forceFullyMaterialized_; }