From 6c0e792e50f82179429b5f43f07f6b04951bbfd2 Mon Sep 17 00:00:00 2001
From: RobinTF <83676088+RobinTF@users.noreply.github.com>
Date: Tue, 4 Feb 2025 18:55:48 +0100
Subject: [PATCH] Don't use `cache-max-size-lazy-result` setting for the root
 operation, even when it's lazy (#1709)

Since https://github.com/ad-freiburg/qlever/pull/1638, QLever has the runtime parameter `lazy-result-max-cache-size`, with a deliberately small default value of 5 MB. However, the final result (of the root operation) should be cached not depending on that size, but depending on the regular `cache-max-size`, `cache-max-size-single-entry`, and `cache-max-num-entries`, even if the last operation is (output-)lazy.  This is now indeed so. Fixes #1701 (which provides a good example of a query, where the old behavior is weird and the new behavior makes much more sense).

Use the occasion to give the runtime parameter a more consistent name, namely `cache-max-size-lazy-result`.
---
 src/ServerMain.cpp             |  4 +--
 src/engine/Operation.cpp       | 12 ++++----
 src/engine/Operation.h         |  3 +-
 src/global/RuntimeParameters.h |  2 +-
 test/OperationTest.cpp         | 54 +++++++++++++++++++++++++++++++---
 test/engine/ValuesForTesting.h | 11 ++++++-
 6 files changed, 72 insertions(+), 14 deletions(-)
diff --git a/src/ServerMain.cpp b/src/ServerMain.cpp
index f50d12cba2..e0808817b1 100644
--- a/src/ServerMain.cpp
+++ b/src/ServerMain.cpp
@@ -82,8 +82,8 @@ int main(int argc, char** argv) {
       optionFactory.getProgramOption<"cache-max-size-single-entry">(),
       "Maximum size for a single cache entry. That is, "
       "results larger than this will not be cached unless pinned.");
-  add("lazy-result-max-cache-size,E",
-      optionFactory.getProgramOption<"lazy-result-max-cache-size">(),
+  add("cache-max-size-lazy-result,E",
+      optionFactory.getProgramOption<"cache-max-size-lazy-result">(),
       "Maximum size up to which lazy results will be cached by aggregating "
       "partial results. Caching does cause significant overhead for this "
       "case.");
diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp
index ed5d9d3cc6..0f94ff7886 100644
--- a/src/engine/Operation.cpp
+++ b/src/engine/Operation.cpp
@@ -226,12 +226,13 @@ ProtoResult Operation::runComputation(const ad_utility::Timer& timer,
 // _____________________________________________________________________________
 CacheValue Operation::runComputationAndPrepareForCache(
     const ad_utility::Timer& timer, ComputationMode computationMode,
-    const QueryCacheKey& cacheKey, bool pinned) {
+    const QueryCacheKey& cacheKey, bool pinned, bool isRoot) {
   auto& cache = _executionContext->getQueryTreeCache();
   auto result = runComputation(timer, computationMode);
   auto maxSize =
-      std::min(RuntimeParameters().get<"lazy-result-max-cache-size">(),
-               cache.getMaxSizeSingleEntry());
+      isRoot ? cache.getMaxSizeSingleEntry()
+             : std::min(RuntimeParameters().get<"cache-max-size-lazy-result">(),
+                        cache.getMaxSizeSingleEntry());
   if (canResultBeCached() && !result.isFullyMaterialized() &&
       !unlikelyToFitInCache(maxSize)) {
     AD_CONTRACT_CHECK(!pinned);
@@ -306,9 +307,10 @@ std::shared_ptr<const Result> Operation::getResult(
                 updateRuntimeInformationOnFailure(timer.msecs());
               }
             });
-    auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult]() {
+    auto cacheSetup = [this, &timer, computationMode, &cacheKey, pinResult,
+                       isRoot]() {
       return runComputationAndPrepareForCache(timer, computationMode, cacheKey,
-                                              pinResult);
+                                              pinResult, isRoot);
     };
 
     auto suitedForCache = [](const CacheValue& cacheValue) {
diff --git a/src/engine/Operation.h b/src/engine/Operation.h
index 893e85ca22..1a3f68b83d 100644
--- a/src/engine/Operation.h
+++ b/src/engine/Operation.h
@@ -384,7 +384,7 @@ class Operation {
   CacheValue runComputationAndPrepareForCache(const ad_utility::Timer& timer,
                                               ComputationMode computationMode,
                                               const QueryCacheKey& cacheKey,
-                                              bool pinned);
+                                              bool pinned, bool isRoot);
 
   // Create and store the complete runtime information for this operation after
   // it has either been successfully computed or read from the cache.
@@ -454,4 +454,5 @@ class Operation {
   FRIEND_TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough);
   FRIEND_TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge);
   FRIEND_TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache);
+  FRIEND_TEST(Operation, checkMaxCacheSizeIsComputedCorrectly);
 };
diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h
index c082b14ca4..f61705e7fc 100644
--- a/src/global/RuntimeParameters.h
+++ b/src/global/RuntimeParameters.h
@@ -54,7 +54,7 @@ inline auto& RuntimeParameters() {
         Bool<"throw-on-unbound-variables">{false},
         // Control up until which size lazy results should be cached. Caching
         // does cause significant overhead for this case.
-        MemorySizeParameter<"lazy-result-max-cache-size">{5_MB},
+        MemorySizeParameter<"cache-max-size-lazy-result">{5_MB},
         Bool<"websocket-updates-enabled">{true},
         // When the result of an index scan is smaller than a single block, then
         // its size estimate will be the size of the block divided by this
diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp
index e39b9312e0..96c6525221 100644
--- a/test/OperationTest.cpp
+++ b/test/OperationTest.cpp
@@ -9,6 +9,7 @@
 #include "engine/NeutralElementOperation.h"
 #include "engine/ValuesForTesting.h"
 #include "global/RuntimeParameters.h"
+#include "util/GTestHelpers.h"
 #include "util/IdTableHelpers.h"
 #include "util/IndexTestHelpers.h"
 #include "util/OperationTestHelpers.h"
@@ -556,7 +557,7 @@ TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough) {
 
   auto cacheValue = valuesForTesting.runComputationAndPrepareForCache(
       timer, ComputationMode::LAZY_IF_SUPPORTED, makeQueryCacheKey("test"),
-      false);
+      false, false);
   EXPECT_FALSE(
       qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test")));
 
@@ -610,11 +611,11 @@ TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge) {
     // generator to additionally assert sure it is not re-read on every
     // iteration.
     auto cleanup =
-        setRuntimeParameterForTest<"lazy-result-max-cache-size">(1_B);
+        setRuntimeParameterForTest<"cache-max-size-lazy-result">(1_B);
 
     cacheValue = valuesForTesting.runComputationAndPrepareForCache(
         timer, ComputationMode::LAZY_IF_SUPPORTED, makeQueryCacheKey("test"),
-        false);
+        false, false);
     EXPECT_FALSE(
         qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test")));
   }
@@ -641,7 +642,7 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) {
 
   auto cacheValue = valuesForTesting.runComputationAndPrepareForCache(
       timer, ComputationMode::LAZY_IF_SUPPORTED, makeQueryCacheKey("test"),
-      false);
+      false, false);
   EXPECT_FALSE(
       qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test")));
 
@@ -653,6 +654,51 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) {
       qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test")));
 }
 
+// _____________________________________________________________________________
+TEST(Operation, checkMaxCacheSizeIsComputedCorrectly) {
+  auto runTest = [](ad_utility::MemorySize cacheLimit,
+                    ad_utility::MemorySize runtimeParameterLimit, bool isRoot,
+                    ad_utility::MemorySize expectedSize,
+                    ad_utility::source_location sourceLocation =
+                        ad_utility::source_location::current()) {
+    auto loc = generateLocationTrace(sourceLocation);
+    auto qec = getQec();
+    qec->getQueryTreeCache().clearAll();
+    std::vector<IdTable> idTablesVector{};
+    idTablesVector.push_back(makeIdTableFromVector({{3, 4}}));
+    ValuesForTesting valuesForTesting{
+        qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}, true};
+
+    ad_utility::MemorySize actualCacheSize;
+    valuesForTesting.setCacheSizeStorage(&actualCacheSize);
+
+    absl::Cleanup restoreOriginalSize{
+        [qec, original = qec->getQueryTreeCache().getMaxSizeSingleEntry()]() {
+          qec->getQueryTreeCache().setMaxSizeSingleEntry(original);
+        }};
+    qec->getQueryTreeCache().setMaxSizeSingleEntry(cacheLimit);
+
+    auto cleanup = setRuntimeParameterForTest<"cache-max-size-lazy-result">(
+        runtimeParameterLimit);
+
+    ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started};
+
+    auto cacheValue = valuesForTesting.runComputationAndPrepareForCache(
+        timer, ComputationMode::LAZY_IF_SUPPORTED, makeQueryCacheKey("test"),
+        false, isRoot);
+
+    EXPECT_EQ(actualCacheSize, expectedSize);
+  };
+
+  runTest(10_B, 10_B, true, 10_B);
+  runTest(10_B, 10_B, false, 10_B);
+  runTest(10_B, 1_B, false, 1_B);
+  runTest(1_B, 10_B, false, 1_B);
+  runTest(10_B, 1_B, true, 10_B);
+  runTest(1_B, 10_B, true, 1_B);
+}
+
+// _____________________________________________________________________________
 TEST(OperationTest, disableCaching) {
   auto qec = getQec();
   qec->getQueryTreeCache().clearAll();
diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h
index 097ccd9c78..ec9b55cd30 100644
--- a/test/engine/ValuesForTesting.h
+++ b/test/engine/ValuesForTesting.h
@@ -22,6 +22,7 @@ class ValuesForTesting : public Operation {
   size_t sizeEstimate_;
   size_t costEstimate_;
   bool unlikelyToFitInCache_ = false;
+  ad_utility::MemorySize* cacheSizeStorage_ = nullptr;
 
  public:
   // Create an operation that has as its result the given `table` and the given
@@ -115,9 +116,17 @@ class ValuesForTesting : public Operation {
     }
     return {std::move(table), resultSortedOn(), localVocab_.clone()};
   }
-  bool unlikelyToFitInCache(ad_utility::MemorySize) const override {
+  bool unlikelyToFitInCache(ad_utility::MemorySize cacheSize) const override {
+    if (cacheSizeStorage_ != nullptr) {
+      *cacheSizeStorage_ = cacheSize;
+    }
     return unlikelyToFitInCache_;
   }
+
+  void setCacheSizeStorage(ad_utility::MemorySize* cacheSizeStorage) {
+    cacheSizeStorage_ = cacheSizeStorage;
+  }
+
   bool supportsLimit() const override { return supportsLimit_; }
 
   bool& forceFullyMaterialized() { return forceFullyMaterialized_; }