From b455626a14f115ba1061c5565c6fa2b5df760e8b Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Tue, 8 Oct 2024 14:59:34 +0200
Subject: [PATCH 01/30] Add the required workflow files...

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 .github/workflows/sparql-conformance.yml      | 91 +++++++++++++++++++
 .../workflows/upload-sparql-conformance.yml   | 61 +++++++++++++
 2 files changed, 152 insertions(+)
 create mode 100644 .github/workflows/sparql-conformance.yml
 create mode 100644 .github/workflows/upload-sparql-conformance.yml

diff --git a/.github/workflows/sparql-conformance.yml b/.github/workflows/sparql-conformance.yml
new file mode 100644
index 0000000000..53c747f49c
--- /dev/null
+++ b/.github/workflows/sparql-conformance.yml
@@ -0,0 +1,91 @@
+name: sparql-test-suite
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+  merge_group:
+
+jobs:
+  build:
+    env:
+      compiler: clang
+      compiler-version: 16
+      build-type: Release
+      cmake-flags: "-DCMAKE_C_COMPILER=clang-16 -DCMAKE_CXX_COMPILER=clang++-16"
+
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: "recursive"
+          path: qlever-code
+      - name: Checkout sparql-test-suite-files
+        uses: actions/checkout@v3
+        with:
+          repository: "w3c/rdf-tests"
+          path: sparql-test-suite
+      - name: Checkout qlever-test-suite
+        uses: actions/checkout@v3
+        with:
+          repository: "SIRDNARch/qlever-conformance-tests"
+          token: ${{ secrets.CONFORMANCE_REPO_ACCESS_TOKEN }}
+          path: qlever-test-suite
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Install python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests
+          pip install rdflib
+      - name: Install dependencies
+        uses: ./qlever-code/.github/workflows/install-dependencies-ubuntu
+      - name: Install compiler
+        uses: ./qlever-code/.github/workflows/install-compiler-ubuntu
+        with:
+          compiler: "clang"
+          compiler-version: "16"
+      - name: Create build directory
+        run: mkdir ${{github.workspace}}/qlever-code/build
+      - name: Configure CMake
+        run: cmake -S ${{github.workspace}}/qlever-code/ -B ${{github.workspace}}/qlever-code/build ${{env.cmake-flags}} -DCMAKE_BUILD_TYPE=${{env.build-type}} -DLOGLEVEL=INFO -DUSE_PARALLEL=false
+      - name: Build
+        run: cmake --build ${{github.workspace}}/qlever-code/build --config ${{env.build-type}} -- -j $(nproc)
+      - name: Execute test suite
+        run: |
+          cd qlever-test-suite
+          python testsuite.py config http://0.0.0.0 7001 ${{github.workspace}}/sparql-test-suite/sparql/sparql11/ ${{github.workspace}}/qlever-code/build/ localhost sparql sparql
+          python testsuite.py extract
+          python testsuite.py ${{ github.sha }}
+          cd ..
+      # Only upload directly if this is not a pull request. In this
+      # case we are on the master branch and have access to the token.
+      - name: "Submit data to server"
+        if: github.event_name != 'pull_request'
+        env:
+          SERVER_URL: ${{ secrets.SPARQL_CONFORMANCE_SERVER_URL }}
+          API_KEY: ${{ secrets.SPARQL_CONFORMANCE_SERVER_KEY }}
+        run: |
+          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event_name}}" -H "sha: ${{github.sha}}" -F "file=@${{github.workspace}}/qlever-test-suite/results/${{ github.sha }}.json.bz2" $SERVER_URL/upload
+
+      # For a pull request we store the file as well as some information
+      # about this PR (number, how to check it out, etc.) and upload it as an artifact
+      - name: Save PR number and coverage file in same directory
+        if: github.event_name == 'pull_request'
+        # Note: If you change any of the filenames here, you also have to change them in `upload-coverage.yml`
+        run : |
+          mkdir -p conformance-report
+          echo ${{ github.event.number }} > ./conformance-report/pr
+          echo ${{ github.repository }} > ./conformance-report/github_repository
+          echo ${GITHUB_REF} > ./conformance-report/github_ref
+          echo ${{github.event.pull_request.head.sha}} > ./conformance-report/sha
+          mv ${{ github.workspace}}/qlever-test-suite/results/${{ github.sha }}.json.bz2 conformance-report/${{ github.event.pull_request.head.sha }}.json.bz2
+      - name: Upload coverage artifact
+        if: github.event_name == 'pull_request'
+        uses: actions/upload-artifact@v3
+        with:
+          name: conformance-report
+          path: conformance-report/
\ No newline at end of file
diff --git a/.github/workflows/upload-sparql-conformance.yml b/.github/workflows/upload-sparql-conformance.yml
new file mode 100644
index 0000000000..fe9be1580c
--- /dev/null
+++ b/.github/workflows/upload-sparql-conformance.yml
@@ -0,0 +1,61 @@
+name: Upload conformance tests result
+
+on:
+  workflow_run:
+    # This has to be the `name:` of the workflow in `code_coverage.yml`.
+    # Start when this  workflow has finished successfully.
+    workflows: [sparql-test-suite]
+    types:
+      - completed
+
+jobs:
+  upload:
+    runs-on: ubuntu-latest
+    if: >
+      github.event.workflow_run.event == 'pull_request' &&
+      github.event.workflow_run.conclusion == 'success'
+    steps:
+      - name: 'Download artifact'
+        uses: actions/github-script@v6
+        # The following script is taken from the link stated at the
+        # beginning of this file. It manually downloads an artifact
+        # from another workflow.
+        with:
+          script: |
+            var artifacts = await github.rest.actions.listWorkflowRunArtifacts({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               run_id: ${{github.event.workflow_run.id }},
+            });
+            var matchArtifact = artifacts.data.artifacts.filter((artifact) => {
+              return artifact.name == "conformance-report"
+            })[0];
+            var download = await github.rest.actions.downloadArtifact({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               artifact_id: matchArtifact.id,
+               archive_format: 'zip',
+            });
+            var fs = require('fs');
+            fs.writeFileSync('${{github.workspace}}/conformance-report.zip', Buffer.from(download.data));
+      - run: unzip conformance-report.zip
+      # Read the metadata into environment variables.
+      - name: "Read PR number"
+        run: echo "pr_number=`cat pr`" >> $GITHUB_ENV
+      - name: "Read Github Ref"
+        run: echo "original_github_ref=`cat github_ref`" >> $GITHUB_ENV;
+      - name: "Read Github SHA"
+        run: echo "commit_sha=`cat sha`" >> $GITHUB_ENV;
+      - name: "Read Github Repository"
+        run: echo "original_github_repository=`cat github_repository`" >> $GITHUB_ENV;
+        # We have to check out the source code from the PR, otherwise Codecov
+        # won't process the upload properly. We first check it out into a
+        # subdirectory `qlever-source`, otherwise the coverage report will
+        # be overwritten. We then move all the files back into the working
+        # directory such that Codecov will pick them up properly.
+      - name: "Submit data to server"
+        env:
+          SERVER_URL: ${{ secrets.SPARQL_CONFORMANCE_SERVER_URL }}
+          API_KEY: ${{ secrets.SPARQL_CONFORMANCE_SERVER_KEY }}
+        run: |
+          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event.workflow_run.event}}" -H "sha: ${{env.commit_sha}}" -H "pr-number: ${{env.pr_number}}" -H "repo: ${{env.original_github_repository}}" -F "file=@${{env.commit_sha}}.json.bz2" $SERVER_URL/upload
\ No newline at end of file

From 3205fa2da872a5fb861669009e5d0f4b7794c1d2 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Tue, 8 Oct 2024 15:13:37 +0200
Subject: [PATCH 02/30] A dummy file for the workflow run thingy.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 .github/workflows/sparql-conformance.yml      | 50 ++-----------------
 .../workflows/upload-sparql-conformance.yml   |  2 +-
 2 files changed, 5 insertions(+), 47 deletions(-)

diff --git a/.github/workflows/sparql-conformance.yml b/.github/workflows/sparql-conformance.yml
index 53c747f49c..cfb2552670 100644
--- a/.github/workflows/sparql-conformance.yml
+++ b/.github/workflows/sparql-conformance.yml
@@ -17,50 +17,8 @@ jobs:
 
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@v3
-        with:
-          submodules: "recursive"
-          path: qlever-code
-      - name: Checkout sparql-test-suite-files
-        uses: actions/checkout@v3
-        with:
-          repository: "w3c/rdf-tests"
-          path: sparql-test-suite
-      - name: Checkout qlever-test-suite
-        uses: actions/checkout@v3
-        with:
-          repository: "SIRDNARch/qlever-conformance-tests"
-          token: ${{ secrets.CONFORMANCE_REPO_ACCESS_TOKEN }}
-          path: qlever-test-suite
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.10"
-      - name: Install python dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install requests
-          pip install rdflib
-      - name: Install dependencies
-        uses: ./qlever-code/.github/workflows/install-dependencies-ubuntu
-      - name: Install compiler
-        uses: ./qlever-code/.github/workflows/install-compiler-ubuntu
-        with:
-          compiler: "clang"
-          compiler-version: "16"
-      - name: Create build directory
-        run: mkdir ${{github.workspace}}/qlever-code/build
-      - name: Configure CMake
-        run: cmake -S ${{github.workspace}}/qlever-code/ -B ${{github.workspace}}/qlever-code/build ${{env.cmake-flags}} -DCMAKE_BUILD_TYPE=${{env.build-type}} -DLOGLEVEL=INFO -DUSE_PARALLEL=false
-      - name: Build
-        run: cmake --build ${{github.workspace}}/qlever-code/build --config ${{env.build-type}} -- -j $(nproc)
-      - name: Execute test suite
-        run: |
-          cd qlever-test-suite
-          python testsuite.py config http://0.0.0.0 7001 ${{github.workspace}}/sparql-test-suite/sparql/sparql11/ ${{github.workspace}}/qlever-code/build/ localhost sparql sparql
-          python testsuite.py extract
-          python testsuite.py ${{ github.sha }}
-          cd ..
+      - name: "spoof a json file for experimenting"
+        run: echo "{}" > dummyResults.json
       # Only upload directly if this is not a pull request. In this
       # case we are on the master branch and have access to the token.
       - name: "Submit data to server"
@@ -69,7 +27,7 @@ jobs:
           SERVER_URL: ${{ secrets.SPARQL_CONFORMANCE_SERVER_URL }}
           API_KEY: ${{ secrets.SPARQL_CONFORMANCE_SERVER_KEY }}
         run: |
-          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event_name}}" -H "sha: ${{github.sha}}" -F "file=@${{github.workspace}}/qlever-test-suite/results/${{ github.sha }}.json.bz2" $SERVER_URL/upload
+          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event_name}}" -H "sha: ${{github.sha}}" -F "file=@${{github.workspace}}/dummyResults.json" $SERVER_URL/upload
 
       # For a pull request we store the file as well as some information
       # about this PR (number, how to check it out, etc.) and upload it as an artifact
@@ -82,7 +40,7 @@ jobs:
           echo ${{ github.repository }} > ./conformance-report/github_repository
           echo ${GITHUB_REF} > ./conformance-report/github_ref
           echo ${{github.event.pull_request.head.sha}} > ./conformance-report/sha
-          mv ${{ github.workspace}}/qlever-test-suite/results/${{ github.sha }}.json.bz2 conformance-report/${{ github.event.pull_request.head.sha }}.json.bz2
+          mv ${{ github.workspace}}/dummyResults.json conformance-report/${{ github.event.pull_request.head.sha }}.json
       - name: Upload coverage artifact
         if: github.event_name == 'pull_request'
         uses: actions/upload-artifact@v3
diff --git a/.github/workflows/upload-sparql-conformance.yml b/.github/workflows/upload-sparql-conformance.yml
index fe9be1580c..3c249b7eee 100644
--- a/.github/workflows/upload-sparql-conformance.yml
+++ b/.github/workflows/upload-sparql-conformance.yml
@@ -58,4 +58,4 @@ jobs:
           SERVER_URL: ${{ secrets.SPARQL_CONFORMANCE_SERVER_URL }}
           API_KEY: ${{ secrets.SPARQL_CONFORMANCE_SERVER_KEY }}
         run: |
-          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event.workflow_run.event}}" -H "sha: ${{env.commit_sha}}" -H "pr-number: ${{env.pr_number}}" -H "repo: ${{env.original_github_repository}}" -F "file=@${{env.commit_sha}}.json.bz2" $SERVER_URL/upload
\ No newline at end of file
+          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event.workflow_run.event}}" -H "sha: ${{env.commit_sha}}" -H "pr-number: ${{env.pr_number}}" -H "repo: ${{env.original_github_repository}}" -F "file=@${{env.commit_sha}}.json" $SERVER_URL/upload
\ No newline at end of file

From b7bedba82609b89570fa730964c25011617f2970 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Tue, 8 Oct 2024 15:19:59 +0200
Subject: [PATCH 03/30] Another test...

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 .github/workflows/sparql-conformance.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/sparql-conformance.yml b/.github/workflows/sparql-conformance.yml
index cfb2552670..528abce449 100644
--- a/.github/workflows/sparql-conformance.yml
+++ b/.github/workflows/sparql-conformance.yml
@@ -27,6 +27,7 @@ jobs:
           SERVER_URL: ${{ secrets.SPARQL_CONFORMANCE_SERVER_URL }}
           API_KEY: ${{ secrets.SPARQL_CONFORMANCE_SERVER_KEY }}
         run: |
+          echo "x-api-key: $API_KEY" -H "event: ${{github.event_name}}" -H "sha: ${{github.sha}}" -F "file=@${{github.workspace}}/dummyResults.json" $SERVER_URL/upload
           curl -H "x-api-key: $API_KEY" -H "event: ${{github.event_name}}" -H "sha: ${{github.sha}}" -F "file=@${{github.workspace}}/dummyResults.json" $SERVER_URL/upload
 
       # For a pull request we store the file as well as some information

From 8bd62997d2d06740180fc8e11ce0737732b79132 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Tue, 8 Oct 2024 16:03:46 +0200
Subject: [PATCH 04/30] More sparql conformance stuff...

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 .github/workflows/sparql-conformance.yml      | 51 +++++++++++++++++--
 .../workflows/upload-sparql-conformance.yml   |  2 +-
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/sparql-conformance.yml b/.github/workflows/sparql-conformance.yml
index 528abce449..160de04ca1 100644
--- a/.github/workflows/sparql-conformance.yml
+++ b/.github/workflows/sparql-conformance.yml
@@ -17,8 +17,50 @@ jobs:
 
     runs-on: ubuntu-22.04
     steps:
-      - name: "spoof a json file for experimenting"
-        run: echo "{}" > dummyResults.json
+      - name: Checkout qlever-test-suite
+        uses: actions/checkout@v3
+        with:
+          repository: "SIRDNARch/qlever-conformance-tests"
+          token: ${{ secrets.CONFORMANCE_REPO_ACCESS_TOKEN }}
+          path: qlever-test-suite
+      - uses: actions/checkout@v3
+        with:
+          submodules: "recursive"
+          path: qlever-code
+      - name: Checkout sparql-test-suite-files
+        uses: actions/checkout@v3
+        with:
+          repository: "w3c/rdf-tests"
+          path: sparql-test-suite
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Install python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests
+          pip install rdflib
+      - name: Install dependencies
+        uses: ./qlever-code/.github/workflows/install-dependencies-ubuntu
+      - name: Install compiler
+        uses: ./qlever-code/.github/workflows/install-compiler-ubuntu
+        with:
+          compiler: "clang"
+          compiler-version: "16"
+      - name: Create build directory
+        run: mkdir ${{github.workspace}}/qlever-code/build
+      - name: Configure CMake
+        run: cmake -S ${{github.workspace}}/qlever-code/ -B ${{github.workspace}}/qlever-code/build ${{env.cmake-flags}} -DCMAKE_BUILD_TYPE=${{env.build-type}} -DLOGLEVEL=INFO -DUSE_PARALLEL=false
+      - name: Build
+        run: cmake --build ${{github.workspace}}/qlever-code/build --config ${{env.build-type}} -- -j $(nproc)
+      - name: Execute test suite
+        run: |
+          cd qlever-test-suite
+          python testsuite.py config http://0.0.0.0 7001 ${{github.workspace}}/sparql-test-suite/sparql/sparql11/ ${{github.workspace}}/qlever-code/build/ localhost sparql sparql
+          python testsuite.py extract
+          python testsuite.py ${{ github.sha }}
+          cd ..
       # Only upload directly if this is not a pull request. In this
       # case we are on the master branch and have access to the token.
       - name: "Submit data to server"
@@ -27,8 +69,7 @@ jobs:
           SERVER_URL: ${{ secrets.SPARQL_CONFORMANCE_SERVER_URL }}
           API_KEY: ${{ secrets.SPARQL_CONFORMANCE_SERVER_KEY }}
         run: |
-          echo "x-api-key: $API_KEY" -H "event: ${{github.event_name}}" -H "sha: ${{github.sha}}" -F "file=@${{github.workspace}}/dummyResults.json" $SERVER_URL/upload
-          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event_name}}" -H "sha: ${{github.sha}}" -F "file=@${{github.workspace}}/dummyResults.json" $SERVER_URL/upload
+          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event_name}}" -H "sha: ${{github.sha}}" -F "file=@${{github.workspace}}/qlever-test-suite/results/${{ github.sha }}.json.bz2" $SERVER_URL/upload
 
       # For a pull request we store the file as well as some information
       # about this PR (number, how to check it out, etc.) and upload it as an artifact
@@ -41,7 +82,7 @@ jobs:
           echo ${{ github.repository }} > ./conformance-report/github_repository
           echo ${GITHUB_REF} > ./conformance-report/github_ref
           echo ${{github.event.pull_request.head.sha}} > ./conformance-report/sha
-          mv ${{ github.workspace}}/dummyResults.json conformance-report/${{ github.event.pull_request.head.sha }}.json
+          mv ${{ github.workspace}}/qlever-test-suite/results/${{ github.sha }}.json.bz2 conformance-report/${{ github.event.pull_request.head.sha }}.json.bz2
       - name: Upload coverage artifact
         if: github.event_name == 'pull_request'
         uses: actions/upload-artifact@v3
diff --git a/.github/workflows/upload-sparql-conformance.yml b/.github/workflows/upload-sparql-conformance.yml
index 3c249b7eee..fe9be1580c 100644
--- a/.github/workflows/upload-sparql-conformance.yml
+++ b/.github/workflows/upload-sparql-conformance.yml
@@ -58,4 +58,4 @@ jobs:
           SERVER_URL: ${{ secrets.SPARQL_CONFORMANCE_SERVER_URL }}
           API_KEY: ${{ secrets.SPARQL_CONFORMANCE_SERVER_KEY }}
         run: |
-          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event.workflow_run.event}}" -H "sha: ${{env.commit_sha}}" -H "pr-number: ${{env.pr_number}}" -H "repo: ${{env.original_github_repository}}" -F "file=@${{env.commit_sha}}.json" $SERVER_URL/upload
\ No newline at end of file
+          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event.workflow_run.event}}" -H "sha: ${{env.commit_sha}}" -H "pr-number: ${{env.pr_number}}" -H "repo: ${{env.original_github_repository}}" -F "file=@${{env.commit_sha}}.json.bz2" $SERVER_URL/upload
\ No newline at end of file

From 5cb6a0e048e0575c2d6e10f702ed24cf56d3a272 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Tue, 7 Jan 2025 12:35:36 +0100
Subject: [PATCH 05/30] Backup in the middle.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/CMakeLists.txt                     |   2 +-
 src/engine/ExistsScan.cpp                     | 118 ++++++++++++++++++
 src/engine/ExistsScan.h                       |  55 ++++++++
 src/engine/GroupBy.cpp                        |  22 +---
 .../sparqlExpressions/ExistsExpression.cpp    |   5 +
 .../sparqlExpressions/ExistsExpression.h      |  39 ++++++
 .../sparqlExpressions/SparqlExpression.cpp    |  14 +++
 .../sparqlExpressions/SparqlExpression.h      |  10 ++
 8 files changed, 248 insertions(+), 17 deletions(-)
 create mode 100644 src/engine/ExistsScan.cpp
 create mode 100644 src/engine/ExistsScan.h
 create mode 100644 src/engine/sparqlExpressions/ExistsExpression.cpp
 create mode 100644 src/engine/sparqlExpressions/ExistsExpression.h

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index be22a64d5d..c724a8fb39 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -14,5 +14,5 @@ add_library(engine
         CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
         TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
         CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp
-        Describe.cpp)
+        Describe.cpp ExistsScan.cpp)
 qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
diff --git a/src/engine/ExistsScan.cpp b/src/engine/ExistsScan.cpp
new file mode 100644
index 0000000000..f42da68f3d
--- /dev/null
+++ b/src/engine/ExistsScan.cpp
@@ -0,0 +1,118 @@
+//  Copyright 2023, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include "engine/ExistsScan.h"
+
+#include "util/JoinAlgorithms/JoinAlgorithms.h"
+
+// _____________________________________________________________________________
+ExistsScan::ExistsScan(QueryExecutionContext* qec,
+                       std::shared_ptr<QueryExecutionTree> left,
+                       std::shared_ptr<QueryExecutionTree> right,
+                       Variable existsVariable)
+    : Operation{qec},
+      left_{std::move(left)},
+      right_{std::move(right)},
+      existsVariable_{std::move(existsVariable)},
+      joinColumns_{QueryExecutionTree::getJoinColumns(*left_, *right_)} {}
+
+// _____________________________________________________________________________
+string ExistsScan::getCacheKeyImpl() const {
+  return absl::StrCat("EXISTS SCAN left: ", left_->getCacheKey(),
+                      " right: ", right_->getCacheKey());
+}
+
+// _____________________________________________________________________________
+string ExistsScan::getDescriptor() const { return "EXISTS scan"; }
+
+// ____________________________________________________________________________
+VariableToColumnMap ExistsScan::computeVariableToColumnMap() const {
+  auto res = left_->getVariableColumns();
+  AD_CONTRACT_CHECK(
+      !res.contains(existsVariable_),
+      "The target variable of an exists scan must be a new variable");
+  res[existsVariable_] = makeAlwaysDefinedColumn(getResultWidth() - 1);
+  return res;
+}
+
+// ____________________________________________________________________________
+size_t ExistsScan::getResultWidth() const {
+  // We add one column to the input.
+  return left_->getResultWidth() + 1;
+}
+
+// ____________________________________________________________________________
+vector<ColumnIndex> ExistsScan::resultSortedOn() const {
+  return left_->resultSortedOn();
+}
+
+// ____________________________________________________________________________
+float ExistsScan::getMultiplicity(size_t col) {
+  if (col < getResultWidth() - 1) {
+    return left_->getMultiplicity(col);
+  }
+  // The multiplicity of the boolean column can be a dummy value, as it should
+  // be never used for joins etc.
+  return 1;
+}
+
+// ____________________________________________________________________________
+uint64_t ExistsScan::getSizeEstimateBeforeLimit() {
+  return left_->getSizeEstimate();
+}
+
+// ____________________________________________________________________________
+size_t ExistsScan::getCostEstimate() {
+  return left_->getCostEstimate() + right_->getCostEstimate() +
+         left_->getSizeEstimate() + right_->getSizeEstimate();
+}
+
+// ____________________________________________________________________________
+ProtoResult ExistsScan::computeResult([[maybe_unused]] bool requestLaziness) {
+  auto leftRes = left_->getResult();
+  auto rightRes = right_->getResult();
+  const auto& left = leftRes->idTable();
+  const auto& right = rightRes->idTable();
+
+  ad_utility::JoinColumnMapping joinColumnData{joinColumns_, left.numColumns(),
+                                               right.numColumns()};
+
+  IdTableView<0> joinColumnsLeft =
+      left.asColumnSubsetView(joinColumnData.jcsLeft());
+  IdTableView<0> joinColumnsRight =
+      right.asColumnSubsetView(joinColumnData.jcsRight());
+
+  checkCancellation();
+
+  auto noopRowAdder = [](auto&&...) {};
+
+  // TODO<joka921> Memory limit.
+  std::vector<size_t> notExistsIndices;
+  auto actionForNotExisting =
+      [&notExistsIndices, begin = joinColumnsLeft.begin()](const auto& itLeft) {
+        notExistsIndices.push_back(itLeft - begin);
+      };
+
+  // TODO<joka921> Handle UNDEF values correctly (and efficiently)
+  auto findUndefDispatch = []<typename It>(const auto& row, It begin, auto end,
+                                           bool& outOfOrder) {
+    return std::array<It, 0>{};
+  };
+
+  auto checkCancellationLambda = [this] { checkCancellation(); };
+  [[maybe_unused]] auto numOutOfOrder = ad_utility::zipperJoinWithUndef(
+      joinColumnsLeft, joinColumnsRight, ql::ranges::lexicographical_compare,
+      noopRowAdder, findUndefDispatch, findUndefDispatch, actionForNotExisting,
+      checkCancellationLambda);
+
+  // Set up the result;
+  IdTable result = left.clone();
+  result.addEmptyColumn();
+  decltype(auto) existsCol = result.getColumn(getResultWidth() - 1);
+  ql::ranges::fill(existsCol, Id::makeFromBool(true));
+  for (size_t notExistsIndex : notExistsIndices) {
+    existsCol[notExistsIndex] = Id::makeFromBool(false);
+  }
+  return {std::move(result), resultSortedOn(), leftRes->getCopyOfLocalVocab()};
+}
diff --git a/src/engine/ExistsScan.h b/src/engine/ExistsScan.h
new file mode 100644
index 0000000000..b08e06c542
--- /dev/null
+++ b/src/engine/ExistsScan.h
@@ -0,0 +1,55 @@
+//  Copyright 2025, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#pragma once
+
+#include "engine/Operation.h"
+#include "engine/QueryExecutionTree.h"
+
+class ExistsScan : public Operation {
+ private:
+  std::shared_ptr<QueryExecutionTree> left_;
+  std::shared_ptr<QueryExecutionTree> right_;
+  std::vector<std::array<ColumnIndex, 2>> joinColumns_;
+
+  Variable existsVariable_;
+
+  vector<float> _multiplicities;
+  std::vector<std::array<ColumnIndex, 2>> _matchedColumns;
+
+ public:
+  ExistsScan(QueryExecutionContext* qec,
+             std::shared_ptr<QueryExecutionTree> left,
+             std::shared_ptr<QueryExecutionTree> right,
+             Variable existsVariable);
+
+ protected:
+  string getCacheKeyImpl() const override;
+
+ public:
+  string getDescriptor() const override;
+
+  size_t getResultWidth() const override;
+
+  vector<ColumnIndex> resultSortedOn() const override;
+
+  bool knownEmptyResult() override { return left_->knownEmptyResult(); }
+
+  float getMultiplicity(size_t col) override;
+
+ private:
+  uint64_t getSizeEstimateBeforeLimit() override;
+
+ public:
+  size_t getCostEstimate() override;
+
+  vector<QueryExecutionTree*> getChildren() override {
+    return {left_.get(), right_.get()};
+  }
+
+ private:
+  ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override;
+
+  VariableToColumnMap computeVariableToColumnMap() const override;
+};
diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
index 6fdeca1833..a6ff49bbe1 100644
--- a/src/engine/GroupBy.cpp
+++ b/src/engine/GroupBy.cpp
@@ -366,8 +366,6 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) {
   }
 
   if (useHashMapOptimization) {
-    // Helper lambda that calls `computeGroupByForHashMapOptimization` for the
-    // given `subresults`.
     auto computeWithHashMap = [this, &metadataForUnsequentialData,
                                &groupByCols](auto&& subresults) {
       auto doCompute = [&]<int NumCols> {
@@ -378,10 +376,9 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) {
       return ad_utility::callFixedSize(groupByCols.size(), doCompute);
     };
 
-    // Now call `computeWithHashMap` and return the result. It expects a range
-    // of results, so if the result is fully materialized, we create an array
-    // with a single element.
     if (subresult->isFullyMaterialized()) {
+      // `computeWithHashMap` takes a range, so we artificially create one with
+      // a single input.
       return computeWithHashMap(
           std::array{std::pair{std::cref(subresult->idTable()),
                                std::cref(subresult->localVocab())}});
@@ -1509,36 +1506,29 @@ Result GroupBy::computeGroupByForHashMapOptimization(
                        NUM_GROUP_COLUMNS == 0);
   LocalVocab localVocab;
 
-  // Initialize the data for the aggregates of the GROUP BY operation.
+  // Initialize aggregation data
   HashMapAggregationData<NUM_GROUP_COLUMNS> aggregationData(
       getExecutionContext()->getAllocator(), aggregateAliases,
       columnIndices.size());
 
-  // Process the input blocks (pairs of `IdTable` and `LocalVocab`) one after
-  // the other.
   ad_utility::Timer lookupTimer{ad_utility::Timer::Stopped};
   ad_utility::Timer aggregationTimer{ad_utility::Timer::Stopped};
   for (const auto& [inputTableRef, inputLocalVocabRef] : subresults) {
+    // Also support `std::reference_wrapper` as the input.
     const IdTable& inputTable = inputTableRef;
     const LocalVocab& inputLocalVocab = inputLocalVocabRef;
 
-    // Merge the local vocab of each input block.
-    //
-    // NOTE: If the input blocks have very similar or even identical non-empty
-    // local vocabs, no deduplication is performed.
     localVocab.mergeWith(std::span{&inputLocalVocab, 1});
-
-    // Setup the `EvaluationContext` for this input block.
+    // Initialize evaluation context
     sparqlExpression::EvaluationContext evaluationContext(
         *getExecutionContext(), _subtree->getVariableColumns(), inputTable,
         getExecutionContext()->getAllocator(), localVocab, cancellationHandle_,
         deadline_);
+
     evaluationContext._groupedVariables = ad_utility::HashSet<Variable>{
         _groupByVariables.begin(), _groupByVariables.end()};
     evaluationContext._isPartOfGroupBy = true;
 
-    // Iterate of the rows of this input block. Process (up to)
-    // `GROUP_BY_HASH_MAP_BLOCK_SIZE` rows at a time.
     for (size_t i = 0; i < inputTable.size();
          i += GROUP_BY_HASH_MAP_BLOCK_SIZE) {
       checkCancellation();
diff --git a/src/engine/sparqlExpressions/ExistsExpression.cpp b/src/engine/sparqlExpressions/ExistsExpression.cpp
new file mode 100644
index 0000000000..6737d3ed7b
--- /dev/null
+++ b/src/engine/sparqlExpressions/ExistsExpression.cpp
@@ -0,0 +1,5 @@
+//
+// Created by kalmbacj on 1/7/25.
+//
+
+#include "ExistsExpression.h"
diff --git a/src/engine/sparqlExpressions/ExistsExpression.h b/src/engine/sparqlExpressions/ExistsExpression.h
new file mode 100644
index 0000000000..5ec68acd61
--- /dev/null
+++ b/src/engine/sparqlExpressions/ExistsExpression.h
@@ -0,0 +1,39 @@
+//
+// Created by kalmbacj on 1/7/25.
+//
+
+#pragma once
+
+#include <variant>
+
+#include "engine/sparqlExpressions/SparqlExpression.h"
+#include "parser/ParsedQuery.h"
+
+namespace sparqlExpression {
+class ExistsExpression : public SparqlExpression {
+ private:
+  std::variant<Variable, ParsedQuery> argument_;
+
+ public:
+  auto& argument() { return argument_; }
+  ExistsExpression(ParsedQuery query) : argument_{std::move(query)} {}
+
+  ExpressionResult evaluate(EvaluationContext* context) const override {
+    AD_CONTRACT_CHECK(std::holds_alternative<Variable>(argument_));
+    return std::get<Variable>(argument_);
+  }
+
+  //_________________________________________________________________________
+  [[nodiscard]] string getCacheKey(
+      const VariableToColumnMap& varColMap) const override {
+    // TODO<joka921> get a proper cache key here
+    AD_CONTRACT_CHECK(std::holds_alternative<Variable>(argument_));
+    return absl::StrCat(
+        "EXISTS WITH COLUMN ",
+        varColMap.at(std::get<Variable>(argument_)).columnIndex_);
+  }
+
+ private:
+  std::span<SparqlExpression::Ptr> childrenImpl() override { return {}; }
+};
+}  // namespace sparqlExpression
diff --git a/src/engine/sparqlExpressions/SparqlExpression.cpp b/src/engine/sparqlExpressions/SparqlExpression.cpp
index b5ec3aa0f7..00864b998d 100644
--- a/src/engine/sparqlExpressions/SparqlExpression.cpp
+++ b/src/engine/sparqlExpressions/SparqlExpression.cpp
@@ -180,4 +180,18 @@ bool SparqlExpression::isInsideAggregate() const {
   }
   return isInsideAggregate_;
 }
+
+// ________________________________________________________________
+bool SparqlExpression::isExistsExpression() const { return false; }
+
+// ________________________________________________________________
+void SparqlExpression::getExistsExpressions(
+    std::vector<SparqlExpression*>& result) {
+  if (isExistsExpression()) {
+    result.push_back(this);
+  }
+  for (auto& child : children()) {
+    child->getExistsExpressions(result);
+  }
+}
 }  // namespace sparqlExpression
diff --git a/src/engine/sparqlExpressions/SparqlExpression.h b/src/engine/sparqlExpressions/SparqlExpression.h
index 1378f10520..d5f7248daf 100644
--- a/src/engine/sparqlExpressions/SparqlExpression.h
+++ b/src/engine/sparqlExpressions/SparqlExpression.h
@@ -123,6 +123,16 @@ class SparqlExpression {
   // implementation returns `false`.
   virtual bool isStrExpression() const;
 
+  // Returns true iff this expression is an EXISTS(...) expression.  Default
+  // implementation returns `false`.
+  virtual bool isExistsExpression() const;
+
+  // Return non-null pointers to all `EXISTS` expressions in the subtree.
+  // The result is passed in as a reference to simplify the recursive
+  // implementation.
+  virtual void getExistsExpressions(
+      std::vector<SparqlExpression*>& result) final;
+
   // __________________________________________________________________________
   virtual ~SparqlExpression() = default;
 

From e356ee1c831d00aeb74ac17094cffcec1f4b55d1 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Tue, 7 Jan 2025 12:57:41 +0100
Subject: [PATCH 06/30] Add some parsing and add some thoughts.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 .../sparqlParser/SparqlQleverVisitor.cpp       | 18 +++++++++++++++---
 src/parser/sparqlParser/SparqlQleverVisitor.h  |  4 ++--
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index f23530f820..99a943d350 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -15,6 +15,7 @@
 
 #include "absl/time/time.h"
 #include "engine/sparqlExpressions/CountStarExpression.h"
+#include "engine/sparqlExpressions/ExistsExpression.h"
 #include "engine/sparqlExpressions/GroupConcatExpression.h"
 #include "engine/sparqlExpressions/LiteralExpression.h"
 #include "engine/sparqlExpressions/NaryExpression.h"
@@ -1366,6 +1367,7 @@ SparqlFilter Visitor::visit(Parser::FilterRContext* ctx) {
   // expression contains unbound variables, because the variables of the FILTER
   // might be bound after the filter appears in the query (which is perfectly
   // legal).
+  auto pimpl = visitExpressionPimpl(ctx->constraint());
   return SparqlFilter{visitExpressionPimpl(ctx->constraint())};
 }
 
@@ -2229,6 +2231,10 @@ ExpressionPtr Visitor::visit([[maybe_unused]] Parser::BuiltInCallContext* ctx) {
     return visit(ctx->substringExpression());
   } else if (ctx->strReplaceExpression()) {
     return visit(ctx->strReplaceExpression());
+  } else if (ctx->existsFunc()) {
+    return visit(ctx->existsFunc());
+  } else if (ctx->notExistsFunc()) {
+    return visit(ctx->notExistsFunc());
   }
   // Get the function name and the arguments. Note that we do not have to check
   // the number of arguments like for `processIriFunctionCall`, since the number
@@ -2418,12 +2424,18 @@ SparqlExpression::Ptr Visitor::visit(Parser::StrReplaceExpressionContext* ctx) {
 }
 
 // ____________________________________________________________________________________
-void Visitor::visit(const Parser::ExistsFuncContext* ctx) {
-  reportNotSupported(ctx, "The EXISTS function is");
+ExpressionPtr Visitor::visit(Parser::ExistsFuncContext* ctx) {
+  auto queryBackup = std::exchange(parsedQuery_, ParsedQuery{});
+  auto group = visit(ctx->groupGraphPattern());
+  ParsedQuery query = std::exchange(parsedQuery_, std::move(queryBackup));
+  query.selectClause().setAsterisk();
+  query._rootGraphPattern = std::move(group);
+  return std::make_unique<sparqlExpression::ExistsExpression>(std::move(query));
 }
 
 // ____________________________________________________________________________________
-void Visitor::visit(const Parser::NotExistsFuncContext* ctx) {
+ExpressionPtr Visitor::visit(Parser::NotExistsFuncContext* ctx) {
+  // TODO<joka921> Implement this without duplicating the code for EXISTS.
   reportNotSupported(ctx, "The NOT EXISTS function is");
 }
 
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h
index fb1cb9c05c..5fb4c95a08 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.h
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.h
@@ -444,9 +444,9 @@ class SparqlQleverVisitor {
 
   ExpressionPtr visit(Parser::StrReplaceExpressionContext* ctx);
 
-  [[noreturn]] static void visit(const Parser::ExistsFuncContext* ctx);
+  ExpressionPtr visit(Parser::ExistsFuncContext* ctx);
 
-  [[noreturn]] static void visit(const Parser::NotExistsFuncContext* ctx);
+  ExpressionPtr visit(Parser::NotExistsFuncContext* ctx);
 
   ExpressionPtr visit(Parser::AggregateContext* ctx);
 

From fc2017479677c4bc9a04a4f5d5259c3fe1d6d0de Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Tue, 7 Jan 2025 15:49:29 +0100
Subject: [PATCH 07/30] Also implement NOT EXISTS

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/ExistsScan.cpp                     |  6 +++--
 src/engine/Filter.cpp                         | 18 +++++++++++++
 src/engine/QueryExecutionTree.h               |  3 +++
 .../sparqlExpressions/ExistsExpression.h      | 24 +++++++++++-------
 .../sparqlParser/SparqlQleverVisitor.cpp      | 25 +++++++++++++++----
 src/parser/sparqlParser/SparqlQleverVisitor.h |  5 ++++
 6 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/src/engine/ExistsScan.cpp b/src/engine/ExistsScan.cpp
index f42da68f3d..1604e353ad 100644
--- a/src/engine/ExistsScan.cpp
+++ b/src/engine/ExistsScan.cpp
@@ -95,8 +95,10 @@ ProtoResult ExistsScan::computeResult([[maybe_unused]] bool requestLaziness) {
       };
 
   // TODO<joka921> Handle UNDEF values correctly (and efficiently)
-  auto findUndefDispatch = []<typename It>(const auto& row, It begin, auto end,
-                                           bool& outOfOrder) {
+  auto findUndefDispatch = []<typename It>([[maybe_unused]] const auto& row,
+                                           [[maybe_unused]] It begin,
+                                           [[maybe_unused]] auto end,
+                                           [[maybe_unused]] bool& outOfOrder) {
     return std::array<It, 0>{};
   };
 
diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp
index 9ecdd85f7a..519c0d9da5 100644
--- a/src/engine/Filter.cpp
+++ b/src/engine/Filter.cpp
@@ -10,10 +10,13 @@
 
 #include "backports/algorithm.h"
 #include "engine/CallFixedSize.h"
+#include "engine/ExistsScan.h"
 #include "engine/QueryExecutionTree.h"
+#include "engine/QueryPlanner.h"
 #include "engine/sparqlExpressions/SparqlExpression.h"
 #include "engine/sparqlExpressions/SparqlExpressionGenerators.h"
 #include "engine/sparqlExpressions/SparqlExpressionValueGetters.h"
+#include "sparqlExpressions/ExistsExpression.h"
 
 using std::endl;
 using std::string;
@@ -28,6 +31,21 @@ Filter::Filter(QueryExecutionContext* qec,
     : Operation(qec),
       _subtree(std::move(subtree)),
       _expression{std::move(expression)} {
+  std::vector<sparqlExpression::SparqlExpression*> existsExpressions;
+  _expression.getPimpl()->getExistsExpressions(existsExpressions);
+  for (auto* expr : existsExpressions) {
+    const auto& exists =
+        dynamic_cast<const sparqlExpression::ExistsExpression&>(*expr);
+    QueryPlanner qp{getExecutionContext(), cancellationHandle_};
+    // TODO<joka921> This can be done by the expression itself, then it is
+    // automatically duplicated.
+    auto pq = exists.argument();
+    auto tree =
+        std::make_shared<QueryExecutionTree>(qp.createExecutionTree(pq));
+    _subtree = ad_utility::makeExecutionTree<ExistsScan>(
+        getExecutionContext(), std::move(_subtree), std::move(tree),
+        exists.variable());
+  }
   setPrefilterExpressionForChildren();
 }
 
diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h
index 0eac785f16..3c074d6c47 100644
--- a/src/engine/QueryExecutionTree.h
+++ b/src/engine/QueryExecutionTree.h
@@ -25,7 +25,10 @@ class QueryExecutionTree {
                      std::shared_ptr<Operation> operation)
       : QueryExecutionTree(qec) {
     rootOperation_ = std::move(operation);
+    // TODO<joka921> This currently fails for EXISTS but it is also unneeded.
+    /*
     readFromCache();
+    */
   }
 
   std::string getCacheKey() const;
diff --git a/src/engine/sparqlExpressions/ExistsExpression.h b/src/engine/sparqlExpressions/ExistsExpression.h
index 5ec68acd61..d5eff23ba8 100644
--- a/src/engine/sparqlExpressions/ExistsExpression.h
+++ b/src/engine/sparqlExpressions/ExistsExpression.h
@@ -12,27 +12,33 @@
 namespace sparqlExpression {
 class ExistsExpression : public SparqlExpression {
  private:
-  std::variant<Variable, ParsedQuery> argument_;
+  ParsedQuery argument_;
+  static inline std::atomic<size_t> indexCounter_ = 0;
+  size_t index_ = ++indexCounter_;
+  Variable variable_{absl::StrCat("?ql_internal_exists_", index_)};
 
  public:
-  auto& argument() { return argument_; }
+  const auto& argument() const { return argument_; }
+  const auto& variable() const { return variable_; }
   ExistsExpression(ParsedQuery query) : argument_{std::move(query)} {}
 
   ExpressionResult evaluate(EvaluationContext* context) const override {
-    AD_CONTRACT_CHECK(std::holds_alternative<Variable>(argument_));
-    return std::get<Variable>(argument_);
+    AD_CONTRACT_CHECK(context->_variableToColumnMap.contains(variable_));
+    return variable_;
   }
 
-  //_________________________________________________________________________
+  //____________________________________________________________________________
   [[nodiscard]] string getCacheKey(
       const VariableToColumnMap& varColMap) const override {
     // TODO<joka921> get a proper cache key here
-    AD_CONTRACT_CHECK(std::holds_alternative<Variable>(argument_));
-    return absl::StrCat(
-        "EXISTS WITH COLUMN ",
-        varColMap.at(std::get<Variable>(argument_)).columnIndex_);
+    AD_CONTRACT_CHECK(varColMap.contains(variable_));
+    return absl::StrCat("EXISTS WITH COL ",
+                        varColMap.at(variable_).columnIndex_);
   }
 
+  // ____________________________________________________________________________
+  bool isExistsExpression() const override { return true; }
+
  private:
   std::span<SparqlExpression::Ptr> childrenImpl() override { return {}; }
 };
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index 99a943d350..903544c96a 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -26,6 +26,7 @@
 #include "engine/sparqlExpressions/SampleExpression.h"
 #include "engine/sparqlExpressions/StdevExpression.h"
 #include "engine/sparqlExpressions/UuidExpressions.h"
+#include "generated/SparqlAutomaticParser.h"
 #include "global/Constants.h"
 #include "global/RuntimeParameters.h"
 #include "parser/GraphPatternOperation.h"
@@ -2424,19 +2425,33 @@ SparqlExpression::Ptr Visitor::visit(Parser::StrReplaceExpressionContext* ctx) {
 }
 
 // ____________________________________________________________________________________
-ExpressionPtr Visitor::visit(Parser::ExistsFuncContext* ctx) {
+ExpressionPtr Visitor::visitExists(Parser::GroupGraphPatternContext* pattern,
+                                   bool negate) {
   auto queryBackup = std::exchange(parsedQuery_, ParsedQuery{});
-  auto group = visit(ctx->groupGraphPattern());
+  auto visibleVariablesSoFar = std::move(visibleVariables_);
+  visibleVariables_.clear();
+  auto group = visit(pattern);
   ParsedQuery query = std::exchange(parsedQuery_, std::move(queryBackup));
   query.selectClause().setAsterisk();
   query._rootGraphPattern = std::move(group);
-  return std::make_unique<sparqlExpression::ExistsExpression>(std::move(query));
+  visibleVariables_ = std::move(visibleVariablesSoFar);
+  auto exists =
+      std::make_unique<sparqlExpression::ExistsExpression>(std::move(query));
+  if (negate) {
+    return sparqlExpression::makeUnaryNegateExpression(std::move(exists));
+  } else {
+    return exists;
+  }
+}
+
+// ____________________________________________________________________________________
+ExpressionPtr Visitor::visit(Parser::ExistsFuncContext* ctx) {
+  return visitExists(ctx->groupGraphPattern(), false);
 }
 
 // ____________________________________________________________________________________
 ExpressionPtr Visitor::visit(Parser::NotExistsFuncContext* ctx) {
-  // TODO<joka921> Implement this without duplicating the code for EXISTS.
-  reportNotSupported(ctx, "The NOT EXISTS function is");
+  return visitExists(ctx->groupGraphPattern(), true);
 }
 
 // ____________________________________________________________________________________
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h
index 5fb4c95a08..3e7b63c3ad 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.h
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.h
@@ -444,6 +444,11 @@ class SparqlQleverVisitor {
 
   ExpressionPtr visit(Parser::StrReplaceExpressionContext* ctx);
 
+  // The common implementation of the parsing of `EXISTS` and `NOT EXISTS`.
+  // The second argument is `true` for `NOT EXISTS`.
+  ExpressionPtr visitExists(Parser::GroupGraphPatternContext* pattern,
+                            bool negate);
+
   ExpressionPtr visit(Parser::ExistsFuncContext* ctx);
 
   ExpressionPtr visit(Parser::NotExistsFuncContext* ctx);

From dde296b052dee3c267acdeaec514a3e3b47e5cb9 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Tue, 7 Jan 2025 16:27:14 +0100
Subject: [PATCH 08/30] Fix a small warning, to feed this to the tool.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/ExistsScan.cpp                       |  4 ++--
 src/engine/sparqlExpressions/ExistsExpression.h | 13 +++++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/engine/ExistsScan.cpp b/src/engine/ExistsScan.cpp
index 1604e353ad..651e8e61cb 100644
--- a/src/engine/ExistsScan.cpp
+++ b/src/engine/ExistsScan.cpp
@@ -14,8 +14,8 @@ ExistsScan::ExistsScan(QueryExecutionContext* qec,
     : Operation{qec},
       left_{std::move(left)},
       right_{std::move(right)},
-      existsVariable_{std::move(existsVariable)},
-      joinColumns_{QueryExecutionTree::getJoinColumns(*left_, *right_)} {}
+      joinColumns_{QueryExecutionTree::getJoinColumns(*left_, *right_)},
+      existsVariable_{std::move(existsVariable)} {}
 
 // _____________________________________________________________________________
 string ExistsScan::getCacheKeyImpl() const {
diff --git a/src/engine/sparqlExpressions/ExistsExpression.h b/src/engine/sparqlExpressions/ExistsExpression.h
index d5eff23ba8..343c195e82 100644
--- a/src/engine/sparqlExpressions/ExistsExpression.h
+++ b/src/engine/sparqlExpressions/ExistsExpression.h
@@ -30,10 +30,15 @@ class ExistsExpression : public SparqlExpression {
   //____________________________________________________________________________
   [[nodiscard]] string getCacheKey(
       const VariableToColumnMap& varColMap) const override {
-    // TODO<joka921> get a proper cache key here
-    AD_CONTRACT_CHECK(varColMap.contains(variable_));
-    return absl::StrCat("EXISTS WITH COL ",
-                        varColMap.at(variable_).columnIndex_);
+    if (varColMap.contains(variable_)) {
+      return absl::StrCat("EXISTS WITH COL ",
+                          varColMap.at(variable_).columnIndex_);
+    } else {
+      // This means that the necessary `ExistsScan` hasn't been set up yet.
+      // It is not possible to cache such incomplete operations, so we return
+      // a random cache key.
+      return std::to_string(ad_utility::FastRandomIntGenerator<size_t>{}());
+    }
   }
 
   // ____________________________________________________________________________

From 0d1c788e11f3a2d2b6bb2dfea6bbbc6fba7f1bc3 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 8 Jan 2025 09:07:04 +0100
Subject: [PATCH 09/30] Some cleanups and fixes.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/Bind.cpp                           | 10 +++
 src/engine/Bind.h                             |  6 +-
 src/engine/ExistsScan.cpp                     | 75 +++++++++++++++----
 src/engine/ExistsScan.h                       |  5 ++
 src/engine/Filter.cpp                         | 18 +----
 src/engine/GroupBy.cpp                        |  7 ++
 src/engine/MultiColumnJoin.cpp                | 14 ++--
 src/engine/QueryExecutionTree.h               |  3 -
 .../sparqlExpressions/SparqlExpression.cpp    |  2 +-
 .../sparqlExpressions/SparqlExpression.h      |  2 +-
 src/util/JoinAlgorithms/FindUndefRanges.h     | 33 ++++++++
 11 files changed, 128 insertions(+), 47 deletions(-)

diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp
index 95de8a4dfe..230ca1cb68 100644
--- a/src/engine/Bind.cpp
+++ b/src/engine/Bind.cpp
@@ -5,12 +5,22 @@
 #include "Bind.h"
 
 #include "engine/CallFixedSize.h"
+#include "engine/ExistsScan.h"
 #include "engine/QueryExecutionTree.h"
 #include "engine/sparqlExpressions/SparqlExpression.h"
 #include "engine/sparqlExpressions/SparqlExpressionGenerators.h"
 #include "util/ChunkedForLoop.h"
 #include "util/Exception.h"
 
+// _____________________________________________________________________________
+Bind::Bind(QueryExecutionContext* qec,
+           std::shared_ptr<QueryExecutionTree> subtree, parsedQuery::Bind b)
+    : Operation(qec), _subtree(std::move(subtree)), _bind(std::move(b)) {
+  _subtree = ExistsScan::addExistsScansToSubtree(
+      _bind._expression, std::move(subtree), getExecutionContext(),
+      cancellationHandle_);
+}
+
 // BIND adds exactly one new column
 size_t Bind::getResultWidth() const { return _subtree->getResultWidth() + 1; }
 
diff --git a/src/engine/Bind.h b/src/engine/Bind.h
index 34c515fb54..3336e0ddbc 100644
--- a/src/engine/Bind.h
+++ b/src/engine/Bind.h
@@ -8,14 +8,14 @@
 #include "engine/sparqlExpressions/SparqlExpressionPimpl.h"
 #include "parser/ParsedQuery.h"
 
-/// BIND operation, currently only supports a very limited subset of expressions
+// BIND operation.
 class Bind : public Operation {
  public:
   static constexpr size_t CHUNK_SIZE = 10'000;
 
+  // ____________________________________________________________________________
   Bind(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> subtree,
-       parsedQuery::Bind b)
-      : Operation(qec), _subtree(std::move(subtree)), _bind(std::move(b)) {}
+       parsedQuery::Bind b);
 
  private:
   std::shared_ptr<QueryExecutionTree> _subtree;
diff --git a/src/engine/ExistsScan.cpp b/src/engine/ExistsScan.cpp
index 651e8e61cb..26fde12984 100644
--- a/src/engine/ExistsScan.cpp
+++ b/src/engine/ExistsScan.cpp
@@ -4,6 +4,9 @@
 
 #include "engine/ExistsScan.h"
 
+#include "engine/QueryPlanner.h"
+#include "engine/sparqlExpressions/ExistsExpression.h"
+#include "engine/sparqlExpressions/SparqlExpression.h"
 #include "util/JoinAlgorithms/JoinAlgorithms.h"
 
 // _____________________________________________________________________________
@@ -15,7 +18,10 @@ ExistsScan::ExistsScan(QueryExecutionContext* qec,
       left_{std::move(left)},
       right_{std::move(right)},
       joinColumns_{QueryExecutionTree::getJoinColumns(*left_, *right_)},
-      existsVariable_{std::move(existsVariable)} {}
+      existsVariable_{std::move(existsVariable)} {
+  std::tie(left_, right_) = QueryExecutionTree::createSortedTrees(
+      std::move(left_), std::move(right_), joinColumns_);
+}
 
 // _____________________________________________________________________________
 string ExistsScan::getCacheKeyImpl() const {
@@ -85,28 +91,41 @@ ProtoResult ExistsScan::computeResult([[maybe_unused]] bool requestLaziness) {
 
   checkCancellation();
 
+  // `isCheap` is true iff there are no UNDEF values in the join columns. In
+  // this case we can use a much cheaper algorithm.
+  // TODO<joka921> There are many other cases where a cheaper implementation can
+  // be chosen, but we leave those for another PR, this is the most common case.
+  namespace stdr = ql::ranges;
+  size_t numJoinColumns = joinColumnsLeft.size();
+  AD_CORRECTNESS_CHECK(numJoinColumns == joinColumnsRight.size());
+  bool isCheap = stdr::none_of(
+      ad_utility::integerRange(numJoinColumns), [&](const auto& col) {
+        return (stdr::any_of(joinColumnsRight.getColumn(col),
+                             &Id::isUndefined)) ||
+               (stdr::any_of(joinColumnsLeft.getColumn(col), &Id::isUndefined));
+      });
+
   auto noopRowAdder = [](auto&&...) {};
 
-  // TODO<joka921> Memory limit.
-  std::vector<size_t> notExistsIndices;
+  std::vector<size_t, ad_utility::AllocatorWithLimit<size_t>> notExistsIndices{
+      allocator()};
   auto actionForNotExisting =
       [&notExistsIndices, begin = joinColumnsLeft.begin()](const auto& itLeft) {
         notExistsIndices.push_back(itLeft - begin);
       };
 
-  // TODO<joka921> Handle UNDEF values correctly (and efficiently)
-  auto findUndefDispatch = []<typename It>([[maybe_unused]] const auto& row,
-                                           [[maybe_unused]] It begin,
-                                           [[maybe_unused]] auto end,
-                                           [[maybe_unused]] bool& outOfOrder) {
-    return std::array<It, 0>{};
-  };
-
   auto checkCancellationLambda = [this] { checkCancellation(); };
-  [[maybe_unused]] auto numOutOfOrder = ad_utility::zipperJoinWithUndef(
-      joinColumnsLeft, joinColumnsRight, ql::ranges::lexicographical_compare,
-      noopRowAdder, findUndefDispatch, findUndefDispatch, actionForNotExisting,
-      checkCancellationLambda);
+  auto runZipperJoin = [&](auto findUndef) {
+    [[maybe_unused]] auto numOutOfOrder = ad_utility::zipperJoinWithUndef(
+        joinColumnsLeft, joinColumnsRight, ql::ranges::lexicographical_compare,
+        noopRowAdder, findUndef, findUndef, actionForNotExisting,
+        checkCancellationLambda);
+  };
+  if (isCheap) {
+    runZipperJoin(ad_utility::noop);
+  } else {
+    runZipperJoin(ad_utility::findSmallerUndefRanges);
+  }
 
   // Set up the result;
   IdTable result = left.clone();
@@ -118,3 +137,29 @@ ProtoResult ExistsScan::computeResult([[maybe_unused]] bool requestLaziness) {
   }
   return {std::move(result), resultSortedOn(), leftRes->getCopyOfLocalVocab()};
 }
+
+// _____________________________________________________________________________
+std::shared_ptr<QueryExecutionTree> ExistsScan::addExistsScansToSubtree(
+    const sparqlExpression::SparqlExpressionPimpl& expression,
+    std::shared_ptr<QueryExecutionTree> subtree, QueryExecutionContext* qec,
+    const ad_utility::SharedCancellationHandle& cancellationHandle) {
+  std::vector<const sparqlExpression::SparqlExpression*> existsExpressions;
+  expression.getPimpl()->getExistsExpressions(existsExpressions);
+  for (auto* expr : existsExpressions) {
+    const auto& exists =
+        dynamic_cast<const sparqlExpression::ExistsExpression&>(*expr);
+    // Currently some FILTERs are applied multiple times especially when there
+    // are OPTIONAL joins in the query. In these cases we have to make sure that
+    // the `ExistsScan` is added only once.
+    if (subtree->isVariableCovered(exists.variable())) {
+      continue;
+    }
+    QueryPlanner qp{qec, cancellationHandle};
+    auto pq = exists.argument();
+    auto tree =
+        std::make_shared<QueryExecutionTree>(qp.createExecutionTree(pq));
+    subtree = ad_utility::makeExecutionTree<ExistsScan>(
+        qec, std::move(subtree), std::move(tree), exists.variable());
+  }
+  return subtree;
+}
diff --git a/src/engine/ExistsScan.h b/src/engine/ExistsScan.h
index b08e06c542..dbd947d302 100644
--- a/src/engine/ExistsScan.h
+++ b/src/engine/ExistsScan.h
@@ -24,6 +24,11 @@ class ExistsScan : public Operation {
              std::shared_ptr<QueryExecutionTree> right,
              Variable existsVariable);
 
+  static std::shared_ptr<QueryExecutionTree> addExistsScansToSubtree(
+      const sparqlExpression::SparqlExpressionPimpl& expression,
+      std::shared_ptr<QueryExecutionTree> subtree, QueryExecutionContext* qec,
+      const ad_utility::SharedCancellationHandle& cancellationHandle);
+
  protected:
   string getCacheKeyImpl() const override;
 
diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp
index 519c0d9da5..ff8edc1fc1 100644
--- a/src/engine/Filter.cpp
+++ b/src/engine/Filter.cpp
@@ -31,21 +31,9 @@ Filter::Filter(QueryExecutionContext* qec,
     : Operation(qec),
       _subtree(std::move(subtree)),
       _expression{std::move(expression)} {
-  std::vector<sparqlExpression::SparqlExpression*> existsExpressions;
-  _expression.getPimpl()->getExistsExpressions(existsExpressions);
-  for (auto* expr : existsExpressions) {
-    const auto& exists =
-        dynamic_cast<const sparqlExpression::ExistsExpression&>(*expr);
-    QueryPlanner qp{getExecutionContext(), cancellationHandle_};
-    // TODO<joka921> This can be done by the expression itself, then it is
-    // automatically duplicated.
-    auto pq = exists.argument();
-    auto tree =
-        std::make_shared<QueryExecutionTree>(qp.createExecutionTree(pq));
-    _subtree = ad_utility::makeExecutionTree<ExistsScan>(
-        getExecutionContext(), std::move(_subtree), std::move(tree),
-        exists.variable());
-  }
+  _subtree = ExistsScan::addExistsScansToSubtree(
+      _expression, std::move(_subtree), getExecutionContext(),
+      cancellationHandle_);
   setPrefilterExpressionForChildren();
 }
 
diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
index a6ff49bbe1..0fe65fd00e 100644
--- a/src/engine/GroupBy.cpp
+++ b/src/engine/GroupBy.cpp
@@ -9,6 +9,7 @@
 #include <absl/strings/str_join.h>
 
 #include "engine/CallFixedSize.h"
+#include "engine/ExistsScan.h"
 #include "engine/IndexScan.h"
 #include "engine/Join.h"
 #include "engine/LazyGroupBy.h"
@@ -52,6 +53,12 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector<Variable> groupByVariables,
   ql::ranges::sort(_groupByVariables, std::less<>{}, &Variable::name);
 
   auto sortColumns = computeSortColumns(subtree.get());
+
+  for (const auto& alias : _aliases) {
+    _subtree = ExistsScan::addExistsScansToSubtree(
+        alias._expression, std::move(subtree), getExecutionContext(),
+        cancellationHandle_);
+  }
   _subtree =
       QueryExecutionTree::createSortedTree(std::move(subtree), sortColumns);
 }
diff --git a/src/engine/MultiColumnJoin.cpp b/src/engine/MultiColumnJoin.cpp
index bb3e4e5995..b605616ecb 100644
--- a/src/engine/MultiColumnJoin.cpp
+++ b/src/engine/MultiColumnJoin.cpp
@@ -237,17 +237,11 @@ void MultiColumnJoin::computeMultiColumnJoin(
     rowAdder.addRow(itLeft - beginLeft, itRight - beginRight);
   };
 
-  auto findUndef = [](const auto& row, auto begin, auto end,
-                      bool& resultMightBeUnsorted) {
-    return ad_utility::findSmallerUndefRanges(row, begin, end,
-                                              resultMightBeUnsorted);
-  };
-
   // `isCheap` is true iff there are no UNDEF values in the join columns. In
   // this case we can use a much cheaper algorithm.
   // TODO<joka921> There are many other cases where a cheaper implementation can
   // be chosen, but we leave those for another PR, this is the most common case.
-  namespace stdr = std::ranges;
+  namespace stdr = ql::ranges;
   bool isCheap = stdr::none_of(joinColumns, [&](const auto& jcs) {
     auto [leftCol, rightCol] = jcs;
     return (stdr::any_of(right.getColumn(rightCol), &Id::isUndefined)) ||
@@ -265,8 +259,10 @@ void MultiColumnJoin::computeMultiColumnJoin(
     } else {
       return ad_utility::zipperJoinWithUndef(
           leftJoinColumns, rightJoinColumns,
-          ql::ranges::lexicographical_compare, addRow, findUndef, findUndef,
-          ad_utility::noop, checkCancellationLambda);
+          ql::ranges::lexicographical_compare, addRow,
+          ad_utility::findSmallerUndefRanges,
+          ad_utility::findSmallerUndefRanges, ad_utility::noop,
+          checkCancellationLambda);
     }
   }();
   *result = std::move(rowAdder).resultTable();
diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h
index 3c074d6c47..0eac785f16 100644
--- a/src/engine/QueryExecutionTree.h
+++ b/src/engine/QueryExecutionTree.h
@@ -25,10 +25,7 @@ class QueryExecutionTree {
                      std::shared_ptr<Operation> operation)
       : QueryExecutionTree(qec) {
     rootOperation_ = std::move(operation);
-    // TODO<joka921> This currently fails for EXISTS but it is also unneeded.
-    /*
     readFromCache();
-    */
   }
 
   std::string getCacheKey() const;
diff --git a/src/engine/sparqlExpressions/SparqlExpression.cpp b/src/engine/sparqlExpressions/SparqlExpression.cpp
index 00864b998d..099933020f 100644
--- a/src/engine/sparqlExpressions/SparqlExpression.cpp
+++ b/src/engine/sparqlExpressions/SparqlExpression.cpp
@@ -186,7 +186,7 @@ bool SparqlExpression::isExistsExpression() const { return false; }
 
 // ________________________________________________________________
 void SparqlExpression::getExistsExpressions(
-    std::vector<SparqlExpression*>& result) {
+    std::vector<const SparqlExpression*>& result) const {
   if (isExistsExpression()) {
     result.push_back(this);
   }
diff --git a/src/engine/sparqlExpressions/SparqlExpression.h b/src/engine/sparqlExpressions/SparqlExpression.h
index d5f7248daf..7f5c551127 100644
--- a/src/engine/sparqlExpressions/SparqlExpression.h
+++ b/src/engine/sparqlExpressions/SparqlExpression.h
@@ -131,7 +131,7 @@ class SparqlExpression {
   // The result is passed in as a reference to simplify the recursive
   // implementation.
   virtual void getExistsExpressions(
-      std::vector<SparqlExpression*>& result) final;
+      std::vector<const SparqlExpression*>& result) const final;
 
   // __________________________________________________________________________
   virtual ~SparqlExpression() = default;
diff --git a/src/util/JoinAlgorithms/FindUndefRanges.h b/src/util/JoinAlgorithms/FindUndefRanges.h
index 7b3f3296cb..cbdbc1b4fd 100644
--- a/src/util/JoinAlgorithms/FindUndefRanges.h
+++ b/src/util/JoinAlgorithms/FindUndefRanges.h
@@ -165,6 +165,38 @@ auto findSmallerUndefRangesArbitrary(const auto& row, It begin, It end,
 // have additional information about the input (most notably which of the join
 // columns contain no UNDEF at all) and therefore a more specialized routine
 // should be chosen.
+struct FindSmallerUndefRanges {
+  template <std::random_access_iterator It>
+  auto operator()(const auto& row, It begin, It end,
+                  bool& resultMightBeUnsorted) -> cppcoro::generator<It> {
+    size_t numLastUndefined = 0;
+    assert(row.size() > 0);
+    auto it = ql::ranges::rbegin(row);
+    auto rend = ql::ranges::rend(row);
+    for (; it < rend; ++it) {
+      if (*it != Id::makeUndefined()) {
+        break;
+      }
+      ++numLastUndefined;
+    }
+
+    for (; it < rend; ++it) {
+      if (*it == Id::makeUndefined()) {
+        return findSmallerUndefRangesArbitrary(row, begin, end,
+                                               resultMightBeUnsorted);
+      }
+    }
+    if (numLastUndefined == 0) {
+      return findSmallerUndefRangesForRowsWithoutUndef(row, begin, end,
+                                                       resultMightBeUnsorted);
+    } else {
+      return findSmallerUndefRangesForRowsWithUndefInLastColumns(
+          row, numLastUndefined, begin, end, resultMightBeUnsorted);
+    }
+  }
+};
+constexpr FindSmallerUndefRanges findSmallerUndefRanges;
+/*
 template <std::random_access_iterator It>
 auto findSmallerUndefRanges(const auto& row, It begin, It end,
                             bool& resultMightBeUnsorted)
@@ -194,4 +226,5 @@ auto findSmallerUndefRanges(const auto& row, It begin, It end,
         row, numLastUndefined, begin, end, resultMightBeUnsorted);
   }
 }
+*/
 }  // namespace ad_utility

From 7ff49c97404cd9604bc16fe0e775a61b8b0ef6b3 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 8 Jan 2025 09:10:10 +0100
Subject: [PATCH 10/30] Fix compilation.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/util/JoinAlgorithms/FindUndefRanges.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/util/JoinAlgorithms/FindUndefRanges.h b/src/util/JoinAlgorithms/FindUndefRanges.h
index cbdbc1b4fd..bf15685f37 100644
--- a/src/util/JoinAlgorithms/FindUndefRanges.h
+++ b/src/util/JoinAlgorithms/FindUndefRanges.h
@@ -168,7 +168,7 @@ auto findSmallerUndefRangesArbitrary(const auto& row, It begin, It end,
 struct FindSmallerUndefRanges {
   template <std::random_access_iterator It>
   auto operator()(const auto& row, It begin, It end,
-                  bool& resultMightBeUnsorted) -> cppcoro::generator<It> {
+                  bool& resultMightBeUnsorted) const -> cppcoro::generator<It> {
     size_t numLastUndefined = 0;
     assert(row.size() > 0);
     auto it = ql::ranges::rbegin(row);

From 7ec8947c759514efdbd0a533a5c00a545d5ecc4c Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 8 Jan 2025 09:24:02 +0100
Subject: [PATCH 11/30] Fix the many many segfaults.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/Bind.cpp    | 2 +-
 src/engine/GroupBy.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp
index 230ca1cb68..bdccf14488 100644
--- a/src/engine/Bind.cpp
+++ b/src/engine/Bind.cpp
@@ -17,7 +17,7 @@ Bind::Bind(QueryExecutionContext* qec,
            std::shared_ptr<QueryExecutionTree> subtree, parsedQuery::Bind b)
     : Operation(qec), _subtree(std::move(subtree)), _bind(std::move(b)) {
   _subtree = ExistsScan::addExistsScansToSubtree(
-      _bind._expression, std::move(subtree), getExecutionContext(),
+      _bind._expression, std::move(_subtree), getExecutionContext(),
       cancellationHandle_);
 }
 
diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
index 0fe65fd00e..cfa8621709 100644
--- a/src/engine/GroupBy.cpp
+++ b/src/engine/GroupBy.cpp
@@ -55,7 +55,7 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector<Variable> groupByVariables,
   auto sortColumns = computeSortColumns(subtree.get());
 
   for (const auto& alias : _aliases) {
-    _subtree = ExistsScan::addExistsScansToSubtree(
+    subtree = ExistsScan::addExistsScansToSubtree(
         alias._expression, std::move(subtree), getExecutionContext(),
         cancellationHandle_);
   }

From c03f3e59f2097c3f14bb9cb214eb6ddfadec2992 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 8 Jan 2025 09:34:56 +0100
Subject: [PATCH 12/30] Fix another bug.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/ExistsScan.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/engine/ExistsScan.cpp b/src/engine/ExistsScan.cpp
index 26fde12984..c416d1dc41 100644
--- a/src/engine/ExistsScan.cpp
+++ b/src/engine/ExistsScan.cpp
@@ -96,8 +96,8 @@ ProtoResult ExistsScan::computeResult([[maybe_unused]] bool requestLaziness) {
   // TODO<joka921> There are many other cases where a cheaper implementation can
   // be chosen, but we leave those for another PR, this is the most common case.
   namespace stdr = ql::ranges;
-  size_t numJoinColumns = joinColumnsLeft.size();
-  AD_CORRECTNESS_CHECK(numJoinColumns == joinColumnsRight.size());
+  size_t numJoinColumns = joinColumnsLeft.numColumns();
+  AD_CORRECTNESS_CHECK(numJoinColumns == joinColumnsRight.numColumns());
   bool isCheap = stdr::none_of(
       ad_utility::integerRange(numJoinColumns), [&](const auto& col) {
         return (stdr::any_of(joinColumnsRight.getColumn(col),

From 2da52abc6aea83dac6eb55dc536ef1f4e184fb1b Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 8 Jan 2025 10:55:21 +0100
Subject: [PATCH 13/30] Fix another bug.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/Bind.cpp                           |  4 +-
 src/engine/CMakeLists.txt                     |  2 +-
 src/engine/{ExistsScan.cpp => ExistsJoin.cpp} | 26 ++++++-------
 src/engine/{ExistsScan.h => ExistsJoin.h}     |  4 +-
 src/engine/Filter.cpp                         |  4 +-
 src/engine/GroupBy.cpp                        |  4 +-
 src/util/JoinAlgorithms/FindUndefRanges.h     | 31 ---------------
 test/QueryPlannerTest.cpp                     | 13 ++++++-
 test/QueryPlannerTestHelpers.h                |  7 ++++
 test/SparqlAntlrParserTest.cpp                | 39 +++++++++++++++++++
 10 files changed, 80 insertions(+), 54 deletions(-)
 rename src/engine/{ExistsScan.cpp => ExistsJoin.cpp} (90%)
 rename src/engine/{ExistsScan.h => ExistsJoin.h} (95%)

diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp
index bdccf14488..276f04e9fc 100644
--- a/src/engine/Bind.cpp
+++ b/src/engine/Bind.cpp
@@ -5,7 +5,7 @@
 #include "Bind.h"
 
 #include "engine/CallFixedSize.h"
-#include "engine/ExistsScan.h"
+#include "engine/ExistsJoin.h"
 #include "engine/QueryExecutionTree.h"
 #include "engine/sparqlExpressions/SparqlExpression.h"
 #include "engine/sparqlExpressions/SparqlExpressionGenerators.h"
@@ -16,7 +16,7 @@
 Bind::Bind(QueryExecutionContext* qec,
            std::shared_ptr<QueryExecutionTree> subtree, parsedQuery::Bind b)
     : Operation(qec), _subtree(std::move(subtree)), _bind(std::move(b)) {
-  _subtree = ExistsScan::addExistsScansToSubtree(
+  _subtree = ExistsJoin::addExistsScansToSubtree(
       _bind._expression, std::move(_subtree), getExecutionContext(),
       cancellationHandle_);
 }
diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index c724a8fb39..a3750a07e5 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -14,5 +14,5 @@ add_library(engine
         CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
         TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
         CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp
-        Describe.cpp ExistsScan.cpp)
+        Describe.cpp ExistsJoin.cpp)
 qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
diff --git a/src/engine/ExistsScan.cpp b/src/engine/ExistsJoin.cpp
similarity index 90%
rename from src/engine/ExistsScan.cpp
rename to src/engine/ExistsJoin.cpp
index c416d1dc41..d8d3f564d1 100644
--- a/src/engine/ExistsScan.cpp
+++ b/src/engine/ExistsJoin.cpp
@@ -2,7 +2,7 @@
 //                  Chair of Algorithms and Data Structures.
 //  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
-#include "engine/ExistsScan.h"
+#include "engine/ExistsJoin.h"
 
 #include "engine/QueryPlanner.h"
 #include "engine/sparqlExpressions/ExistsExpression.h"
@@ -10,7 +10,7 @@
 #include "util/JoinAlgorithms/JoinAlgorithms.h"
 
 // _____________________________________________________________________________
-ExistsScan::ExistsScan(QueryExecutionContext* qec,
+ExistsJoin::ExistsJoin(QueryExecutionContext* qec,
                        std::shared_ptr<QueryExecutionTree> left,
                        std::shared_ptr<QueryExecutionTree> right,
                        Variable existsVariable)
@@ -24,16 +24,16 @@ ExistsScan::ExistsScan(QueryExecutionContext* qec,
 }
 
 // _____________________________________________________________________________
-string ExistsScan::getCacheKeyImpl() const {
+string ExistsJoin::getCacheKeyImpl() const {
   return absl::StrCat("EXISTS SCAN left: ", left_->getCacheKey(),
                       " right: ", right_->getCacheKey());
 }
 
 // _____________________________________________________________________________
-string ExistsScan::getDescriptor() const { return "EXISTS scan"; }
+string ExistsJoin::getDescriptor() const { return "EXISTS scan"; }
 
 // ____________________________________________________________________________
-VariableToColumnMap ExistsScan::computeVariableToColumnMap() const {
+VariableToColumnMap ExistsJoin::computeVariableToColumnMap() const {
   auto res = left_->getVariableColumns();
   AD_CONTRACT_CHECK(
       !res.contains(existsVariable_),
@@ -43,18 +43,18 @@ VariableToColumnMap ExistsScan::computeVariableToColumnMap() const {
 }
 
 // ____________________________________________________________________________
-size_t ExistsScan::getResultWidth() const {
+size_t ExistsJoin::getResultWidth() const {
   // We add one column to the input.
   return left_->getResultWidth() + 1;
 }
 
 // ____________________________________________________________________________
-vector<ColumnIndex> ExistsScan::resultSortedOn() const {
+vector<ColumnIndex> ExistsJoin::resultSortedOn() const {
   return left_->resultSortedOn();
 }
 
 // ____________________________________________________________________________
-float ExistsScan::getMultiplicity(size_t col) {
+float ExistsJoin::getMultiplicity(size_t col) {
   if (col < getResultWidth() - 1) {
     return left_->getMultiplicity(col);
   }
@@ -64,18 +64,18 @@ float ExistsScan::getMultiplicity(size_t col) {
 }
 
 // ____________________________________________________________________________
-uint64_t ExistsScan::getSizeEstimateBeforeLimit() {
+uint64_t ExistsJoin::getSizeEstimateBeforeLimit() {
   return left_->getSizeEstimate();
 }
 
 // ____________________________________________________________________________
-size_t ExistsScan::getCostEstimate() {
+size_t ExistsJoin::getCostEstimate() {
   return left_->getCostEstimate() + right_->getCostEstimate() +
          left_->getSizeEstimate() + right_->getSizeEstimate();
 }
 
 // ____________________________________________________________________________
-ProtoResult ExistsScan::computeResult([[maybe_unused]] bool requestLaziness) {
+ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   auto leftRes = left_->getResult();
   auto rightRes = right_->getResult();
   const auto& left = leftRes->idTable();
@@ -139,7 +139,7 @@ ProtoResult ExistsScan::computeResult([[maybe_unused]] bool requestLaziness) {
 }
 
 // _____________________________________________________________________________
-std::shared_ptr<QueryExecutionTree> ExistsScan::addExistsScansToSubtree(
+std::shared_ptr<QueryExecutionTree> ExistsJoin::addExistsScansToSubtree(
     const sparqlExpression::SparqlExpressionPimpl& expression,
     std::shared_ptr<QueryExecutionTree> subtree, QueryExecutionContext* qec,
     const ad_utility::SharedCancellationHandle& cancellationHandle) {
@@ -158,7 +158,7 @@ std::shared_ptr<QueryExecutionTree> ExistsScan::addExistsScansToSubtree(
     auto pq = exists.argument();
     auto tree =
         std::make_shared<QueryExecutionTree>(qp.createExecutionTree(pq));
-    subtree = ad_utility::makeExecutionTree<ExistsScan>(
+    subtree = ad_utility::makeExecutionTree<ExistsJoin>(
         qec, std::move(subtree), std::move(tree), exists.variable());
   }
   return subtree;
diff --git a/src/engine/ExistsScan.h b/src/engine/ExistsJoin.h
similarity index 95%
rename from src/engine/ExistsScan.h
rename to src/engine/ExistsJoin.h
index dbd947d302..9b9c7483ce 100644
--- a/src/engine/ExistsScan.h
+++ b/src/engine/ExistsJoin.h
@@ -7,7 +7,7 @@
 #include "engine/Operation.h"
 #include "engine/QueryExecutionTree.h"
 
-class ExistsScan : public Operation {
+class ExistsJoin : public Operation {
  private:
   std::shared_ptr<QueryExecutionTree> left_;
   std::shared_ptr<QueryExecutionTree> right_;
@@ -19,7 +19,7 @@ class ExistsScan : public Operation {
   std::vector<std::array<ColumnIndex, 2>> _matchedColumns;
 
  public:
-  ExistsScan(QueryExecutionContext* qec,
+  ExistsJoin(QueryExecutionContext* qec,
              std::shared_ptr<QueryExecutionTree> left,
              std::shared_ptr<QueryExecutionTree> right,
              Variable existsVariable);
diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp
index ff8edc1fc1..9da7c12724 100644
--- a/src/engine/Filter.cpp
+++ b/src/engine/Filter.cpp
@@ -10,7 +10,7 @@
 
 #include "backports/algorithm.h"
 #include "engine/CallFixedSize.h"
-#include "engine/ExistsScan.h"
+#include "engine/ExistsJoin.h"
 #include "engine/QueryExecutionTree.h"
 #include "engine/QueryPlanner.h"
 #include "engine/sparqlExpressions/SparqlExpression.h"
@@ -31,7 +31,7 @@ Filter::Filter(QueryExecutionContext* qec,
     : Operation(qec),
       _subtree(std::move(subtree)),
       _expression{std::move(expression)} {
-  _subtree = ExistsScan::addExistsScansToSubtree(
+  _subtree = ExistsJoin::addExistsScansToSubtree(
       _expression, std::move(_subtree), getExecutionContext(),
       cancellationHandle_);
   setPrefilterExpressionForChildren();
diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
index cfa8621709..3e8af1cb29 100644
--- a/src/engine/GroupBy.cpp
+++ b/src/engine/GroupBy.cpp
@@ -9,7 +9,7 @@
 #include <absl/strings/str_join.h>
 
 #include "engine/CallFixedSize.h"
-#include "engine/ExistsScan.h"
+#include "engine/ExistsJoin.h"
 #include "engine/IndexScan.h"
 #include "engine/Join.h"
 #include "engine/LazyGroupBy.h"
@@ -55,7 +55,7 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector<Variable> groupByVariables,
   auto sortColumns = computeSortColumns(subtree.get());
 
   for (const auto& alias : _aliases) {
-    subtree = ExistsScan::addExistsScansToSubtree(
+    subtree = ExistsJoin::addExistsScansToSubtree(
         alias._expression, std::move(subtree), getExecutionContext(),
         cancellationHandle_);
   }
diff --git a/src/util/JoinAlgorithms/FindUndefRanges.h b/src/util/JoinAlgorithms/FindUndefRanges.h
index bf15685f37..6313bea887 100644
--- a/src/util/JoinAlgorithms/FindUndefRanges.h
+++ b/src/util/JoinAlgorithms/FindUndefRanges.h
@@ -196,35 +196,4 @@ struct FindSmallerUndefRanges {
   }
 };
 constexpr FindSmallerUndefRanges findSmallerUndefRanges;
-/*
-template <std::random_access_iterator It>
-auto findSmallerUndefRanges(const auto& row, It begin, It end,
-                            bool& resultMightBeUnsorted)
-    -> cppcoro::generator<It> {
-  size_t numLastUndefined = 0;
-  assert(row.size() > 0);
-  auto it = ql::ranges::rbegin(row);
-  auto rend = ql::ranges::rend(row);
-  for (; it < rend; ++it) {
-    if (*it != Id::makeUndefined()) {
-      break;
-    }
-    ++numLastUndefined;
-  }
-
-  for (; it < rend; ++it) {
-    if (*it == Id::makeUndefined()) {
-      return findSmallerUndefRangesArbitrary(row, begin, end,
-                                             resultMightBeUnsorted);
-    }
-  }
-  if (numLastUndefined == 0) {
-    return findSmallerUndefRangesForRowsWithoutUndef(row, begin, end,
-                                                     resultMightBeUnsorted);
-  } else {
-    return findSmallerUndefRangesForRowsWithUndefInLastColumns(
-        row, numLastUndefined, begin, end, resultMightBeUnsorted);
-  }
-}
-*/
 }  // namespace ad_utility
diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 90462f3cc3..c7d806319e 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -2906,10 +2906,21 @@ TEST(QueryPlanner, Describe) {
 }
 
 // ____________________________________________________________________________
-TEST(QueryPlanner, GroupByRedundanteParensAndVariables) {
+TEST(QueryPlanner, GroupByRedundantParensAndVariables) {
   auto matcher = h::GroupBy({Variable{"?x"}}, {},
                             h::IndexScanFromStrings("?x", "?y", "?z"));
   h::expect("SELECT ?x { ?x ?y ?z} GROUP BY (?x)", matcher);
   h::expect("SELECT ?x { ?x ?y ?z} GROUP BY ?x ?x", matcher);
   h::expect("SELECT ?x { ?x ?y ?z} GROUP BY ?x ?x (?x)", matcher);
 }
+
+// ____________________________________________________________________________
+TEST(QueryPlanner, Exists) {
+  auto xyz = h::IndexScanFromStrings("?x", "?y", "?z");
+  auto a = h::IndexScanFromStrings("?x", "?y", "?z");
+  h::expect(
+      "SELECT * { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
+      h::Filter("EXISTS {?a ?b ?c}",
+                h::ExistsJoin(h::IndexScanFromStrings("?x", "?y", "?z"),
+                              h::IndexScanFromStrings("?a", "?b", "?c"))));
+}
diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h
index c300bf0d5f..f53f30c5bb 100644
--- a/test/QueryPlannerTestHelpers.h
+++ b/test/QueryPlannerTestHelpers.h
@@ -15,6 +15,7 @@
 #include "engine/CartesianProductJoin.h"
 #include "engine/CountAvailablePredicates.h"
 #include "engine/Describe.h"
+#include "engine/ExistsJoin.h"
 #include "engine/Filter.h"
 #include "engine/GroupBy.h"
 #include "engine/IndexScan.h"
@@ -405,6 +406,12 @@ inline QetMatcher Describe(
             AD_PROPERTY(::Describe, getDescribe, describeMatcher)));
 }
 
+// Match an `ExistsJoin`
+inline QetMatcher ExistsJoin(const QetMatcher& leftChild,
+                             const QetMatcher& rightChild) {
+  return RootOperation<::ExistsJoin>(AllOf(children(leftChild, rightChild)));
+}
+
 //
 inline QetMatcher QetWithWarnings(
     const std::vector<std::string>& warningSubstrings,
diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp
index 0803f96f03..f5a65169b2 100644
--- a/test/SparqlAntlrParserTest.cpp
+++ b/test/SparqlAntlrParserTest.cpp
@@ -4,6 +4,7 @@
 //          Julian Mundhahs <mundhahj@cs.uni-freiburg.de>
 //          Hannah Bast <bast@cs.uni-freiburg.de>
 
+#include <engine/sparqlExpressions/ExistsExpression.h>
 #include <gtest/gtest.h>
 
 #include <iostream>
@@ -14,6 +15,7 @@
 #include "./SparqlExpressionTestHelpers.h"
 #include "./util/GTestHelpers.h"
 #include "./util/TripleComponentTestHelpers.h"
+#include "QueryPlannerTestHelpers.h"
 #include "SparqlAntlrParserTestHelpers.h"
 #include "engine/sparqlExpressions/CountStarExpression.h"
 #include "engine/sparqlExpressions/GroupConcatExpression.h"
@@ -1860,6 +1862,43 @@ TEST(SparqlParser, binaryStringExpressions) {
   expectBuiltInCall("STRBEFORE(?x, ?y)", makeMatcher(&makeStrBeforeExpression));
 }
 
+// Matchers for EXISTS and NOT EXISTS functions.
+namespace existsTestHelpers {
+using namespace sparqlExpression;
+using namespace ::testing;
+
+// Match an EXISTS function
+auto existsMatcher(Matcher<const ParsedQuery&> pattern) {
+  return Pointee(WhenDynamicCastTo<const ExistsExpression&>(
+      AD_PROPERTY(ExistsExpression, argument, pattern)));
+}
+// Match a NOT EXISTS function
+auto notExistsMatcher(Matcher<const ParsedQuery&> pattern) {
+  return builtInCallTestHelpers::matchNaryWithChildrenMatchers(
+      &makeUnaryNegateExpression, existsMatcher(pattern));
+}
+}  // namespace existsTestHelpers
+
+// _____________________________________________________________________________
+TEST(SparqlParser, Exists) {
+  using namespace existsTestHelpers;
+  auto expectBuiltInCall = ExpectCompleteParse<&Parser::builtInCall>{};
+  // A matcher that matches the query `SELECT * { ?x <bar> ?foo}`, where the
+  // FROM and FROM NAMED clauses can still be specified via arguments.
+  using Graphs = ScanSpecificationAsTripleComponent::Graphs;
+  auto selectABarFooMatcher = [](Graphs defaultGraphs = std::nullopt,
+                                 Graphs namedGraphs = std::nullopt) {
+    return testing::AllOf(m::SelectQuery(
+        m::AsteriskSelect(),
+        m::GraphPattern(m::Triples({{Var{"?a"}, "<bar>", Var{"?foo"}}})),
+        defaultGraphs, namedGraphs));
+  };
+  expectBuiltInCall("EXISTS {?a <bar> ?foo}",
+                    existsMatcher(selectABarFooMatcher()));
+  expectBuiltInCall("NOT EXISTS {?a <bar> ?foo}",
+                    notExistsMatcher(selectABarFooMatcher()));
+}
+
 namespace aggregateTestHelpers {
 using namespace sparqlExpression;
 

From cbbc771c64251f3ec69b342bbcda02fc691a5c74 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 8 Jan 2025 11:00:59 +0100
Subject: [PATCH 14/30] Fix another bug.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 test/QueryPlannerTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index c7d806319e..8d68a4b1e5 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -2917,7 +2917,7 @@ TEST(QueryPlanner, GroupByRedundantParensAndVariables) {
 // ____________________________________________________________________________
 TEST(QueryPlanner, Exists) {
   auto xyz = h::IndexScanFromStrings("?x", "?y", "?z");
-  auto a = h::IndexScanFromStrings("?x", "?y", "?z");
+  auto ab = h::IndexScanFromStrings("?x", "?y", "?z");
   h::expect(
       "SELECT * { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
       h::Filter("EXISTS {?a ?b ?c}",

From 91e5802c33d798e1b9cb49326079a9ddba1b902a Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 8 Jan 2025 11:03:12 +0100
Subject: [PATCH 15/30] blub.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 test/QueryPlannerTest.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 8d68a4b1e5..6f8f40d47e 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -2917,10 +2917,7 @@ TEST(QueryPlanner, GroupByRedundantParensAndVariables) {
 // ____________________________________________________________________________
 TEST(QueryPlanner, Exists) {
   auto xyz = h::IndexScanFromStrings("?x", "?y", "?z");
-  auto ab = h::IndexScanFromStrings("?x", "?y", "?z");
-  h::expect(
-      "SELECT * { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
-      h::Filter("EXISTS {?a ?b ?c}",
-                h::ExistsJoin(h::IndexScanFromStrings("?x", "?y", "?z"),
-                              h::IndexScanFromStrings("?a", "?b", "?c"))));
+  auto abc = h::IndexScanFromStrings("?a", "?b", "?c");
+  h::expect("SELECT * { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
+            h::Filter("EXISTS {?a ?b ?c}", h::ExistsJoin(xyz, abc)));
 }

From c3a9a7df4b46ac5e0e720c2ca4a40e9d1f5a0b0e Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 8 Jan 2025 12:31:54 +0100
Subject: [PATCH 16/30] Added some more tests.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 .../sparqlParser/SparqlQleverVisitor.cpp      | 14 +++++----
 src/parser/sparqlParser/SparqlQleverVisitor.h |  1 +
 test/QueryPlannerTest.cpp                     | 29 +++++++++++++++++++
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index 903544c96a..32b050db9b 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -268,6 +268,7 @@ ParsedQuery Visitor::visit(Parser::ConstructQueryContext* ctx) {
   ParsedQuery query;
   query.datasetClauses_ = parsedQuery::DatasetClauses::fromClauses(
       visitVector(ctx->datasetClause()));
+  activeDatasetClauses_ = query.datasetClauses_;
   if (ctx->constructTemplate()) {
     query._clause = visit(ctx->constructTemplate())
                         .value_or(parsedQuery::ConstructClause{});
@@ -303,9 +304,9 @@ ParsedQuery Visitor::visit(Parser::DescribeQueryContext* ctx) {
   }
 
   // Parse the FROM and FROM NAMED clauses.
-  auto datasetClauses = parsedQuery::DatasetClauses::fromClauses(
+  activeDatasetClauses_ = parsedQuery::DatasetClauses::fromClauses(
       visitVector(ctx->datasetClause()));
-  describeClause.datasetClauses_ = datasetClauses;
+  describeClause.datasetClauses_ = activeDatasetClauses_;
 
   // Parse the WHERE clause and construct a SELECT query from it. For `DESCRIBE
   // *`, add each visible variable as a resource to describe.
@@ -336,7 +337,7 @@ ParsedQuery Visitor::visit(Parser::DescribeQueryContext* ctx) {
   parsedQuery_.addSolutionModifiers(visit(ctx->solutionModifier()));
   parsedQuery_._rootGraphPattern._graphPatterns.emplace_back(
       std::move(describeClause));
-  parsedQuery_.datasetClauses_ = datasetClauses;
+  parsedQuery_.datasetClauses_ = activeDatasetClauses_;
   auto constructClause = ParsedQuery::ConstructClause{};
   using G = GraphTerm;
   using V = Variable;
@@ -352,6 +353,7 @@ ParsedQuery Visitor::visit(Parser::AskQueryContext* ctx) {
   parsedQuery_._clause = ParsedQuery::AskClause{};
   parsedQuery_.datasetClauses_ = parsedQuery::DatasetClauses::fromClauses(
       visitVector(ctx->datasetClause()));
+  activeDatasetClauses_ = parsedQuery_.datasetClauses_;
   visitWhereClause(ctx->whereClause(), parsedQuery_);
   // NOTE: It can make sense to have solution modifiers with an ASK query, for
   // example, a GROUP BY with a HAVING.
@@ -595,6 +597,8 @@ ParsedQuery Visitor::visit(Parser::ModifyContext* ctx) {
       };
   AD_CORRECTNESS_CHECK(visibleVariables_.empty());
   auto graphPattern = visit(ctx->groupGraphPattern());
+  parsedQuery_.datasetClauses_ =
+      parsedQuery::DatasetClauses::fromClauses(visitVector(ctx->usingClause()));
   parsedQuery_._rootGraphPattern = std::move(graphPattern);
   parsedQuery_.registerVariablesVisibleInQueryBody(visibleVariables_);
   visibleVariables_.clear();
@@ -605,8 +609,6 @@ ParsedQuery Visitor::visit(Parser::ModifyContext* ctx) {
   checkTriples(op.toDelete_);
   visitIf(&op.with_, ctx->iri());
   parsedQuery_._clause = parsedQuery::UpdateClause{op};
-  parsedQuery_.datasetClauses_ =
-      parsedQuery::DatasetClauses::fromClauses(visitVector(ctx->usingClause()));
 
   return parsedQuery_;
 }
@@ -1174,6 +1176,7 @@ ParsedQuery Visitor::visit(Parser::SelectQueryContext* ctx) {
   parsedQuery_._clause = visit(ctx->selectClause());
   parsedQuery_.datasetClauses_ = parsedQuery::DatasetClauses::fromClauses(
       visitVector(ctx->datasetClause()));
+  activeDatasetClauses_ = parsedQuery_.datasetClauses_;
   visitWhereClause(ctx->whereClause(), parsedQuery_);
   parsedQuery_.addSolutionModifiers(visit(ctx->solutionModifier()));
   return parsedQuery_;
@@ -2434,6 +2437,7 @@ ExpressionPtr Visitor::visitExists(Parser::GroupGraphPatternContext* pattern,
   ParsedQuery query = std::exchange(parsedQuery_, std::move(queryBackup));
   query.selectClause().setAsterisk();
   query._rootGraphPattern = std::move(group);
+  query.datasetClauses_ = activeDatasetClauses_;
   visibleVariables_ = std::move(visibleVariablesSoFar);
   auto exists =
       std::make_unique<sparqlExpression::ExistsExpression>(std::move(query));
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h
index 3e7b63c3ad..3d7aa0dd86 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.h
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.h
@@ -78,6 +78,7 @@ class SparqlQleverVisitor {
   // query. This may contain duplicates. A variable is added via
   // `addVisibleVariable`.
   std::vector<Variable> visibleVariables_{};
+  ParsedQuery::DatasetClauses activeDatasetClauses_;
   PrefixMap prefixMap_{};
   // We need to remember the prologue (prefix declarations) when we encounter it
   // because we need it when we encounter a SERVICE query. When there is no
diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 6f8f40d47e..518833bb02 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -2918,6 +2918,35 @@ TEST(QueryPlanner, GroupByRedundantParensAndVariables) {
 TEST(QueryPlanner, Exists) {
   auto xyz = h::IndexScanFromStrings("?x", "?y", "?z");
   auto abc = h::IndexScanFromStrings("?a", "?b", "?c");
+  using V = Variable;
+  // Simple tests for EXISTS with FILTER, BIND, and GROUP BY.
   h::expect("SELECT * { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
             h::Filter("EXISTS {?a ?b ?c}", h::ExistsJoin(xyz, abc)));
+  h::expect("SELECT * { ?x ?y ?z BIND(EXISTS {?a ?b ?c} as ?bound)}",
+            h::Bind(h::ExistsJoin(xyz, abc), "EXISTS {?a ?b ?c}",
+                    Variable("?bound")));
+  h::expect(
+      "SELECT ?x (SAMPLE(EXISTS{?a ?b ?c}) as ?s) { ?x ?y ?z } GROUP BY ?x",
+      h::GroupBy({V{"?x"}}, {"(SAMPLE(EXISTS{?a ?b ?c}) as ?s)"},
+                 h::ExistsJoin(xyz, abc)));
+
+  // Test the interaction of FROM [NAMED] with EXISTS.
+
+  using H = ad_utility::HashSet<std::string>;
+  auto xyzg = h::IndexScanFromStrings("?x", "?y", "?z", {}, H{"<g>"});
+  auto abcg = h::IndexScanFromStrings("?a", "?b", "?c", {}, H{"<g>"});
+
+  auto existsJoin = h::ExistsJoin(xyzg, abcg);
+  auto filter = h::Filter("EXISTS {?a ?b ?c}", existsJoin);
+
+  // Test all different kinds of queries.
+  // TODO<joka921> There is a more elegant way to reduce the code duplication
+  // (use a lambda that only changes the beginning of the query).
+  h::expect("SELECT * FROM <g> { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}", filter);
+  h::expect("ASK FROM <g> { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}", filter);
+  h::expect(
+      "CONSTRUCT {<a> <b> <c>} FROM <g> { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
+      filter);
+  h::expect("Describe ?x FROM <g> { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
+            h::Describe(::testing::_, filter));
 }

From 0adbfa609e5a22c799e7ec6c737a58637697c198 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 8 Jan 2025 17:25:35 +0100
Subject: [PATCH 17/30] Add some tests at least for the parser and query
 planner.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 .../sparqlParser/SparqlQleverVisitor.cpp       |  1 -
 test/QueryPlannerTest.cpp                      | 18 +++++++++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index 32b050db9b..41e297120c 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -7,7 +7,6 @@
 
 #include "parser/sparqlParser/SparqlQleverVisitor.h"
 
-#include <absl/strings/str_join.h>
 #include <absl/strings/str_split.h>
 
 #include <string>
diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 518833bb02..89601732e8 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -2930,17 +2930,13 @@ TEST(QueryPlanner, Exists) {
       h::GroupBy({V{"?x"}}, {"(SAMPLE(EXISTS{?a ?b ?c}) as ?s)"},
                  h::ExistsJoin(xyz, abc)));
 
-  // Test the interaction of FROM [NAMED] with EXISTS.
-
+  // Test the interaction of FROM with EXISTS.
   using H = ad_utility::HashSet<std::string>;
   auto xyzg = h::IndexScanFromStrings("?x", "?y", "?z", {}, H{"<g>"});
   auto abcg = h::IndexScanFromStrings("?a", "?b", "?c", {}, H{"<g>"});
 
   auto existsJoin = h::ExistsJoin(xyzg, abcg);
   auto filter = h::Filter("EXISTS {?a ?b ?c}", existsJoin);
-
-  // Test all different kinds of queries.
-  // TODO<joka921> There is a more elegant way to reduce the code duplication
   // (use a lambda that only changes the beginning of the query).
   h::expect("SELECT * FROM <g> { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}", filter);
   h::expect("ASK FROM <g> { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}", filter);
@@ -2949,4 +2945,16 @@ TEST(QueryPlanner, Exists) {
       filter);
   h::expect("Describe ?x FROM <g> { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
             h::Describe(::testing::_, filter));
+
+  // Test the interaction of FROM NAMES with EXISTS
+  auto varG = std::vector{Variable{"?g"}};
+  std::vector<ColumnIndex> graphCol{ADDITIONAL_COLUMN_GRAPH_ID};
+  auto uvcg =
+      h::IndexScanFromStrings("?u", "?v", "?c", {}, H{"<g2>"}, varG, graphCol);
+  existsJoin = h::ExistsJoin(xyzg, h::UnorderedJoins(abcg, uvcg));
+  filter = h::Filter("EXISTS {?a ?b ?c. GRAPH ?g { ?u ?v ?c}}", existsJoin);
+  h::expect(
+      "SELECT * FROM <g> FROM NAMED <g2> { ?x ?y ?z FILTER EXISTS {?a ?b ?c. "
+      "GRAPH ?g { ?u ?v ?c}}}",
+      filter);
 }

From babd2940a203258cd95fb9dc332c93e24476ebc1 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 9 Jan 2025 09:38:21 +0100
Subject: [PATCH 18/30] Some more tests. As a next step, I want to write some
 comments.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 test/engine/CMakeLists.txt     |  1 +
 test/engine/ExistsJoinTest.cpp | 94 ++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 test/engine/ExistsJoinTest.cpp

diff --git a/test/engine/CMakeLists.txt b/test/engine/CMakeLists.txt
index fef9ffed39..41b2b463ad 100644
--- a/test/engine/CMakeLists.txt
+++ b/test/engine/CMakeLists.txt
@@ -12,3 +12,4 @@ addLinkAndDiscoverTest(BindTest engine)
 addLinkAndRunAsSingleTest(SpatialJoinAlgorithmsTest engine)
 addLinkAndDiscoverTestSerial(QueryExecutionTreeTest engine)
 addLinkAndDiscoverTestSerial(DescribeTest engine)
+addLinkAndDiscoverTestSerial(ExistsJoinTest engine)
diff --git a/test/engine/ExistsJoinTest.cpp b/test/engine/ExistsJoinTest.cpp
new file mode 100644
index 0000000000..af72e5fbb6
--- /dev/null
+++ b/test/engine/ExistsJoinTest.cpp
@@ -0,0 +1,94 @@
+// Copyright 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include <gmock/gmock.h>
+
+#include "../util/GTestHelpers.h"
+#include "../util/IdTableHelpers.h"
+#include "../util/IndexTestHelpers.h"
+#include "engine/ExistsJoin.h"
+#include "engine/IndexScan.h"
+#include "engine/NeutralElementOperation.h"
+#include "engine/QueryExecutionTree.h"
+
+using namespace ad_utility::testing;
+
+namespace {
+void testExists(const VectorTable& leftInput, const VectorTable& rightInput,
+                std::vector<bool> expectedAsBool, size_t numJoinColumns) {
+  AD_CORRECTNESS_CHECK(leftInput.size() == expectedAsBool.size());
+  auto left = makeIdTableFromVector(leftInput);
+  auto right = makeIdTableFromVector(rightInput);
+  AD_CORRECTNESS_CHECK(left.numColumns() >= numJoinColumns);
+  AD_CORRECTNESS_CHECK(right.numColumns() >= numJoinColumns);
+
+  auto qec = getQec();
+  using V = Variable;
+  using Vars = std::vector<std::optional<Variable>>;
+
+  // TODO<joka921> Support more than one join column.
+  // TODO<joka921> also randomly permute the join columns.
+
+  auto joinCol = [](size_t i) { return V{absl::StrCat("?joinCol_", i)}; };
+  auto nonJoinCol = [i = 0]() mutable {
+    return V{absl::StrCat("?nonJoinCol_", i++)};
+  };
+
+  auto makeChild = [&](const IdTable& input) {
+    Vars vars;
+    for (size_t i : ad_utility::integerRange(numJoinColumns)) {
+      vars.push_back(joinCol(i));
+    };
+    for ([[maybe_unused]] size_t i :
+         ql::views::iota(numJoinColumns, input.numColumns())) {
+      vars.push_back(nonJoinCol());
+    }
+    return ad_utility::makeExecutionTree<ValuesForTesting>(qec, input.clone(),
+                                                           vars);
+  };
+
+  auto exists =
+      ExistsJoin{qec, makeChild(left), makeChild(right), V{"?exists"}};
+
+  EXPECT_EQ(exists.getResultWidth(), left.numColumns() + 1);
+
+  auto res = exists.computeResultOnlyForTesting();
+  const auto& table = res.idTable();
+  ASSERT_EQ(table.numRows(), left.size());
+  IdTable expected = left.clone();
+  expected.addEmptyColumn();
+  ql::ranges::transform(expectedAsBool, expected.getColumn(2).begin(),
+                        &Id::makeFromBool);
+  EXPECT_THAT(table, matchesIdTable(expected));
+}
+}  // namespace
+
+TEST(Exists, computeResult) {
+  // Single join column.
+  testExists({{3, 6}, {4, 7}, {5, 8}}, {{3, 15}, {3, 19}, {5, 37}},
+             {true, false, true}, 1);
+
+  // UNDEF matches everything
+  auto U = Id::makeUndefined();
+  testExists({{U, 13}, {3, 6}, {4, 7}, {5, 8}}, {{3, 15}, {3, 19}, {5, 37}},
+             {true, true, false, true}, 1);
+  testExists({{3, 6}, {4, 7}, {5, 8}}, {{U, 15}}, {true, true, true}, 1);
+
+  // Two join columns
+  testExists({{3, 6}, {4, 7}, {5, 8}}, {{3, 15}, {3, 19}, {5, 37}},
+             {false, false, false}, 2);
+  testExists({{3, 6}, {4, 7}, {5, 8}},
+             {{3, 6, 11}, {3, 19, 7}, {4, 8, 0}, {5, 8, 37}},
+             {true, false, true}, 2);
+
+  // Two join columns with UNDEF
+  testExists({{2, 2}, {3, U}, {4, 8}, {5, 8}},
+             {{U, 8}, {3, 15}, {3, 19}, {5, U}, {5, 37}},
+             {false, true, true, true}, 2);
+  testExists({{U, U}}, {{13, 17}}, {true}, 2);
+  testExists({{13, 17}, {25, 38}}, {{U, U}}, {true, true}, 2);
+
+  // TODO<joka921> Add tests with unsorted inputs.
+  // TODO<joka921> Test empty inputs on one side.
+}

From 6766af39ca5e073d1669807dfa3e832a29fe964c Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 9 Jan 2025 10:35:08 +0100
Subject: [PATCH 19/30] Added some comments.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/Bind.cpp                           |  2 +-
 src/engine/ExistsJoin.cpp                     | 31 +++++++++++++++----
 src/engine/ExistsJoin.h                       | 22 ++++++++++---
 src/engine/Filter.cpp                         |  4 +--
 src/engine/GroupBy.cpp                        |  2 +-
 .../sparqlExpressions/ExistsExpression.cpp    |  5 ---
 .../sparqlExpressions/ExistsExpression.h      | 29 +++++++++++------
 7 files changed, 66 insertions(+), 29 deletions(-)
 delete mode 100644 src/engine/sparqlExpressions/ExistsExpression.cpp

diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp
index 276f04e9fc..ed98495d72 100644
--- a/src/engine/Bind.cpp
+++ b/src/engine/Bind.cpp
@@ -16,7 +16,7 @@
 Bind::Bind(QueryExecutionContext* qec,
            std::shared_ptr<QueryExecutionTree> subtree, parsedQuery::Bind b)
     : Operation(qec), _subtree(std::move(subtree)), _bind(std::move(b)) {
-  _subtree = ExistsJoin::addExistsScansToSubtree(
+  _subtree = ExistsJoin::addExistsJoinsToSubtree(
       _bind._expression, std::move(_subtree), getExecutionContext(),
       cancellationHandle_);
 }
diff --git a/src/engine/ExistsJoin.cpp b/src/engine/ExistsJoin.cpp
index d8d3f564d1..7ca230c799 100644
--- a/src/engine/ExistsJoin.cpp
+++ b/src/engine/ExistsJoin.cpp
@@ -1,4 +1,4 @@
-//  Copyright 2023, University of Freiburg,
+//  Copyright 2025, University of Freiburg,
 //                  Chair of Algorithms and Data Structures.
 //  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
@@ -25,12 +25,12 @@ ExistsJoin::ExistsJoin(QueryExecutionContext* qec,
 
 // _____________________________________________________________________________
 string ExistsJoin::getCacheKeyImpl() const {
-  return absl::StrCat("EXISTS SCAN left: ", left_->getCacheKey(),
+  return absl::StrCat("EXISTS JOIN left: ", left_->getCacheKey(),
                       " right: ", right_->getCacheKey());
 }
 
 // _____________________________________________________________________________
-string ExistsJoin::getDescriptor() const { return "EXISTS scan"; }
+string ExistsJoin::getDescriptor() const { return "Exists Join"; }
 
 // ____________________________________________________________________________
 VariableToColumnMap ExistsJoin::computeVariableToColumnMap() const {
@@ -70,6 +70,7 @@ uint64_t ExistsJoin::getSizeEstimateBeforeLimit() {
 
 // ____________________________________________________________________________
 size_t ExistsJoin::getCostEstimate() {
+  // The implementation is a linear zipper join.
   return left_->getCostEstimate() + right_->getCostEstimate() +
          left_->getSizeEstimate() + right_->getSizeEstimate();
 }
@@ -81,9 +82,16 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   const auto& left = leftRes->idTable();
   const auto& right = rightRes->idTable();
 
+  // We reuse the generic `zipperJoinWithUndef` utility in the following way:
+  // It has (among others) two callbacks: One for each matching pair of rows
+  // from left and right, and one for rows in the left input that have no
+  // matching counterpart in the right input. The first callback can be a noop,
+  // and the second callback gives us exactly `NOT EXISTS`.
+
+  // Only extract the join columns from both inputs to make the following code
+  // easier.
   ad_utility::JoinColumnMapping joinColumnData{joinColumns_, left.numColumns(),
                                                right.numColumns()};
-
   IdTableView<0> joinColumnsLeft =
       left.asColumnSubsetView(joinColumnData.jcsLeft());
   IdTableView<0> joinColumnsRight =
@@ -105,15 +113,20 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
                (stdr::any_of(joinColumnsLeft.getColumn(col), &Id::isUndefined));
       });
 
-  auto noopRowAdder = [](auto&&...) {};
+  // Nothing to do for the actual matches.
+  auto noopRowAdder = ad_utility::noop;
 
+  // Store the indices of rows for which `exists` is `false`.
   std::vector<size_t, ad_utility::AllocatorWithLimit<size_t>> notExistsIndices{
       allocator()};
+  // The callback is called with iterators, so we convert them back to indices.
   auto actionForNotExisting =
       [&notExistsIndices, begin = joinColumnsLeft.begin()](const auto& itLeft) {
         notExistsIndices.push_back(itLeft - begin);
       };
 
+  // Run the actual zipper join, with the possible optimization if we know, that
+  // there can be no UNDEF values.
   auto checkCancellationLambda = [this] { checkCancellation(); };
   auto runZipperJoin = [&](auto findUndef) {
     [[maybe_unused]] auto numOutOfOrder = ad_utility::zipperJoinWithUndef(
@@ -135,16 +148,22 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   for (size_t notExistsIndex : notExistsIndices) {
     existsCol[notExistsIndex] = Id::makeFromBool(false);
   }
+
+  // The result is a copy of the left input + and additional columns with only
+  // boolean values, so the local vocab of the left input is sufficient.
   return {std::move(result), resultSortedOn(), leftRes->getCopyOfLocalVocab()};
 }
 
 // _____________________________________________________________________________
-std::shared_ptr<QueryExecutionTree> ExistsJoin::addExistsScansToSubtree(
+std::shared_ptr<QueryExecutionTree> ExistsJoin::addExistsJoinsToSubtree(
     const sparqlExpression::SparqlExpressionPimpl& expression,
     std::shared_ptr<QueryExecutionTree> subtree, QueryExecutionContext* qec,
     const ad_utility::SharedCancellationHandle& cancellationHandle) {
+  // First extract all the `EXISTS` functions from the expression.
   std::vector<const sparqlExpression::SparqlExpression*> existsExpressions;
   expression.getPimpl()->getExistsExpressions(existsExpressions);
+
+  // For each of the EXISTS functions add one `ExistsJoin`
   for (auto* expr : existsExpressions) {
     const auto& exists =
         dynamic_cast<const sparqlExpression::ExistsExpression&>(*expr);
diff --git a/src/engine/ExistsJoin.h b/src/engine/ExistsJoin.h
index 9b9c7483ce..4ff44fe94c 100644
--- a/src/engine/ExistsJoin.h
+++ b/src/engine/ExistsJoin.h
@@ -7,28 +7,42 @@
 #include "engine/Operation.h"
 #include "engine/QueryExecutionTree.h"
 
+// The implementation of the SPARQL `EXISTS` function. It takes two subtrees,
+// and returns the left subtree with an additional boolean column that is `true`
+// iff at least one matching row is contained in the right subtree.
 class ExistsJoin : public Operation {
  private:
+  // The left and right child.
   std::shared_ptr<QueryExecutionTree> left_;
   std::shared_ptr<QueryExecutionTree> right_;
   std::vector<std::array<ColumnIndex, 2>> joinColumns_;
 
+  // The variable of the added result column.
   Variable existsVariable_;
 
-  vector<float> _multiplicities;
-  std::vector<std::array<ColumnIndex, 2>> _matchedColumns;
-
  public:
+  // Constructor. The `existsVariable` (the variable for the added boolean
+  // column) must not yet be bound by `left`.
   ExistsJoin(QueryExecutionContext* qec,
              std::shared_ptr<QueryExecutionTree> left,
              std::shared_ptr<QueryExecutionTree> right,
              Variable existsVariable);
 
-  static std::shared_ptr<QueryExecutionTree> addExistsScansToSubtree(
+  // For a given subtree and a given expression, extract all the
+  // `ExistsExpressions` from the expression and add one `ExistsJoin` per
+  // `ExistsExpression` to the subtree. The left side of the `ExistsJoin` is the
+  // input subtree, the right hand side of the `ExistsJoin` as well as the
+  // variable to which the result is bound are extracted from the
+  // `ExistsExpression`. The returned subtree can then be used to evaluate the
+  // `expression`. Note: `ExistsExpression` is a simple dummy that only reads
+  // the values of the column that is added by the `ExistsJoin`.
+  static std::shared_ptr<QueryExecutionTree> addExistsJoinsToSubtree(
       const sparqlExpression::SparqlExpressionPimpl& expression,
       std::shared_ptr<QueryExecutionTree> subtree, QueryExecutionContext* qec,
       const ad_utility::SharedCancellationHandle& cancellationHandle);
 
+  // All following functions are inherited from `Operation`, see there for
+  // comments.
  protected:
   string getCacheKeyImpl() const override;
 
diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp
index 9da7c12724..08393d9fb5 100644
--- a/src/engine/Filter.cpp
+++ b/src/engine/Filter.cpp
@@ -12,11 +12,9 @@
 #include "engine/CallFixedSize.h"
 #include "engine/ExistsJoin.h"
 #include "engine/QueryExecutionTree.h"
-#include "engine/QueryPlanner.h"
 #include "engine/sparqlExpressions/SparqlExpression.h"
 #include "engine/sparqlExpressions/SparqlExpressionGenerators.h"
 #include "engine/sparqlExpressions/SparqlExpressionValueGetters.h"
-#include "sparqlExpressions/ExistsExpression.h"
 
 using std::endl;
 using std::string;
@@ -31,7 +29,7 @@ Filter::Filter(QueryExecutionContext* qec,
     : Operation(qec),
       _subtree(std::move(subtree)),
       _expression{std::move(expression)} {
-  _subtree = ExistsJoin::addExistsScansToSubtree(
+  _subtree = ExistsJoin::addExistsJoinsToSubtree(
       _expression, std::move(_subtree), getExecutionContext(),
       cancellationHandle_);
   setPrefilterExpressionForChildren();
diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
index 3e8af1cb29..65c7b85d11 100644
--- a/src/engine/GroupBy.cpp
+++ b/src/engine/GroupBy.cpp
@@ -55,7 +55,7 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector<Variable> groupByVariables,
   auto sortColumns = computeSortColumns(subtree.get());
 
   for (const auto& alias : _aliases) {
-    subtree = ExistsJoin::addExistsScansToSubtree(
+    subtree = ExistsJoin::addExistsJoinsToSubtree(
         alias._expression, std::move(subtree), getExecutionContext(),
         cancellationHandle_);
   }
diff --git a/src/engine/sparqlExpressions/ExistsExpression.cpp b/src/engine/sparqlExpressions/ExistsExpression.cpp
deleted file mode 100644
index 6737d3ed7b..0000000000
--- a/src/engine/sparqlExpressions/ExistsExpression.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-//
-// Created by kalmbacj on 1/7/25.
-//
-
-#include "ExistsExpression.h"
diff --git a/src/engine/sparqlExpressions/ExistsExpression.h b/src/engine/sparqlExpressions/ExistsExpression.h
index 343c195e82..1313b342b0 100644
--- a/src/engine/sparqlExpressions/ExistsExpression.h
+++ b/src/engine/sparqlExpressions/ExistsExpression.h
@@ -1,6 +1,6 @@
-//
-// Created by kalmbacj on 1/7/25.
-//
+//  Copyright 2025, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #pragma once
 
@@ -9,19 +9,28 @@
 #include "engine/sparqlExpressions/SparqlExpression.h"
 #include "parser/ParsedQuery.h"
 
+// The expression that corresponds to the `EXISTS` function.
+// The implementation only reads the value of a precomputed variable. The actual
+// computation of EXISTS is done by the `ExistsJoin` class.
 namespace sparqlExpression {
 class ExistsExpression : public SparqlExpression {
  private:
+  // The argument (a group graph pattern) of the EXISTS. This is set during the
+  // parsing and is required and read by the `ExistsJoin` class.
   ParsedQuery argument_;
+
+  // Each `ExistsExpression` has a unique index and a unique variable name that
+  // is used to communicate between the `ExistsExpression` and the `ExistsJoin`.
   static inline std::atomic<size_t> indexCounter_ = 0;
   size_t index_ = ++indexCounter_;
   Variable variable_{absl::StrCat("?ql_internal_exists_", index_)};
 
  public:
+  explicit ExistsExpression(ParsedQuery query) : argument_{std::move(query)} {}
   const auto& argument() const { return argument_; }
   const auto& variable() const { return variable_; }
-  ExistsExpression(ParsedQuery query) : argument_{std::move(query)} {}
 
+  // Evaluate only reads the variable which is written by the `ExistsJoin`.
   ExpressionResult evaluate(EvaluationContext* context) const override {
     AD_CONTRACT_CHECK(context->_variableToColumnMap.contains(variable_));
     return variable_;
@@ -31,17 +40,19 @@ class ExistsExpression : public SparqlExpression {
   [[nodiscard]] string getCacheKey(
       const VariableToColumnMap& varColMap) const override {
     if (varColMap.contains(variable_)) {
-      return absl::StrCat("EXISTS WITH COL ",
+      return absl::StrCat("ExistsExpression col# ",
                           varColMap.at(variable_).columnIndex_);
     } else {
-      // This means that the necessary `ExistsScan` hasn't been set up yet.
-      // It is not possible to cache such incomplete operations, so we return
-      // a random cache key.
+      // This means that the necessary `ExistsJoin` hasn't been set up yet. This
+      // can for example happen if the parsing (which sets up the
+      // `ExistsExpression`) is completed, but the query planning (which sets up
+      // the `ExistsJoin` is still in progress). It is not possible to cache
+      // such incomplete operations, so we return a random cache key.
       return std::to_string(ad_utility::FastRandomIntGenerator<size_t>{}());
     }
   }
 
-  // ____________________________________________________________________________
+  // This is in fact an `ExistsExpression`.
   bool isExistsExpression() const override { return true; }
 
  private:

From 3a574eab1a8ad78482ff2f781bb6ecad108abc7d Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 9 Jan 2025 10:55:08 +0100
Subject: [PATCH 20/30] This is commented and very clean. The only thing that
 is missing, is some corner case tests, and maybe cleaning up the parsing of
 the active dataset clauses.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/GroupBy.cpp                        | 21 +++++++++++++-----
 .../sparqlParser/SparqlQleverVisitor.cpp      | 22 ++++++++++++-------
 src/parser/sparqlParser/SparqlQleverVisitor.h |  3 +++
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
index 65c7b85d11..46ff7a410a 100644
--- a/src/engine/GroupBy.cpp
+++ b/src/engine/GroupBy.cpp
@@ -373,6 +373,8 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) {
   }
 
   if (useHashMapOptimization) {
+    // Helper lambda that calls `computeGroupByForHashMapOptimization` for the
+    // given `subresults`.
     auto computeWithHashMap = [this, &metadataForUnsequentialData,
                                &groupByCols](auto&& subresults) {
       auto doCompute = [&]<int NumCols> {
@@ -383,9 +385,10 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) {
       return ad_utility::callFixedSize(groupByCols.size(), doCompute);
     };
 
+    // Now call `computeWithHashMap` and return the result. It expects a range
+    // of results, so if the result is fully materialized, we create an array
+    // with a single element.
     if (subresult->isFullyMaterialized()) {
-      // `computeWithHashMap` takes a range, so we artificially create one with
-      // a single input.
       return computeWithHashMap(
           std::array{std::pair{std::cref(subresult->idTable()),
                                std::cref(subresult->localVocab())}});
@@ -1513,29 +1516,35 @@ Result GroupBy::computeGroupByForHashMapOptimization(
                        NUM_GROUP_COLUMNS == 0);
   LocalVocab localVocab;
 
-  // Initialize aggregation data
+  // Initialize the data for the aggregates of the GROUP BY operation.
   HashMapAggregationData<NUM_GROUP_COLUMNS> aggregationData(
       getExecutionContext()->getAllocator(), aggregateAliases,
       columnIndices.size());
 
+  // Process the input blocks (pairs of `IdTable` and `LocalVocab`) one after
+  // the other.
   ad_utility::Timer lookupTimer{ad_utility::Timer::Stopped};
   ad_utility::Timer aggregationTimer{ad_utility::Timer::Stopped};
   for (const auto& [inputTableRef, inputLocalVocabRef] : subresults) {
-    // Also support `std::reference_wrapper` as the input.
     const IdTable& inputTable = inputTableRef;
     const LocalVocab& inputLocalVocab = inputLocalVocabRef;
 
+    // Merge the local vocab of each input block.
+    //
+    // NOTE: If the input blocks have very similar or even identical non-empty
+    // local vocabs, no deduplication is performed.
     localVocab.mergeWith(std::span{&inputLocalVocab, 1});
-    // Initialize evaluation context
+    // Setup the `EvaluationContext` for this input block.
     sparqlExpression::EvaluationContext evaluationContext(
         *getExecutionContext(), _subtree->getVariableColumns(), inputTable,
         getExecutionContext()->getAllocator(), localVocab, cancellationHandle_,
         deadline_);
-
     evaluationContext._groupedVariables = ad_utility::HashSet<Variable>{
         _groupByVariables.begin(), _groupByVariables.end()};
     evaluationContext._isPartOfGroupBy = true;
 
+    // Iterate of the rows of this input block. Process (up to)
+    // `GROUP_BY_HASH_MAP_BLOCK_SIZE` rows at a time.
     for (size_t i = 0; i < inputTable.size();
          i += GROUP_BY_HASH_MAP_BLOCK_SIZE) {
       checkCancellation();
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index 41e297120c..6c1bf6d7eb 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -25,7 +25,6 @@
 #include "engine/sparqlExpressions/SampleExpression.h"
 #include "engine/sparqlExpressions/StdevExpression.h"
 #include "engine/sparqlExpressions/UuidExpressions.h"
-#include "generated/SparqlAutomaticParser.h"
 #include "global/Constants.h"
 #include "global/RuntimeParameters.h"
 #include "parser/GraphPatternOperation.h"
@@ -1370,7 +1369,6 @@ SparqlFilter Visitor::visit(Parser::FilterRContext* ctx) {
   // expression contains unbound variables, because the variables of the FILTER
   // might be bound after the filter appears in the query (which is perfectly
   // legal).
-  auto pimpl = visitExpressionPimpl(ctx->constraint());
   return SparqlFilter{visitExpressionPimpl(ctx->constraint())};
 }
 
@@ -2429,17 +2427,25 @@ SparqlExpression::Ptr Visitor::visit(Parser::StrReplaceExpressionContext* ctx) {
 // ____________________________________________________________________________________
 ExpressionPtr Visitor::visitExists(Parser::GroupGraphPatternContext* pattern,
                                    bool negate) {
+  // The argument of the EXISTS is a completely independent GroupGraphPattern
+  // (except for the FROM [NAMED] clauses), so we have to back up and restore
+  // all global  state when parsing EXISTS.
   auto queryBackup = std::exchange(parsedQuery_, ParsedQuery{});
   auto visibleVariablesSoFar = std::move(visibleVariables_);
   visibleVariables_.clear();
+
+  // Parse the argument of EXISTS.
   auto group = visit(pattern);
-  ParsedQuery query = std::exchange(parsedQuery_, std::move(queryBackup));
-  query.selectClause().setAsterisk();
-  query._rootGraphPattern = std::move(group);
-  query.datasetClauses_ = activeDatasetClauses_;
+  ParsedQuery argumentOfExists =
+      std::exchange(parsedQuery_, std::move(queryBackup));
+  argumentOfExists.selectClause().setAsterisk();
+  argumentOfExists._rootGraphPattern = std::move(group);
+
+  // EXISTS inherits the FROM [NAMED] clauses from the outer argumentOfExists.
+  argumentOfExists.datasetClauses_ = activeDatasetClauses_;
   visibleVariables_ = std::move(visibleVariablesSoFar);
-  auto exists =
-      std::make_unique<sparqlExpression::ExistsExpression>(std::move(query));
+  auto exists = std::make_unique<sparqlExpression::ExistsExpression>(
+      std::move(argumentOfExists));
   if (negate) {
     return sparqlExpression::makeUnaryNegateExpression(std::move(exists));
   } else {
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h
index 3d7aa0dd86..2fd0d6bc9b 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.h
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.h
@@ -78,6 +78,9 @@ class SparqlQleverVisitor {
   // query. This may contain duplicates. A variable is added via
   // `addVisibleVariable`.
   std::vector<Variable> visibleVariables_{};
+
+  // The FROM [NAMED] clauses of the query that is currently being parsed.
+  // Those are currently needed when parsing an EXISTS clause inside the query.
   ParsedQuery::DatasetClauses activeDatasetClauses_;
   PrefixMap prefixMap_{};
   // We need to remember the prologue (prefix declarations) when we encounter it

From 5809be2bdf69cc8b701496edb38874380ebf2b97 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 9 Jan 2025 16:04:45 +0100
Subject: [PATCH 21/30] better tests.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 test/QueryPlannerTest.cpp | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 89601732e8..c2927b29e9 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -2918,6 +2918,8 @@ TEST(QueryPlanner, GroupByRedundantParensAndVariables) {
 TEST(QueryPlanner, Exists) {
   auto xyz = h::IndexScanFromStrings("?x", "?y", "?z");
   auto abc = h::IndexScanFromStrings("?a", "?b", "?c");
+  auto def = h::IndexScanFromStrings("?d", "?e", "?f");
+  auto ghi = h::IndexScanFromStrings("?g", "?h", "?i");
   using V = Variable;
   // Simple tests for EXISTS with FILTER, BIND, and GROUP BY.
   h::expect("SELECT * { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
@@ -2930,6 +2932,26 @@ TEST(QueryPlanner, Exists) {
       h::GroupBy({V{"?x"}}, {"(SAMPLE(EXISTS{?a ?b ?c}) as ?s)"},
                  h::ExistsJoin(xyz, abc)));
 
+  // Similar tests, but with multiple EXISTS clauses
+  auto existsAbcDef = h::ExistsJoin(h::ExistsJoin(xyz, abc), def);
+  h::expect(
+      "SELECT * { ?x ?y ?z FILTER (EXISTS {?a ?b ?c} || EXISTS {?d ?e ?f})}",
+      h::Filter("EXISTS {?a ?b ?c} || EXISTS {?d ?e ?f}", existsAbcDef));
+  ;
+  h::expect(
+      "SELECT * { ?x ?y ?z BIND(EXISTS {?a ?b ?c} || EXISTS {?d ?e ?f} as "
+      "?bound)}",
+      h::Bind(existsAbcDef, "EXISTS {?a ?b ?c} || EXISTS {?d ?e ?f}",
+              Variable("?bound")));
+
+  h::expect(
+      "SELECT ?x (SAMPLE(EXISTS {?a ?b ?c} || EXISTS {?d ?e ?f}) as ?s) "
+      "(SAMPLE(EXISTS{?g ?h ?i}) as ?t) { ?x ?y ?z } GROUP BY ?x",
+      h::GroupBy({V{"?x"}},
+                 {"(SAMPLE(EXISTS {?a ?b ?c} || EXISTS {?d ?e ?f}) as ?s)",
+                  "(SAMPLE(EXISTS{?g ?h ?i}) as ?t)"},
+                 h::ExistsJoin(existsAbcDef, ghi)));
+
   // Test the interaction of FROM with EXISTS.
   using H = ad_utility::HashSet<std::string>;
   auto xyzg = h::IndexScanFromStrings("?x", "?y", "?z", {}, H{"<g>"});

From 52943570743cb6c9db292f6331e1b8304ab32379 Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Fri, 10 Jan 2025 03:59:43 +0100
Subject: [PATCH 22/30] Made a pass over `ExistsJoin.h` and `ExistsJoin.cpp`

---
 src/engine/ExistsJoin.cpp | 69 ++++++++++++++++++++++++---------------
 src/engine/ExistsJoin.h   | 28 +++++++++-------
 2 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/src/engine/ExistsJoin.cpp b/src/engine/ExistsJoin.cpp
index 7ca230c799..4e0b3b5bde 100644
--- a/src/engine/ExistsJoin.cpp
+++ b/src/engine/ExistsJoin.cpp
@@ -1,6 +1,6 @@
-//  Copyright 2025, University of Freiburg,
-//                  Chair of Algorithms and Data Structures.
-//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+// Copyright 2025, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #include "engine/ExistsJoin.h"
 
@@ -19,6 +19,7 @@ ExistsJoin::ExistsJoin(QueryExecutionContext* qec,
       right_{std::move(right)},
       joinColumns_{QueryExecutionTree::getJoinColumns(*left_, *right_)},
       existsVariable_{std::move(existsVariable)} {
+  // Make sure that the left and right input are sorted on the join columns.
   std::tie(left_, right_) = QueryExecutionTree::createSortedTrees(
       std::move(left_), std::move(right_), joinColumns_);
 }
@@ -37,7 +38,7 @@ VariableToColumnMap ExistsJoin::computeVariableToColumnMap() const {
   auto res = left_->getVariableColumns();
   AD_CONTRACT_CHECK(
       !res.contains(existsVariable_),
-      "The target variable of an exists scan must be a new variable");
+      "The target variable of an EXISTS join must be a new variable");
   res[existsVariable_] = makeAlwaysDefinedColumn(getResultWidth() - 1);
   return res;
 }
@@ -50,16 +51,20 @@ size_t ExistsJoin::getResultWidth() const {
 
 // ____________________________________________________________________________
 vector<ColumnIndex> ExistsJoin::resultSortedOn() const {
+  // We add one column to `left_`, but do not change the order of the rows.
   return left_->resultSortedOn();
 }
 
 // ____________________________________________________________________________
 float ExistsJoin::getMultiplicity(size_t col) {
+  // The multiplicities of all columns except the last one are the same as in
+  // `left_`.
   if (col < getResultWidth() - 1) {
     return left_->getMultiplicity(col);
   }
-  // The multiplicity of the boolean column can be a dummy value, as it should
-  // be never used for joins etc.
+  // For the added (Boolean) column we take a dummy value, assuming that it
+  // will not be used for subsequent joins or other operations that make use of
+  // the multiplicities.
   return 1;
 }
 
@@ -82,13 +87,17 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   const auto& left = leftRes->idTable();
   const auto& right = rightRes->idTable();
 
-  // We reuse the generic `zipperJoinWithUndef` utility in the following way:
-  // It has (among others) two callbacks: One for each matching pair of rows
-  // from left and right, and one for rows in the left input that have no
-  // matching counterpart in the right input. The first callback can be a noop,
-  // and the second callback gives us exactly `NOT EXISTS`.
-
-  // Only extract the join columns from both inputs to make the following code
+  // We reuse the generic `zipperJoinWithUndef` function, which has two two
+  // callbacks: one for each matching pair of rows from `left` and `right`, and
+  // one for rows in the left input that have no matching counterpart in the
+  // right input. The first callback can be a noop, and the second callback
+  // gives us exactly those rows, where the value in the to-be-added result
+  // column should be `false`.
+  //
+  // the inverse of the value needed for the added Boolean
+  // column.
+
+  // Extract the join columns from both inputs to make the following code
   // easier.
   ad_utility::JoinColumnMapping joinColumnData{joinColumns_, left.numColumns(),
                                                right.numColumns()};
@@ -96,11 +105,11 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
       left.asColumnSubsetView(joinColumnData.jcsLeft());
   IdTableView<0> joinColumnsRight =
       right.asColumnSubsetView(joinColumnData.jcsRight());
-
   checkCancellation();
 
-  // `isCheap` is true iff there are no UNDEF values in the join columns. In
-  // this case we can use a much cheaper algorithm.
+  // Compute `isCheap`, which is true iff there are no UNDEF values in the join
+  // columns (in which case we can use a simpler and cheaper join algorithm).
+  //
   // TODO<joka921> There are many other cases where a cheaper implementation can
   // be chosen, but we leave those for another PR, this is the most common case.
   namespace stdr = ql::ranges;
@@ -116,7 +125,8 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   // Nothing to do for the actual matches.
   auto noopRowAdder = ad_utility::noop;
 
-  // Store the indices of rows for which `exists` is `false`.
+  // Store the indices of rows for which the value of the `EXISTS` (in the added
+  // Boolean column) should be `false`.
   std::vector<size_t, ad_utility::AllocatorWithLimit<size_t>> notExistsIndices{
       allocator()};
   // The callback is called with iterators, so we convert them back to indices.
@@ -125,8 +135,9 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
         notExistsIndices.push_back(itLeft - begin);
       };
 
-  // Run the actual zipper join, with the possible optimization if we know, that
-  // there can be no UNDEF values.
+  // Run `zipperJoinWithUndef` with the described callbacks and the mentioned
+  // optimization in case we know that there are no UNDEF values in the join
+  // columns.
   auto checkCancellationLambda = [this] { checkCancellation(); };
   auto runZipperJoin = [&](auto findUndef) {
     [[maybe_unused]] auto numOutOfOrder = ad_utility::zipperJoinWithUndef(
@@ -140,7 +151,8 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
     runZipperJoin(ad_utility::findSmallerUndefRanges);
   }
 
-  // Set up the result;
+  // Add the result column from the computed `notExistsIndices` (which tell us
+  // where the value should be `false`).
   IdTable result = left.clone();
   result.addEmptyColumn();
   decltype(auto) existsCol = result.getColumn(getResultWidth() - 1);
@@ -149,8 +161,8 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
     existsCol[notExistsIndex] = Id::makeFromBool(false);
   }
 
-  // The result is a copy of the left input + and additional columns with only
-  // boolean values, so the local vocab of the left input is sufficient.
+  // The added column only contains Boolean values, and adds no new words to the
+  // local vocabulary, so we can simply copy the local vocab from `leftRes`.
   return {std::move(result), resultSortedOn(), leftRes->getCopyOfLocalVocab()};
 }
 
@@ -159,17 +171,20 @@ std::shared_ptr<QueryExecutionTree> ExistsJoin::addExistsJoinsToSubtree(
     const sparqlExpression::SparqlExpressionPimpl& expression,
     std::shared_ptr<QueryExecutionTree> subtree, QueryExecutionContext* qec,
     const ad_utility::SharedCancellationHandle& cancellationHandle) {
-  // First extract all the `EXISTS` functions from the expression.
+  // Extract all `EXISTS` functions from the given `expression`.
   std::vector<const sparqlExpression::SparqlExpression*> existsExpressions;
   expression.getPimpl()->getExistsExpressions(existsExpressions);
 
-  // For each of the EXISTS functions add one `ExistsJoin`
+  // For each `EXISTS` function, add the corresponding `ExistsJoin`.
   for (auto* expr : existsExpressions) {
     const auto& exists =
         dynamic_cast<const sparqlExpression::ExistsExpression&>(*expr);
-    // Currently some FILTERs are applied multiple times especially when there
-    // are OPTIONAL joins in the query. In these cases we have to make sure that
-    // the `ExistsScan` is added only once.
+    // Currently some FILTERs are applied multiple times (in particular, this
+    // happens when there are OPTIONAL joins in the query). In these cases we
+    // have to make sure that the `ExistsJoin` is added only once.
+    //
+    // TODO(question from Hannah's review): Why does the following implement
+    // what the preceding comment says?
     if (subtree->isVariableCovered(exists.variable())) {
       continue;
     }
diff --git a/src/engine/ExistsJoin.h b/src/engine/ExistsJoin.h
index 4ff44fe94c..b319c304c9 100644
--- a/src/engine/ExistsJoin.h
+++ b/src/engine/ExistsJoin.h
@@ -1,13 +1,14 @@
-//  Copyright 2025, University of Freiburg,
-//                  Chair of Algorithms and Data Structures.
-//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+// Copyright 2025, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #pragma once
 
 #include "engine/Operation.h"
 #include "engine/QueryExecutionTree.h"
 
-// The implementation of the SPARQL `EXISTS` function. It takes two subtrees,
+// The implementation of an "EXISTS join", which we use to realize the semantics
+// of the SPARQL `EXISTS` function. The join takes two subtrees as input, and
 // and returns the left subtree with an additional boolean column that is `true`
 // iff at least one matching row is contained in the right subtree.
 class ExistsJoin : public Operation {
@@ -17,25 +18,28 @@ class ExistsJoin : public Operation {
   std::shared_ptr<QueryExecutionTree> right_;
   std::vector<std::array<ColumnIndex, 2>> joinColumns_;
 
-  // The variable of the added result column.
+  // The variable of the added (Boolean) result column.
   Variable existsVariable_;
 
  public:
-  // Constructor. The `existsVariable` (the variable for the added boolean
-  // column) must not yet be bound by `left`.
+  // Constructor. The `existsVariable` (the variable for the added column) must
+  // not yet be bound by `left`.
   ExistsJoin(QueryExecutionContext* qec,
              std::shared_ptr<QueryExecutionTree> left,
              std::shared_ptr<QueryExecutionTree> right,
              Variable existsVariable);
 
   // For a given subtree and a given expression, extract all the
-  // `ExistsExpressions` from the expression and add one `ExistsJoin` per
-  // `ExistsExpression` to the subtree. The left side of the `ExistsJoin` is the
-  // input subtree, the right hand side of the `ExistsJoin` as well as the
+  // `ExistsExpression`s from the expression and add one `ExistsJoin` per
+  // `ExistsExpression` to the subtree. The left side of the `ExistsJoin` is
+  // the input subtree, the right hand side of the `ExistsJoin` as well as the
   // variable to which the result is bound are extracted from the
   // `ExistsExpression`. The returned subtree can then be used to evaluate the
-  // `expression`. Note: `ExistsExpression` is a simple dummy that only reads
-  // the values of the column that is added by the `ExistsJoin`.
+  // `expression`.
+  //
+  // NOTE: `ExistsExpression` is a dummy that only reads the values of the
+  // column that is added by the `ExistsJoin`. The main work is done by the
+  // latter and not by the former.
   static std::shared_ptr<QueryExecutionTree> addExistsJoinsToSubtree(
       const sparqlExpression::SparqlExpressionPimpl& expression,
       std::shared_ptr<QueryExecutionTree> subtree, QueryExecutionContext* qec,

From 2bc5bdff745edc9320f4e2012fd9451947b9f2de Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Wed, 5 Feb 2025 03:26:17 +0100
Subject: [PATCH 23/30] Changes by Hannah improving documentation and comments

---
 src/engine/ExistsJoin.cpp                     | 29 +++++++-------
 src/engine/ExistsJoin.h                       | 30 ++++++++-------
 src/engine/GroupBy.cpp                        | 11 +++---
 src/engine/MultiColumnJoin.cpp                | 24 ++++++------
 .../sparqlExpressions/ExistsExpression.h      | 38 ++++++++++---------
 .../sparqlExpressions/SparqlExpression.h      |  2 +-
 .../sparqlParser/SparqlQleverVisitor.cpp      | 30 ++++++++-------
 src/parser/sparqlParser/SparqlQleverVisitor.h | 10 +++--
 test/ExceptionTest.cpp                        |  6 +--
 test/QueryPlannerTest.cpp                     | 11 +++---
 test/SparqlAntlrParserTest.cpp                |  8 ++--
 test/engine/ExistsJoinTest.cpp                | 24 +++++++-----
 12 files changed, 122 insertions(+), 101 deletions(-)

diff --git a/src/engine/ExistsJoin.cpp b/src/engine/ExistsJoin.cpp
index 4e0b3b5bde..a58a22a47c 100644
--- a/src/engine/ExistsJoin.cpp
+++ b/src/engine/ExistsJoin.cpp
@@ -93,9 +93,6 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   // right input. The first callback can be a noop, and the second callback
   // gives us exactly those rows, where the value in the to-be-added result
   // column should be `false`.
-  //
-  // the inverse of the value needed for the added Boolean
-  // column.
 
   // Extract the join columns from both inputs to make the following code
   // easier.
@@ -110,16 +107,17 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   // Compute `isCheap`, which is true iff there are no UNDEF values in the join
   // columns (in which case we can use a simpler and cheaper join algorithm).
   //
-  // TODO<joka921> There are many other cases where a cheaper implementation can
-  // be chosen, but we leave those for another PR, this is the most common case.
-  namespace stdr = ql::ranges;
+  // TODO<joka921> This is the most common case. There are many other cases
+  // where the generic `zipperJoinWithUndef` can be optimized. We will those
+  // for a later PR.
   size_t numJoinColumns = joinColumnsLeft.numColumns();
   AD_CORRECTNESS_CHECK(numJoinColumns == joinColumnsRight.numColumns());
-  bool isCheap = stdr::none_of(
+  bool isCheap = ql::ranges::none_of(
       ad_utility::integerRange(numJoinColumns), [&](const auto& col) {
-        return (stdr::any_of(joinColumnsRight.getColumn(col),
-                             &Id::isUndefined)) ||
-               (stdr::any_of(joinColumnsLeft.getColumn(col), &Id::isUndefined));
+        return (ql::ranges::any_of(joinColumnsRight.getColumn(col),
+                                   &Id::isUndefined)) ||
+               (ql::ranges::any_of(joinColumnsLeft.getColumn(col),
+                                   &Id::isUndefined));
       });
 
   // Nothing to do for the actual matches.
@@ -179,15 +177,14 @@ std::shared_ptr<QueryExecutionTree> ExistsJoin::addExistsJoinsToSubtree(
   for (auto* expr : existsExpressions) {
     const auto& exists =
         dynamic_cast<const sparqlExpression::ExistsExpression&>(*expr);
-    // Currently some FILTERs are applied multiple times (in particular, this
-    // happens when there are OPTIONAL joins in the query). In these cases we
-    // have to make sure that the `ExistsJoin` is added only once.
-    //
-    // TODO(question from Hannah's review): Why does the following implement
-    // what the preceding comment says?
+    // If we have already considered this `EXIST` (which we can detect by its
+    // variable), skip it. This can happen because some `FILTER`s (which may
+    // contain `EXISTS` functions) are applied multiple times (for example,
+    // when there are OPTIONAL joins in the query).
     if (subtree->isVariableCovered(exists.variable())) {
       continue;
     }
+
     QueryPlanner qp{qec, cancellationHandle};
     auto pq = exists.argument();
     auto tree =
diff --git a/src/engine/ExistsJoin.h b/src/engine/ExistsJoin.h
index b319c304c9..43dbbe074f 100644
--- a/src/engine/ExistsJoin.h
+++ b/src/engine/ExistsJoin.h
@@ -9,8 +9,8 @@
 
 // The implementation of an "EXISTS join", which we use to realize the semantics
 // of the SPARQL `EXISTS` function. The join takes two subtrees as input, and
-// and returns the left subtree with an additional boolean column that is `true`
-// iff at least one matching row is contained in the right subtree.
+// returns the left subtree with an additional boolean column that is `true` iff
+// at least one matching row is contained in the right subtree.
 class ExistsJoin : public Operation {
  private:
   // The left and right child.
@@ -23,23 +23,27 @@ class ExistsJoin : public Operation {
 
  public:
   // Constructor. The `existsVariable` (the variable for the added column) must
-  // not yet be bound by `left`.
+  // not yet be bound in `left`.
   ExistsJoin(QueryExecutionContext* qec,
              std::shared_ptr<QueryExecutionTree> left,
              std::shared_ptr<QueryExecutionTree> right,
              Variable existsVariable);
 
-  // For a given subtree and a given expression, extract all the
-  // `ExistsExpression`s from the expression and add one `ExistsJoin` per
-  // `ExistsExpression` to the subtree. The left side of the `ExistsJoin` is
-  // the input subtree, the right hand side of the `ExistsJoin` as well as the
-  // variable to which the result is bound are extracted from the
-  // `ExistsExpression`. The returned subtree can then be used to evaluate the
-  // `expression`.
+  // Extract all `ExistsExpression`s from the given `expression`. For each
+  // `ExistsExpression`, add an `ExistsJoin`. The left side of the first
+  // `ExistsJoin` is the input `subtree`. The left side of subsequent
+  // `ExistsJoin`s is the previous `ExistsJoin`. The right side of each
+  // `ExistsJoin` is the argument of the respective `ExistsExpression`. When
+  // there are no `ExistsExpression`s, return the input `subtree` unchanged.
   //
-  // NOTE: `ExistsExpression` is a dummy that only reads the values of the
-  // column that is added by the `ExistsJoin`. The main work is done by the
-  // latter and not by the former.
+  // The returned subtree will contain one additional column for each
+  // `ExistsExpression`, which contains the result of the respective
+  // `ExistsJoin`. The `ExistsExpression` just reads the values of this column.
+  // The main work is done by the `ExistsJoin`.
+  //
+  // This function should be called in the constructor of each `Operation`,
+  // where an `EXISTS` expression can occur. For example, in the constructor of
+  // `BIND` and `FILTER`.
   static std::shared_ptr<QueryExecutionTree> addExistsJoinsToSubtree(
       const sparqlExpression::SparqlExpressionPimpl& expression,
       std::shared_ptr<QueryExecutionTree> subtree, QueryExecutionContext* qec,
diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
index 46ff7a410a..95ad6a6e51 100644
--- a/src/engine/GroupBy.cpp
+++ b/src/engine/GroupBy.cpp
@@ -1,8 +1,7 @@
-// Copyright 2018, University of Freiburg,
-// Chair of Algorithms and Data Structures.
-// Author:
-//   2018      Florian Kramer (florian.kramer@mail.uni-freiburg.de)
-//   2020-     Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de)
+// Copyright 2018 - 2025, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Florian Kramer [2018 - 2020]
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #include "engine/GroupBy.h"
 
@@ -54,11 +53,13 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector<Variable> groupByVariables,
 
   auto sortColumns = computeSortColumns(subtree.get());
 
+  // Aliases are like `BIND`s, which may contain `EXISTS` expressions.
   for (const auto& alias : _aliases) {
     subtree = ExistsJoin::addExistsJoinsToSubtree(
         alias._expression, std::move(subtree), getExecutionContext(),
         cancellationHandle_);
   }
+
   _subtree =
       QueryExecutionTree::createSortedTree(std::move(subtree), sortColumns);
 }
diff --git a/src/engine/MultiColumnJoin.cpp b/src/engine/MultiColumnJoin.cpp
index b605616ecb..a831c4cd55 100644
--- a/src/engine/MultiColumnJoin.cpp
+++ b/src/engine/MultiColumnJoin.cpp
@@ -1,6 +1,7 @@
-// Copyright 2018, University of Freiburg,
-// Chair of Algorithms and Data Structures.
-// Author: Florian Kramer (florian.kramer@netpun.uni-freiburg.de)
+// Copyright 2018 - 2025, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Florian Kramer [2018 - 2020]
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #include "MultiColumnJoin.h"
 
@@ -237,15 +238,16 @@ void MultiColumnJoin::computeMultiColumnJoin(
     rowAdder.addRow(itLeft - beginLeft, itRight - beginRight);
   };
 
-  // `isCheap` is true iff there are no UNDEF values in the join columns. In
-  // this case we can use a much cheaper algorithm.
-  // TODO<joka921> There are many other cases where a cheaper implementation can
-  // be chosen, but we leave those for another PR, this is the most common case.
-  namespace stdr = ql::ranges;
-  bool isCheap = stdr::none_of(joinColumns, [&](const auto& jcs) {
+  // Compute `isCheap`, which is true iff there are no UNDEF values in the join
+  // columns (in which case we can use a simpler and cheaper join algorithm).
+  //
+  // TODO<joka921> This is the most common case. There are many other cases
+  // where the generic `zipperJoinWithUndef` can be optimized. We will those
+  // for a later PR.
+  bool isCheap = ql::ranges::none_of(joinColumns, [&](const auto& jcs) {
     auto [leftCol, rightCol] = jcs;
-    return (stdr::any_of(right.getColumn(rightCol), &Id::isUndefined)) ||
-           (stdr::any_of(left.getColumn(leftCol), &Id::isUndefined));
+    return (ql::ranges::any_of(right.getColumn(rightCol), &Id::isUndefined)) ||
+           (ql::ranges::any_of(left.getColumn(leftCol), &Id::isUndefined));
   });
 
   auto checkCancellationLambda = [this] { checkCancellation(); };
diff --git a/src/engine/sparqlExpressions/ExistsExpression.h b/src/engine/sparqlExpressions/ExistsExpression.h
index 1313b342b0..b13071b657 100644
--- a/src/engine/sparqlExpressions/ExistsExpression.h
+++ b/src/engine/sparqlExpressions/ExistsExpression.h
@@ -1,6 +1,6 @@
-//  Copyright 2025, University of Freiburg,
-//                  Chair of Algorithms and Data Structures.
-//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+// Copyright 2025, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #pragma once
 
@@ -9,18 +9,19 @@
 #include "engine/sparqlExpressions/SparqlExpression.h"
 #include "parser/ParsedQuery.h"
 
-// The expression that corresponds to the `EXISTS` function.
-// The implementation only reads the value of a precomputed variable. The actual
-// computation of EXISTS is done by the `ExistsJoin` class.
+// The `SparqlExpression` for `EXISTS`. The implementation is straightforward
+// because it only reads the value computed by the special `ExistsJoin`
+// operation, where the actual work is done (see the comments there).
 namespace sparqlExpression {
 class ExistsExpression : public SparqlExpression {
  private:
-  // The argument (a group graph pattern) of the EXISTS. This is set during the
-  // parsing and is required and read by the `ExistsJoin` class.
+  // The argument of the `EXISTS`, which is a group graph pattern. This is set
+  // during parsing and is used by the `ExistsJoin` operation.
   ParsedQuery argument_;
 
   // Each `ExistsExpression` has a unique index and a unique variable name that
-  // is used to communicate between the `ExistsExpression` and the `ExistsJoin`.
+  // is used to communicate the result computed by the `ExistsJoin` to this
+  // `ExistsExpression`.
   static inline std::atomic<size_t> indexCounter_ = 0;
   size_t index_ = ++indexCounter_;
   Variable variable_{absl::StrCat("?ql_internal_exists_", index_)};
@@ -30,29 +31,32 @@ class ExistsExpression : public SparqlExpression {
   const auto& argument() const { return argument_; }
   const auto& variable() const { return variable_; }
 
-  // Evaluate only reads the variable which is written by the `ExistsJoin`.
+  // To evaluate, just return the variable of the column computed by the
+  // `ExistsJoin`.
   ExpressionResult evaluate(EvaluationContext* context) const override {
     AD_CONTRACT_CHECK(context->_variableToColumnMap.contains(variable_));
     return variable_;
   }
 
-  //____________________________________________________________________________
+  // Return the cache key, which in the normal case depends on the column index
+  // of the variable computed by the `ExistsJoin`.
+  //
+  // There is a special case, where the corresponding `ExistsJoin` has not
+  // been set up yet (because the query planning is not yet complete). Since we
+  // cannot cache incomplete operations, we return a random cache key in this
+  // case.
   [[nodiscard]] string getCacheKey(
       const VariableToColumnMap& varColMap) const override {
     if (varColMap.contains(variable_)) {
       return absl::StrCat("ExistsExpression col# ",
                           varColMap.at(variable_).columnIndex_);
     } else {
-      // This means that the necessary `ExistsJoin` hasn't been set up yet. This
-      // can for example happen if the parsing (which sets up the
-      // `ExistsExpression`) is completed, but the query planning (which sets up
-      // the `ExistsJoin` is still in progress). It is not possible to cache
-      // such incomplete operations, so we return a random cache key.
       return std::to_string(ad_utility::FastRandomIntGenerator<size_t>{}());
     }
   }
 
-  // This is in fact an `ExistsExpression`.
+  // This is the one expresssion, where this function should return `true`.
+  // Used to extract `EXISTS` expressions from a general expression tree.
   bool isExistsExpression() const override { return true; }
 
  private:
diff --git a/src/engine/sparqlExpressions/SparqlExpression.h b/src/engine/sparqlExpressions/SparqlExpression.h
index 7f5c551127..f033f27edc 100644
--- a/src/engine/sparqlExpressions/SparqlExpression.h
+++ b/src/engine/sparqlExpressions/SparqlExpression.h
@@ -127,7 +127,7 @@ class SparqlExpression {
   // implementation returns `false`.
   virtual bool isExistsExpression() const;
 
-  // Return non-null pointers to all `EXISTS` expressions in the subtree.
+  // Return non-null pointers to all `EXISTS` expressions in expression tree.
   // The result is passed in as a reference to simplify the recursive
   // implementation.
   virtual void getExistsExpressions(
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index da375c9f40..1fc6729e0e 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -1,9 +1,8 @@
-// Copyright 2021 - 2024, University of Freiburg
+// Copyright 2021 - 2025, University of Freiburg
 // Chair of Algorithms and Data Structures
-// Authors:
-//   2021 -    Hannah Bast <bast@cs.uni-freiburg.de>
-//   2022      Julian Mundhahs <mundhahj@tf.uni-freiburg.de>
-//   2022 -    Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+// Authors: Julian Mundhahs <mundhahj@tf.uni-freiburg.de>
+//          Hannah Bast <bast@cs.uni-freiburg.de>
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #include "parser/sparqlParser/SparqlQleverVisitor.h"
 
@@ -2443,28 +2442,33 @@ SparqlExpression::Ptr Visitor::visit(Parser::StrReplaceExpressionContext* ctx) {
                                                  std::move(children.at(2)));
 }
 
-// ____________________________________________________________________________________
+// ____________________________________________________________________________
 ExpressionPtr Visitor::visitExists(Parser::GroupGraphPatternContext* pattern,
                                    bool negate) {
-  // The argument of the EXISTS is a completely independent GroupGraphPattern
-  // (except for the FROM [NAMED] clauses), so we have to back up and restore
-  // all global  state when parsing EXISTS.
+  // The argument of 'EXISTS` is a `GroupGraphPattern` that is independent from
+  // the rest of the query (except for the `FROM` and `FROM NAMED` clauses,
+  // which also apply to the argument of `EXISTS`). We therefore have to back up
+  // and restore all global state when parsing `EXISTS`.
   auto queryBackup = std::exchange(parsedQuery_, ParsedQuery{});
-  auto visibleVariablesSoFar = std::move(visibleVariables_);
+  auto visibleVariablesBackup = std::move(visibleVariables_);
   visibleVariables_.clear();
 
-  // Parse the argument of EXISTS.
+  // Parse the argument of `EXISTS`.
   auto group = visit(pattern);
   ParsedQuery argumentOfExists =
       std::exchange(parsedQuery_, std::move(queryBackup));
   argumentOfExists.selectClause().setAsterisk();
   argumentOfExists._rootGraphPattern = std::move(group);
 
-  // EXISTS inherits the FROM [NAMED] clauses from the outer argumentOfExists.
+  // The argument of `EXISTS` inherits the `FROM` and `FROM NAMED` clauses from
+  // the outer query.
   argumentOfExists.datasetClauses_ = activeDatasetClauses_;
-  visibleVariables_ = std::move(visibleVariablesSoFar);
+  visibleVariables_ = std::move(visibleVariablesBackup);
   auto exists = std::make_unique<sparqlExpression::ExistsExpression>(
       std::move(argumentOfExists));
+
+  // Handle `NOT EXISTS` (which is syntactically distinct from `! EXISTS`) by
+  // simply negating the `ExistsExpression`.
   if (negate) {
     return sparqlExpression::makeUnaryNegateExpression(std::move(exists));
   } else {
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h
index 2fd0d6bc9b..205309a5fa 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.h
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.h
@@ -79,10 +79,14 @@ class SparqlQleverVisitor {
   // `addVisibleVariable`.
   std::vector<Variable> visibleVariables_{};
 
-  // The FROM [NAMED] clauses of the query that is currently being parsed.
-  // Those are currently needed when parsing an EXISTS clause inside the query.
+  // The `FROM` and `FROM NAMED` clauses of the query that is currently
+  // being parsed. Those are inherited by certain constructs, which are
+  // otherwise independent (in particular, `EXISTS` and `DESCRIBE`).
   ParsedQuery::DatasetClauses activeDatasetClauses_;
+
+  // The map from prefixes to their full IRIs.
   PrefixMap prefixMap_{};
+
   // We need to remember the prologue (prefix declarations) when we encounter it
   // because we need it when we encounter a SERVICE query. When there is no
   // prologue, this string simply remains empty.
@@ -448,8 +452,6 @@ class SparqlQleverVisitor {
 
   ExpressionPtr visit(Parser::StrReplaceExpressionContext* ctx);
 
-  // The common implementation of the parsing of `EXISTS` and `NOT EXISTS`.
-  // The second argument is `true` for `NOT EXISTS`.
   ExpressionPtr visitExists(Parser::GroupGraphPatternContext* pattern,
                             bool negate);
 
diff --git a/test/ExceptionTest.cpp b/test/ExceptionTest.cpp
index eaf0d0504d..4cc649ebc0 100644
--- a/test/ExceptionTest.cpp
+++ b/test/ExceptionTest.cpp
@@ -1,6 +1,6 @@
-//  Copyright 2023, University of Freiburg,
-//                  Chair of Algorithms and Data Structures.
-//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+// Copyright 2023 - 2025, University of Freiburg
+// Chair of Algorithms and Data Structures.
+// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 89601732e8..688b4abaf3 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -1,7 +1,7 @@
-// Copyright 2015 - 2024, University of Freiburg
+// Copyright 2015 - 2025, University of Freiburg
 // Chair of Algorithms and Data Structures
 // Authors: Björn Buchhold <buchhold@cs.uni-freiburg.de> [2015 - 2017]
-//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de> [2018 - 2024]
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #include <gmock/gmock.h>
 
@@ -2919,10 +2919,11 @@ TEST(QueryPlanner, Exists) {
   auto xyz = h::IndexScanFromStrings("?x", "?y", "?z");
   auto abc = h::IndexScanFromStrings("?a", "?b", "?c");
   using V = Variable;
+
   // Simple tests for EXISTS with FILTER, BIND, and GROUP BY.
-  h::expect("SELECT * { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}",
+  h::expect("SELECT * { ?x ?y ?z FILTER EXISTS {?a ?b ?c} }",
             h::Filter("EXISTS {?a ?b ?c}", h::ExistsJoin(xyz, abc)));
-  h::expect("SELECT * { ?x ?y ?z BIND(EXISTS {?a ?b ?c} as ?bound)}",
+  h::expect("SELECT * { ?x ?y ?z BIND(EXISTS {?a ?b ?c} as ?bound) }",
             h::Bind(h::ExistsJoin(xyz, abc), "EXISTS {?a ?b ?c}",
                     Variable("?bound")));
   h::expect(
@@ -2935,9 +2936,9 @@ TEST(QueryPlanner, Exists) {
   auto xyzg = h::IndexScanFromStrings("?x", "?y", "?z", {}, H{"<g>"});
   auto abcg = h::IndexScanFromStrings("?a", "?b", "?c", {}, H{"<g>"});
 
+  // Various uses of FILTER EXISTS.
   auto existsJoin = h::ExistsJoin(xyzg, abcg);
   auto filter = h::Filter("EXISTS {?a ?b ?c}", existsJoin);
-  // (use a lambda that only changes the beginning of the query).
   h::expect("SELECT * FROM <g> { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}", filter);
   h::expect("ASK FROM <g> { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}", filter);
   h::expect(
diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp
index a5f00ba723..1515d3605a 100644
--- a/test/SparqlAntlrParserTest.cpp
+++ b/test/SparqlAntlrParserTest.cpp
@@ -1,4 +1,4 @@
-// Copyright 2021 - 2024, University of Freiburg
+// Copyright 2021 - 2025, University of Freiburg
 // Chair of Algorithms and Data Structures
 // Authors: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 //          Julian Mundhahs <mundhahj@cs.uni-freiburg.de>
@@ -1890,8 +1890,9 @@ auto notExistsMatcher(Matcher<const ParsedQuery&> pattern) {
 TEST(SparqlParser, Exists) {
   using namespace existsTestHelpers;
   auto expectBuiltInCall = ExpectCompleteParse<&Parser::builtInCall>{};
-  // A matcher that matches the query `SELECT * { ?x <bar> ?foo}`, where the
-  // FROM and FROM NAMED clauses can still be specified via arguments.
+
+  // A matcher that matches the query `SELECT * { ?x <bar> ?foo }`, where the
+  // FROM and FROM NAMED clauses can be specified as arguments.
   using Graphs = ScanSpecificationAsTripleComponent::Graphs;
   auto selectABarFooMatcher = [](Graphs defaultGraphs = std::nullopt,
                                  Graphs namedGraphs = std::nullopt) {
@@ -1900,6 +1901,7 @@ TEST(SparqlParser, Exists) {
         m::GraphPattern(m::Triples({{Var{"?a"}, "<bar>", Var{"?foo"}}})),
         defaultGraphs, namedGraphs));
   };
+
   expectBuiltInCall("EXISTS {?a <bar> ?foo}",
                     existsMatcher(selectABarFooMatcher()));
   expectBuiltInCall("NOT EXISTS {?a <bar> ?foo}",
diff --git a/test/engine/ExistsJoinTest.cpp b/test/engine/ExistsJoinTest.cpp
index af72e5fbb6..197fdeeba6 100644
--- a/test/engine/ExistsJoinTest.cpp
+++ b/test/engine/ExistsJoinTest.cpp
@@ -1,4 +1,4 @@
-// Copyright 2024, University of Freiburg
+// Copyright 2024 - 2025, University of Freiburg
 // Chair of Algorithms and Data Structures
 // Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
@@ -15,6 +15,13 @@
 using namespace ad_utility::testing;
 
 namespace {
+
+// Helper function that computes an `ExistsJoin` of the given `leftInput` and
+// `rightInput` and checks that the result columns is equal to `expectedAsBool`.
+// The first `numJoinColumns` columns of both `leftInput` and `rightInput` are
+// used as join columns.
+//
+// TODO<joka921> Also test permutations of the join columns.
 void testExists(const VectorTable& leftInput, const VectorTable& rightInput,
                 std::vector<bool> expectedAsBool, size_t numJoinColumns) {
   AD_CORRECTNESS_CHECK(leftInput.size() == expectedAsBool.size());
@@ -27,14 +34,12 @@ void testExists(const VectorTable& leftInput, const VectorTable& rightInput,
   using V = Variable;
   using Vars = std::vector<std::optional<Variable>>;
 
-  // TODO<joka921> Support more than one join column.
-  // TODO<joka921> also randomly permute the join columns.
-
+  // Helper lambda `makeChild` that turns a `VectorTable` input into a
+  // `QueryExecutionTree` with a `ValuesForTesting` operation.
   auto joinCol = [](size_t i) { return V{absl::StrCat("?joinCol_", i)}; };
   auto nonJoinCol = [i = 0]() mutable {
     return V{absl::StrCat("?nonJoinCol_", i++)};
   };
-
   auto makeChild = [&](const IdTable& input) {
     Vars vars;
     for (size_t i : ad_utility::integerRange(numJoinColumns)) {
@@ -48,11 +53,10 @@ void testExists(const VectorTable& leftInput, const VectorTable& rightInput,
                                                            vars);
   };
 
+  // Compute the `ExistsJoin` and check the result.
   auto exists =
       ExistsJoin{qec, makeChild(left), makeChild(right), V{"?exists"}};
-
   EXPECT_EQ(exists.getResultWidth(), left.numColumns() + 1);
-
   auto res = exists.computeResultOnlyForTesting();
   const auto& table = res.idTable();
   ASSERT_EQ(table.numRows(), left.size());
@@ -69,20 +73,20 @@ TEST(Exists, computeResult) {
   testExists({{3, 6}, {4, 7}, {5, 8}}, {{3, 15}, {3, 19}, {5, 37}},
              {true, false, true}, 1);
 
-  // UNDEF matches everything
+  // Single join column with one UNDEF (which always matches).
   auto U = Id::makeUndefined();
   testExists({{U, 13}, {3, 6}, {4, 7}, {5, 8}}, {{3, 15}, {3, 19}, {5, 37}},
              {true, true, false, true}, 1);
   testExists({{3, 6}, {4, 7}, {5, 8}}, {{U, 15}}, {true, true, true}, 1);
 
-  // Two join columns
+  // Two join columns.
   testExists({{3, 6}, {4, 7}, {5, 8}}, {{3, 15}, {3, 19}, {5, 37}},
              {false, false, false}, 2);
   testExists({{3, 6}, {4, 7}, {5, 8}},
              {{3, 6, 11}, {3, 19, 7}, {4, 8, 0}, {5, 8, 37}},
              {true, false, true}, 2);
 
-  // Two join columns with UNDEF
+  // Two join columns with UNDEFs in each column.
   testExists({{2, 2}, {3, U}, {4, 8}, {5, 8}},
              {{U, 8}, {3, 15}, {3, 19}, {5, U}, {5, 37}},
              {false, true, true, true}, 2);

From c2abaddabd663f0d53339a880c491ca9e73452e5 Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Wed, 5 Feb 2025 03:39:49 +0100
Subject: [PATCH 24/30] Fix typo

---
 src/engine/sparqlExpressions/ExistsExpression.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/engine/sparqlExpressions/ExistsExpression.h b/src/engine/sparqlExpressions/ExistsExpression.h
index b13071b657..dd880bdb09 100644
--- a/src/engine/sparqlExpressions/ExistsExpression.h
+++ b/src/engine/sparqlExpressions/ExistsExpression.h
@@ -55,7 +55,7 @@ class ExistsExpression : public SparqlExpression {
     }
   }
 
-  // This is the one expresssion, where this function should return `true`.
+  // This is the one expression, where this function should return `true`.
   // Used to extract `EXISTS` expressions from a general expression tree.
   bool isExistsExpression() const override { return true; }
 

From ee495f41a29e153f0d972a79c47e9e468d27120d Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 14 Feb 2025 18:55:08 +0100
Subject: [PATCH 25/30] The test is currently not compiling, as we still have
 to apply several changes.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/ExistsJoin.cpp      | 49 +++++++++++++++++++++-------------
 test/SparqlAntlrParserTest.cpp | 16 +++++++++++
 test/engine/ExistsJoinTest.cpp | 48 ++++++++++++++++++++-------------
 3 files changed, 75 insertions(+), 38 deletions(-)

diff --git a/src/engine/ExistsJoin.cpp b/src/engine/ExistsJoin.cpp
index a58a22a47c..94ca355b4b 100644
--- a/src/engine/ExistsJoin.cpp
+++ b/src/engine/ExistsJoin.cpp
@@ -4,6 +4,7 @@
 
 #include "engine/ExistsJoin.h"
 
+#include "CallFixedSize.h"
 #include "engine/QueryPlanner.h"
 #include "engine/sparqlExpressions/ExistsExpression.h"
 #include "engine/sparqlExpressions/SparqlExpression.h"
@@ -127,27 +128,37 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   // Boolean column) should be `false`.
   std::vector<size_t, ad_utility::AllocatorWithLimit<size_t>> notExistsIndices{
       allocator()};
-  // The callback is called with iterators, so we convert them back to indices.
-  auto actionForNotExisting =
-      [&notExistsIndices, begin = joinColumnsLeft.begin()](const auto& itLeft) {
-        notExistsIndices.push_back(itLeft - begin);
-      };
-
-  // Run `zipperJoinWithUndef` with the described callbacks and the mentioned
-  // optimization in case we know that there are no UNDEF values in the join
+  // Run the actual exists join, but use `callFixedSize` for the number of join
   // columns.
-  auto checkCancellationLambda = [this] { checkCancellation(); };
-  auto runZipperJoin = [&](auto findUndef) {
-    [[maybe_unused]] auto numOutOfOrder = ad_utility::zipperJoinWithUndef(
-        joinColumnsLeft, joinColumnsRight, ql::ranges::lexicographical_compare,
-        noopRowAdder, findUndef, findUndef, actionForNotExisting,
-        checkCancellationLambda);
+  auto runForNumJoinCols = [&notExistsIndices, isCheap, &noopRowAdder,
+                            &colsLeftDynamic = joinColumnsLeft,
+                            &colsRightDynamic = joinColumnsRight,
+                            this]<int NumJoinCols>() {
+    // The callback is called with iterators, so we convert them back to
+    // indices.
+    auto joinColumnsLeft = colsLeftDynamic.asStaticView<NumJoinCols>();
+    auto joinColumnsRight = colsRightDynamic.asStaticView<NumJoinCols>();
+    auto actionForNotExisting =
+        [&notExistsIndices, begin = joinColumnsLeft.begin()](
+            const auto& itLeft) { notExistsIndices.push_back(itLeft - begin); };
+
+    // Run `zipperJoinWithUndef` with the described callbacks and the mentioned
+    // optimization in case we know that there are no UNDEF values in the join
+    // columns.
+    auto checkCancellationLambda = [this] { checkCancellation(); };
+    auto runZipperJoin = [&](auto findUndef) {
+      [[maybe_unused]] auto numOutOfOrder = ad_utility::zipperJoinWithUndef(
+          joinColumnsLeft, joinColumnsRight,
+          ql::ranges::lexicographical_compare, noopRowAdder, findUndef,
+          findUndef, actionForNotExisting, checkCancellationLambda);
+    };
+    if (isCheap) {
+      runZipperJoin(ad_utility::noop);
+    } else {
+      runZipperJoin(ad_utility::findSmallerUndefRanges);
+    }
   };
-  if (isCheap) {
-    runZipperJoin(ad_utility::noop);
-  } else {
-    runZipperJoin(ad_utility::findSmallerUndefRanges);
-  }
+  ad_utility::callFixedSize(numJoinColumns, runForNumJoinCols);
 
   // Add the result column from the computed `notExistsIndices` (which tell us
   // where the value should be `false`).
diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp
index 181d395446..49df3bfe98 100644
--- a/test/SparqlAntlrParserTest.cpp
+++ b/test/SparqlAntlrParserTest.cpp
@@ -52,9 +52,11 @@ const ad_utility::HashMap<std::string, std::string> defaultPrefixMap{
 template <auto F, bool testInsideConstructTemplate = false>
 auto parse =
     [](const string& input, SparqlQleverVisitor::PrefixMap prefixes = {},
+       ParsedQuery::DatasetClauses clauses = {},
        SparqlQleverVisitor::DisableSomeChecksOnlyForTesting disableSomeChecks =
            SparqlQleverVisitor::DisableSomeChecksOnlyForTesting::False) {
       ParserAndVisitor p{input, std::move(prefixes), disableSomeChecks};
+      // TODO<joka921> also propagate the active dataset clauses.
       if (testInsideConstructTemplate) {
         p.visitor_.setParseModeToInsideConstructTemplateForTesting();
       }
@@ -115,6 +117,20 @@ struct ExpectCompleteParse {
           matcher, l);
     });
   };
+
+  auto operator()(const string& input,
+                  const testing::Matcher<const Value&>& matcher,
+                  ParsedQuery::DatasetClauses activeDatasetClauses,
+                  ad_utility::source_location l =
+                      ad_utility::source_location::current()) const {
+    auto tr = generateLocationTrace(l, "successful parsing was expected here");
+    EXPECT_NO_THROW({
+      return expectCompleteParse(
+          parse<Clause, parseInsideConstructTemplate>(
+              input, std::move(prefixMap), disableSomeChecks),
+          matcher, l);
+    });
+  };
 };
 
 template <auto Clause>
diff --git a/test/engine/ExistsJoinTest.cpp b/test/engine/ExistsJoinTest.cpp
index 40233e9097..e16d9b3ba7 100644
--- a/test/engine/ExistsJoinTest.cpp
+++ b/test/engine/ExistsJoinTest.cpp
@@ -15,6 +15,12 @@
 using namespace ad_utility::testing;
 
 namespace {
+
+// Helper function that computes an `ExistsJoin` of the given `left` and
+// `right` and checks that the result columns is equal to `expectedAsBool`.
+// The first `numJoinColumns` columns of both `leftInput` and `rightInput` are
+// used as join columns.
+//
 void testExistsFromIdTable(IdTable left, IdTable right,
                            std::vector<bool> expectedAsBool,
                            size_t numJoinColumns) {
@@ -22,27 +28,29 @@ void testExistsFromIdTable(IdTable left, IdTable right,
   AD_CORRECTNESS_CHECK(left.numColumns() >= numJoinColumns);
   AD_CORRECTNESS_CHECK(right.numColumns() >= numJoinColumns);
 
-  // Permute the join columns.
-  auto colsLeft = ad_utility::integerRange(left.numColumns());
-  std::vector<size_t> leftPermutation;
-  ql::ranges::copy(colsLeft, std::back_inserter(leftPermutation));
-  left.setColumnSubset(leftPermutation);
-
-  auto colsRight = ad_utility::integerRange(right.numColumns());
-  std::vector<size_t> rightPermutation;
-  ql::ranges::copy(colsRight, std::back_inserter(rightPermutation));
-  right.setColumnSubset(rightPermutation);
-
-  // The expected output depends on the (sorted) input, even if we shuffle it
-  // afterward.
+  // Randomly permute the columns of the `input` and return the permutation that
+  // was applied
+  auto permuteColumns = [](auto& table) {
+    auto colsView = ad_utility::integerRange(table.numColumns());
+    std::vector<size_t> permutation;
+    ql::ranges::copy(colsView, std::back_inserter(permutation));
+    table.setColumnSubset(permutation);
+    return permutation;
+  };
+  // Permute the columns.
+  auto leftPermutation = permuteColumns(left);
+  auto rightPermutation = permuteColumns(right);
+
+  // We have to make the deep copy of `left` for the expected result at exactly
+  // this point: The permutation of the columns (above) also affects the
+  // expected result, while the permutation of the rows (which will be applied
+  // below) doesn't affect it, as the `ExistsJoin` internally sorts its inputs.
   IdTable expected = left.clone();
 
   // Randomly shuffle the inputs, to ensure that the `existsJoin` correctly
   // pre-sorts its inputs.
-  std::random_device rd;
-  std::mt19937 g(rd());
-  std::shuffle(left.begin(), left.end(), g);
-  std::shuffle(right.begin(), right.end(), g);
+  ad_utility::randomShuffle(left.begin(), left.end());
+  ad_utility::randomShuffle(right.begin(), right.end());
 
   auto qec = getQec();
   using V = Variable;
@@ -69,8 +77,8 @@ void testExistsFromIdTable(IdTable left, IdTable right,
   };
 
   // Compute the `ExistsJoin` and check the result.
-  auto exists =
-      ExistsJoin{qec, makeChild(left), makeChild(right), V{"?exists"}};
+  auto exists = ExistsJoin{qec, makeChild(left, leftPermutation),
+                           makeChild(right, rightPermutation), V{"?exists"}};
   EXPECT_EQ(exists.getResultWidth(), left.numColumns() + 1);
   auto res = exists.computeResultOnlyForTesting();
   const auto& table = res.idTable();
@@ -81,6 +89,8 @@ void testExistsFromIdTable(IdTable left, IdTable right,
   EXPECT_THAT(table, matchesIdTable(expected));
 }
 
+// Same as the function above, but conveniently takes `VectorTable`s instead of
+// `IdTable`s.
 void testExists(const VectorTable& leftInput, const VectorTable& rightInput,
                 std::vector<bool> expectedAsBool, size_t numJoinColumns) {
   auto left = makeIdTableFromVector(leftInput);

From ca30b5a217f384955213235b30e7d680d6a7ba30 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 14 Feb 2025 20:01:02 +0100
Subject: [PATCH 26/30] Also test different datasets.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/parser/sparqlParser/SparqlQleverVisitor.h |  5 +++
 test/SparqlAntlrParserTest.cpp                | 31 ++++++++++++++++---
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h
index aa5ab52197..4e761934d1 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.h
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.h
@@ -120,6 +120,11 @@ class SparqlQleverVisitor {
     isInsideConstructTriples_ = true;
   }
 
+  void setActiveDatasetClausesForTesting(
+      ParsedQuery::DatasetClauses datasetClauses) {
+    activeDatasetClauses_ = std::move(datasetClauses);
+  }
+
   // ___________________________________________________________________________
   ParsedQuery visit(Parser::QueryOrUpdateContext* ctx);
 
diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp
index 49df3bfe98..9deb2fc0ef 100644
--- a/test/SparqlAntlrParserTest.cpp
+++ b/test/SparqlAntlrParserTest.cpp
@@ -56,7 +56,7 @@ auto parse =
        SparqlQleverVisitor::DisableSomeChecksOnlyForTesting disableSomeChecks =
            SparqlQleverVisitor::DisableSomeChecksOnlyForTesting::False) {
       ParserAndVisitor p{input, std::move(prefixes), disableSomeChecks};
-      // TODO<joka921> also propagate the active dataset clauses.
+      p.visitor_.setActiveDatasetClausesForTesting(std::move(clauses));
       if (testInsideConstructTemplate) {
         p.visitor_.setParseModeToInsideConstructTemplateForTesting();
       }
@@ -113,7 +113,7 @@ struct ExpectCompleteParse {
     EXPECT_NO_THROW({
       return expectCompleteParse(
           parse<Clause, parseInsideConstructTemplate>(
-              input, std::move(prefixMap), disableSomeChecks),
+              input, std::move(prefixMap), {}, disableSomeChecks),
           matcher, l);
     });
   };
@@ -127,7 +127,7 @@ struct ExpectCompleteParse {
     EXPECT_NO_THROW({
       return expectCompleteParse(
           parse<Clause, parseInsideConstructTemplate>(
-              input, std::move(prefixMap), disableSomeChecks),
+              input, {}, std::move(activeDatasetClauses), disableSomeChecks),
           matcher, l);
     });
   };
@@ -152,7 +152,7 @@ struct ExpectParseFails {
       ad_utility::source_location l = ad_utility::source_location::current()) {
     auto trace = generateLocationTrace(l);
     AD_EXPECT_THROW_WITH_MESSAGE(
-        parse<Clause>(input, std::move(prefixMap), disableSomeChecks),
+        parse<Clause>(input, std::move(prefixMap), {}, disableSomeChecks),
         messageMatcher);
   }
 };
@@ -1954,6 +1954,29 @@ TEST(SparqlParser, Exists) {
                     existsMatcher(selectABarFooMatcher()));
   expectBuiltInCall("NOT EXISTS {?a <bar> ?foo}",
                     notExistsMatcher(selectABarFooMatcher()));
+
+  Graphs defaultGraphs{ad_utility::HashSet<TripleComponent>{iri("<blubb>")}};
+  Graphs namedGraphs{ad_utility::HashSet<TripleComponent>{iri("<blabb>")}};
+
+  // Now run the same tests, but with non-empty dataset clauses, that have to be
+  // propagated to the `ParsedQuery` stored inside the `ExistsExpression`.
+  ParsedQuery::DatasetClauses datasetClauses;
+  datasetClauses.defaultGraphs_ = defaultGraphs;
+  datasetClauses.namedGraphs_ = namedGraphs;
+  datasetClauses.defaultGraphs_.value().insert(iri("<blubb>"));
+  expectBuiltInCall("EXISTS {?a <bar> ?foo}",
+                    existsMatcher(selectABarFooMatcher()));
+  expectBuiltInCall("NOT EXISTS {?a <bar> ?foo}",
+                    notExistsMatcher(selectABarFooMatcher()));
+
+  expectBuiltInCall(
+      "EXISTS {?a <bar> ?foo}",
+      existsMatcher(selectABarFooMatcher(defaultGraphs, namedGraphs)),
+      datasetClauses);
+  expectBuiltInCall(
+      "NOT EXISTS {?a <bar> ?foo}",
+      notExistsMatcher(selectABarFooMatcher(defaultGraphs, namedGraphs)),
+      datasetClauses);
 }
 
 namespace aggregateTestHelpers {

From d48d76b0a12bbd49c222130d0e7a3681293c34bf Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 14 Feb 2025 20:06:54 +0100
Subject: [PATCH 27/30] Fix the name of the conformance test-suite

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 .github/workflows/sparql-conformance.yml        | 2 ++
 .github/workflows/upload-sparql-conformance.yml | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/sparql-conformance.yml b/.github/workflows/sparql-conformance.yml
index 0b496ccefc..8ecdbfebb6 100644
--- a/.github/workflows/sparql-conformance.yml
+++ b/.github/workflows/sparql-conformance.yml
@@ -1,3 +1,5 @@
+name: sparql-test-suite
+
 on:
   push:
     branches: [ master ]
diff --git a/.github/workflows/upload-sparql-conformance.yml b/.github/workflows/upload-sparql-conformance.yml
index fe9be1580c..390c4e446a 100644
--- a/.github/workflows/upload-sparql-conformance.yml
+++ b/.github/workflows/upload-sparql-conformance.yml
@@ -2,7 +2,7 @@ name: Upload conformance tests result
 
 on:
   workflow_run:
-    # This has to be the `name:` of the workflow in `code_coverage.yml`.
+    # This has to be the `name:` of the workflow in `sparql-conformance.yml`.
     # Start when this  workflow has finished successfully.
     workflows: [sparql-test-suite]
     types:

From cfe3c17ef53c1b67dee05d08ec63a49a54461ce8 Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Fri, 14 Feb 2025 21:26:44 +0100
Subject: [PATCH 28/30] Minor improvements from Hannah's review

---
 src/engine/ExistsJoin.cpp                       | 12 ++++++------
 src/engine/sparqlExpressions/ExistsExpression.h |  7 ++++---
 test/SparqlExpressionTest.cpp                   |  8 +++++---
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/engine/ExistsJoin.cpp b/src/engine/ExistsJoin.cpp
index 94ca355b4b..902f551ddb 100644
--- a/src/engine/ExistsJoin.cpp
+++ b/src/engine/ExistsJoin.cpp
@@ -109,8 +109,8 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   // columns (in which case we can use a simpler and cheaper join algorithm).
   //
   // TODO<joka921> This is the most common case. There are many other cases
-  // where the generic `zipperJoinWithUndef` can be optimized. We will those
-  // for a later PR.
+  // where the generic `zipperJoinWithUndef` can be optimized. This is work for
+  // a future PR.
   size_t numJoinColumns = joinColumnsLeft.numColumns();
   AD_CORRECTNESS_CHECK(numJoinColumns == joinColumnsRight.numColumns());
   bool isCheap = ql::ranges::none_of(
@@ -128,14 +128,14 @@ ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) {
   // Boolean column) should be `false`.
   std::vector<size_t, ad_utility::AllocatorWithLimit<size_t>> notExistsIndices{
       allocator()};
-  // Run the actual exists join, but use `callFixedSize` for the number of join
-  // columns.
+  // Helper lambda for computing the exists join with `callFixedSize`, which
+  // makes the number of join columns a template parameter.
   auto runForNumJoinCols = [&notExistsIndices, isCheap, &noopRowAdder,
                             &colsLeftDynamic = joinColumnsLeft,
                             &colsRightDynamic = joinColumnsRight,
                             this]<int NumJoinCols>() {
-    // The callback is called with iterators, so we convert them back to
-    // indices.
+    // The `actionForNotExisting` callback gets iterators as input, but should
+    // output indices, hence the pointer arithmetic.
     auto joinColumnsLeft = colsLeftDynamic.asStaticView<NumJoinCols>();
     auto joinColumnsRight = colsRightDynamic.asStaticView<NumJoinCols>();
     auto actionForNotExisting =
diff --git a/src/engine/sparqlExpressions/ExistsExpression.h b/src/engine/sparqlExpressions/ExistsExpression.h
index 675209dbcc..afaa026344 100644
--- a/src/engine/sparqlExpressions/ExistsExpression.h
+++ b/src/engine/sparqlExpressions/ExistsExpression.h
@@ -51,9 +51,10 @@ class ExistsExpression : public SparqlExpression {
       return absl::StrCat("ExistsExpression col# ",
                           varColMap.at(variable_).columnIndex_);
     } else {
-      // This means that the necessary `ExistsJoin` hasn't been set up yet. This
-      // can for example happen if `getCacheKey` is called during the query
-      // planning.
+      // This means that the necessary `ExistsJoin` hasn't been set up yet. For
+      // example, this can happen if `getCacheKey` is called during query
+      // planning (which is done to avoid redundant evaluation in the case of
+      // identical subtrees in the query plan).
       return absl::StrCat("Uninitialized Exists: ",
                           ad_utility::FastRandomIntGenerator<size_t>{}());
     }
diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp
index aa6e1fc909..d53f3ebc4f 100644
--- a/test/SparqlExpressionTest.cpp
+++ b/test/SparqlExpressionTest.cpp
@@ -1468,9 +1468,11 @@ TEST(SingleUseExpression, simpleMembersForTestCoverage) {
   EXPECT_ANY_THROW(expression.getCacheKey({}));
 }
 
-// The actual implementation of EXISTS is done in the ExistsJoin, which  is also
-// properly tested.
-TEST(ExistsExpression, dummyTests) {
+// This just tests basic functionality of the `ExistsExpression` class. Since
+// the actual implementation of the `EXISTS` operator is done in the
+// `ExistsJoin` class, most of the testing happens in
+// `test/engine/ExistsJoinTest.cpp`.
+TEST(ExistsExpression, basicFunctionality) {
   ExistsExpression exists{ParsedQuery{}};
   auto var = exists.variable();
   TestContext context;

From 608d0ea55ab442602671d2221f4151a7ce55a4ed Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Fri, 14 Feb 2025 21:58:01 +0100
Subject: [PATCH 29/30] Re-insert the `baseIri_` declaration in
 `SparqlQleverVisitor.h`

It got lost in the merge conflict resolution.
---
 src/parser/sparqlParser/SparqlQleverVisitor.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h
index 395303d2f5..412f2677f6 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.h
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.h
@@ -88,6 +88,9 @@ class SparqlQleverVisitor {
   // The map from prefixes to their full IRIs.
   PrefixMap prefixMap_{};
 
+  // The `BASE` IRI of the query if any.
+  ad_utility::triple_component::Iri baseIri_{};
+
   // We need to remember the prologue (prefix declarations) when we encounter it
   // because we need it when we encounter a SERVICE query. When there is no
   // prologue, this string simply remains empty.

From 092e0d91d22d3e541253dfcce728fd6cd5d5065e Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Sat, 15 Feb 2025 15:26:08 +0100
Subject: [PATCH 30/30] Revert changes in .github/workflows

---
 .github/workflows/sparql-conformance.yml      |  2 +-
 .../workflows/upload-sparql-conformance.yml   | 61 -------------------
 2 files changed, 1 insertion(+), 62 deletions(-)
 delete mode 100644 .github/workflows/upload-sparql-conformance.yml

diff --git a/.github/workflows/sparql-conformance.yml b/.github/workflows/sparql-conformance.yml
index 8ecdbfebb6..3e4bdfd63d 100644
--- a/.github/workflows/sparql-conformance.yml
+++ b/.github/workflows/sparql-conformance.yml
@@ -1,4 +1,4 @@
-name: sparql-test-suite
+name: sparql-conformance
 
 on:
   push:
diff --git a/.github/workflows/upload-sparql-conformance.yml b/.github/workflows/upload-sparql-conformance.yml
deleted file mode 100644
index 390c4e446a..0000000000
--- a/.github/workflows/upload-sparql-conformance.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-name: Upload conformance tests result
-
-on:
-  workflow_run:
-    # This has to be the `name:` of the workflow in `sparql-conformance.yml`.
-    # Start when this  workflow has finished successfully.
-    workflows: [sparql-test-suite]
-    types:
-      - completed
-
-jobs:
-  upload:
-    runs-on: ubuntu-latest
-    if: >
-      github.event.workflow_run.event == 'pull_request' &&
-      github.event.workflow_run.conclusion == 'success'
-    steps:
-      - name: 'Download artifact'
-        uses: actions/github-script@v6
-        # The following script is taken from the link stated at the
-        # beginning of this file. It manually downloads an artifact
-        # from another workflow.
-        with:
-          script: |
-            var artifacts = await github.rest.actions.listWorkflowRunArtifacts({
-               owner: context.repo.owner,
-               repo: context.repo.repo,
-               run_id: ${{github.event.workflow_run.id }},
-            });
-            var matchArtifact = artifacts.data.artifacts.filter((artifact) => {
-              return artifact.name == "conformance-report"
-            })[0];
-            var download = await github.rest.actions.downloadArtifact({
-               owner: context.repo.owner,
-               repo: context.repo.repo,
-               artifact_id: matchArtifact.id,
-               archive_format: 'zip',
-            });
-            var fs = require('fs');
-            fs.writeFileSync('${{github.workspace}}/conformance-report.zip', Buffer.from(download.data));
-      - run: unzip conformance-report.zip
-      # Read the metadata into environment variables.
-      - name: "Read PR number"
-        run: echo "pr_number=`cat pr`" >> $GITHUB_ENV
-      - name: "Read Github Ref"
-        run: echo "original_github_ref=`cat github_ref`" >> $GITHUB_ENV;
-      - name: "Read Github SHA"
-        run: echo "commit_sha=`cat sha`" >> $GITHUB_ENV;
-      - name: "Read Github Repository"
-        run: echo "original_github_repository=`cat github_repository`" >> $GITHUB_ENV;
-        # We have to check out the source code from the PR, otherwise Codecov
-        # won't process the upload properly. We first check it out into a
-        # subdirectory `qlever-source`, otherwise the coverage report will
-        # be overwritten. We then move all the files back into the working
-        # directory such that Codecov will pick them up properly.
-      - name: "Submit data to server"
-        env:
-          SERVER_URL: ${{ secrets.SPARQL_CONFORMANCE_SERVER_URL }}
-          API_KEY: ${{ secrets.SPARQL_CONFORMANCE_SERVER_KEY }}
-        run: |
-          curl -H "x-api-key: $API_KEY" -H "event: ${{github.event.workflow_run.event}}" -H "sha: ${{env.commit_sha}}" -H "pr-number: ${{env.pr_number}}" -H "repo: ${{env.original_github_repository}}" -F "file=@${{env.commit_sha}}.json.bz2" $SERVER_URL/upload
\ No newline at end of file