diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml
new file mode 100644
index 0000000000000..feaf803faa245
--- /dev/null
+++ b/.github/workflows/sycl-benchmark-aggregate.yml
@@ -0,0 +1,108 @@
+name: Aggregate compute-benchmark averages from historical data
+
+# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
+# how the benchmark results compare to a historical average: This historical
+# average is calculated in this workflow, which aggregates historical data and
+# produces measures of central tendency (median in this case) used for this
+# purpose.
+
+on:
+  workflow_dispatch:
+    inputs:
+      cutoff_timestamp:
+        description: |
+          Timestamp indicating the age limit of data used in average calculation:
+          Any benchmark results created before this timestamp is excluded from
+          being aggregated. 
+          
+          Any valid date string supported by GNU coreutils is valid here:
+          https://www.gnu.org/software/coreutils/manual/html_node/Date-input-formats.html
+        type: string
+        required: false
+  workflow_call:
+    inputs:
+      cutoff_timestamp:
+        type: string
+        required: false
+
+permissions:
+  contents: read
+
+jobs:
+  aggregate:
+    name: Aggregate average (median) value for all metrics
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        path: llvm
+        sparse-checkout: |
+          devops/scripts/benchmarking
+          devops/benchmarking
+    - name: Load benchmarking configuration
+      run: |
+        CONFIG_FILE="$PWD/llvm/devops/benchmarking/benchmark-ci.conf"
+
+        # Load default values from configuration file
+        . "$PWD/llvm/devops/scripts/benchmarking/utils.sh"
+        # utils.sh contains functions to sanitize config file settings
+        load_config_constants "$CONFIG_FILE"
+        echo "PERF_RES_GIT_REPO=$PERF_RES_GIT_REPO" >> $GITHUB_ENV
+        echo "PERF_RES_BRANCH=$PERF_RES_BRANCH" >> $GITHUB_ENV
+        echo "PERF_RES_PATH=$PERF_RES_PATH" >> $GITHUB_ENV
+
+        # Determine a "cutoff timestamp" used by the aggregator script
+        #
+        # This timestamp controls which historical results are used to compute
+        # measures of central tendency: Any files timestamped *before* this time
+        # will be *excluded* from the central tendency calculation.
+
+        echo "TIMESTAMP_FORMAT=$TIMESTAMP_FORMAT" >> $GITHUB_ENV
+        if [ -z '${{ inputs.cutoff_timestamp }}' ]; then
+          # No time given, use default time period from config file:
+          echo "CUTOFF_TIMESTAMP=$(date --date="$AVERAGE_CUTOFF_RANGE" +"$TIMESTAMP_FORMAT")" >> $GITHUB_ENV
+        else
+          # If the provided time is a valid GNU coreutils date string, convert
+          # the time to our format:
+          _converted_timestamp="$(date --date '${{ inputs.cutoff_timestamp }}' +"$TIMESTAMP_FORMAT" 2> /dev/null)"
+          if [ -n "$_converted_timestamp" ]; then
+            echo "CUTOFF_TIMESTAMP=$_converted_timestamp" >> $GITHUB_ENV
+          else
+            # If not a valid GNU date string, it could be in our timestamp format already.
+            # aggregate.py will ensure the timestamp is in the proper format, so we can pass the
+            # time forward regardless: 
+            echo 'CUTOFF_TIMESTAMP=${{ inputs.cutoff_timestamp }}' >> $GITHUB_ENV
+          fi
+        fi
+    - name: Checkout historical performance results repository
+      run: |
+        git clone -b $PERF_RES_BRANCH https://github.com/$PERF_RES_GIT_REPO $PERF_RES_PATH
+    - name: Run aggregator on historical results
+      run: |
+        # The current format of the historical results respository is:
+        #
+        # /<ONEAPI_DEVICE_SELECTOR>/<runner>/<test name>
+        #
+        # Thus, a min/max depth of 3 is used to enumerate all test cases in the
+        # repository. Test name is also derived from here.
+        for dir in $(find "$PERF_RES_PATH" -mindepth 3 -maxdepth 3 -type d ! -path '*.git*'); do
+          test_name="$(basename $dir)"
+          python llvm/devops/scripts/benchmarking/aggregate.py "$test_name" "$dir" "$CUTOFF_TIMESTAMP"
+        done
+    - name: Upload average to the repo
+      env:
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
+      run: |
+        # TODO -- waiting on security clearance
+        cd "$PERF_RES_PATH"
+        git config user.name "SYCL Benchmarking Bot"
+        git config user.email "sys_sycl_benchmarks@intel.com"
+        git add .
+        git commit -m "[GHA] Aggregate median data from $CUTOFF_TIMESTAMP to $(date +"$TIMESTAMP_FORMAT")"
+        git push "https://$GITHUB_TOKEN@github.com/$PERF_RES_GIT_REPO.git" "$PERF_RES_BRANCH"
+    - name: Archive new medians
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+        name: llvm-ci-perf-results new medians
+        path: ${{ env.PERF_RES_PATH }}/**/*-median.csv
\ No newline at end of file
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index b3b4f62e370db..1869e435afb79 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -25,7 +25,7 @@ on:
         required: False
       tests_selector:
         description: |
-          Two possible options: "e2e" and "cts".
+          Three possible options: "e2e", "cts", and "benchmark".
         type: string
         default: "e2e"
 
@@ -150,6 +150,7 @@ on:
         options:
           - e2e
           - cts
+          - benchmark
 
       env:
         description: |
@@ -314,3 +315,11 @@ jobs:
         sycl_cts_artifact: ${{ inputs.sycl_cts_artifact }}
         target_devices: ${{ inputs.target_devices }}
         retention-days: ${{ inputs.retention-days }}
+
+    - name: Run compute-benchmarks on SYCL
+      if: inputs.tests_selector == 'benchmark'
+      uses: ./devops/actions/run-tests/benchmark
+      with:
+        target_devices: ${{ inputs.target_devices }}
+      env:
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml
index 5485719d60141..569b90d277eb0 100644
--- a/.github/workflows/sycl-nightly.yml
+++ b/.github/workflows/sycl-nightly.yml
@@ -238,6 +238,38 @@ jobs:
       sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
       sycl_cts_artifact: sycl_cts_bin
 
+  aggregate_benchmark_results:
+    if: always() && !cancelled()
+    name: Aggregate benchmark results and produce historical averages
+    uses: ./.github/workflows/sycl-benchmark-aggregate.yml
+      
+  run-sycl-benchmarks:
+    needs: [ubuntu2204_build, aggregate_benchmark_results]
+    if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: Run compute-benchmarks on L0 Gen12
+            runner: '["Linux", "gen12"]'
+            image: ghcr.io/intel/llvm/ubuntu2404_intel_drivers:latest
+            image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+            target_devices: level_zero:gpu
+            reset_intel_gpu: true
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    with:
+      name: ${{ matrix.name }}
+      runner: ${{ matrix.runner }}
+      image: ${{ matrix.image }}
+      image_options: ${{ matrix.image_options }}
+      target_devices: ${{ matrix.target_devices }}
+      tests_selector: benchmark
+      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
+      ref: ${{ github.sha }}
+      sycl_toolchain_artifact: sycl_linux_default
+      sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
+
   nightly_build_upload:
     name: Nightly Build Upload
     if: ${{ github.ref_name == 'sycl' }}
diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
new file mode 100644
index 0000000000000..4dd4ca66d765f
--- /dev/null
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -0,0 +1,63 @@
+name: 'Run compute-benchmarks'
+
+# Run compute-benchmarks on SYCL
+# 
+# This action assumes SYCL is in $PWD/toolchain, and that /devops has been
+# checked out in $PWD/devops. This action also assumes that GITHUB_TOKEN
+# was properly set in env, because according to Github, that's apparently the
+# recommended way to pass a secret into a github action:
+#
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+#
+
+inputs:
+  target_devices:
+    required: true
+
+runs:
+  using: "composite"
+  steps:
+  - name: Run compute-benchmarks
+    shell: bash
+    run: |
+      cat << EOF
+      #
+      # NOTE TO DEVELOPERS:
+      #
+
+      Check latter steps of the workflow: This job produces an artifact with:
+        - benchmark results from passing/failing tests
+        - log containing all failing (too slow) benchmarks
+        - log containing all erroring benchmarks
+
+      While this step in the workflow provides debugging output describing this
+      information, it might be easier to inspect the logs from the artifact
+      instead.
+
+      EOF
+      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+      export CMPLR_ROOT=$PWD/toolchain
+      sycl-ls
+      echo "-----"
+      ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s
+  - name: Push compute-benchmarks results
+    shell: bash
+    run: |
+      # TODO -- waiting on security clearance
+      # Load configuration values
+      . "$PWD/devops/scripts/benchmarking/utils.sh"
+      CONFIG_FILE="$PWD/devops/benchmarking/benchmark-ci.conf"
+      load_config_constants "$CONFIG_FILE"
+
+      cd "$PERF_RES_PATH"
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "sys_sycl_benchmarks@intel.com"
+      git add .
+      git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+      git push "https://$GITHUB_TOKEN@github.com/$PERF_RES_GIT_REPO.git" "$PERF_RES_BRANCH"
+  - name: Archive compute-benchmark results
+    if: always()
+    uses: actions/upload-artifact@v4
+    with:
+      name: Compute-benchmark results (${{ runner.name }})
+      path: ./artifact
diff --git a/devops/benchmarking/benchmark-ci.conf b/devops/benchmarking/benchmark-ci.conf
new file mode 100644
index 0000000000000..ba39b37cd1e92
--- /dev/null
+++ b/devops/benchmarking/benchmark-ci.conf
@@ -0,0 +1,75 @@
+#
+# Configuration Options
+#
+
+# Compile flags used to build compute-benchmarks
+COMPUTE_BENCH_COMPILE_FLAGS="-j2"
+# Number of iterations to run tests for
+COMPUTE_BENCH_ITERATIONS="100"
+
+# Metrics to benchmark, and their allowed variance, as a Python dictionary 
+#METRICS_VARIANCE='{"Median": 0.5, "StdDev": 4.0}'
+METRICS_VARIANCE='{"Median": 0.5}'
+# Metrics to record using aggregate.py
+METRICS_RECORDED='["Median", "StdDev"]'
+
+# Default period of time to aggregate for the average
+AVERAGE_CUTOFF_RANGE="7-days-ago"
+# Accepts all valid date strings accepted by GNU coreutils `date` extension:
+#
+# https://www.gnu.org/software/coreutils/manual/html_node/Date-input-formats.html
+#
+# Relative timestamps are okay, but replace ' ' with '-', as whitespace gets
+# Threshold to store benchmark files before benchmarking
+# TODO reconsider this
+AVERAGE_THRESHOLD=3
+# removed when config file entries are sanitized.
+
+# Enabled ONEAPI_DEVICE_SELECTOR backends
+DEVICE_SELECTOR_ENABLED_BACKENDS="level_zero,opencl,cuda,hip"
+# Disabled backends: native_cpu
+
+# Enabled ONEAPI_DEVICE_SELECTOR backends
+DEVICE_SELECTOR_ENABLED_DEVICES="cpu,gpu"
+# Disabled devices: fpga
+
+
+#
+# Constants
+#
+
+# Constants used throughout the benchmarking workflow -- do not randomly
+# reconfigure
+
+# Github repo + branch settings for repo storing benchmark results
+PERF_RES_GIT_REPO="ianayl/llvm-ci-perf-results"
+PERF_RES_BRANCH="test-compute-bench"
+
+# Github repo + branch settings for compute-benchmarks itself
+COMPUTE_BENCH_GIT_REPO="ianayl/compute-benchmarks"
+COMPUTE_BENCH_BRANCH="update-sycl"
+
+# Path to clone benchmark results repo
+PERF_RES_PATH="./llvm-ci-perf-res"
+
+# Path to clone and build compute-benchmarks
+COMPUTE_BENCH_PATH="./compute-benchmarks"
+
+# Format of timestamps used (unix `date` format string)
+TIMESTAMP_FORMAT="%Y%m%d_%H%M%S"
+
+# Path to root folder storing benchmark CI artifact
+ARTIFACT_PATH="./artifact"
+
+# Path to temporarily cache compute-benchmark results
+OUTPUT_CACHE="./artifact/failed_tests"
+# If a test result does not get moved out of this catch-all cache path, it is
+# considered to have failed
+
+# Path to cache passing compute-benchmark results
+PASSING_CACHE="./artifact/passing_tests"
+
+# Log file for test cases that perform over the allowed variance
+BENCHMARK_SLOW_LOG="./artifact/benchmarks_failed.log"
+# Log file for test cases that errored / failed to build
+BENCHMARK_ERROR_LOG="./artifact/benchmarks_errored.log"
diff --git a/devops/benchmarking/enabled_tests.conf b/devops/benchmarking/enabled_tests.conf
new file mode 100644
index 0000000000000..0f6e21f93f67b
--- /dev/null
+++ b/devops/benchmarking/enabled_tests.conf
@@ -0,0 +1,8 @@
+# Test cases to be enabled:
+api_overhead_benchmark_sycl
+memory_benchmark_sycl
+miscellaneous_benchmark_sycl
+ulls_benchmark_sycl
+
+# As of January 2025, these are every compute-benchmark tests with a SYCL
+# implementation.
\ No newline at end of file
diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py
new file mode 100644
index 0000000000000..4b3918118af75
--- /dev/null
+++ b/devops/scripts/benchmarking/aggregate.py
@@ -0,0 +1,126 @@
+import csv
+import sys
+from pathlib import Path
+import heapq
+import statistics
+
+import common
+
+
+# Simple median calculation
+class SimpleMedian:
+
+    def __init__(self):
+        self.elements = []
+
+    def add(self, n: float):
+        self.elements.append(n)
+
+    def get_median(self) -> float:
+        return statistics.median(self.elements)
+
+
+# Calculate medians incrementally using a heap: Useful for when dealing with
+# large number of samples.
+#
+# TODO how many samples are we going to realistically get? I had written this
+# with precommit in mind, but if this only runs nightly, it would actually be
+# faster to do a normal median calculation.
+class StreamingMedian:
+
+    def __init__(self):
+        # Gist: we keep a minheap and a maxheap, and store the median as the top
+        # of the minheap. When a new element comes it gets put into the heap
+        # based on if the element is bigger than the current median. Then, the
+        # heaps are heapified and the median is repopulated by heapify.
+        self.minheap_larger = []
+        self.maxheap_smaller = []
+
+    # Note: numbers on maxheap should be negative, as heapq
+    # is minheap by default
+
+    def add(self, n: float):
+        if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n:
+            heapq.heappush(self.maxheap_smaller, -n)
+        else:
+            heapq.heappush(self.minheap_larger, n)
+
+        # Ensure minheap has more elements than maxheap
+        if len(self.maxheap_smaller) > len(self.minheap_larger) + 1:
+            heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller))
+        elif len(self.maxheap_smaller) < len(self.minheap_larger):
+            heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger))
+
+    def get_median(self) -> float:
+        if len(self.maxheap_smaller) == len(self.minheap_larger):
+            # Equal number of elements smaller and larger than "median":
+            # thus, there are two median values. The median would then become
+            # the average of both median values.
+            return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0
+        else:
+            # Otherwise, median is always in minheap, as minheap is always
+            # bigger
+            return -self.maxheap_smaller[0]
+
+
+def aggregate_median(test_name: str, test_dir: str, cutoff: str):
+
+    # Get all .csv samples for the requested test folder
+    def csv_samples() -> list[str]:
+        # TODO check that the path below is valid directory
+        cache_dir = Path(f"{test_dir}")
+        # TODO check for time range; What time range do I want?
+        return filter(
+            lambda f: f.is_file()
+            and common.valid_timestamp(str(f)[-19:-4])
+            and str(f)[-19:-4] > cutoff,
+            cache_dir.glob(f"{test_name}-*_*.csv"),
+        )
+
+    # Calculate median of every desired metric:
+    aggregate_s = dict()
+    for sample_path in csv_samples():
+        with open(sample_path, "r") as sample_file:
+            for s in csv.DictReader(sample_file):
+                test_case = s["TestCase"]
+                # Construct entry in aggregate_s for test case if it does not
+                # exist already:
+                if test_case not in aggregate_s:
+                    aggregate_s[test_case] = {
+                        metric: SimpleMedian() for metric in common.metrics_variance
+                    }
+
+                for metric in common.metrics_variance:
+                    aggregate_s[test_case][metric].add(common.sanitize(s[metric]))
+
+    # Write calculated median (aggregate_s) as a new .csv file:
+    with open(
+        f"{test_dir}/{test_name}-median.csv", "w"
+    ) as output_csv:
+        writer = csv.DictWriter(
+            output_csv, fieldnames=["TestCase", *common.metrics_variance.keys()]
+        )
+        writer.writeheader()
+        for test_case in aggregate_s:
+            writer.writerow(
+                {"TestCase": test_case}
+                | {
+                    metric: aggregate_s[test_case][metric].get_median()
+                    for metric in common.metrics_variance
+                }
+            )
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 4:
+        print(
+            f"Usage: {sys.argv[0]} <test name> <absolute path to test directory> <cutoff timestamp YYYYMMDD_HHMMSS>"
+        )
+        exit(1)
+    if not common.valid_timestamp(sys.argv[3]):
+        print(sys.argv)
+        print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.")
+        exit(1)
+    common.load_configs()
+
+    aggregate_median(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh
new file mode 100755
index 0000000000000..ca707af09ce04
--- /dev/null
+++ b/devops/scripts/benchmarking/benchmark.sh
@@ -0,0 +1,308 @@
+#!/bin/sh
+
+#
+# benchmark.sh: Benchmark dpcpp using compute-benchmarks
+#
+
+usage () {
+    >&2 echo "Usage: $0 <compute-benchmarks git repo> -t <runner type> [-B <compute-benchmarks build path>]
+  -n  Github runner name -- Required
+  -B  Path to clone and build compute-benchmarks on
+  -p  Path to compute-benchmarks (or directory to build compute-benchmarks in)
+  -r  Github repo to use for compute-benchmarks origin, in format <org>/<name>
+  -b  Git branch to use within compute-benchmarks
+  -f  Compile flags passed into building compute-benchmarks
+  -c  Clean up working directory
+  -C  Clean up working directory and exit
+  -s  Cache results
+
+This script builds and runs benchmarks from compute-benchmarks."
+    exit 1
+}
+
+clone_perf_res() {
+    echo "### Cloning llvm-ci-perf-res ($PERF_RES_GIT_REPO:$PERF_RES_BRANCH) ###"
+    mkdir -p "$(dirname "$PERF_RES_PATH")"
+    git clone -b $PERF_RES_BRANCH https://github.com/$PERF_RES_GIT_REPO $PERF_RES_PATH
+    [ "$?" -ne 0 ] && exit $? 
+}
+
+clone_compute_bench() {
+    echo "### Cloning compute-benchmarks ($COMPUTE_BENCH_GIT_REPO:$COMPUTE_BENCH_BRANCH) ###"
+    mkdir -p "$(dirname "$COMPUTE_BENCH_PATH")"
+    git clone -b $COMPUTE_BENCH_BRANCH \
+              --recurse-submodules https://github.com/$COMPUTE_BENCH_GIT_REPO \
+              $COMPUTE_BENCH_PATH
+    [ "$?" -ne 0 ] && exit $? 
+}
+
+build_compute_bench() {
+    echo "### Building compute-benchmarks ($COMPUTE_BENCH_GIT_REPO:$COMPUTE_BENCH_BRANCH) ###"
+    mkdir $COMPUTE_BENCH_PATH/build && cd $COMPUTE_BENCH_PATH/build &&
+    # No reason to turn on ccache, if this docker image will be disassembled later on
+    cmake .. -DBUILD_SYCL=ON -DBUILD_L0=OFF -DBUILD=OCL=OFF -DCCACHE_ALLOWED=FALSE # && cmake --build . $COMPUTE_BENCH_COMPILE_FLAGS
+    # TODO enable mechanism for opting into L0 and OCL -- the concept is to
+    # subtract OCL/L0 times from SYCL times in hopes of deriving SYCL runtime
+    # overhead, but this is mostly an idea that needs to be mulled upon.
+
+    if [ "$?" -eq 0 ]; then
+        while IFS= read -r case; do
+            # Skip lines starting with '#'
+            [ "${case##\#*}" ] || continue
+            make $COMPUTE_BENCH_COMPILE_FLAGS "$case"
+        done < "$TESTS_CONFIG"
+    fi
+    #compute_bench_build_stat=$?
+    cd -
+    #[ "$compute_bench_build_stat" -ne 0 ] && exit $compute_bench_build_stat 
+}
+
+# print_bench_res() {
+#     # Usage: print_bench_res <benchmark output .csv file> <benchmark status code> <summary file>
+#     if [ ! -s $1 ]; then
+#         printf "NO OUTPUT! (Status $2)\n" | tee -a $3
+#         return  # Do not proceed if file is empty
+#     fi
+#     
+#     get_csv_col_index $1 run-time-mean
+#     tmp_run_time_mean_i=$tmp_csv_col_i
+#     get_csv_col_index $1 run-time-median
+#     tmp_run_time_median_i=$tmp_csv_col_i
+#     get_csv_col_index $1 run-time-throughput
+#     tmp_run_time_throughput_i=$tmp_csv_col_i
+# 
+#     # `sycl-bench` output seems to like inserting the header multiple times.
+#     # Here we cache the header to make sure it prints only once:
+#     tmp_header_title="$(cat $1 | head -n 1 | sed 's/^\# Benchmark name/benchmark/')"
+#     tmp_result="$(cat $1 | grep '^[^\#]')"
+# 
+#     printf "%s\n%s" "$tmp_header_title" "$tmp_result"                  \
+#         | awk -F',' -v me="$tmp_run_time_mean_i"                       \
+#                     -v md="$tmp_run_time_median_i"                     \
+#                     -v th="$tmp_run_time_throughput_i"                 \
+#             '{printf "%-57s %-13s %-15s %-20s\n", $1, $me, $md, $th }' \
+#         | tee -a $3   # Print to summary file
+# }
+
+# Check if the number of samples for a given test case is less than a threshold
+# set in benchmark-ci.conf
+#
+# Usage: <relative path of directory containing test case results>
+samples_under_threshold () {
+    [ ! -d "$PERF_RES_PATH/$1" ] && return 1 # Directory doesn't exist
+    file_count="$(find "$PERF_RES_PATH/$1" -maxdepth 1 -type f | wc -l )"
+    [ "$file_count" -lt "$AVERAGE_THRESHOLD" ]
+}
+
+# Check for a regression via compare.py
+#
+# Usage: check_regression <relative path of output csv>
+check_regression() {
+    csv_relpath="$(dirname "$1")"
+    csv_name="$(basename "$1")"
+    if samples_under_threshold "$csv_relpath"; then
+        echo "Not enough samples to construct a good average, performance\
+ check skipped!"
+        return 0 # Success status
+    fi
+    DEVOPS_PATH="$DEVOPS_PATH" \
+        python "$DEVOPS_PATH/scripts/benchmarking/compare.py" \
+            "$csv_relpath" "$csv_name"
+    return $?
+}
+
+# Move the results of our benchmark into the git repo, and save benchmark
+# results to artifact archive
+#
+# Usage: cache <relative path of output csv>
+cache() {
+    mkdir -p "$(dirname "$PASSING_CACHE/$1")" "$(dirname "$PERF_RES_PATH/$1")"
+    cp "$OUTPUT_CACHE/$1" "$PASSING_CACHE/$1"
+    mv "$OUTPUT_CACHE/$1" "$PERF_RES_PATH/$1"
+}
+
+# Check for a regression + cache if no regression found
+#
+# Usage: check_and_cache <relative path of output csv>
+check_and_cache() {
+    echo "Checking $1..."
+    if check_regression $1; then
+        if [ "$CACHE_RESULTS" -eq "1" ]; then
+            echo "Caching $1..."
+            cache $1
+        fi
+    else
+        [ "$CACHE_RESULTS" -eq "1" ] && echo "Regression found -- Not caching!"
+    fi
+}
+
+# Run and process the results of each enabled benchmark in enabled_tests.conf
+process_benchmarks() {
+    mkdir -p "$PERF_RES_PATH"
+    
+    echo "### Running and processing selected benchmarks ###"
+    if [ -z "$TESTS_CONFIG" ]; then
+        echo "Setting tests to run via cli is not currently supported."
+        exit 1
+    else
+        rm "$BENCHMARK_ERROR_LOG" "$BENCHMARK_SLOW_LOG" 2> /dev/null
+        mkdir -p "$(dirname "$BENCHMARK_ERROR_LOG")" "$(dirname "$BENCHMARK_SLOW_LOG")"
+        # Loop through each line of enabled_tests.conf, but ignore lines in the
+        # test config starting with #'s:
+        grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do
+            echo "# Running $testcase..."
+
+            # The benchmark results git repo and this script's output both share
+            # the following directory structure:
+            #
+            # /<device selector>/<runner>/<test name>
+            #
+            # Instead of specifying 2 paths with a slightly different root
+            # folder name for every function we use, we can use a relative path
+            # to represent the file in both folders.
+            #
+            # Figure out the relative path of our testcase result:
+            test_dir_relpath="$DEVICE_SELECTOR_DIRNAME/$RUNNER/$testcase"
+            output_csv_relpath="$test_dir_relpath/$testcase-$TIMESTAMP.csv"
+			mkdir -p "$OUTPUT_CACHE/$test_dir_relpath" # Ensure directory exists
+            # TODO generate runner config txt if not exist
+
+            output_csv="$OUTPUT_CACHE/$output_csv_relpath"
+            $COMPUTE_BENCH_PATH/build/bin/$testcase --csv \
+                --iterations="$COMPUTE_BENCH_ITERATIONS" \
+                    | tail +8 > "$output_csv"
+                    # The tail +8 filters out header lines not in csv format
+
+            exit_status="$?"
+            if [ "$exit_status" -eq 0 ] && [ -s "$output_csv" ]; then 
+                check_and_cache $output_csv_relpath
+            else
+                # TODO consider capturing stderr for logging
+                echo "[ERROR] $testcase returned exit status $exit_status"
+                echo "-- $testcase: error $exit_status" >> "$BENCHMARK_ERROR_LOG"
+            fi
+        done
+    fi
+}
+
+# Handle failures + produce a report on what failed
+process_results() {
+    fail=0
+    if [ -s "$BENCHMARK_SLOW_LOG" ]; then
+        printf "\n### Tests performing over acceptable range of average: ###\n"
+        cat "$BENCHMARK_SLOW_LOG"
+        echo ""
+        fail=2
+    fi
+    if [ -s "$BENCHMARK_ERROR_LOG" ]; then
+        printf "\n### Tests that failed to run: ###\n"
+        cat "$BENCHMARK_ERROR_LOG"
+        echo ""
+        fail=1
+    fi
+    exit $fail
+}
+
+cleanup() {
+    echo "### Cleaning up compute-benchmark builds from prior runs ###"
+    rm -rf $COMPUTE_BENCH_PATH
+    rm -rf $PERF_RES_PATH
+    [ ! -z "$_exit_after_cleanup" ] && exit
+}
+
+load_configs() {
+    # This script needs to know where the intel/llvm "/devops" directory is,
+    # containing all the configuration files and the compare script.
+    #
+    # If this is not provided, this function tries to guess where the files
+    # are based on how the script is called, and verifies that all necessary
+    # configs and scripts are reachable. 
+
+    # This benchmarking script is usually at:
+    # 
+    # /devops/scripts/benchmarking/benchmark.sh
+    #
+    # Derive /devops based on location of this script:
+    [ -z "$DEVOPS_PATH" ] && DEVOPS_PATH="$(dirname "$0")/../.."
+
+    BENCHMARK_CI_CONFIG="$(realpath $DEVOPS_PATH/benchmarking/benchmark-ci.conf)"
+    TESTS_CONFIG="$(realpath $DEVOPS_PATH/benchmarking/enabled_tests.conf)"
+    COMPARE_PATH="$(realpath $DEVOPS_PATH/scripts/benchmarking/compare.py)"
+    UTILS_PATH="$(realpath $DEVOPS_PATH/scripts/benchmarking/utils.sh)"
+
+    for file in \
+        "$BENCHMARK_CI_CONFIG" "$TESTS_CONFIG" "$COMPARE_PATH" "$UTILS_PATH"
+    do
+        if [ ! -f "$file" ]; then
+            echo "Please provide path to DEVOPS_PATH."
+            exit -1
+        fi
+    done
+
+    . "$UTILS_PATH"
+    load_config_options "$BENCHMARK_CI_CONFIG"
+    load_config_constants "$BENCHMARK_CI_CONFIG"
+}
+
+#####
+
+load_configs
+
+COMPUTE_BENCH_COMPILE_FLAGS=""
+CACHE_RESULTS="0"
+TIMESTAMP="$(date +"$TIMESTAMP_FORMAT")"
+
+# CLI flags + overrides to configuration options:
+while getopts "p:b:r:f:n:cCs" opt; do
+    case $opt in
+        p) COMPUTE_BENCH_PATH=$OPTARG ;;
+        r) COMPUTE_BENCH_GIT_REPO=$OPTARG ;;
+        b) COMPUTE_BENCH_BRANCH=$OPTARG ;;
+        f) COMPUTE_BENCH_COMPILE_FLAGS=$OPTARG ;;
+		n) RUNNER=$OPTARG ;;
+        # Cleanup status is saved in a var to ensure all arguments are processed before
+        # performing cleanup
+        c) _cleanup=1 ;;
+        C) _cleanup=1 && _exit_after_cleanup=1 ;;
+        s) CACHE_RESULTS="1";;
+        \?) usage ;;
+    esac
+done
+
+# Check all necessary variables exist:
+if [ -z "$CMPLR_ROOT" ]; then
+    echo "Please set CMPLR_ROOT first; it is needed by compute-benchmarks to build."
+    exit 1
+elif [ -z "$ONEAPI_DEVICE_SELECTOR" ]; then
+    echo "Please set ONEAPI_DEVICE_SELECTOR first to specify which device to use."
+    exit 1
+elif [ -z "$RUNNER" ]; then
+    echo "Please specify runner name using -n first; it is needed for storing/comparing benchmark results."
+    exit 1
+fi
+
+# Make sure ONEAPI_DEVICE_SELECTOR doesn't try to enable multiple devices at the
+# same time, or use specific device id's
+_dev_sel_backend_re="$(echo "$DEVICE_SELECTOR_ENABLED_BACKENDS" | sed 's/,/|/g')"
+_dev_sel_device_re="$(echo "$DEVICE_SELECTOR_ENABLED_DEVICES" | sed 's/,/|/g')"
+_dev_sel_re="s/($_dev_sel_backend_re):($_dev_sel_device_re)//"
+if [ -n "$(echo "$ONEAPI_DEVICE_SELECTOR" | sed -E "$_dev_sel_re")" ]; then
+    echo "Unsupported ONEAPI_DEVICE_SELECTOR value: please ensure only one \
+device is selected, and devices are not selected by indices."
+    echo "Enabled backends: $DEVICE_SELECTOR_ENABLED_BACKENDS"
+    echo "Enabled device types: $DEVICE_SELECTOR_ENABLED_DEVICES"
+    exit 1
+fi
+# ONEAPI_DEVICE_SELECTOR values are not valid directory names in unix: this 
+# value lets us use ONEAPI_DEVICE_SELECTOR as actual directory names 
+DEVICE_SELECTOR_DIRNAME="$(echo "$ONEAPI_DEVICE_SELECTOR" | sed 's/:/-/')"
+
+# Clean up and delete all cached files if specified:
+[ ! -z "$_cleanup" ] && cleanup
+# Clone and build only if they aren't already cached/deleted:
+[ ! -d "$PERF_RES_PATH"            ] && clone_perf_res
+[ ! -d "$COMPUTE_BENCH_PATH"       ] && clone_compute_bench
+[ ! -d "$COMPUTE_BENCH_PATH/build" ] && build_compute_bench
+# Process benchmarks:
+process_benchmarks
+process_results
diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py
new file mode 100644
index 0000000000000..75f236066fd98
--- /dev/null
+++ b/devops/scripts/benchmarking/common.py
@@ -0,0 +1,67 @@
+import os
+import re
+import ast
+
+# Globals definition
+PERF_RES_PATH, metrics_variance, metrics_recorded = None, None, None
+BENCHMARK_SLOW_LOG, BENCHMARK_ERROR_LOG = None, None
+
+
+def sanitize(stat: str) -> float:
+    # Get rid of %
+    if stat[-1] == "%":
+        stat = stat[:-1]
+    return float(stat)
+
+
+def load_configs():
+    DEVOPS_PATH = os.getenv("DEVOPS_PATH")
+    if DEVOPS_PATH is None:
+        # Try to predict where /devops is based on executable
+        DEVOPS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+
+    benchmarking_ci_conf_path = f"{DEVOPS_PATH}/benchmarking/benchmark-ci.conf"
+    if not os.path.isfile(benchmarking_ci_conf_path):
+        raise Exception(f"Please provide path to a valid DEVOPS_PATH.")
+
+    global PERF_RES_PATH, OUTPUT_CACHE, metrics_variance, metrics_recorded
+    global BENCHMARK_ERROR_LOG, BENCHMARK_SLOW_LOG
+    perf_res_re = re.compile(r"^PERF_RES_PATH=(.*)$", re.M)
+    output_cache_re = re.compile(r"^OUTPUT_CACHE=(.*)$", re.M)
+    m_variance_re = re.compile(r"^METRICS_VARIANCE=(.*)$", re.M)
+    m_recorded_re = re.compile(r"^METRICS_RECORDED=(.*)$", re.M)
+    b_slow_re = re.compile(r"^BENCHMARK_SLOW_LOG=(.*)$", re.M)
+    b_error_re = re.compile(r"^BENCHMARK_ERROR_LOG=(.*)$", re.M)
+
+    with open(benchmarking_ci_conf_path, "r") as configs_file:
+        configs_str = configs_file.read()
+
+        for m_variance in m_variance_re.findall(configs_str):
+            metrics_variance = ast.literal_eval(m_variance.strip()[1:-1])
+            if not isinstance(metrics_variance, dict):
+                raise TypeError("Error in benchmark-ci.conf: METRICS_VARIANCE is not a python dict.")
+
+        for m_recorded in m_recorded_re.findall(configs_str):
+            metrics_recorded = ast.literal_eval(m_recorded.strip()[1:-1])
+            if not isinstance(metrics_recorded, list):
+                raise TypeError("Error in benchmark-ci.conf: METRICS_RECORDED is not a python list.")
+
+        for perf_res in perf_res_re.findall(configs_str):
+            PERF_RES_PATH = str(perf_res[1:-1])
+
+        for output_cache in output_cache_re.findall(configs_str):
+            OUTPUT_CACHE = str(output_cache[1:-1])
+
+        for b_slow_log in b_slow_re.findall(configs_str):
+            BENCHMARK_SLOW_LOG = str(b_slow_log[1:-1])
+
+        for b_error_log in b_error_re.findall(configs_str):
+            BENCHMARK_ERROR_LOG = str(b_error_log[1:-1])
+        
+
+def valid_timestamp(timestamp: str) -> bool:
+    timestamp_re = re.compile(
+        # YYYYMMDD_HHMMSS
+        r"^\d{4}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])_(0[0-9]|1[0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
+    )
+    return timestamp_re.match(timestamp) is not None
\ No newline at end of file
diff --git a/devops/scripts/benchmarking/compare.py b/devops/scripts/benchmarking/compare.py
new file mode 100644
index 0000000000000..7974fcc130397
--- /dev/null
+++ b/devops/scripts/benchmarking/compare.py
@@ -0,0 +1,73 @@
+import os
+import csv
+import sys
+from pathlib import Path
+
+import common
+
+def compare_to_median(test_name: str, median_path: str, test_csv_path: str):
+    median = dict() # stores actual median of current testcase
+    with open(median_path, "r") as median_csv:
+        for stat in csv.DictReader(median_csv):
+            median[stat["TestCase"]] = {
+                metric: float(stat[metric]) for metric in common.metrics_variance
+            }
+
+    # TODO read status codes from a config file instead?
+    status = 0
+    failure_counts = {metric: 0 for metric in common.metrics_variance}
+    with open(test_csv_path, "r") as sample_csv:
+        for sample in csv.DictReader(sample_csv):
+            test_case = sample["TestCase"]
+
+            # Ignore test cases we haven't profiled before
+            if test_case not in median:
+                continue
+            hist_median = median[test_case]
+            for metric, threshold in common.metrics_variance.items():
+                max_tolerated = hist_median[metric] * (1 + threshold)
+                sample_value = common.sanitize(sample[metric])
+                if sample_value > max_tolerated:
+                    print("vvv FAILED vvv")
+                    print(test_case)
+                    print(
+                        f"{metric}: {sample_value} -- Historic avg. {hist_median[metric]} (max tolerance {threshold*100}%: {max_tolerated})"
+                    )
+                    print("^^^^^^^^^^^^^^")
+                    with open(common.BENCHMARK_SLOW_LOG, "a") as slow_log:
+                        slow_log.write(
+                            f"-- {test_name}::{test_case}\n"
+                            f"   {metric}: {sample_value} -- Historic avg. {hist_median[metric]} (max tol. {threshold*100}%: {max_tolerated})\n"
+                        )
+                    status = 1
+                    failure_counts[metric] += 1
+    if status != 0:
+        print(f"Failure counts: {failure_counts}")
+    return status
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print(f"Usage: {sys.argv[0]} <relative path of test results directory> <result csv filename>")
+        exit(1)
+    # Both benchmark results git repo and benchmark.sh output are structured
+    # like so:
+    # /<device_selector>/<runner>/<test name>
+    # This relative path is sys.argv[1], while the name of the csv file we are
+    # comparing against is sys.argv[2].
+    common.load_configs()
+    test_name = os.path.basename(sys.argv[1])
+    test_csv_path = f"{common.OUTPUT_CACHE}/{sys.argv[1]}/{sys.argv[2]}"
+    median_path = f"{common.PERF_RES_PATH}/{sys.argv[1]}/{test_name}-median.csv"
+
+    if not os.path.isfile(test_csv_path):
+        print("Invalid test file provided: " + test_csv_path)
+        exit(1)
+    if not os.path.isfile(median_path):
+        print(
+            f"Median file for test {test_name} not found at {median_path}.\n"
+            + "Please calculate the median using the aggregate workflow."
+        )
+        exit(1)
+
+    exit(compare_to_median(test_name, median_path, test_csv_path))
diff --git a/devops/scripts/benchmarking/utils.sh b/devops/scripts/benchmarking/utils.sh
new file mode 100644
index 0000000000000..ccff01b572add
--- /dev/null
+++ b/devops/scripts/benchmarking/utils.sh
@@ -0,0 +1,111 @@
+#!/bin/sh
+
+#
+# utils.sh: Utilities for benchmarking scripts
+#
+
+# Usage: _sanitize_configs <field value>
+_sanitize_configs() {
+    # Trim quotes if any
+    trim_quotes="$(printf "%s" "$2" | tr -d "\n" | sed 's/^"//; s/"$//')"
+    check_illegal_chars="$(printf "%s" "$trim_quotes" | sed 's/[a-zA-Z0-9_.,:/%-]//g')"
+
+    if [ -n "$check_illegal_chars" ]; then
+        # Throw if unallowed characters are spotted
+        printf ""
+    else
+        # Return the trimmed string
+        printf "%s" "$trim_quotes"
+    fi
+}
+
+_preprocess_config() {
+    # Remove comments
+    _tmp1="$(mktemp)"
+    grep '^[^#]' "$1" > "$_tmp1" 
+    # Skip values intended for python
+    _tmp2="$(mktemp)"
+    grep -E -v '^METRICS_(VARIANCE|RECORDED)' "$_tmp1" > "$_tmp2"
+    rm "$_tmp1"
+    # Return
+    echo "$_tmp2"
+}
+
+# Sanitize + load all known configuration options
+# Usage: load_config_options <config file>
+load_config_options() {
+    processed_config="$(_preprocess_config $1)"
+    # Strict loading of configuration options by name:
+    while IFS='=' read -r key value; do
+        sanitized_value=$(_sanitize_configs "$key" "$value")
+        if [ -z "$sanitized_value" ]; then
+            echo "Bad configuration value for $key: $value"
+            echo "Ensure $value is within character range [a-zA-Z0-9_.,:/%-]."
+            exit 1
+        fi
+
+        case "$key" in
+            'COMPUTE_BENCH_COMPILE_FLAGS')
+                export COMPUTE_BENCH_COMPILE_FLAGS="$sanitized_value" ;;
+            'COMPUTE_BENCH_ITERATIONS')
+                export COMPUTE_BENCH_ITERATIONS="$sanitized_value" ;;
+            'AVERAGE_THRESHOLD')
+                export AVERAGE_THRESHOLD="$sanitized_value" ;;
+            'AVERAGE_CUTOFF_RANGE')
+                export AVERAGE_CUTOFF_RANGE="$sanitized_value" ;;
+            'DEVICE_SELECTOR_ENABLED_BACKENDS')
+                export DEVICE_SELECTOR_ENABLED_BACKENDS="$sanitized_value" ;;
+            'DEVICE_SELECTOR_ENABLED_DEVICES')
+                export DEVICE_SELECTOR_ENABLED_DEVICES="$sanitized_value" ;;
+        esac
+    done < "$processed_config"
+}
+
+# Sanitize + load all (known) constants from the configuration file
+# Usage: load_config_constants <config file>
+load_config_constants() {
+    processed_config="$(_preprocess_config $1)"
+    # Strict loading of configuration options by name:
+    while IFS='=' read -r key value; do
+        sanitized_value=$(_sanitize_configs "$key" "$value")
+        if [ -z "$sanitized_value" ]; then
+            echo "Bad configuration value for $key: $value"
+            echo "Ensure $value is within character range [a-zA-Z0-9_.,:/%-]."
+            exit 1
+        fi
+
+        case "$key" in
+            'PERF_RES_GIT_REPO')
+                export PERF_RES_GIT_REPO="$sanitized_value" ;;
+            'PERF_RES_BRANCH')
+                export PERF_RES_BRANCH="$sanitized_value" ;;
+            'PERF_RES_PATH')
+                export PERF_RES_PATH="$sanitized_value" ;;
+            'COMPUTE_BENCH_GIT_REPO')
+                export COMPUTE_BENCH_GIT_REPO="$sanitized_value" ;;
+            'COMPUTE_BENCH_BRANCH')
+                export COMPUTE_BENCH_BRANCH="$sanitized_value" ;;
+            'COMPUTE_BENCH_PATH')
+                export COMPUTE_BENCH_PATH="$sanitized_value" ;;
+            'OUTPUT_CACHE')
+                export OUTPUT_CACHE="$sanitized_value" ;;
+            'ARTIFACT_PATH')
+                export ARTIFACT_PATH="$sanitized_value" ;;
+            'PASSING_CACHE')
+                export PASSING_CACHE="$sanitized_value" ;;
+            'TIMESTAMP_FORMAT')
+                export TIMESTAMP_FORMAT="$sanitized_value" ;;
+            'BENCHMARK_SLOW_LOG')
+                export BENCHMARK_SLOW_LOG="$sanitized_value" ;;
+            'BENCHMARK_ERROR_LOG')
+                export BENCHMARK_ERROR_LOG="$sanitized_value" ;;
+        esac
+    done < "$processed_config"
+}
+
+# # Sanitize + load a single configuration value
+# # Usage: load_single_config <config file> <config name>
+# load_single_config() {
+#     _val="$(_sanitize_configs "$(grep "^$2=" "$1" | sed "s/^$2=//")")"
+#     export "$2=$_val"
+# }