diff --git a/.github/workflows/performance-monitoring.yml b/.github/workflows/performance-monitoring.yml
new file mode 100644
index 00000000..d36c46f6
--- /dev/null
+++ b/.github/workflows/performance-monitoring.yml
@@ -0,0 +1,142 @@
+# Based on Gen private website workflow
+# https://github.com/probcomp/gen-website-private/blob/main/.github/workflows/publish_private_website_example.yml
+name: Performance monitoring
+
+on:
+  push:
+    branches:          [main, performance-monitoring]
+  # Disable on other branches for now, because I don't know how to publish artifacts elsewhere.
+  # pull_request:
+  #   branches:          [main]
+
+jobs:
+  prepare:
+    permissions:
+      id-token: write
+      contents: read
+    runs-on: ubuntu-latest
+    steps:
+      # Thanks to https://stackoverflow.com/a/58035262
+      - name: Determine branch
+        shell: bash
+        run: |
+          branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
+          echo "Running on branch ${branch:?}"
+          echo "branch=${branch:?}" >> $GITHUB_OUTPUT
+        id: extract_branch
+
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      # Based on https://github.com/probcomp/gen-website-private/blob/807a8d0a912dd24f4b4bb7da2f8dc44c7227d39c/README.md#authentication-notes
+      - name: Authenticate with Google Cloud
+        uses: 'google-github-actions/auth@v2.1.3'
+        with:
+          project_id: 'probcomp-caliban'
+          workload_identity_provider: 'projects/110275315150/locations/global/workloadIdentityPools/gen-website-private-publishers/providers/github'
+          service_account: 'gen-website-private-admin@probcomp-caliban.iam.gserviceaccount.com'
+          audience: '//iam.googleapis.com/projects/110275315150/locations/global/workloadIdentityPools/gen-website-private-publishers/providers/github'
+
+      - name: Run benchmark
+        run: |
+          python -m venv venv
+          source venv/bin/activate
+          sudo apt-get -y install graphviz
+          pip install --upgrade pip "setuptools>=62.4"
+          make benchmark
+
+      - name: Fetch historical data
+        # Does not yet account for running on pull requests, where we might need to initialize the benchmark data
+        # from main
+        run: |
+          BUCKET_PATH=gs://gen-website-private/gen.dev/genparse-performance-monitoring
+          # The benchmarking action should tolerate a nonexistent data file, 
+          # but fetch historical data if available.
+          ([[ "${{ steps.extract_branch.outputs.branch }}" != main ]] && 
+           (gcloud storage cp "${BUCKET_PATH:?}"/"${{ steps.extract_branch.outputs.branch }}"/data.js data.js || 
+            echo "No historical data found for ${{ steps.extract_branch.outputs.branch }}, attempting to get main branch data.")
+          ) ||
+            gcloud storage cp "${BUCKET_PATH:?}"/main/data.js data.js ||
+            gcloud storage cp "${BUCKET_PATH:?}"/data.js data.js ||
+            echo "No historical data found, skipping."
+
+      - name: Translate from data.js to benchmarks.json
+        # Remove it if sed fails because sed failing indicates that data.js does not exist, hence we will want the
+        # benchmarking action to generate benchmarks.json from scratch.
+        run: sed -e 's/window.BENCHMARK_DATA = //' data.js > benchmarks.json || rm benchmarks.json
+
+      - name: Translate to benchmark-action data file format
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          # For a full list of inputs, see:
+          # https://github.com/benchmark-action/github-action-benchmark/tree/master?tab=readme-ov-file#action-inputs
+          tool: 'pytest'
+          output-file-path: output.json
+          external-data-json-path: benchmarks.json
+          # Unset because I think we don't need a GitHub token except to make comments.
+          # github-token:
+          comment-always: false
+          alert-threshold: '200%'
+          alert-comment-cc-users: '@timvieira,@benlebrun'
+          comment-on-alert: false
+          fail-on-alert: false
+          fail-threshold: '300%'
+          # If we find the chart gets too busy for us, we can set max-items-in-chart
+          # max-items-in-chart: ~
+
+      - name: Translate benchmark data back to data.js
+        run: sed '1s/\(.*\)/window.BENCHMARK_DATA = \1/' benchmarks.json > data.js
+
+      - name: Fetch preexisting data
+        run: |
+          BUCKET_PATH=gs://gen-website-private/gen.dev/genparse-performance-monitoring
+          # for pull requests, use the branch name so the data are visualized at
+          # genparse-performance-monitoring.gen.dev/BRANCH
+          ROOT_WEBSITE_DIR=website
+          mkdir -p "$ROOT_WEBSITE_DIR"
+          
+          gcloud storage rsync --recursive "${BUCKET_PATH:?}" "${ROOT_WEBSITE_DIR:?}" ||
+            echo "No preexisting website data, skipping..."
+
+      - name: Update relevant website data
+        run: |
+          # for pull requests, use the branch name so the data are visualized at
+          # genparse-performance-monitoring.gen.dev/BRANCH
+          WEBSITE_DIR=website
+          if [[ "${{ steps.extract_branch.outputs.branch }}" != main ]]; then
+            WEBSITE_DIR="${WEBSITE_DIR:?}"/"${{ steps.extract_branch.outputs.branch }}"
+          fi
+          echo "website_dir=${WEBSITE_DIR:?}" >> "$GITHUB_OUTPUT"
+          
+          echo "Generating website directory ${WEBSITE_DIR:?}"
+          mkdir -p "${WEBSITE_DIR:?}"
+          rm -f "$WEBSITE_DIR"/*  # Delete any existing data for this branch
+          cp .github/workflows/performance-monitoring/index.html "${WEBSITE_DIR:?}"/index.html
+          cp data.js "${WEBSITE_DIR:?}"/data.js
+        id: update_website_data
+
+      - name: Add timestamp and repo URL to index.html
+        run: |
+          WEBSITE_DIR="${{ steps.update_website_data.outputs.website_dir }}"
+          echo "Generating website directory ${WEBSITE_DIR:?}"
+          sed -i -e 's/\$TIMESTAMP/'"$(date -Iseconds)"'/' "${WEBSITE_DIR:?}"/index.html
+          sed -i -e 's/\$REPO/'"${{ github.event.repository.name }}"'/' "${WEBSITE_DIR:?}"/index.html
+
+      - name: Create website artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: perfmon-website
+          path: ./website
+
+  # This should update data.js in the appropriate bucket location too,
+  publish:
+    permissions:
+      id-token: write
+      contents: read
+    needs: prepare
+    uses: probcomp/gen-website-private/.github/workflows/publish_private_website.yml@main
+    with:
+      artifact: perfmon-website
+      parent_domain: gen.dev
+      subdomain: genparse-performance-monitoring
diff --git a/.github/workflows/performance-monitoring/index.html b/.github/workflows/performance-monitoring/index.html
new file mode 100644
index 00000000..13226795
--- /dev/null
+++ b/.github/workflows/performance-monitoring/index.html
@@ -0,0 +1,283 @@
+<!DOCTYPE html>
+<!-- Based on https://github.com/benchmark-action/github-action-benchmark/blob/6bae118c112083251560ad8b3a1ff2e43aa23351/src/default_index_html.ts -->
+<html>
+<head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, minimum-scale=1.0, initial-scale=1, user-scalable=yes" />
+    <style>
+        html {
+          font-family: BlinkMacSystemFont,-apple-system,"Segoe UI",Roboto,Oxygen,Ubuntu,Cantarell,"Fira Sans","Droid Sans","Helvetica Neue",Helvetica,Arial,sans-serif;
+          -webkit-font-smoothing: antialiased;
+          background-color: #fff;
+          font-size: 16px;
+        }
+        body {
+          color: #4a4a4a;
+          margin: 8px;
+          font-size: 1em;
+          font-weight: 400;
+        }
+        header {
+          margin-bottom: 8px;
+          display: flex;
+          flex-direction: column;
+        }
+        main {
+          width: 100%;
+          display: flex;
+          flex-direction: column;
+        }
+        a {
+          color: #3273dc;
+          cursor: pointer;
+          text-decoration: none;
+        }
+        a:hover {
+          color: #000;
+        }
+        button {
+          color: #fff;
+          background-color: #3298dc;
+          border-color: transparent;
+          cursor: pointer;
+          text-align: center;
+        }
+        button:hover {
+          background-color: #2793da;
+          flex: none;
+        }
+        .spacer {
+          flex: auto;
+        }
+        .small {
+          font-size: 0.75rem;
+        }
+        footer {
+          margin-top: 16px;
+          display: flex;
+          align-items: center;
+        }
+        .header-label {
+          margin-right: 4px;
+        }
+        .benchmark-set {
+          margin: 8px 0;
+          width: 100%;
+          display: flex;
+          flex-direction: column;
+        }
+        .benchmark-title {
+          font-size: 3rem;
+          font-weight: 600;
+          word-break: break-word;
+          text-align: center;
+        }
+        .benchmark-graphs {
+          display: flex;
+          flex-direction: row;
+          justify-content: space-around;
+          align-items: center;
+          flex-wrap: wrap;
+          width: 100%;
+        }
+        .benchmark-chart {
+          max-width: 1000px;
+        }
+    </style>
+    <title>Benchmarks</title>
+</head>
+
+<body>
+<header id="header">
+    <div class="header-item">
+        <strong class="header-label">Last Update:</strong>
+        <span id="last-update">$TIMESTAMP</span>
+    </div>
+    <div class="header-item">
+        <strong class="header-label">Repository:</strong>
+        <a id="repository-link" rel="noopener">$REPO</a>
+    </div>
+</header>
+<main id="main"></main>
+<footer>
+    <button id="dl-button">Download data as JSON</button>
+    <div class="spacer"></div>
+    <div class="small">Powered by <a rel="noopener" href="https://github.com/marketplace/actions/continuous-benchmark">github-action-benchmark</a></div>
+</footer>
+
+<script src="https://cdn.jsdelivr.net/npm/chart.js@2.9.2/dist/Chart.min.js"></script>
+<script src="data.js"></script>
+<script id="main-script">
+    'use strict';
+    (function() {
+      // Colors from https://github.com/github/linguist/blob/master/lib/linguist/languages.yml
+      const toolColors = {
+        cargo: '#dea584',
+        go: '#00add8',
+        benchmarkjs: '#f1e05a',
+        benchmarkluau: '#000080',
+        pytest: '#3572a5',
+        googlecpp: '#f34b7d',
+        catch2: '#f34b7d',
+        julia: '#a270ba',
+        jmh: '#b07219',
+        benchmarkdotnet: '#178600',
+        customBiggerIsBetter: '#38ff38',
+        customSmallerIsBetter: '#ff3838',
+        _: '#333333'
+      };
+
+      function init() {
+        function collectBenchesPerTestCase(entries) {
+          const map = new Map();
+          for (const entry of entries) {
+            const {commit, date, tool, benches} = entry;
+            for (const bench of benches) {
+              const result = { commit, date, tool, bench };
+              const arr = map.get(bench.name);
+              if (arr === undefined) {
+                map.set(bench.name, [result]);
+              } else {
+                arr.push(result);
+              }
+            }
+          }
+          return map;
+        }
+
+        const data = window.BENCHMARK_DATA;
+
+        // Render header
+        document.getElementById('last-update').textContent = new Date(data.lastUpdate).toString();
+        const repoLink = document.getElementById('repository-link');
+        repoLink.href = data.repoUrl;
+        repoLink.textContent = data.repoUrl;
+
+        // Render footer
+        document.getElementById('dl-button').onclick = () => {
+          const dataUrl = 'data:,' + JSON.stringify(data, null, 2);
+          const a = document.createElement('a');
+          a.href = dataUrl;
+          a.download = 'benchmark_data.json';
+          a.click();
+        };
+
+        // Prepare data points for charts
+        return Object.keys(data.entries).map(name => ({
+          name,
+          dataSet: collectBenchesPerTestCase(data.entries[name]),
+        }));
+      }
+
+      function renderAllChars(dataSets) {
+
+        function renderGraph(parent, name, dataset) {
+          const canvas = document.createElement('canvas');
+          canvas.className = 'benchmark-chart';
+          parent.appendChild(canvas);
+
+          const color = toolColors[dataset.length > 0 ? dataset[0].tool : '_'];
+          const data = {
+            labels: dataset.map(d => d.commit.id.slice(0, 7)),
+            datasets: [
+              {
+                label: name,
+                data: dataset.map(d => d.bench.value),
+                borderColor: color,
+                backgroundColor: color + '60', // Add alpha for #rrggbbaa
+              }
+            ],
+          };
+          const options = {
+            scales: {
+              xAxes: [
+                {
+                  scaleLabel: {
+                    display: true,
+                    labelString: 'commit',
+                  },
+                }
+              ],
+              yAxes: [
+                {
+                  scaleLabel: {
+                    display: true,
+                    labelString: dataset.length > 0 ? dataset[0].bench.unit : '',
+                  },
+                  ticks: {
+                    beginAtZero: true,
+                  }
+                }
+              ],
+            },
+            tooltips: {
+              callbacks: {
+                afterTitle: items => {
+                  const {index} = items[0];
+                  const data = dataset[index];
+                  return '\n' + data.commit.message + '\n\n' + data.commit.timestamp + ' committed by @' + data.commit.committer.username + '\n';
+                },
+                label: item => {
+                  let label = item.value;
+                  const { range, unit } = dataset[item.index].bench;
+                  label += ' ' + unit;
+                  if (range) {
+                    label += ' (' + range + ')';
+                  }
+                  return label;
+                },
+                afterLabel: item => {
+                  const { extra } = dataset[item.index].bench;
+                  return extra ? '\n' + extra : '';
+                }
+              }
+            },
+            onClick: (_mouseEvent, activeElems) => {
+              if (activeElems.length === 0) {
+                return;
+              }
+              // XXX: Undocumented. How can we know the index?
+              const index = activeElems[0]._index;
+              const url = dataset[index].commit.url;
+              window.open(url, '_blank');
+            },
+          };
+
+          new Chart(canvas, {
+            type: 'line',
+            data,
+            options,
+          });
+        }
+
+        function renderBenchSet(name, benchSet, main) {
+          const setElem = document.createElement('div');
+          setElem.className = 'benchmark-set';
+          main.appendChild(setElem);
+
+          const nameElem = document.createElement('h1');
+          nameElem.className = 'benchmark-title';
+          nameElem.textContent = name;
+          setElem.appendChild(nameElem);
+
+          const graphsElem = document.createElement('div');
+          graphsElem.className = 'benchmark-graphs';
+          setElem.appendChild(graphsElem);
+
+          for (const [benchName, benches] of benchSet.entries()) {
+            renderGraph(graphsElem, benchName, benches)
+          }
+        }
+
+        const main = document.getElementById('main');
+        for (const {name, dataSet} of dataSets) {
+          renderBenchSet(name, dataSet, main);
+        }
+      }
+
+      renderAllChars(init()); // Start
+    })();
+</script>
+</body>
+</html>
+`;
diff --git a/Makefile b/Makefile
index 64a6d0d6..86d6900b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,10 +2,12 @@ SHELL := /usr/bin/env bash
 EXEC = python=3.10
 NAME = genparse
 TEST = tests
+PERF_TEST = perf_tests
 RUN = python -m
 INSTALL = $(RUN) pip install
 SRC_FILES := $(shell find $(NAME) -name '*.py')
 TEST_FILES := $(shell find $(TEST) -name '*.py')
+PERF_TEST_FILES := $(shell find $(PERF_TEST) -name '*.py')
 .DEFAULT_GOAL := help
 
 ## help      : print available commands.
@@ -100,3 +102,7 @@ html/coverage/index.html : html/pytest/report.html
 	@coverage html -d $(@D)
 html/pytest/report.html : $(SRC_FILES) $(TEST_FILES)
 	@coverage run --branch -m pytest --html=$@ --self-contained-html $(SRC_FILES) $(TEST_FILES)
+benchmark : env benchmark.json
+benchmark.json : $(SRC_FILES) $(PERF_TEST_FILES)
+	@pytest $(PERF_TEST_FILES) --benchmark-json output.json
+
diff --git a/perf_tests/test_tiny_example.py b/perf_tests/test_tiny_example.py
new file mode 100644
index 00000000..00b3a4ee
--- /dev/null
+++ b/perf_tests/test_tiny_example.py
@@ -0,0 +1,56 @@
+import gc
+
+import torch
+import pytest
+
+from genparse import InferenceSetup
+
+
+def get_inference_setup():
+    grammar = """
+    start: "Sequential Monte Carlo is " ( "good" | "bad" )
+    """
+    return InferenceSetup('gpt2', grammar, proposal_name='character')
+
+
+# Reproduce the free_vllm_memory logic here so that we can run this benchmark with GPU on old
+# commits for benchmark prototyping purposes.
+def cleanup(inference_setup):
+    try:
+        from vllm.distributed.parallel_state import (
+            destroy_model_parallel,
+            destroy_distributed_environment,
+        )
+
+        destroy_model_parallel()
+        destroy_distributed_environment()
+
+        try:
+            del inference_setup.llm.llm_engine.model_executor
+        except AttributeError:
+            pass
+        gc.collect()
+        torch.cuda.empty_cache()
+    except ImportError:
+        pass
+
+
+def get_and_clean_up_inference_setup():
+    setup = get_inference_setup()
+    cleanup(setup)
+
+
+def do_inference(inference_setup_):
+    return inference_setup_(' ', n_particles=5, verbosity=1)
+
+
+@pytest.mark.benchmark()
+def test_tiny_example_setup(benchmark):
+    benchmark(get_and_clean_up_inference_setup)
+
+
+@pytest.mark.benchmark()
+def test_tiny_example_inference(benchmark):
+    inference_setup = get_inference_setup()
+    benchmark(do_inference, inference_setup)
+    cleanup(inference_setup)
diff --git a/setup.py b/setup.py
index c087a3ef..9cc77ea2 100644
--- a/setup.py
+++ b/setup.py
@@ -32,6 +32,7 @@
     'pre-commit',
     'pytest',
     'pytest-html',
+    'pytest-benchmark',
     'ruff',
 ]