diff --git a/.github/workflows/performance-monitoring.yml b/.github/workflows/performance-monitoring.yml new file mode 100644 index 00000000..d36c46f6 --- /dev/null +++ b/.github/workflows/performance-monitoring.yml @@ -0,0 +1,142 @@ +# Based on Gen private website workflow +# https://github.com/probcomp/gen-website-private/blob/main/.github/workflows/publish_private_website_example.yml +name: Performance monitoring + +on: + push: + branches: [main, performance-monitoring] + # Disable on other branches for now, because I don't know how to publish artifacts elsewhere. + # pull_request: + # branches: [main] + +jobs: + prepare: + permissions: + id-token: write + contents: read + runs-on: ubuntu-latest + steps: + # Thanks to https://stackoverflow.com/a/58035262 + - name: Determine branch + shell: bash + run: | + branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}} + echo "Running on branch ${branch:?}" + echo "branch=${branch:?}" >> $GITHUB_OUTPUT + id: extract_branch + + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + # Based on https://github.com/probcomp/gen-website-private/blob/807a8d0a912dd24f4b4bb7da2f8dc44c7227d39c/README.md#authentication-notes + - name: Authenticate with Google Cloud + uses: 'google-github-actions/auth@v2.1.3' + with: + project_id: 'probcomp-caliban' + workload_identity_provider: 'projects/110275315150/locations/global/workloadIdentityPools/gen-website-private-publishers/providers/github' + service_account: 'gen-website-private-admin@probcomp-caliban.iam.gserviceaccount.com' + audience: '//iam.googleapis.com/projects/110275315150/locations/global/workloadIdentityPools/gen-website-private-publishers/providers/github' + + - name: Run benchmark + run: | + python -m venv venv + source venv/bin/activate + sudo apt-get -y install graphviz + pip install --upgrade pip "setuptools>=62.4" + make benchmark + + - name: Fetch historical data + # Does not yet account for running on pull requests, where we might need to initialize the benchmark data + # from main + run: | + BUCKET_PATH=gs://gen-website-private/gen.dev/genparse-performance-monitoring + # The benchmarking action should tolerate a nonexistent data file, + # but fetch historical data if available. + ([[ "${{ steps.extract_branch.outputs.branch }}" != main ]] && + (gcloud storage cp "${BUCKET_PATH:?}"/"${{ steps.extract_branch.outputs.branch }}"/data.js data.js || + echo "No historical data found for ${{ steps.extract_branch.outputs.branch }}, attempting to get main branch data.") + ) || + gcloud storage cp "${BUCKET_PATH:?}"/main/data.js data.js || + gcloud storage cp "${BUCKET_PATH:?}"/data.js data.js || + echo "No historical data found, skipping." + + - name: Translate from data.js to benchmarks.json + # Remove it if sed fails because sed failing indicates that data.js does not exist, hence we will want the + # benchmarking action to generate benchmarks.json from scratch. + run: sed -e 's/window.BENCHMARK_DATA = //' data.js > benchmarks.json || rm benchmarks.json + + - name: Translate to benchmark-action data file format + uses: benchmark-action/github-action-benchmark@v1 + with: + # For a full list of inputs, see: + # https://github.com/benchmark-action/github-action-benchmark/tree/master?tab=readme-ov-file#action-inputs + tool: 'pytest' + output-file-path: output.json + external-data-json-path: benchmarks.json + # Unset because I think we don't need a GitHub token except to make comments. + # github-token: + comment-always: false + alert-threshold: '200%' + alert-comment-cc-users: '@timvieira,@benlebrun' + comment-on-alert: false + fail-on-alert: false + fail-threshold: '300%' + # If we find the chart gets too busy for us, we can set max-items-in-chart + # max-items-in-chart: ~ + + - name: Translate benchmark data back to data.js + run: sed '1s/\(.*\)/window.BENCHMARK_DATA = \1/' benchmarks.json > data.js + + - name: Fetch preexisting data + run: | + BUCKET_PATH=gs://gen-website-private/gen.dev/genparse-performance-monitoring + # for pull requests, use the branch name so the data are visualized at + # genparse-performance-monitoring.gen.dev/BRANCH + ROOT_WEBSITE_DIR=website + mkdir -p "$ROOT_WEBSITE_DIR" + + gcloud storage rsync --recursive "${BUCKET_PATH:?}" "${ROOT_WEBSITE_DIR:?}" || + echo "No preexisting website data, skipping..." + + - name: Update relevant website data + run: | + # for pull requests, use the branch name so the data are visualized at + # genparse-performance-monitoring.gen.dev/BRANCH + WEBSITE_DIR=website + if [[ "${{ steps.extract_branch.outputs.branch }}" != main ]]; then + WEBSITE_DIR="${WEBSITE_DIR:?}"/"${{ steps.extract_branch.outputs.branch }}" + fi + echo "website_dir=${WEBSITE_DIR:?}" >> "$GITHUB_OUTPUT" + + echo "Generating website directory ${WEBSITE_DIR:?}" + mkdir -p "${WEBSITE_DIR:?}" + rm -f "$WEBSITE_DIR"/* # Delete any existing data for this branch + cp .github/workflows/performance-monitoring/index.html "${WEBSITE_DIR:?}"/index.html + cp data.js "${WEBSITE_DIR:?}"/data.js + id: update_website_data + + - name: Add timestamp and repo URL to index.html + run: | + WEBSITE_DIR="${{ steps.update_website_data.outputs.website_dir }}" + echo "Generating website directory ${WEBSITE_DIR:?}" + sed -i -e 's/\$TIMESTAMP/'"$(date -Iseconds)"'/' "${WEBSITE_DIR:?}"/index.html + sed -i -e 's/\$REPO/'"${{ github.event.repository.name }}"'/' "${WEBSITE_DIR:?}"/index.html + + - name: Create website artifact + uses: actions/upload-artifact@v4 + with: + name: perfmon-website + path: ./website + + # This should update data.js in the appropriate bucket location too, + publish: + permissions: + id-token: write + contents: read + needs: prepare + uses: probcomp/gen-website-private/.github/workflows/publish_private_website.yml@main + with: + artifact: perfmon-website + parent_domain: gen.dev + subdomain: genparse-performance-monitoring diff --git a/.github/workflows/performance-monitoring/index.html b/.github/workflows/performance-monitoring/index.html new file mode 100644 index 00000000..13226795 --- /dev/null +++ b/.github/workflows/performance-monitoring/index.html @@ -0,0 +1,283 @@ + + + + + + + + Benchmarks + + + + +
+ + + + + + + +`; diff --git a/Makefile b/Makefile index 64a6d0d6..86d6900b 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,12 @@ SHELL := /usr/bin/env bash EXEC = python=3.10 NAME = genparse TEST = tests +PERF_TEST = perf_tests RUN = python -m INSTALL = $(RUN) pip install SRC_FILES := $(shell find $(NAME) -name '*.py') TEST_FILES := $(shell find $(TEST) -name '*.py') +PERF_TEST_FILES := $(shell find $(PERF_TEST) -name '*.py') .DEFAULT_GOAL := help ## help : print available commands. @@ -100,3 +102,7 @@ html/coverage/index.html : html/pytest/report.html @coverage html -d $(@D) html/pytest/report.html : $(SRC_FILES) $(TEST_FILES) @coverage run --branch -m pytest --html=$@ --self-contained-html $(SRC_FILES) $(TEST_FILES) +benchmark : env benchmark.json +benchmark.json : $(SRC_FILES) $(PERF_TEST_FILES) + @pytest $(PERF_TEST_FILES) --benchmark-json output.json + diff --git a/perf_tests/test_tiny_example.py b/perf_tests/test_tiny_example.py new file mode 100644 index 00000000..00b3a4ee --- /dev/null +++ b/perf_tests/test_tiny_example.py @@ -0,0 +1,56 @@ +import gc + +import torch +import pytest + +from genparse import InferenceSetup + + +def get_inference_setup(): + grammar = """ + start: "Sequential Monte Carlo is " ( "good" | "bad" ) + """ + return InferenceSetup('gpt2', grammar, proposal_name='character') + + +# Reproduce the free_vllm_memory logic here so that we can run this benchmark with GPU on old +# commits for benchmark prototyping purposes. +def cleanup(inference_setup): + try: + from vllm.distributed.parallel_state import ( + destroy_model_parallel, + destroy_distributed_environment, + ) + + destroy_model_parallel() + destroy_distributed_environment() + + try: + del inference_setup.llm.llm_engine.model_executor + except AttributeError: + pass + gc.collect() + torch.cuda.empty_cache() + except ImportError: + pass + + +def get_and_clean_up_inference_setup(): + setup = get_inference_setup() + cleanup(setup) + + +def do_inference(inference_setup_): + return inference_setup_(' ', n_particles=5, verbosity=1) + + +@pytest.mark.benchmark() +def test_tiny_example_setup(benchmark): + benchmark(get_and_clean_up_inference_setup) + + +@pytest.mark.benchmark() +def test_tiny_example_inference(benchmark): + inference_setup = get_inference_setup() + benchmark(do_inference, inference_setup) + cleanup(inference_setup) diff --git a/setup.py b/setup.py index c087a3ef..9cc77ea2 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ 'pre-commit', 'pytest', 'pytest-html', + 'pytest-benchmark', 'ruff', ]