diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml new file mode 100644 index 00000000..ffac4747 --- /dev/null +++ b/.github/workflows/benchmarks.yaml @@ -0,0 +1,63 @@ +name: Benchmarks + +on: + push: + branches: [ main ] + pull_request: + paths: + - src/torchcodec/* + - benchmarks/* + - .github/workflows/benchmarks.yaml + +defaults: + run: + shell: bash -l -eo pipefail {0} + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + - name: Check out repo + uses: actions/checkout@v3 + + - name: Setup conda env + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + miniconda-version: "latest" + activate-environment: test + python-version: '3.12' + + - name: Update pip + run: python -m pip install --upgrade pip + + - name: Install FFmpeg, PyTorch and TorchAudio + run: | + conda install "ffmpeg=4" pkg-config -c conda-forge + conda install pytorch torchvision torchaudio cpuonly -c pytorch-nightly + #python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu + ffmpeg -version + + #- name: Download, build and install TorchVision + # run: | + # git clone https://github.com/pytorch/vision.git + # cd vision + # python setup.py install + + - name: Install all other dependencies + run: | + python -m pip install decord matplotlib pandas numpy + + - name: Build and install torchcodec + run: | + python -m pip install -e ".[dev]" --no-build-isolation -vvv + + - name: Test generic decoder benchmark + run: | + python benchmarks/decoders/benchmark_decoders.py --bm_video_speed_min_run_seconds 1 + + - name: TEST README data geeneration benchmark + run: | + python benchmarks/decoders/generate_readme_data.py --test_run diff --git a/benchmarks/decoders/benchmark_decoders.py b/benchmarks/decoders/benchmark_decoders.py index 23f45dab..bda3d63d 100644 --- a/benchmarks/decoders/benchmark_decoders.py +++ b/benchmarks/decoders/benchmark_decoders.py @@ -13,7 +13,7 @@ from benchmark_decoders_library import ( AbstractDecoder, - BatchParameters, + # BatchParameters, DecordAccurate, DecordAccurateBatch, plot_data, @@ -174,7 +174,7 @@ def main() -> None: num_sequential_frames_from_start=[1, 10, 100], min_runtime_seconds=args.bm_video_speed_min_run_seconds, benchmark_video_creation=args.bm_video_creation, - batch_parameters=BatchParameters(num_threads=8, batch_size=40), + batch_parameters=None, # BatchParameters(num_threads=8, batch_size=40), ) plot_data(df_data, args.plot_path) diff --git a/benchmarks/decoders/generate_readme_data.py b/benchmarks/decoders/generate_readme_data.py index 277cd375..f8e24538 100644 --- a/benchmarks/decoders/generate_readme_data.py +++ b/benchmarks/decoders/generate_readme_data.py @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import argparse import json import os import platform @@ -26,18 +27,48 @@ def main() -> None: """Benchmarks the performance of a few video decoders on synthetic videos""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--test_run", + help="Test run only; use small values for experiments to ensure everything works. Does not overwrite the data file.", + action="store_true", + ) + args = parser.parse_args() + + # The logic is clearer internally if we invert the boolean. However, we want to + # maintain the external default that a test run is off by default. + data_generation_run = not args.test_run + + if data_generation_run: + resolutions = ["1280x720"] + encodings = ["libx264"] + patterns = ["mandelbrot"] + fpses = [60] + gop_sizes = [600] + durations = [120] + pix_fmts = ["yuv420p"] + ffmpeg_path = "ffmpeg" + min_runtime_seconds = 30 + + # These are the number of uniform seeks we do in the seek+decode benchmark. + num_samples = 10 + else: + resolutions = ["640x480"] + encodings = ["libx264"] + patterns = ["mandelbrot"] + fpses = [30] + gop_sizes = [20] + durations = [10] # if this goes too low, we hit EOF errors in some decoders + pix_fmts = ["yuv420p"] + ffmpeg_path = "ffmpeg" + min_runtime_seconds = 1 + + num_samples = 4 + videos_dir_path = "/tmp/torchcodec_benchmarking_videos" shutil.rmtree(videos_dir_path, ignore_errors=True) os.makedirs(videos_dir_path) - resolutions = ["1280x720"] - encodings = ["libx264"] - patterns = ["mandelbrot"] - fpses = [60] - gop_sizes = [600] - durations = [120] - pix_fmts = ["yuv420p"] - ffmpeg_path = "ffmpeg" generate_videos( resolutions, encodings, @@ -61,15 +92,13 @@ def main() -> None: decoder_dict["TorchAudio"] = TorchAudioDecoder() decoder_dict["Decord"] = DecordAccurateBatch() - # These are the number of uniform seeks we do in the seek+decode benchmark. - num_samples = 10 video_files_paths = list(Path(videos_dir_path).glob("*.mp4")) df_data = run_benchmarks( decoder_dict, video_files_paths, num_samples, num_sequential_frames_from_start=[100], - min_runtime_seconds=30, + min_runtime_seconds=min_runtime_seconds, benchmark_video_creation=False, ) df_data.append( @@ -82,9 +111,10 @@ def main() -> None: } ) - data_json = Path(__file__).parent / "benchmark_readme_data.json" - with open(data_json, "w") as write_file: - json.dump(df_data, write_file, sort_keys=True, indent=4) + if data_generation_run: + data_json = Path(__file__).parent / "benchmark_readme_data.json" + with open(data_json, "w") as write_file: + json.dump(df_data, write_file, sort_keys=True, indent=4) if __name__ == "__main__":