diff --git a/.clang-tidy b/.clang-tidy
index 04689c330..9b3f844c9 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -64,4 +64,6 @@ CheckOptions:
value: '1'
- key: readability-magic-numbers.IgnorePowersOf2IntegerValues
value: '1'
+ - key: cppcoreguidelines-avoid-do-while.IgnoreMacros
+ value: 'true'
...
diff --git a/.clangd b/.clangd
new file mode 100644
index 000000000..7c4fe036d
--- /dev/null
+++ b/.clangd
@@ -0,0 +1,65 @@
+# https://clangd.llvm.org/config
+
+# Apply a config conditionally to all C files
+If:
+ PathMatch: .*\.(c|h)$
+
+---
+
+# Apply a config conditionally to all C++ files
+If:
+ PathMatch: .*\.(c|h)pp
+
+---
+
+# Apply a config conditionally to all CUDA files
+If:
+ PathMatch: .*\.cuh?
+CompileFlags:
+ Add:
+ - "-x"
+ - "cuda"
+ # No error on unknown CUDA versions
+ - "-Wno-unknown-cuda-version"
+ # Allow variadic CUDA functions
+ - "-Xclang=-fcuda-allow-variadic-functions"
+Diagnostics:
+ Suppress:
+ - "variadic_device_fn"
+ - "attributes_not_allowed"
+
+---
+
+# Tweak the clangd parse settings for all files
+CompileFlags:
+ Add:
+ # report all errors
+ - "-ferror-limit=0"
+ - "-fmacro-backtrace-limit=0"
+ - "-ftemplate-backtrace-limit=0"
+ # Skip the CUDA version check
+ - "--no-cuda-version-check"
+ Remove:
+ # remove gcc's -fcoroutines
+ - -fcoroutines
+ # remove nvc++ flags unknown to clang
+ - "-gpu=*"
+ - "-stdpar*"
+ # remove nvcc flags unknown to clang
+ - "-arch*"
+ - "-gencode*"
+ - "--generate-code*"
+ - "-ccbin*"
+ - "-t=*"
+ - "--threads*"
+ - "-Xptxas*"
+ - "-Xcudafe*"
+ - "-Xfatbin*"
+ - "-Xcompiler*"
+ - "--diag-suppress*"
+ - "--diag_suppress*"
+ - "--compiler-options*"
+ - "--expt-extended-lambda"
+ - "--expt-relaxed-constexpr"
+ - "-forward-unknown-to-host-compiler"
+ - "-Werror=cross-execution-space-call"
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
new file mode 100644
index 000000000..9d35e3f97
--- /dev/null
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,30 @@
+# syntax=docker/dockerfile:1.5
+
+ARG BASE
+ARG PYTHON_PACKAGE_MANAGER=conda
+
+FROM ${BASE} as pip-base
+
+ENV DEFAULT_VIRTUAL_ENV=rapids
+
+FROM ${BASE} as conda-base
+
+ENV DEFAULT_CONDA_ENV=rapids
+
+FROM ${PYTHON_PACKAGE_MANAGER}-base
+
+ARG CUDA
+ENV CUDAARCHS="RAPIDS"
+ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}"
+
+ARG PYTHON_PACKAGE_MANAGER
+ENV PYTHON_PACKAGE_MANAGER="${PYTHON_PACKAGE_MANAGER}"
+
+ENV PYTHONSAFEPATH="1"
+ENV PYTHONUNBUFFERED="1"
+ENV PYTHONDONTWRITEBYTECODE="1"
+
+ENV SCCACHE_REGION="us-east-2"
+ENV SCCACHE_BUCKET="rapids-sccache-devs"
+ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
+ENV HISTFILE="/home/coder/.cache/._bash_history"
diff --git a/.devcontainer/README.md b/.devcontainer/README.md
new file mode 100644
index 000000000..31ddab9aa
--- /dev/null
+++ b/.devcontainer/README.md
@@ -0,0 +1,35 @@
+# RMM Development Containers
+
+This directory contains [devcontainer configurations](https://containers.dev/implementors/json_reference/) for using VSCode to [develop in a container](https://code.visualstudio.com/docs/devcontainers/containers) via the `Remote Containers` [extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [GitHub Codespaces](https://github.com/codespaces).
+
+This container is a turnkey development environment for building and testing the RMM C++ and Python libraries.
+
+## Table of Contents
+
+* [Prerequisites](#prerequisites)
+* [Host bind mounts](#host-bind-mounts)
+* [Launch a Dev Container](#launch-a-dev-container)
+
+## Prerequisites
+
+* [VSCode](https://code.visualstudio.com/download)
+* [VSCode Remote Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
+
+## Host bind mounts
+
+By default, the following directories are bind-mounted into the devcontainer:
+
+* `${repo}:/home/coder/rmm`
+* `${repo}/../.aws:/home/coder/.aws`
+* `${repo}/../.local:/home/coder/.local`
+* `${repo}/../.cache:/home/coder/.cache`
+* `${repo}/../.conda:/home/coder/.conda`
+* `${repo}/../.config:/home/coder/.config`
+
+This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs.
+
+## Launch a Dev Container
+
+To launch a devcontainer from VSCode, open the RMM repo and select the "Reopen in Container" button in the bottom right:
+
+Alternatively, open the VSCode command palette (typically `cmd/ctrl + shift + P`) and run the "Rebuild and Reopen in Container" command.
diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
new file mode 100644
index 000000000..ba3555a26
--- /dev/null
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "11.8",
+ "PYTHON_PACKAGE_MANAGER": "conda",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda11.8-envs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda11.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
new file mode 100644
index 000000000..a5b4290e9
--- /dev/null
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -0,0 +1,36 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "11.8",
+ "PYTHON_PACKAGE_MANAGER": "pip",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json
new file mode 100644
index 000000000..03282635e
--- /dev/null
+++ b/.devcontainer/cuda12.0-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "12.0",
+ "PYTHON_PACKAGE_MANAGER": "conda",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-mambaforge-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.0-envs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json
new file mode 100644
index 000000000..8dc607f28
--- /dev/null
+++ b/.devcontainer/cuda12.0-pip/devcontainer.json
@@ -0,0 +1,36 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "12.0",
+ "PYTHON_PACKAGE_MANAGER": "pip",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda12.0-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
new file mode 100644
index 000000000..895ba83ee
--- /dev/null
+++ b/.github/copy-pr-bot.yaml
@@ -0,0 +1,4 @@
+# Configuration file for `copy-pr-bot` GitHub App
+# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
+
+enabled: true
diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml
index 57bdd6110..9a0b41550 100644
--- a/.github/ops-bot.yaml
+++ b/.github/ops-bot.yaml
@@ -5,6 +5,4 @@ auto_merger: true
branch_checker: true
label_checker: true
release_drafter: true
-copy_prs: true
-rerun_tests: true
recently_updated: true
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 67af4dafc..d80bf1d41 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
jobs:
cpp-build:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
python-build:
needs: [cpp-build]
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
upload-conda:
needs: [cpp-build, python-build]
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.10
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -56,7 +56,7 @@ jobs:
if: github.ref_type == 'branch'
needs: python-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -64,11 +64,11 @@ jobs:
date: ${{ inputs.date }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
- container_image: "rapidsai/ci:latest"
+ container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
wheel-publish:
needs: wheel-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.10
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index f5b7e3bf5..95ac72d59 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -20,58 +20,67 @@ jobs:
- docs-build
- wheel-build
- wheel-tests
+ - devcontainer
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.10
checks:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.10
with:
enable_check_generated_files: false
conda-cpp-build:
needs: checks
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10
with:
build_type: pull-request
conda-cpp-tests:
needs: conda-cpp-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10
with:
build_type: pull-request
conda-python-build:
needs: conda-cpp-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10
with:
build_type: pull-request
conda-python-tests:
needs: conda-python-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10
with:
build_type: pull-request
docs-build:
needs: conda-python-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
- container_image: "rapidsai/ci:latest"
+ container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build:
needs: checks
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10
with:
build_type: pull-request
script: ci/build_wheel.sh
wheel-tests:
needs: wheel-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10
with:
build_type: pull-request
script: ci/test_wheel.sh
+ devcontainer:
+ secrets: inherit
+ uses: rapidsai/shared-action-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.10
+ with:
+ build_command: |
+ sccache -z;
+ build-all -DBUILD_BENCHMARKS=ON --verbose;
+ sccache -s;
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 9a26dac4a..81f1b791b 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
jobs:
cpp-tests:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
sha: ${{ inputs.sha }}
python-tests:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -32,7 +32,7 @@ jobs:
sha: ${{ inputs.sha }}
wheel-tests:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.08
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10
with:
build_type: nightly
branch: ${{ inputs.branch }}
diff --git a/.gitignore b/.gitignore
index 1ab57e4d4..0f98d3e44 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ DartConfiguration.tcl
.DS_Store
*.manifest
*.spec
+compile_commands.json
## Python build directories & artifacts
dist/
@@ -153,3 +154,7 @@ rmm_log.txt
# cibuildwheel
/wheelhouse
+
+# clang tooling
+compile_commands.json
+.clangd/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index aa076e6be..7f095025a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
hooks:
- id: cython-lint
- repo: https://github.com/pre-commit/mirrors-clang-format
- rev: v16.0.1
+ rev: v16.0.6
hooks:
- id: clang-format
types_or: [c, c++, cuda]
@@ -69,6 +69,13 @@ repos:
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmakelang==0.6.13
+ - id: doxygen-check
+ name: doxygen-check
+ entry: ./scripts/doxygen.sh
+ types_or: [file]
+ language: system
+ pass_filenames: false
+ verbose: true
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.278
hooks:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 453ab54c2..901b3790a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,42 @@
+# RMM 23.10.00 (11 Oct 2023)
+
+## 🚨 Breaking Changes
+
+- Update to Cython 3.0.0 ([#1313](https://github.com/rapidsai/rmm/pull/1313)) [@vyasr](https://github.com/vyasr)
+
+## 🐛 Bug Fixes
+
+- Compile cdef public functions from torch_allocator with C ABI ([#1350](https://github.com/rapidsai/rmm/pull/1350)) [@wence-](https://github.com/wence-)
+- Make doxygen only a conda dependency. ([#1344](https://github.com/rapidsai/rmm/pull/1344)) [@bdice](https://github.com/bdice)
+- Use `conda mambabuild` not `mamba mambabuild` ([#1338](https://github.com/rapidsai/rmm/pull/1338)) [@wence-](https://github.com/wence-)
+- Fix stream_ordered_memory_resource attempt to record event in stream from another device ([#1333](https://github.com/rapidsai/rmm/pull/1333)) [@harrism](https://github.com/harrism)
+
+## 📖 Documentation
+
+- Clean up headers in CMakeLists.txt. ([#1341](https://github.com/rapidsai/rmm/pull/1341)) [@bdice](https://github.com/bdice)
+- Add pre-commit hook to validate doxygen ([#1334](https://github.com/rapidsai/rmm/pull/1334)) [@vyasr](https://github.com/vyasr)
+- Fix doxygen warnings ([#1317](https://github.com/rapidsai/rmm/pull/1317)) [@vyasr](https://github.com/vyasr)
+- Treat warnings as errors in Python documentation ([#1316](https://github.com/rapidsai/rmm/pull/1316)) [@vyasr](https://github.com/vyasr)
+
+## 🚀 New Features
+
+- Enable RMM Debug Logging via Python ([#1339](https://github.com/rapidsai/rmm/pull/1339)) [@harrism](https://github.com/harrism)
+
+## 🛠️ Improvements
+
+- Update image names ([#1346](https://github.com/rapidsai/rmm/pull/1346)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Update to clang 16.0.6. ([#1343](https://github.com/rapidsai/rmm/pull/1343)) [@bdice](https://github.com/bdice)
+- Update doxygen to 1.9.1 ([#1337](https://github.com/rapidsai/rmm/pull/1337)) [@vyasr](https://github.com/vyasr)
+- Simplify wheel build scripts and allow alphas of RAPIDS dependencies ([#1335](https://github.com/rapidsai/rmm/pull/1335)) [@divyegala](https://github.com/divyegala)
+- Use `copy-pr-bot` ([#1329](https://github.com/rapidsai/rmm/pull/1329)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Add RMM devcontainers ([#1328](https://github.com/rapidsai/rmm/pull/1328)) [@trxcllnt](https://github.com/trxcllnt)
+- Add Python bindings for `limiting_resource_adaptor` ([#1327](https://github.com/rapidsai/rmm/pull/1327)) [@pentschev](https://github.com/pentschev)
+- Fix missing jQuery error in docs ([#1321](https://github.com/rapidsai/rmm/pull/1321)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Use fetch_rapids.cmake. ([#1319](https://github.com/rapidsai/rmm/pull/1319)) [@bdice](https://github.com/bdice)
+- Update to Cython 3.0.0 ([#1313](https://github.com/rapidsai/rmm/pull/1313)) [@vyasr](https://github.com/vyasr)
+- Branch 23.10 merge 23.08 ([#1312](https://github.com/rapidsai/rmm/pull/1312)) [@vyasr](https://github.com/vyasr)
+- Branch 23.10 merge 23.08 ([#1309](https://github.com/rapidsai/rmm/pull/1309)) [@vyasr](https://github.com/vyasr)
+
# RMM 23.08.00 (9 Aug 2023)
## 🚨 Breaking Changes
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2c4453968..814f41be0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,11 +14,7 @@
cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
-if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
- file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/RAPIDS.cmake
- ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
-endif()
-include(${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
+include(fetch_rapids.cmake)
include(rapids-cmake)
include(rapids-cpm)
@@ -27,16 +23,21 @@ include(rapids-find)
project(
RMM
- VERSION 23.08.00
+ VERSION 23.10.00
LANGUAGES CXX)
# Write the version header
rapids_cmake_write_version_file(include/rmm/version_config.hpp)
+# ##################################################################################################
+# * build type -------------------------------------------------------------------------------------
+
# Set a default build type if none was specified
rapids_cmake_build_type(Release)
-# build options
+# ##################################################################################################
+# * build options ----------------------------------------------------------------------------------
+
option(BUILD_TESTS "Configure CMake to build tests" ON)
option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)
set(RMM_LOGGING_LEVEL
@@ -52,17 +53,28 @@ message(STATUS "RMM: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'")
# cudart can be statically linked or dynamically linked the python ecosystem wants dynamic linking
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
+# ##################################################################################################
+# * compiler options -------------------------------------------------------------------------------
+
# find packages we depend on
rapids_find_package(
CUDAToolkit REQUIRED
BUILD_EXPORT_SET rmm-exports
INSTALL_EXPORT_SET rmm-exports)
+
+# ##################################################################################################
+# * dependencies -----------------------------------------------------------------------------------
+
+# add third party dependencies using CPM
rapids_cpm_init()
+
include(cmake/thirdparty/get_fmt.cmake)
include(cmake/thirdparty/get_spdlog.cmake)
include(cmake/thirdparty/get_thrust.cmake)
-# library targets
+# ##################################################################################################
+# * library targets --------------------------------------------------------------------------------
+
add_library(rmm INTERFACE)
add_library(rmm::rmm ALIAS rmm)
@@ -83,6 +95,9 @@ target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only)
target_link_libraries(rmm INTERFACE dl)
target_compile_features(rmm INTERFACE cxx_std_17 $)
+# ##################################################################################################
+# * tests and benchmarks ---------------------------------------------------------------------------
+
if((BUILD_TESTS OR BUILD_BENCHMARKS) AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
include(rapids-cuda)
rapids_cuda_init_architectures(RMM)
@@ -94,7 +109,9 @@ if((BUILD_TESTS OR BUILD_BENCHMARKS) AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAM
message(STATUS "RMM: Building benchmarks with GPU Architectures: ${CMAKE_CUDA_ARCHITECTURES}")
endif()
-# optionally build tests
+# ##################################################################################################
+# * add tests --------------------------------------------------------------------------------------
+
if(BUILD_TESTS AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
include(cmake/thirdparty/get_gtest.cmake)
include(CTest) # calls enable_testing()
@@ -102,19 +119,19 @@ if(BUILD_TESTS AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
add_subdirectory(tests)
endif()
-# optionally build benchmarks
+# ##################################################################################################
+# * add benchmarks ---------------------------------------------------------------------------------
+
if(BUILD_BENCHMARKS AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
include(${rapids-cmake-dir}/cpm/gbench.cmake)
rapids_cpm_gbench()
add_subdirectory(benchmarks)
endif()
-include(CPack)
+# ##################################################################################################
+# * install targets --------------------------------------------------------------------------------
-# optionally assemble Thrust pretty-printers
-if(Thrust_SOURCE_DIR)
- configure_file(scripts/load-pretty-printers.in load-pretty-printers @ONLY)
-endif()
+include(CPack)
# install export targets
install(TARGETS rmm EXPORT rmm-exports)
@@ -147,7 +164,9 @@ rapids_export(
DOCUMENTATION doc_string
FINAL_CODE_BLOCK code_string)
-# build export targets
+# ##################################################################################################
+# * build export -----------------------------------------------------------------------------------
+
rapids_export(
BUILD rmm
EXPORT_SET rmm-exports
@@ -156,7 +175,8 @@ rapids_export(
DOCUMENTATION doc_string
FINAL_CODE_BLOCK code_string)
-# make documentation
+# ##################################################################################################
+# * make documentation -----------------------------------------------------------------------------
add_custom_command(
OUTPUT RMM_DOXYGEN
@@ -169,3 +189,11 @@ add_custom_target(
rmm_doc
DEPENDS RMM_DOXYGEN
COMMENT "Target for the custom command to build the RMM doxygen docs")
+
+# ##################################################################################################
+# * make gdb helper scripts ------------------------------------------------------------------------
+
+# optionally assemble Thrust pretty-printers
+if(Thrust_SOURCE_DIR)
+ configure_file(scripts/load-pretty-printers.in load-pretty-printers @ONLY)
+endif()
diff --git a/README.md b/README.md
index 8d65b7d33..4059c9ce0 100644
--- a/README.md
+++ b/README.md
@@ -354,11 +354,32 @@ objects for each device and sets them as the per-device resource for that device
```c++
std::vector> per_device_pools;
for(int i = 0; i < N; ++i) {
- cudaSetDevice(i); // set device i before creating MR
- // Use a vector of unique_ptr to maintain the lifetime of the MRs
- per_device_pools.push_back(std::make_unique());
- // Set the per-device resource for device i
- set_per_device_resource(cuda_device_id{i}, &per_device_pools.back());
+ cudaSetDevice(i); // set device i before creating MR
+ // Use a vector of unique_ptr to maintain the lifetime of the MRs
+ per_device_pools.push_back(std::make_unique());
+ // Set the per-device resource for device i
+ set_per_device_resource(cuda_device_id{i}, &per_device_pools.back());
+}
+```
+
+Note that the CUDA device that is current when creating a `device_memory_resource` must also be
+current any time that `device_memory_resource` is used to deallocate memory, including in a
+destructor. This affects RAII classes like `rmm::device_buffer` and `rmm::device_uvector`. Here's an
+(incorrect) example that assumes the above example loop has been run to create a
+`pool_memory_resource` for each device. A correct example adds a call to `cudaSetDevice(0)` on the
+line of the error comment.
+
+```c++
+{
+ RMM_CUDA_TRY(cudaSetDevice(0));
+ rmm::device_buffer buf_a(16);
+
+ {
+ RMM_CUDA_TRY(cudaSetDevice(1));
+ rmm::device_buffer buf_b(16);
+ }
+
+ // Error: when buf_a is destroyed, the current device must be 0, but it is 1
}
```
@@ -560,9 +581,12 @@ of more detailed logging. The default is `INFO`. Available levels are `TRACE`, `
The log relies on the [spdlog](https://github.com/gabime/spdlog.git) library.
-Note that to see logging below the `INFO` level, the C++ application must also call
-`rmm::logger().set_level()`, e.g. to enable all levels of logging down to `TRACE`, call
-`rmm::logger().set_level(spdlog::level::trace)` (and compile with `-DRMM_LOGGING_LEVEL=TRACE`).
+Note that to see logging below the `INFO` level, the application must also set the logging level at
+run time. C++ applications must must call `rmm::logger().set_level()`, for example to enable all
+levels of logging down to `TRACE`, call `rmm::logger().set_level(spdlog::level::trace)` (and compile
+librmm with `-DRMM_LOGGING_LEVEL=TRACE`). Python applications must call `rmm.set_logging_level()`,
+for example to enable all levels of logging down to `TRACE`, call `rmm.set_logging_level("trace")`
+(and compile the RMM Python module with `-DRMM_LOGGING_LEVEL=TRACE`).
Note that debug logging is different from the CSV memory allocation logging provided by
`rmm::mr::logging_resource_adapter`. The latter is for logging a history of allocation /
diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp
index 8c670525d..470442830 100644
--- a/benchmarks/random_allocations/random_allocations.cpp
+++ b/benchmarks/random_allocations/random_allocations.cpp
@@ -68,7 +68,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
{
std::default_random_engine generator;
- max_usage *= size_mb; // convert to bytes
+ max_usage *= size_mb; // convert to bytes
constexpr int allocation_probability{73}; // percent
constexpr int max_op_chance{99};
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index bc6e18021..0ae14a88f 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -11,6 +11,7 @@ rapids-print-env
rapids-logger "Begin cpp build"
-rapids-mamba-retry mambabuild conda/recipes/librmm
+# This calls mambabuild when boa is installed (as is the case in the CI images)
+rapids-conda-retry mambabuild conda/recipes/librmm
rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 7e1efef9a..d54947106 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -25,7 +25,7 @@ rapids-mamba-retry install \
--channel "${PYTHON_CHANNEL}" \
rmm librmm
-export RAPIDS_VERSION_NUMBER="23.08"
+export RAPIDS_VERSION_NUMBER="23.10"
export RAPIDS_DOCS_DIR="$(mktemp -d)"
rapids-logger "Build CPP docs"
@@ -37,11 +37,11 @@ popd
rapids-logger "Build Python docs"
pushd python/docs
-sphinx-build -b dirhtml . _html
-sphinx-build -b text . _text
+make dirhtml
+make text
mkdir -p "${RAPIDS_DOCS_DIR}/rmm/"{html,txt}
-mv _html/* "${RAPIDS_DOCS_DIR}/rmm/html"
-mv _text/* "${RAPIDS_DOCS_DIR}/rmm/txt"
+mv _build/dirhtml/* "${RAPIDS_DOCS_DIR}/rmm/html"
+mv _build/text/* "${RAPIDS_DOCS_DIR}/rmm/txt"
popd
rapids-upload-docs
diff --git a/ci/build_python.sh b/ci/build_python.sh
index b306d3e47..9da4025c7 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -13,6 +13,7 @@ rapids-logger "Begin py build"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
-rapids-mamba-retry mambabuild -c "${CPP_CHANNEL}" conda/recipes/rmm
+# This calls mambabuild when boa is installed (as is the case in the CI images)
+rapids-conda-retry mambabuild -c "${CPP_CHANNEL}" conda/recipes/rmm
rapids-upload-conda-to-s3 python
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 09c1e104e..660c88387 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -3,6 +3,9 @@
set -euo pipefail
+package_name="rmm"
+package_dir="python"
+
source rapids-configure-sccache
source rapids-date-string
@@ -12,15 +15,34 @@ version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})"
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-ci/release/apply_wheel_modifications.sh ${version_override} "-${RAPIDS_PY_CUDA_SUFFIX}"
-echo "The package name and/or version was modified in the package source. The git diff is:"
-git diff
+# This is the version of the suffix with a preceding hyphen. It's used
+# everywhere except in the final wheel name.
+PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
+
+# Patch project metadata files to include the CUDA version suffix and version override.
+pyproject_file="${package_dir}/pyproject.toml"
+
+sed -i "s/^version = .*/version = \"${version_override}\"/g" ${pyproject_file}
+sed -i "s/name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
+
+# For nightlies we want to ensure that we're pulling in alphas as well. The
+# easiest way to do so is to augment the spec with a constraint containing a
+# min alpha version that doesn't affect the version bounds but does allow usage
+# of alpha versions for that dependency without --pre
+alpha_spec=''
+if ! rapids-is-release-build; then
+ alpha_spec=',>=0.0.0a0'
+fi
+
+if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
+ sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
+fi
-cd python
+cd "${package_dir}"
SKBUILD_CONFIGURE_OPTIONS="-DRMM_BUILD_WHEELS=ON" python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
mkdir -p final_dist
python -m auditwheel repair -w final_dist dist/*
-RAPIDS_PY_WHEEL_NAME="rmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
diff --git a/ci/check_style.sh b/ci/check_style.sh
index 4beaca333..e96ad8bf1 100755
--- a/ci/check_style.sh
+++ b/ci/check_style.sh
@@ -14,7 +14,7 @@ rapids-dependency-file-generator \
rapids-mamba-retry env create --force -f env.yaml -n checks
conda activate checks
-FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/cmake-format-rapids-cmake.json
+FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.10/cmake-format-rapids-cmake.json
export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
deleted file mode 100755
index cc71389ea..000000000
--- a/ci/release/apply_wheel_modifications.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Usage: bash apply_wheel_modifications.sh
-
-VERSION=${1}
-CUDA_SUFFIX=${2}
-
-sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/pyproject.toml
-
-sed -i "s/^name = \"rmm\"/name = \"rmm${CUDA_SUFFIX}\"/g" python/pyproject.toml
-
-if [[ $CUDA_SUFFIX == "-cu12" ]]; then
- sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" python/pyproject.toml
-fi
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 7314442ff..8b7f07167 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -22,6 +22,9 @@ NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
+# Need to distutils-normalize the original version
+NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
+
echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
# Inplace sed replace; workaround for Linux and Mac
@@ -31,14 +34,15 @@ function sed_runner() {
# cpp update
sed_runner 's/'" VERSION .*"'/'" VERSION ${NEXT_FULL_TAG}"'/g' CMakeLists.txt
-sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' CMakeLists.txt
# Python update
sed_runner 's/'"rmm_version .*)"'/'"rmm_version ${NEXT_FULL_TAG})"'/g' python/CMakeLists.txt
-sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' python/CMakeLists.txt
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/rmm/__init__.py
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pyproject.toml
+# rapids-cmake version
+sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
+
# cmake-format rapids-cmake definitions
sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHORT_TAG}\/cmake-format-rapids-cmake.json"'/g' ci/check_style.sh
@@ -54,3 +58,9 @@ for FILE in .github/workflows/*.yaml; do
sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
done
sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh
+
+# .devcontainer files
+find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
+ sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
+ sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
+done
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index f668c4a4c..8571a1928 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -4,17 +4,28 @@ channels:
- rapidsai
- conda-forge
dependencies:
+- c-compiler
+- clang-tools==16.0.6
+- clang==16.0.6
- cmake>=3.26.4
- cuda-python>=11.7.1,<12.0a0
- cuda-version=11.8
- cudatoolkit
-- cython>=0.29,<0.30
+- cxx-compiler
+- cython>=3.0.0
+- doxygen=1.9.1
- fmt>=9.1.0,<10
+- gcc_linux-64=11.*
- gcovr>=5.0
+- graphviz
- identify>=2.5.20
+- ipython
+- make
+- nbsphinx
- ninja
- numba>=0.57
- numpy>=1.21
+- numpydoc
- nvcc_linux-64=11.8
- pre-commit
- pytest
@@ -22,5 +33,10 @@ dependencies:
- python>=3.9,<3.11
- scikit-build>=0.13.1
- spdlog>=1.11.0,<1.12
+- sphinx
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sphinx_rtd_theme
+- sysroot_linux-64==2.17
- tomli
name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 40291ba33..aec31b3db 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -4,22 +4,38 @@ channels:
- rapidsai
- conda-forge
dependencies:
+- c-compiler
+- clang-tools==16.0.6
+- clang==16.0.6
- cmake>=3.26.4
- cuda-nvcc
- cuda-python>=12.0,<13.0a0
- cuda-version=12.0
-- cython>=0.29,<0.30
+- cxx-compiler
+- cython>=3.0.0
+- doxygen=1.9.1
- fmt>=9.1.0,<10
+- gcc_linux-64=11.*
- gcovr>=5.0
+- graphviz
- identify>=2.5.20
+- ipython
+- make
+- nbsphinx
- ninja
- numba>=0.57
- numpy>=1.21
+- numpydoc
- pre-commit
- pytest
- pytest-cov
- python>=3.9,<3.11
- scikit-build>=0.13.1
- spdlog>=1.11.0,<1.12
+- sphinx
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sphinx_rtd_theme
+- sysroot_linux-64==2.17
- tomli
name: all_cuda-120_arch-x86_64
diff --git a/conda/recipes/rmm/meta.yaml b/conda/recipes/rmm/meta.yaml
index 5d905b853..e13254d4c 100644
--- a/conda/recipes/rmm/meta.yaml
+++ b/conda/recipes/rmm/meta.yaml
@@ -58,7 +58,7 @@ requirements:
- cuda-cudart-dev
- cuda-python ==12.0.0
{% endif %}
- - cython >=0.29,<0.30
+ - cython >=3.0.0
- librmm ={{ version }}
- python
- scikit-build >=0.13.1
diff --git a/dependencies.yaml b/dependencies.yaml
index 91eed8280..7d9af48cf 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -10,6 +10,7 @@ files:
- checks
- cudatoolkit
- develop
+ - docs
- run
- test_python
test_python:
@@ -62,12 +63,14 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- &cmake_ver cmake>=3.26.4
- - cython>=0.29,<0.30
+ - cython>=3.0.0
- ninja
- scikit-build>=0.13.1
- tomli
- output_types: conda
packages:
+ - c-compiler
+ - cxx-compiler
- fmt>=9.1.0,<10
- spdlog>=1.11.0,<1.12
- python>=3.9,<3.11
@@ -76,6 +79,18 @@ dependencies:
- wheel
- setuptools>=61.0.0
specific:
+ - output_types: conda
+ matrices:
+ - matrix:
+ arch: x86_64
+ packages:
+ - gcc_linux-64=11.*
+ - sysroot_linux-64==2.17
+ - matrix:
+ arch: aarch64
+ packages:
+ - gcc_linux-aarch64=11.*
+ - sysroot_linux-aarch64==2.17
- output_types: conda
matrices:
- matrix:
@@ -110,6 +125,9 @@ dependencies:
# pre-commit requires identify minimum version 1.0, but clang-format requires textproto support and that was
# added in 2.5.20, so we need to call out the minimum version needed for our plugins
- identify>=2.5.20
+ - output_types: conda
+ packages:
+ - &doxygen doxygen=1.9.1
cudatoolkit:
specific:
- output_types: conda
@@ -148,13 +166,18 @@ dependencies:
- output_types: [conda, requirements]
packages:
- gcovr>=5.0
+ - output_types: conda
+ packages:
+ - clang==16.0.6
+ - clang-tools==16.0.6
docs:
common:
- - output_types: [conda]
+ - output_types: conda
packages:
- - doxygen=1.8.20
+ - *doxygen
- graphviz
- ipython
+ - make
- nbsphinx
- numpydoc
- sphinx
diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile
index 264d99df0..5d6153f81 100644
--- a/doxygen/Doxyfile
+++ b/doxygen/Doxyfile
@@ -1,4 +1,4 @@
-# Doxyfile 1.8.20
+# Doxyfile 1.9.1
# This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project.
@@ -32,13 +32,13 @@ DOXYFILE_ENCODING = UTF-8
# title of most generated pages and in a few other places.
# The default value is: My Project.
-PROJECT_NAME = "RMM"
+PROJECT_NAME = RMM
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
# control system is used.
-PROJECT_NUMBER = 23.08
+PROJECT_NUMBER = 23.10
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
@@ -259,7 +259,11 @@ TAB_SIZE = 4
# commands \{ and \} for these it is advised to use the version @{ and @} or use
# a double escape (\\{ and \\})
-ALIASES =
+ALIASES = "briefreturn{1}=@brief \1 @return \1" \
+ "default_copy_constructor=Default copy constructor" \
+ "default_move_constructor=Default move constructor" \
+ "default_copy_assignment{1}=@brief Default copy assignment operator @return \1& Reference to the assigned object" \
+ "default_move_assignment{1}=@brief Default move assignment operator @return \1& Reference to the assigned object"
# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
# only. Doxygen will then generate output that is more tailored for C. For
@@ -313,7 +317,10 @@ OPTIMIZE_OUTPUT_SLICE = NO
# Note: For files without extension you can use no_extension as a placeholder.
#
# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.
EXTENSION_MAPPING = cu=C++ \
cuh=C++
@@ -524,6 +531,13 @@ EXTRACT_LOCAL_METHODS = NO
EXTRACT_ANON_NSPACES = NO
+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
# undocumented members inside documented classes or files. If set to NO these
# members will be included in the various overviews, but no documentation
@@ -561,11 +575,18 @@ HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# (including Cygwin) and Mac users are advised to set this option to NO.
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
# The default value is: system dependent.
CASE_SENSE_NAMES = YES
@@ -804,7 +825,10 @@ WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = YES
# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# Possible values are: NO, YES and FAIL_ON_WARNINGS.
# The default value is: NO.
WARN_AS_ERROR = NO
@@ -840,8 +864,8 @@ INPUT = ../include
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
-# possible encodings.
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
# The default value is: UTF-8.
INPUT_ENCODING = UTF-8
@@ -854,13 +878,15 @@ INPUT_ENCODING = UTF-8
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
# read by doxygen.
#
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
-# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
-# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd,
-# *.vhdl, *.ucf, *.qsf and *.ice.
+# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
+# *.ucf, *.qsf and *.ice.
FILE_PATTERNS = *.c \
*.cpp \
@@ -898,7 +924,7 @@ EXCLUDE_SYMLINKS = NO
# Note that the wildcards are matched against the file with absolute path, so to
# exclude all test directories for example use the pattern */test/*
-EXCLUDE_PATTERNS =
+EXCLUDE_PATTERNS = */detail/*
# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
# (namespaces, classes, functions, etc.) that should be excluded from the
@@ -1090,13 +1116,6 @@ VERBATIM_HEADERS = YES
ALPHABETICAL_INDEX = YES
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX = 5
-
# In case all classes in a project start with a common prefix, all classes will
# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
# can be used to specify a prefix (or a list of prefixes) that should be ignored
@@ -1267,10 +1286,11 @@ HTML_INDEX_NUM_ENTRIES = 100
# If the GENERATE_DOCSET tag is set to YES, additional index files will be
# generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: https://developer.apple.com/xcode/), introduced with OSX
-# 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
# genXcode/_index.html for more information.
@@ -1312,8 +1332,8 @@ DOCSET_PUBLISHER_NAME = Publisher
# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
+# (see:
+# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
#
# The HTML Help Workshop contains a compiler that can convert all HTML output
# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@@ -1388,7 +1408,8 @@ QCH_FILE =
# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
# Project output. For more information please see Qt Help Project / Namespace
-# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
# The default value is: org.doxygen.Project.
# This tag requires that the tag GENERATE_QHP is set to YES.
@@ -1396,8 +1417,8 @@ QHP_NAMESPACE = org.doxygen.Project
# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
# Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
-# folders).
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
# The default value is: doc.
# This tag requires that the tag GENERATE_QHP is set to YES.
@@ -1405,16 +1426,16 @@ QHP_VIRTUAL_FOLDER = doc
# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
# filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
# This tag requires that the tag GENERATE_QHP is set to YES.
QHP_CUST_FILTER_NAME =
# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
# custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
# This tag requires that the tag GENERATE_QHP is set to YES.
QHP_CUST_FILTER_ATTRS =
@@ -1426,9 +1447,9 @@ QHP_CUST_FILTER_ATTRS =
QHP_SECT_FILTER_ATTRS =
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
# This tag requires that the tag GENERATE_QHP is set to YES.
QHG_LOCATION =
@@ -1555,7 +1576,7 @@ USE_MATHJAX = NO
# When MathJax is enabled you can set the default output format to be used for
# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
# Possible values are: HTML-CSS (which is slower, but has the best
# compatibility), NativeMML (i.e. MathML) and SVG.
# The default value is: HTML-CSS.
@@ -1585,7 +1606,8 @@ MATHJAX_EXTENSIONS =
# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
# of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
# example see the documentation.
# This tag requires that the tag USE_MATHJAX is set to YES.
@@ -1632,7 +1654,8 @@ SERVER_BASED_SEARCH = NO
#
# Doxygen ships with an example indexer (doxyindexer) and search engine
# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/).
+# Xapian (see:
+# https://xapian.org/).
#
# See the section "External Indexing and Searching" for details.
# The default value is: NO.
@@ -1645,8 +1668,9 @@ EXTERNAL_SEARCH = NO
#
# Doxygen ships with an example indexer (doxyindexer) and search engine
# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/). See the section "External Indexing and
-# Searching" for details.
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
# This tag requires that the tag SEARCHENGINE is set to YES.
SEARCHENGINE_URL =
@@ -2325,10 +2349,32 @@ UML_LOOK = NO
# but if the number exceeds 15, the total amount of fields shown is limited to
# 10.
# Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
+# This tag requires that the tag UML_LOOK is set to YES.
UML_LIMIT_NUM_FIELDS = 10
+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD = 17
+
# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
# collaboration graphs will show the relations between templates and their
# instances.
@@ -2518,9 +2564,11 @@ DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
# files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc and
+# plantuml temporary files.
# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
DOT_CLEANUP = YES
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
new file mode 100644
index 000000000..1ff8119ba
--- /dev/null
+++ b/fetch_rapids.cmake
@@ -0,0 +1,18 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
+ file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.10/RAPIDS.cmake
+ ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
+endif()
+include(${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp
index ab225490e..8d355ee23 100644
--- a/include/rmm/cuda_device.hpp
+++ b/include/rmm/cuda_device.hpp
@@ -26,23 +26,22 @@ namespace rmm {
*
*/
struct cuda_device_id {
- using value_type = int;
+ using value_type = int; ///< Integer type used for device identifier
/**
* @brief Construct a `cuda_device_id` from the specified integer value
*
- * @param id The device's integer identifier
+ * @param dev_id The device's integer identifier
*/
explicit constexpr cuda_device_id(value_type dev_id) noexcept : id_{dev_id} {}
- /// Returns the wrapped integer value
+ /// @briefreturn{The wrapped integer value}
[[nodiscard]] constexpr value_type value() const noexcept { return id_; }
private:
value_type id_;
};
-namespace detail {
/**
* @brief Returns a `cuda_device_id` for the current device
*
@@ -50,11 +49,56 @@ namespace detail {
*
* @return `cuda_device_id` for the current device
*/
-inline cuda_device_id current_device()
+inline cuda_device_id get_current_cuda_device()
{
- int dev_id{};
- RMM_CUDA_TRY(cudaGetDevice(&dev_id));
+ cuda_device_id::value_type dev_id{-1};
+ RMM_ASSERT_CUDA_SUCCESS(cudaGetDevice(&dev_id));
return cuda_device_id{dev_id};
}
-} // namespace detail
+
+/**
+ * @brief Returns the number of CUDA devices in the system
+ *
+ * @return Number of CUDA devices in the system
+ */
+inline int get_num_cuda_devices()
+{
+ cuda_device_id::value_type num_dev{-1};
+ RMM_ASSERT_CUDA_SUCCESS(cudaGetDeviceCount(&num_dev));
+ return num_dev;
+}
+
+/**
+ * @brief RAII class that sets the current CUDA device to the specified device on construction
+ * and restores the previous device on destruction.
+ */
+struct cuda_set_device_raii {
+ /**
+ * @brief Construct a new cuda_set_device_raii object and sets the current CUDA device to `dev_id`
+ *
+ * @param dev_id The device to set as the current CUDA device
+ */
+ explicit cuda_set_device_raii(cuda_device_id dev_id)
+ : old_device_{get_current_cuda_device()}, needs_reset_{old_device_.value() != dev_id.value()}
+ {
+ if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value()));
+ }
+ /**
+ * @brief Reactivates the previous CUDA device
+ */
+ ~cuda_set_device_raii() noexcept
+ {
+ if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value()));
+ }
+
+ cuda_set_device_raii(cuda_set_device_raii const&) = delete;
+ cuda_set_device_raii& operator=(cuda_set_device_raii const&) = delete;
+ cuda_set_device_raii(cuda_set_device_raii&&) = delete;
+ cuda_set_device_raii& operator=(cuda_set_device_raii&&) = delete;
+
+ private:
+ cuda_device_id old_device_;
+ bool needs_reset_;
+};
+
} // namespace rmm
diff --git a/include/rmm/cuda_stream.hpp b/include/rmm/cuda_stream.hpp
index 9d2dab4f0..b778cf7bd 100644
--- a/include/rmm/cuda_stream.hpp
+++ b/include/rmm/cuda_stream.hpp
@@ -46,6 +46,8 @@ class cuda_stream {
*
* A moved-from cuda_stream is invalid and it is Undefined Behavior to call methods that access
* the owned stream.
+ *
+ * @return A reference to this cuda_stream
*/
cuda_stream& operator=(cuda_stream&&) = default;
~cuda_stream() = default;
diff --git a/include/rmm/cuda_stream_view.hpp b/include/rmm/cuda_stream_view.hpp
index bc30f26d8..150fe3957 100644
--- a/include/rmm/cuda_stream_view.hpp
+++ b/include/rmm/cuda_stream_view.hpp
@@ -33,41 +33,47 @@ namespace rmm {
*/
class cuda_stream_view {
public:
- constexpr cuda_stream_view() = default;
- constexpr cuda_stream_view(cuda_stream_view const&) = default;
- constexpr cuda_stream_view(cuda_stream_view&&) = default;
- constexpr cuda_stream_view& operator=(cuda_stream_view const&) = default;
- constexpr cuda_stream_view& operator=(cuda_stream_view&&) = default;
- ~cuda_stream_view() = default;
+ constexpr cuda_stream_view() = default;
+ ~cuda_stream_view() = default;
+ constexpr cuda_stream_view(cuda_stream_view const&) = default; ///< @default_copy_constructor
+ constexpr cuda_stream_view(cuda_stream_view&&) = default; ///< @default_move_constructor
+ constexpr cuda_stream_view& operator=(cuda_stream_view const&) =
+ default; ///< @default_copy_assignment{cuda_stream_view}
+ constexpr cuda_stream_view& operator=(cuda_stream_view&&) =
+ default; ///< @default_move_assignment{cuda_stream_view}
// Disable construction from literal 0
constexpr cuda_stream_view(int) = delete; //< Prevent cast from 0
constexpr cuda_stream_view(std::nullptr_t) = delete; //< Prevent cast from nullptr
/**
- * @brief Implicit conversion from cudaStream_t.
+ * @brief Constructor from a cudaStream_t
+ *
+ * @param stream The underlying stream for this view
*/
constexpr cuda_stream_view(cudaStream_t stream) noexcept : stream_{stream} {}
/**
* @brief Get the wrapped stream.
*
- * @return cudaStream_t The wrapped stream.
+ * @return cudaStream_t The underlying stream referenced by this cuda_stream_view
*/
[[nodiscard]] constexpr cudaStream_t value() const noexcept { return stream_; }
/**
* @brief Implicit conversion to cudaStream_t.
+ *
+ * @return cudaStream_t The underlying stream referenced by this cuda_stream_view
*/
constexpr operator cudaStream_t() const noexcept { return value(); }
/**
- * @brief Return true if the wrapped stream is the CUDA per-thread default stream.
+ * @briefreturn{true if the wrapped stream is the CUDA per-thread default stream}
*/
[[nodiscard]] inline bool is_per_thread_default() const noexcept;
/**
- * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream.
+ * @briefreturn{true if the wrapped stream is explicitly the CUDA legacy default stream}
*/
[[nodiscard]] inline bool is_default() const noexcept;
diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp
index 28121e6a8..b45dbae25 100644
--- a/include/rmm/detail/dynamic_load_runtime.hpp
+++ b/include/rmm/detail/dynamic_load_runtime.hpp
@@ -115,7 +115,7 @@ struct async_alloc {
int cuda_pool_supported{};
auto result = cudaDeviceGetAttribute(&cuda_pool_supported,
cudaDevAttrMemoryPoolsSupported,
- rmm::detail::current_device().value());
+ rmm::get_current_cuda_device().value());
return result == cudaSuccess and cuda_pool_supported == 1;
}()};
return runtime_supports_pool and driver_supports_pool;
@@ -139,7 +139,7 @@ struct async_alloc {
if (cudaMemHandleTypeNone != handle_type) {
auto const result = cudaDeviceGetAttribute(&supported_handle_types_bitmask,
cudaDevAttrMemoryPoolSupportedHandleTypes,
- rmm::detail::current_device().value());
+ rmm::get_current_cuda_device().value());
// Don't throw on cudaErrorInvalidValue
auto const unsupported_runtime = (result == cudaErrorInvalidValue);
diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp
index bd8a8211a..84638ee89 100644
--- a/include/rmm/device_buffer.hpp
+++ b/include/rmm/device_buffer.hpp
@@ -29,7 +29,6 @@
namespace rmm {
/**
- * @file device_buffer.hpp
* @brief RAII construct for device memory allocation
*
* This class allocates untyped and *uninitialized* device memory using a
@@ -202,6 +201,8 @@ class device_buffer {
* replaced by the `other.stream()`.
*
* @param other The `device_buffer` whose contents will be moved.
+ *
+ * @return A reference to this `device_buffer`
*/
device_buffer& operator=(device_buffer&& other) noexcept
{
@@ -331,22 +332,22 @@ class device_buffer {
}
/**
- * @brief Returns raw pointer to underlying device memory allocation
+ * @briefreturn{Const pointer to the device memory allocation}
*/
[[nodiscard]] void const* data() const noexcept { return _data; }
/**
- * @brief Returns raw pointer to underlying device memory allocation
+ * @briefreturn{Pointer to the device memory allocation}
*/
void* data() noexcept { return _data; }
/**
- * @brief Returns the number of bytes.
+ * @briefreturn{The number of bytes}
*/
[[nodiscard]] std::size_t size() const noexcept { return _size; }
/**
- * @brief Returns the signed number of bytes.
+ * @briefreturn{The signed number of bytes}
*/
[[nodiscard]] std::int64_t ssize() const noexcept
{
@@ -356,11 +357,10 @@ class device_buffer {
}
/**
- * @brief returns the number of bytes that can be held in currently allocated storage.
+ * @briefreturn{Whether or not the buffer currently holds any data}
*
* If `is_empty() == true`, the `device_buffer` may still hold an allocation
* if `capacity() > 0`.
- *
*/
[[nodiscard]] bool is_empty() const noexcept { return 0 == size(); }
@@ -368,11 +368,13 @@ class device_buffer {
* @brief Returns actual size in bytes of device memory allocation.
*
* The invariant `size() <= capacity()` holds.
+ *
+ * @return The actual size in bytes of the device memory allocation
*/
[[nodiscard]] std::size_t capacity() const noexcept { return _capacity; }
/**
- * @brief Returns stream most recently specified for allocation/deallocation
+ * @briefreturn{The stream most recently specified for allocation/deallocation}
*/
[[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
@@ -384,20 +386,21 @@ class device_buffer {
* will be used for deallocation in the `rmm::device_uvector` destructor.
* However, if either of `resize()` or `shrink_to_fit()` is called after this,
* the later stream parameter will be stored and used in the destructor.
+ *
+ * @param stream The stream to use for deallocation
*/
void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }
/**
- * @brief Returns pointer to the memory resource used to allocate and
- * deallocate the device memory
+ * @briefreturn{Pointer to the memory resource used to allocate and deallocate}
*/
[[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _mr; }
private:
- void* _data{nullptr}; ///< Pointer to device memory allocation
- std::size_t _size{}; ///< Requested size of the device memory allocation
- std::size_t _capacity{}; ///< The actual size of the device memory allocation
- cuda_stream_view _stream{}; ///< Stream to use for device memory deallocation
+ void* _data{nullptr}; ///< Pointer to device memory allocation
+ std::size_t _size{}; ///< Requested size of the device memory allocation
+ std::size_t _capacity{}; ///< The actual size of the device memory allocation
+ cuda_stream_view _stream{}; ///< Stream to use for device memory deallocation
mr::device_memory_resource* _mr{
mr::get_current_device_resource()}; ///< The memory resource used to
///< allocate/deallocate device memory
diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp
index e476b2d87..83fcdda09 100644
--- a/include/rmm/device_scalar.hpp
+++ b/include/rmm/device_scalar.hpp
@@ -37,18 +37,25 @@ class device_scalar {
public:
static_assert(std::is_trivially_copyable::value, "Scalar type must be trivially copyable");
- using value_type = typename device_uvector::value_type;
- using reference = typename device_uvector::reference;
- using const_reference = typename device_uvector::const_reference;
- using pointer = typename device_uvector::pointer;
- using const_pointer = typename device_uvector::const_pointer;
+ using value_type = typename device_uvector::value_type; ///< T, the type of the scalar element
+ using reference = typename device_uvector::reference; ///< value_type&
+ using const_reference = typename device_uvector::const_reference; ///< const value_type&
+ using pointer =
+ typename device_uvector::pointer; ///< The type of the pointer returned by data()
+ using const_pointer = typename device_uvector::const_pointer; ///< The type of the iterator
+ ///< returned by data() const
RMM_EXEC_CHECK_DISABLE
~device_scalar() = default;
RMM_EXEC_CHECK_DISABLE
- device_scalar(device_scalar&&) noexcept = default;
+ device_scalar(device_scalar&&) noexcept = default; ///< Default move constructor
+ /**
+ * @brief Default move assignment operator
+ *
+ * @return device_scalar& A reference to the assigned-to object
+ */
device_scalar& operator=(device_scalar&&) noexcept = default;
/**
@@ -224,6 +231,8 @@ class device_scalar {
* specified to the constructor, then appropriate dependencies must be inserted between the
* streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may
* be a race condition.
+ *
+ * @return Pointer to underlying device memory
*/
[[nodiscard]] pointer data() noexcept { return static_cast(_storage.data()); }
@@ -234,6 +243,8 @@ class device_scalar {
* specified to the constructor, then appropriate dependencies must be inserted between the
* streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may
* be a race condition.
+ *
+ * @return Const pointer to underlying device memory
*/
[[nodiscard]] const_pointer data() const noexcept
{
@@ -241,12 +252,14 @@ class device_scalar {
}
/**
- * @brief Returns stream most recently specified for allocation/deallocation
+ * @briefreturn{Stream associated with the device memory allocation}
*/
[[nodiscard]] cuda_stream_view stream() const noexcept { return _storage.stream(); }
/**
* @brief Sets the stream to be used for deallocation
+ *
+ * @param stream Stream to be used for deallocation
*/
void set_stream(cuda_stream_view stream) noexcept { _storage.set_stream(stream); }
diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index 40f5d8c5d..47ed1adff 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -71,22 +71,24 @@ class device_uvector {
"device_uvector only supports types that are trivially copyable.");
public:
- using value_type = T;
- using size_type = std::size_t;
- using reference = value_type&;
- using const_reference = value_type const&;
- using pointer = value_type*;
- using const_pointer = value_type const*;
- using iterator = pointer;
- using const_iterator = const_pointer;
+ using value_type = T; ///< T; stored value type
+ using size_type = std::size_t; ///< The type used for the size of the vector
+ using reference = value_type&; ///< value_type&; reference type returned by operator[](size_type)
+ using const_reference = value_type const&; ///< value_type const&; constant reference type
+ ///< returned by operator[](size_type) const
+ using pointer = value_type*; ///< The type of the pointer returned by data()
+ using const_pointer = value_type const*; ///< The type of the pointer returned by data() const
+ using iterator = pointer; ///< The type of the iterator returned by begin()
+ using const_iterator = const_pointer; ///< The type of the const iterator returned by cbegin()
RMM_EXEC_CHECK_DISABLE
~device_uvector() = default;
RMM_EXEC_CHECK_DISABLE
- device_uvector(device_uvector&&) noexcept = default;
+ device_uvector(device_uvector&&) noexcept = default; ///< @default_move_constructor
- device_uvector& operator=(device_uvector&&) noexcept = default;
+ device_uvector& operator=(device_uvector&&) noexcept =
+ default; ///< @default_move_assignment{device_uvector}
/**
* @brief Copy ctor is deleted as it doesn't allow a stream argument
@@ -497,12 +499,12 @@ class device_uvector {
[[nodiscard]] const_iterator end() const noexcept { return cend(); }
/**
- * @brief Returns the number of elements.
+ * @briefreturn{The number of elements in the vector}
*/
[[nodiscard]] std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); }
/**
- * @brief Returns the signed number of elements.
+ * @briefreturn{The signed number of elements in the vector}
*/
[[nodiscard]] std::int64_t ssize() const noexcept
{
@@ -512,17 +514,12 @@ class device_uvector {
}
/**
- * @brief Returns true if the vector contains no elements, i.e., `size() == 0`.
- *
- * @return true The vector is empty
- * @return false The vector is not empty
+ * @briefreturn{true if the vector contains no elements, i.e. `size() == 0`}
*/
[[nodiscard]] bool is_empty() const noexcept { return size() == 0; }
/**
- * @brief Returns pointer to the resource used to allocate and deallocate the device storage.
- *
- * @return Pointer to underlying resource
+ * @briefreturn{Pointer to underlying resource used to allocate and deallocate the device storage}
*/
[[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept
{
@@ -530,7 +527,7 @@ class device_uvector {
}
/**
- * @brief Returns stream most recently specified for allocation/deallocation
+ * @briefreturn{Stream most recently specified for allocation/deallocation}
*/
[[nodiscard]] cuda_stream_view stream() const noexcept { return _storage.stream(); }
@@ -542,6 +539,8 @@ class device_uvector {
* will be used for deallocation in the `rmm::device_uvector destructor.
* However, if either of `resize()` or `shrink_to_fit()` is called after this,
* the later stream parameter will be stored and used in the destructor.
+ *
+ * @param stream The stream to use for deallocation
*/
void set_stream(cuda_stream_view stream) noexcept { _storage.set_stream(stream); }
diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp
index 015c3646d..78c747652 100644
--- a/include/rmm/exec_policy.hpp
+++ b/include/rmm/exec_policy.hpp
@@ -30,6 +30,9 @@
namespace rmm {
+/**
+ * @brief Synchronous execution policy for allocations using thrust
+ */
using thrust_exec_policy_t =
thrust::detail::execute_with_allocator,
thrust::cuda_cub::execute_on_stream_base>;
@@ -40,6 +43,12 @@ using thrust_exec_policy_t =
*/
class exec_policy : public thrust_exec_policy_t {
public:
+ /**
+ * @brief Construct a new execution policy object
+ *
+ * @param stream The stream on which to allocate temporary memory
+ * @param mr The resource to use for allocating temporary memory
+ */
explicit exec_policy(cuda_stream_view stream = cuda_stream_default,
rmm::mr::device_memory_resource* mr = mr::get_current_device_resource())
: thrust_exec_policy_t(
@@ -50,6 +59,9 @@ class exec_policy : public thrust_exec_policy_t {
#if THRUST_VERSION >= 101600
+/**
+ * @brief Asynchronous execution policy for allocations using thrust
+ */
using thrust_exec_policy_nosync_t =
thrust::detail::execute_with_allocator,
thrust::cuda_cub::execute_on_stream_nosync_base>;
@@ -72,9 +84,11 @@ class exec_policy_nosync : public thrust_exec_policy_nosync_t {
#else
-using thrust_exec_policy_nosync_t = thrust_exec_policy_t;
-using exec_policy_nosync = exec_policy;
-
+using thrust_exec_policy_nosync_t =
+ thrust_exec_policy_t; ///< When used with Thrust < 1.16.0, thrust_exec_policy_nosync_t is an
+ ///< alias for thrust_exec_policy_t
+using exec_policy_nosync =
+ exec_policy; ///< When used with Thrust < 1.16.0, exec_policy_nosync is an alias for exec_policy
#endif
} // namespace rmm
diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp
index d0d6f8fa4..4e17878bd 100644
--- a/include/rmm/logger.hpp
+++ b/include/rmm/logger.hpp
@@ -43,9 +43,11 @@ inline std::string default_log_filename()
return (filename == nullptr) ? std::string{"rmm_log.txt"} : std::string{filename};
}
-// Simple wrapper around a spdlog::logger that performs RMM-specific initialization
+/**
+ * @brief Simple wrapper around a spdlog::logger that performs RMM-specific initialization
+ */
struct logger_wrapper {
- spdlog::logger logger_;
+ spdlog::logger logger_; ///< The underlying logger
logger_wrapper()
: logger_{"RMM",
@@ -70,8 +72,14 @@ struct logger_wrapper {
* @brief Represent a size in number of bytes.
*/
struct bytes {
- std::size_t value;
-
+ std::size_t value; ///< The size in bytes
+
+ /**
+ * @brief Construct a new bytes object
+ *
+ * @param os The output stream
+ * @param value The size in bytes
+ */
friend std::ostream& operator<<(std::ostream& os, bytes const& value)
{
static std::array units{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
@@ -113,5 +121,8 @@ inline spdlog::logger& logger()
} // namespace rmm
+// Doxygen doesn't like this because we're overloading something from fmt
+//! @cond Doxygen_Suppress
template <>
struct fmt::formatter : fmt::ostream_formatter {};
+//! @endcond
diff --git a/include/rmm/mr/device/callback_memory_resource.hpp b/include/rmm/mr/device/callback_memory_resource.hpp
index 1937e7ef0..2d9695be2 100644
--- a/include/rmm/mr/device/callback_memory_resource.hpp
+++ b/include/rmm/mr/device/callback_memory_resource.hpp
@@ -90,12 +90,14 @@ class callback_memory_resource final : public device_memory_resource {
{
}
- callback_memory_resource() = delete;
- ~callback_memory_resource() override = default;
- callback_memory_resource(callback_memory_resource const&) = delete;
- callback_memory_resource& operator=(callback_memory_resource const&) = delete;
- callback_memory_resource(callback_memory_resource&&) noexcept = default;
- callback_memory_resource& operator=(callback_memory_resource&&) noexcept = default;
+ callback_memory_resource() = delete;
+ ~callback_memory_resource() override = default;
+ callback_memory_resource(callback_memory_resource const&) = delete;
+ callback_memory_resource& operator=(callback_memory_resource const&) = delete;
+ callback_memory_resource(callback_memory_resource&&) noexcept =
+ default; ///< @default_move_constructor
+ callback_memory_resource& operator=(callback_memory_resource&&) noexcept =
+ default; ///< @default_move_assignment{callback_memory_resource}
private:
void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp
index d41eae63e..329d8f29a 100644
--- a/include/rmm/mr/device/cuda_async_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -98,7 +98,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
RMM_EXPECTS(rmm::detail::async_alloc::is_export_handle_type_supported(pool_props.handleTypes),
"Requested IPC memory handle type not supported");
pool_props.location.type = cudaMemLocationTypeDevice;
- pool_props.location.id = rmm::detail::current_device().value();
+ pool_props.location.id = rmm::get_current_cuda_device().value();
cudaMemPool_t cuda_pool_handle{};
RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props));
pool_ = cuda_async_view_memory_resource{cuda_pool_handle};
diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp
index 191e4741d..c685cd75f 100644
--- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp
@@ -60,7 +60,7 @@ class cuda_async_view_memory_resource final : public device_memory_resource {
}()}
{
// Check if cudaMallocAsync Memory pool supported
- auto const device = rmm::detail::current_device();
+ auto const device = rmm::get_current_cuda_device();
int cuda_pool_supported{};
auto result =
cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value());
@@ -77,11 +77,15 @@ class cuda_async_view_memory_resource final : public device_memory_resource {
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
#endif
- cuda_async_view_memory_resource() = default;
- cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) = default;
- cuda_async_view_memory_resource(cuda_async_view_memory_resource&&) = default;
- cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource const&) = default;
- cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&) = default;
+ cuda_async_view_memory_resource() = default;
+ cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) =
+ default; ///< @default_copy_constructor
+ cuda_async_view_memory_resource(cuda_async_view_memory_resource&&) =
+ default; ///< @default_move_constructor
+ cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource const&) =
+ default; ///< @default_copy_assignment{cuda_async_view_memory_resource}
+ cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&) =
+ default; ///< @default_move_assignment{cuda_async_view_memory_resource}
/**
* @brief Query whether the resource supports use of non-null CUDA streams for
diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp
index 7a5385f4e..b0bf9ae09 100644
--- a/include/rmm/mr/device/cuda_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_memory_resource.hpp
@@ -29,12 +29,14 @@ namespace rmm::mr {
*/
class cuda_memory_resource final : public device_memory_resource {
public:
- cuda_memory_resource() = default;
- ~cuda_memory_resource() override = default;
- cuda_memory_resource(cuda_memory_resource const&) = default;
- cuda_memory_resource(cuda_memory_resource&&) = default;
- cuda_memory_resource& operator=(cuda_memory_resource const&) = default;
- cuda_memory_resource& operator=(cuda_memory_resource&&) = default;
+ cuda_memory_resource() = default;
+ ~cuda_memory_resource() override = default;
+ cuda_memory_resource(cuda_memory_resource const&) = default; ///< @default_copy_constructor
+ cuda_memory_resource(cuda_memory_resource&&) = default; ///< @default_move_constructor
+ cuda_memory_resource& operator=(cuda_memory_resource const&) =
+ default; ///< @default_copy_assignment{cuda_memory_resource}
+ cuda_memory_resource& operator=(cuda_memory_resource&&) =
+ default; ///< @default_move_assignment{cuda_memory_resource}
/**
* @brief Query whether the resource supports use of non-null CUDA streams for
diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
index 53575e5ce..f071717c0 100644
--- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
+++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
@@ -15,15 +15,16 @@
*/
#pragma once
+#include
#include
#include
#include
#include
-#include
-
#include
+#include
+
#include
#include
#include
@@ -288,17 +289,25 @@ class stream_ordered_memory_resource : public crtp, public device_
stream_event_pair get_event(cuda_stream_view stream)
{
if (stream.is_per_thread_default()) {
- // Create a thread-local shared event wrapper. Shared pointers in the thread and in each MR
- // instance ensures it is destroyed cleaned up only after all are finished with it.
- thread_local auto event_tls = std::make_shared();
- default_stream_events.insert(event_tls);
- return stream_event_pair{stream.value(), event_tls->event};
+ // Create a thread-local shared event wrapper for each device. Shared pointers in the thread
+ // and in each MR instance ensure the wrappers are destroyed only after all are finished
+ // with them.
+ thread_local std::vector> events_tls(
+ rmm::get_num_cuda_devices());
+ auto event = [&, device_id = this->device_id_]() {
+ if (events_tls[device_id.value()]) { return events_tls[device_id.value()]->event; }
+
+ auto event = std::make_shared();
+ this->default_stream_events.insert(event);
+ return (events_tls[device_id.value()] = std::move(event))->event;
+ }();
+ return stream_event_pair{stream.value(), event};
}
// We use cudaStreamLegacy as the event map key for the default stream for consistency between
// PTDS and non-PTDS mode. In PTDS mode, the cudaStreamLegacy map key will only exist if the
// user explicitly passes it, so it is used as the default location for the free list
- // at construction. For consistency, the same key is used for null stream free lists in non-PTDS
- // mode.
+ // at construction. For consistency, the same key is used for null stream free lists in
+ // non-PTDS mode.
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
auto* const stream_to_store = stream.is_default() ? cudaStreamLegacy : stream.value();
@@ -496,11 +505,13 @@ class stream_ordered_memory_resource : public crtp, public device_
// bidirectional mapping between non-default streams and events
std::unordered_map stream_events_;
- // shared pointers to events keeps the events alive as long as either the thread that created them
- // or the MR that is using them exists.
+ // shared pointers to events keeps the events alive as long as either the thread that created
+ // them or the MR that is using them exists.
std::set> default_stream_events;
std::mutex mtx_; // mutex for thread-safe access
-}; // namespace detail
+
+ rmm::cuda_device_id device_id_{rmm::get_current_cuda_device()};
+}; // namespace detail
} // namespace rmm::mr::detail
diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
index 4778d9bda..5f511e393 100644
--- a/include/rmm/mr/device/device_memory_resource.hpp
+++ b/include/rmm/mr/device/device_memory_resource.hpp
@@ -81,12 +81,15 @@ namespace rmm::mr {
*/
class device_memory_resource {
public:
- device_memory_resource() = default;
- virtual ~device_memory_resource() = default;
- device_memory_resource(device_memory_resource const&) = default;
- device_memory_resource& operator=(device_memory_resource const&) = default;
- device_memory_resource(device_memory_resource&&) noexcept = default;
- device_memory_resource& operator=(device_memory_resource&&) noexcept = default;
+ device_memory_resource() = default;
+ virtual ~device_memory_resource() = default;
+ device_memory_resource(device_memory_resource const&) = default; ///< @default_copy_constructor
+ device_memory_resource(device_memory_resource&&) noexcept =
+ default; ///< @default_move_constructor
+ device_memory_resource& operator=(device_memory_resource const&) =
+ default; ///< @default_copy_assignment{device_memory_resource}
+ device_memory_resource& operator=(device_memory_resource&&) noexcept =
+ default; ///< @default_move_assignment{device_memory_resource}
/**
* @brief Allocates memory of size at least \p bytes.
@@ -121,7 +124,7 @@ class device_memory_resource {
*
* @throws Nothing.
*
- * @param p Pointer to be deallocated
+ * @param ptr Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the
* value of `bytes` that was passed to the `allocate` call that returned `p`.
* @param stream Stream on which to perform deallocation
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index 4a46cb660..48f0513d8 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -109,14 +109,13 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
~failure_callback_resource_adaptor() override = default;
failure_callback_resource_adaptor(failure_callback_resource_adaptor const&) = delete;
failure_callback_resource_adaptor& operator=(failure_callback_resource_adaptor const&) = delete;
- failure_callback_resource_adaptor(failure_callback_resource_adaptor&&) noexcept = default;
+ failure_callback_resource_adaptor(failure_callback_resource_adaptor&&) noexcept =
+ default; ///< @default_move_constructor
failure_callback_resource_adaptor& operator=(failure_callback_resource_adaptor&&) noexcept =
- default;
+ default; ///< @default_move_assignment{failure_callback_resource_adaptor}
/**
- * @brief Return pointer to the upstream resource.
- *
- * @return Upstream* Pointer to the upstream resource.
+ * @briefreturn{Pointer to the upstream resource}
*/
Upstream* get_upstream() const noexcept { return upstream_; }
diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp
index 4954415c3..a24ac6ddd 100644
--- a/include/rmm/mr/device/fixed_size_memory_resource.hpp
+++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp
@@ -50,10 +50,10 @@ class fixed_size_memory_resource
friend class detail::stream_ordered_memory_resource,
detail::fixed_size_free_list>;
- // A block is the fixed size this resource alloates
- static constexpr std::size_t default_block_size = 1 << 20; // 1 MiB
- // This is the number of blocks that the pool starts out with, and also the number of
- // blocks by which the pool grows when all of its current blocks are allocated
+ static constexpr std::size_t default_block_size = 1 << 20; ///< Default allocation block size
+
+ /// The number of blocks that the pool starts out with, and also the number of
+ /// blocks by which the pool grows when all of its current blocks are allocated
static constexpr std::size_t default_blocks_to_preallocate = 128;
/**
@@ -121,11 +121,11 @@ class fixed_size_memory_resource
[[nodiscard]] std::size_t get_block_size() const noexcept { return block_size_; }
protected:
- using free_list = detail::fixed_size_free_list;
- using block_type = free_list::block_type;
+ using free_list = detail::fixed_size_free_list; ///< The free list type
+ using block_type = free_list::block_type; ///< The type of block managed by the free list
using typename detail::stream_ordered_memory_resource,
detail::fixed_size_free_list>::split_block;
- using lock_guard = std::lock_guard;
+ using lock_guard = std::lock_guard; ///< Type of lock used to synchronize access
/**
* @brief Get the (fixed) size of allocations supported by this memory resource
@@ -142,6 +142,7 @@ class fixed_size_memory_resource
* strategy (see `size_to_grow()`).
*
* @param size The minimum size to allocate
+ * @param blocks The set of blocks from which to allocate
* @param stream The stream on which the memory is to be used.
* @return block_type The allocated block
*/
@@ -154,7 +155,6 @@ class fixed_size_memory_resource
/**
* @brief Allocate blocks from upstream to expand the suballocation pool.
*
- * @param size The minimum size to allocate
* @param stream The stream on which the memory is to be used.
* @return block_type The allocated block
*/
@@ -182,7 +182,6 @@ class fixed_size_memory_resource
*
* @param block The block to allocate from.
* @param size The size in bytes of the requested allocation.
- * @param stream_event The stream and associated event on which the allocation will be used.
* @return A pair comprising the allocated pointer and any unallocated remainder of the input
* block.
*/
@@ -196,7 +195,6 @@ class fixed_size_memory_resource
*
* @param ptr The pointer to the memory to free.
* @param size The size of the memory to free. Must be equal to the original allocation size.
- * @param stream The stream-event pair for the stream on which the memory was last used.
* @return The (now freed) block associated with `p`. The caller is expected to return the block
* to the pool.
*/
@@ -217,7 +215,8 @@ class fixed_size_memory_resource
* @param stream the stream being executed on
* @return std::pair with available and free memory for resource
*/
- [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override
+ [[nodiscard]] std::pair do_get_mem_info(
+ [[maybe_unused]] cuda_stream_view stream) const override
{
return std::make_pair(0, 0);
}
@@ -272,7 +271,7 @@ class fixed_size_memory_resource
}
private:
- Upstream* upstream_mr_; // The resource from which to allocate new blocks
+ Upstream* upstream_mr_; // The resource from which to allocate new blocks
std::size_t const block_size_; // size of blocks this MR allocates
std::size_t const upstream_chunk_size_; // size of chunks allocated from heap MR
diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index 6d1843e82..895c404b0 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -44,7 +44,8 @@ class limiting_resource_adaptor final : public device_memory_resource {
* @throws `rmm::logic_error` if `upstream == nullptr`
*
* @param upstream The resource used for allocating/deallocating device memory
- * @param allocation_limit Maximum memory allowed for this allocator.
+ * @param allocation_limit Maximum memory allowed for this allocator
+ * @param alignment Alignment in bytes for the start of each allocated buffer
*/
limiting_resource_adaptor(Upstream* upstream,
std::size_t allocation_limit,
@@ -57,17 +58,17 @@ class limiting_resource_adaptor final : public device_memory_resource {
RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
}
- limiting_resource_adaptor() = delete;
- ~limiting_resource_adaptor() override = default;
- limiting_resource_adaptor(limiting_resource_adaptor const&) = delete;
- limiting_resource_adaptor(limiting_resource_adaptor&&) noexcept = default;
- limiting_resource_adaptor& operator=(limiting_resource_adaptor const&) = delete;
- limiting_resource_adaptor& operator=(limiting_resource_adaptor&&) noexcept = default;
+ limiting_resource_adaptor() = delete;
+ ~limiting_resource_adaptor() override = default;
+ limiting_resource_adaptor(limiting_resource_adaptor const&) = delete;
+ limiting_resource_adaptor(limiting_resource_adaptor&&) noexcept =
+ default; ///< @default_move_constructor
+ limiting_resource_adaptor& operator=(limiting_resource_adaptor const&) = delete;
+ limiting_resource_adaptor& operator=(limiting_resource_adaptor&&) noexcept =
+ default; ///< @default_move_assignment{limiting_resource_adaptor}
/**
- * @brief Return pointer to the upstream resource.
- *
- * @return Upstream* Pointer to the upstream resource.
+ * @briefreturn{Pointer to the upstream resource}
*/
[[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp
index 71487705a..0ff9e950b 100644
--- a/include/rmm/mr/device/logging_resource_adaptor.hpp
+++ b/include/rmm/mr/device/logging_resource_adaptor.hpp
@@ -98,6 +98,20 @@ class logging_resource_adaptor final : public device_memory_resource {
init_logger(auto_flush);
}
+ /**
+ * @brief Construct a new logging resource adaptor using `upstream` to satisfy
+ * allocation requests and logging information about each allocation/free to
+ * the ostream specified by `stream`.
+ *
+ * The logfile will be written using CSV formatting.
+ *
+ * @throws `rmm::logic_error` if `upstream == nullptr`
+ *
+ * @param upstream The resource used for allocating/deallocating device memory
+ * @param sinks A list of logging sinks to which log output will be written.
+ * @param auto_flush If true, flushes the log for every (de)allocation. Warning, this will degrade
+ * performance.
+ */
logging_resource_adaptor(Upstream* upstream,
spdlog::sinks_init_list sinks,
bool auto_flush = false)
@@ -108,12 +122,14 @@ class logging_resource_adaptor final : public device_memory_resource {
init_logger(auto_flush);
}
- logging_resource_adaptor() = delete;
- ~logging_resource_adaptor() override = default;
- logging_resource_adaptor(logging_resource_adaptor const&) = delete;
- logging_resource_adaptor& operator=(logging_resource_adaptor const&) = delete;
- logging_resource_adaptor(logging_resource_adaptor&&) noexcept = default;
- logging_resource_adaptor& operator=(logging_resource_adaptor&&) noexcept = default;
+ logging_resource_adaptor() = delete;
+ ~logging_resource_adaptor() override = default;
+ logging_resource_adaptor(logging_resource_adaptor const&) = delete;
+ logging_resource_adaptor& operator=(logging_resource_adaptor const&) = delete;
+ logging_resource_adaptor(logging_resource_adaptor&&) noexcept =
+ default; ///< @default_move_constructor
+ logging_resource_adaptor& operator=(logging_resource_adaptor&&) noexcept =
+ default; ///< @default_move_assignment{logging_resource_adaptor}
/**
* @brief Return pointer to the upstream resource.
@@ -301,8 +317,8 @@ class logging_resource_adaptor final : public device_memory_resource {
std::shared_ptr logger_; ///< spdlog logger object
- Upstream* upstream_; ///< The upstream resource used for satisfying
- ///< allocation requests
+ Upstream* upstream_; ///< The upstream resource used for satisfying
+ ///< allocation requests
};
/**
diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp
index 50317720a..4a0f7701a 100644
--- a/include/rmm/mr/device/managed_memory_resource.hpp
+++ b/include/rmm/mr/device/managed_memory_resource.hpp
@@ -29,12 +29,14 @@ namespace rmm::mr {
*/
class managed_memory_resource final : public device_memory_resource {
public:
- managed_memory_resource() = default;
- ~managed_memory_resource() override = default;
- managed_memory_resource(managed_memory_resource const&) = default;
- managed_memory_resource(managed_memory_resource&&) = default;
- managed_memory_resource& operator=(managed_memory_resource const&) = default;
- managed_memory_resource& operator=(managed_memory_resource&&) = default;
+ managed_memory_resource() = default;
+ ~managed_memory_resource() override = default;
+ managed_memory_resource(managed_memory_resource const&) = default; ///< @default_copy_constructor
+ managed_memory_resource(managed_memory_resource&&) = default; ///< @default_move_constructor
+ managed_memory_resource& operator=(managed_memory_resource const&) =
+ default; ///< @default_copy_assignment{managed_memory_resource}
+ managed_memory_resource& operator=(managed_memory_resource&&) =
+ default; ///< @default_move_assignment{managed_memory_resource}
/**
* @brief Query whether the resource supports use of non-null streams for
diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp
index d7c7b8c46..eca0b3207 100644
--- a/include/rmm/mr/device/owning_wrapper.hpp
+++ b/include/rmm/mr/device/owning_wrapper.hpp
@@ -73,7 +73,8 @@ auto make_resource(std::tuple...> const& upstreams, A
template
class owning_wrapper : public device_memory_resource {
public:
- using upstream_tuple = std::tuple...>;
+ using upstream_tuple =
+ std::tuple...>; ///< Tuple of upstream memory resources
/**
* @brief Constructs the wrapped resource using the provided upstreams and any additional
@@ -116,14 +117,12 @@ class owning_wrapper : public device_memory_resource {
}
/**
- * @brief Returns a constant reference to the wrapped resource.
- *
+ * @briefreturn{A constant reference to the wrapped resource}
*/
[[nodiscard]] Resource const& wrapped() const noexcept { return *wrapped_; }
/**
- * @brief Returns reference to the wrapped resource.
- *
+ * @briefreturn{A reference to the wrapped resource}
*/
[[nodiscard]] Resource& wrapped() noexcept { return *wrapped_; }
@@ -136,9 +135,7 @@ class owning_wrapper : public device_memory_resource {
}
/**
- * @brief Query whether the resource supports the get_mem_info API.
- *
- * @return true if the wrapped resource supports get_mem_info, false otherwise.
+ * @briefreturn{true if the wrapped resource supports get_mem_info, false otherwise}
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override
{
diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index 4ddbd874a..aa7217758 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -88,13 +88,19 @@ inline device_memory_resource* initial_resource()
return &mr;
}
+/**
+ * @briefreturn{Reference to the lock}
+ */
inline std::mutex& map_lock()
{
static std::mutex map_lock;
return map_lock;
}
-// Must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
+// This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
+/**
+ * @briefreturn{Reference to the map from device id -> resource}
+ */
RMM_EXPORT inline auto& get_map()
{
static std::map device_id_to_resource;
@@ -121,7 +127,7 @@ RMM_EXPORT inline auto& get_map()
* undefined if used while the active CUDA device is a different device from the one that was active
* when the device_memory_resource was created.
*
- * @param id The id of the target device
+ * @param device_id The id of the target device
* @return Pointer to the current `device_memory_resource` for device `id`
*/
inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
@@ -156,7 +162,7 @@ inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
* undefined if used while the active CUDA device is a different device from the one that was active
* when the device_memory_resource was created.
*
- * @param id The id of the target device
+ * @param device_id The id of the target device
* @param new_mr If not `nullptr`, pointer to new `device_memory_resource` to use as new resource
* for `id`
* @return Pointer to the previous memory resource for `id`
@@ -196,7 +202,7 @@ inline device_memory_resource* set_per_device_resource(cuda_device_id device_id,
*/
inline device_memory_resource* get_current_device_resource()
{
- return get_per_device_resource(rmm::detail::current_device());
+ return get_per_device_resource(rmm::get_current_cuda_device());
}
/**
@@ -225,6 +231,6 @@ inline device_memory_resource* get_current_device_resource()
*/
inline device_memory_resource* set_current_device_resource(device_memory_resource* new_mr)
{
- return set_per_device_resource(rmm::detail::current_device(), new_mr);
+ return set_per_device_resource(rmm::get_current_cuda_device(), new_mr);
}
} // namespace rmm::mr
diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp
index 5c87ef7f7..a52ec14d1 100644
--- a/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -43,7 +43,7 @@ namespace rmm::mr {
template
class polymorphic_allocator {
public:
- using value_type = T;
+ using value_type = T; ///< T, the value type of objects allocated by this allocator
/**
* @brief Construct a `polymorphic_allocator` using the return value of
* `rmm::mr::get_current_device_resource()` as the underlying memory resource.
@@ -148,7 +148,9 @@ bool operator!=(polymorphic_allocator const& lhs, polymorphic_allocator co
template
class stream_allocator_adaptor {
public:
- using value_type = typename std::allocator_traits::value_type;
+ using value_type =
+ typename std::allocator_traits::value_type; ///< The value type of objects allocated
+ ///< by this allocator
stream_allocator_adaptor() = delete;
@@ -187,8 +189,8 @@ class stream_allocator_adaptor {
*/
template
struct rebind {
- using other =
- stream_allocator_adaptor::template rebind_alloc>;
+ using other = stream_allocator_adaptor::template rebind_alloc>; ///< The type to bind to
};
/**
@@ -212,14 +214,12 @@ class stream_allocator_adaptor {
void deallocate(value_type* ptr, std::size_t num) { alloc_.deallocate(ptr, num, stream()); }
/**
- * @brief Returns the underlying stream on which calls to the underlying allocator are made.
- *
+ * @briefreturn{The stream on which calls to the underlying allocator are made}
*/
[[nodiscard]] cuda_stream_view stream() const noexcept { return stream_; }
/**
- * @brief Returns the underlying stream-ordered allocator
- *
+ * @briefreturn{The underlying allocator}
*/
[[nodiscard]] Allocator underlying_allocator() const noexcept { return alloc_; }
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index 8955fd958..703ce8ea7 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -143,11 +143,11 @@ class pool_memory_resource final
[[nodiscard]] std::size_t pool_size() const noexcept { return current_pool_size_; }
protected:
- using free_list = detail::coalescing_free_list;
- using block_type = free_list::block_type;
+ using free_list = detail::coalescing_free_list; ///< The free list implementation
+ using block_type = free_list::block_type; ///< The type of block returned by the free list
using typename detail::stream_ordered_memory_resource,
detail::coalescing_free_list>::split_block;
- using lock_guard = std::lock_guard;
+ using lock_guard = std::lock_guard; ///< Type of lock used to synchronize access
/**
* @brief Get the maximum size of allocations supported by this memory resource
@@ -306,7 +306,6 @@ class pool_memory_resource final
*
* @param block The block to allocate from.
* @param size The size in bytes of the requested allocation.
- * @param stream_event The stream and associated event on which the allocation will be used.
* @return A pair comprising the allocated pointer and any unallocated remainder of the input
* block.
*/
@@ -329,7 +328,6 @@ class pool_memory_resource final
*
* @param ptr The pointer to the memory to free.
* @param size The size of the memory to free. Must be equal to the original allocation size.
- * @param stream The stream-event pair for the stream on which the memory was last used.
* @return The (now freed) block associated with `p`. The caller is expected to return the block
* to the pool.
*/
diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp
index c3fe3010f..7af75593d 100644
--- a/include/rmm/mr/device/statistics_resource_adaptor.hpp
+++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -40,17 +40,24 @@ template
class statistics_resource_adaptor final : public device_memory_resource {
public:
// can be a std::shared_mutex once C++17 is adopted
- using read_lock_t = std::shared_lock;
- using write_lock_t = std::unique_lock;
-
+ using read_lock_t =
+ std::shared_lock; ///< Type of lock used to synchronize read access
+ using write_lock_t =
+ std::unique_lock; ///< Type of lock used to synchronize write access
/**
* @brief Utility struct for counting the current, peak, and total value of a number
*/
struct counter {
- int64_t value{0}; // Current value
- int64_t peak{0}; // Max value of `value`
- int64_t total{0}; // Sum of all added values
+ int64_t value{0}; ///< Current value
+ int64_t peak{0}; ///< Max value of `value`
+ int64_t total{0}; ///< Sum of all added values
+ /**
+ * @brief Add `val` to the current value and update the peak value if necessary
+ *
+ * @param val Value to add
+ * @return Reference to this object
+ */
counter& operator+=(int64_t val)
{
value += val;
@@ -59,6 +66,12 @@ class statistics_resource_adaptor final : public device_memory_resource {
return *this;
}
+ /**
+ * @brief Subtract `val` from the current value and update the peak value if necessary
+ *
+ * @param val Value to subtract
+ * @return Reference to this object
+ */
counter& operator-=(int64_t val)
{
value -= val;
@@ -79,17 +92,17 @@ class statistics_resource_adaptor final : public device_memory_resource {
RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
}
- statistics_resource_adaptor() = delete;
- ~statistics_resource_adaptor() override = default;
- statistics_resource_adaptor(statistics_resource_adaptor const&) = delete;
- statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete;
- statistics_resource_adaptor(statistics_resource_adaptor&&) noexcept = default;
- statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) noexcept = default;
+ statistics_resource_adaptor() = delete;
+ ~statistics_resource_adaptor() override = default;
+ statistics_resource_adaptor(statistics_resource_adaptor const&) = delete;
+ statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete;
+ statistics_resource_adaptor(statistics_resource_adaptor&&) noexcept =
+ default; ///< @default_move_constructor
+ statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) noexcept =
+ default; ///< @default_move_assignment{statistics_resource_adaptor}
/**
- * @brief Return pointer to the upstream resource.
- *
- * @return Upstream* Pointer to the upstream resource.
+ * @briefreturn{Pointer to the upstream resource}
*/
Upstream* get_upstream() const noexcept { return upstream_; }
diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
index 37053cd07..0b8570985 100644
--- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
+++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
@@ -35,7 +35,7 @@ namespace rmm::mr {
template
class thread_safe_resource_adaptor final : public device_memory_resource {
public:
- using lock_t = std::lock_guard;
+ using lock_t = std::lock_guard; ///< Type of lock used to synchronize access
/**
* @brief Construct a new thread safe resource adaptor using `upstream` to satisfy
diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
index 2b2aee0dd..de2f25043 100644
--- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp
+++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
@@ -38,9 +38,9 @@ namespace rmm::mr {
template
class thrust_allocator : public thrust::device_malloc_allocator {
public:
- using Base = thrust::device_malloc_allocator;
- using pointer = typename Base::pointer;
- using size_type = typename Base::size_type;
+ using Base = thrust::device_malloc_allocator; ///< The base type of this allocator
+ using pointer = typename Base::pointer; ///< The pointer type
+ using size_type = typename Base::size_type; ///< The size type
/**
* @brief Provides the type of a `thrust_allocator` instantiated with another
@@ -50,7 +50,7 @@ class thrust_allocator : public thrust::device_malloc_allocator {
*/
template
struct rebind {
- using other = thrust_allocator;
+ using other = thrust_allocator; ///< The type to bind to
};
/**
@@ -113,12 +113,12 @@ class thrust_allocator : public thrust::device_malloc_allocator {
}
/**
- * @brief Returns the device memory resource used by this allocator.
+ * @briefreturn{The device memory resource used by this}
*/
[[nodiscard]] device_memory_resource* resource() const noexcept { return _mr; }
/**
- * @brief Returns the stream used by this allocator.
+ * @briefreturn{The stream used by this allocator}
*/
[[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 0bbdc7468..b87218d12 100644
--- a/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -48,9 +48,10 @@ template
class tracking_resource_adaptor final : public device_memory_resource {
public:
// can be a std::shared_mutex once C++17 is adopted
- using read_lock_t = std::shared_lock;
- using write_lock_t = std::unique_lock;
-
+ using read_lock_t =
+ std::shared_lock; ///< Type of lock used to synchronize read access
+ using write_lock_t =
+ std::unique_lock; ///< Type of lock used to synchronize write access
/**
* @brief Information stored about an allocation. Includes the size
* and a stack trace if the `tracking_resource_adaptor` was initialized
@@ -58,10 +59,16 @@ class tracking_resource_adaptor final : public device_memory_resource {
*
*/
struct allocation_info {
- std::unique_ptr strace;
- std::size_t allocation_size;
+ std::unique_ptr strace; ///< Stack trace of the allocation
+ std::size_t allocation_size; ///< Size of the allocation
allocation_info() = delete;
+ /**
+ * @brief Construct a new allocation info object
+ *
+ * @param size Size of the allocation
+ * @param capture_stack If true, capture the stack trace for the allocation
+ */
allocation_info(std::size_t size, bool capture_stack)
: strace{[&]() {
return capture_stack ? std::make_unique() : nullptr;
@@ -84,17 +91,17 @@ class tracking_resource_adaptor final : public device_memory_resource {
RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
}
- tracking_resource_adaptor() = delete;
- ~tracking_resource_adaptor() override = default;
- tracking_resource_adaptor(tracking_resource_adaptor const&) = delete;
- tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete;
- tracking_resource_adaptor(tracking_resource_adaptor&&) noexcept = default;
- tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) noexcept = default;
+ tracking_resource_adaptor() = delete;
+ ~tracking_resource_adaptor() override = default;
+ tracking_resource_adaptor(tracking_resource_adaptor const&) = delete;
+ tracking_resource_adaptor(tracking_resource_adaptor&&) noexcept =
+ default; ///< @default_move_constructor
+ tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete;
+ tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) noexcept =
+ default; ///< @default_move_assignment{tracking_resource_adaptor}
/**
- * @brief Return pointer to the upstream resource.
- *
- * @return Upstream* Pointer to the upstream resource.
+ * @briefreturn{Pointer to the upstream resource}
*/
Upstream* get_upstream() const noexcept { return upstream_; }
diff --git a/include/rmm/mr/host/host_memory_resource.hpp b/include/rmm/mr/host/host_memory_resource.hpp
index c0fe85594..3279bfb3f 100644
--- a/include/rmm/mr/host/host_memory_resource.hpp
+++ b/include/rmm/mr/host/host_memory_resource.hpp
@@ -46,12 +46,14 @@ namespace rmm::mr {
*/
class host_memory_resource {
public:
- host_memory_resource() = default;
- virtual ~host_memory_resource() = default;
- host_memory_resource(host_memory_resource const&) = default;
- host_memory_resource& operator=(host_memory_resource const&) = default;
- host_memory_resource(host_memory_resource&&) noexcept = default;
- host_memory_resource& operator=(host_memory_resource&&) noexcept = default;
+ host_memory_resource() = default;
+ virtual ~host_memory_resource() = default;
+ host_memory_resource(host_memory_resource const&) = default; ///< @default_copy_constructor
+ host_memory_resource(host_memory_resource&&) noexcept = default; ///< @default_move_constructor
+ host_memory_resource& operator=(host_memory_resource const&) =
+ default; ///< @default_copy_assignment{host_memory_resource}
+ host_memory_resource& operator=(host_memory_resource&&) noexcept =
+ default; ///< @default_move_assignment{host_memory_resource}
/**
* @brief Allocates memory on the host of size at least `bytes` bytes.
@@ -84,7 +86,6 @@ class host_memory_resource {
* that was passed to the `allocate` call that returned `ptr`.
* @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
* that was passed to the `allocate` call that returned `ptr`.
- * @param stream Stream on which to perform deallocation
*/
void deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
{
diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp
index ffd3345bb..68cccc320 100644
--- a/include/rmm/mr/host/new_delete_resource.hpp
+++ b/include/rmm/mr/host/new_delete_resource.hpp
@@ -30,12 +30,14 @@ namespace rmm::mr {
*/
class new_delete_resource final : public host_memory_resource {
public:
- new_delete_resource() = default;
- ~new_delete_resource() override = default;
- new_delete_resource(new_delete_resource const&) = default;
- new_delete_resource(new_delete_resource&&) = default;
- new_delete_resource& operator=(new_delete_resource const&) = default;
- new_delete_resource& operator=(new_delete_resource&&) = default;
+ new_delete_resource() = default;
+ ~new_delete_resource() override = default;
+ new_delete_resource(new_delete_resource const&) = default; ///< @default_copy_constructor
+ new_delete_resource(new_delete_resource&&) = default; ///< @default_move_constructor
+ new_delete_resource& operator=(new_delete_resource const&) =
+ default; ///< @default_copy_assignment{new_delete_resource}
+ new_delete_resource& operator=(new_delete_resource&&) =
+ default; ///< @default_move_assignment{new_delete_resource}
private:
/**
diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp
index 3e0c5e3e6..fb28ebfb7 100644
--- a/include/rmm/mr/host/pinned_memory_resource.hpp
+++ b/include/rmm/mr/host/pinned_memory_resource.hpp
@@ -24,7 +24,7 @@
namespace rmm::mr {
-/*
+/**
* @brief A `host_memory_resource` that uses `cudaMallocHost` to allocate
* pinned/page-locked host memory.
*
@@ -32,12 +32,14 @@ namespace rmm::mr {
*/
class pinned_memory_resource final : public host_memory_resource {
public:
- pinned_memory_resource() = default;
- ~pinned_memory_resource() override = default;
- pinned_memory_resource(pinned_memory_resource const&) = default;
- pinned_memory_resource(pinned_memory_resource&&) = default;
- pinned_memory_resource& operator=(pinned_memory_resource const&) = default;
- pinned_memory_resource& operator=(pinned_memory_resource&&) = default;
+ pinned_memory_resource() = default;
+ ~pinned_memory_resource() override = default;
+ pinned_memory_resource(pinned_memory_resource const&) = default; ///< @default_copy_constructor
+ pinned_memory_resource(pinned_memory_resource&&) = default; ///< @default_move_constructor
+ pinned_memory_resource& operator=(pinned_memory_resource const&) =
+ default; ///< @default_copy_assignment{pinned_memory_resource}
+ pinned_memory_resource& operator=(pinned_memory_resource&&) =
+ default; ///< @default_move_assignment{pinned_memory_resource}
private:
/**
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index b26a6dd43..3506bf9a9 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -14,11 +14,9 @@
cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
-set(rmm_version 23.08.00)
+set(rmm_version 23.10.00)
-file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/RAPIDS.cmake
- ${CMAKE_BINARY_DIR}/RAPIDS.cmake)
-include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)
+include(../fetch_rapids.cmake)
project(
rmm-python
@@ -58,5 +56,7 @@ endif()
include(rapids-cython)
rapids_cython_init()
+add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
+
add_subdirectory(rmm/_cuda)
add_subdirectory(rmm/_lib)
diff --git a/python/docs/Makefile b/python/docs/Makefile
index d4bb2cbb9..72d1dcd85 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -3,7 +3,7 @@
# You can set these variables from the command line, and also
# from the environment for the first two.
-SPHINXOPTS ?=
+SPHINXOPTS = -n -v -W --keep-going
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
diff --git a/python/docs/api.rst b/python/docs/api.rst
index 73cd5dd81..b229d8214 100644
--- a/python/docs/api.rst
+++ b/python/docs/api.rst
@@ -15,6 +15,7 @@ Memory Resources
.. automodule:: rmm.mr
:members:
+ :inherited-members:
:undoc-members:
:show-inheritance:
@@ -28,6 +29,7 @@ Memory Allocators
.. automodule:: rmm.allocators.numba
:members:
+ :inherited-members:
:undoc-members:
:show-inheritance:
diff --git a/python/docs/basics.md b/python/docs/basics.md
index 0c47073c1..997745f00 100644
--- a/python/docs/basics.md
+++ b/python/docs/basics.md
@@ -91,7 +91,7 @@ example, enabling the `ManagedMemoryResource` tells RMM to use
> :warning: The default resource must be set for any device **before**
> allocating any device memory on that device. Setting or changing the
> resource after device allocations have been made can lead to unexpected
-> behaviour or crashes. See [Multiple Devices](#multiple-devices)
+> behaviour or crashes.
As another example, `PoolMemoryResource` allows you to allocate a
large "pool" of device memory up-front. Subsequent allocations will
diff --git a/python/docs/conf.py b/python/docs/conf.py
index f6f3adf43..17c012194 100644
--- a/python/docs/conf.py
+++ b/python/docs/conf.py
@@ -23,9 +23,9 @@
# built documents.
#
# The short X.Y version.
-version = "23.08"
+version = "23.10"
# The full version, including alpha/beta/rc tags.
-release = "23.08.00"
+release = "23.10.00"
# -- General configuration ---------------------------------------------------
@@ -35,6 +35,7 @@
# ones.
extensions = [
+ "sphinxcontrib.jquery",
"sphinx.ext.intersphinx",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
@@ -113,7 +114,7 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
+html_static_path = []
# -- Options for HTMLHelp output ------------------------------------------
@@ -179,7 +180,10 @@
# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {"https://docs.python.org/": None}
+intersphinx_mapping = {
+ "python": ("https://docs.python.org/3", None),
+ "numba": ("https://numba.readthedocs.io/en/stable", None),
+}
# Config numpydoc
numpydoc_show_inherited_class_members = True
@@ -187,6 +191,11 @@
autoclass_content = "init"
+nitpick_ignore = [
+ ("py:class", "size_t"),
+ ("py:class", "void"),
+]
+
def setup(app):
app.add_js_file("copybutton_pydocs.js")
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 58ac1d5ad..f83e4fec4 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -17,7 +17,7 @@ build-backend = "setuptools.build_meta"
requires = [
"cmake>=3.26.4",
"cuda-python>=11.7.1,<12.0a0",
- "cython>=0.29,<0.30",
+ "cython>=3.0.0",
"ninja",
"scikit-build>=0.13.1",
"setuptools>=61.0.0",
@@ -27,7 +27,7 @@ requires = [
[project]
name = "rmm"
-version = "23.08.00"
+version = "23.10.00"
description = "rmm - RAPIDS Memory Manager"
readme = { file = "README.md", content-type = "text/markdown" }
authors = [
diff --git a/python/rmm/__init__.py b/python/rmm/__init__.py
index a5467251e..52d232dc2 100644
--- a/python/rmm/__init__.py
+++ b/python/rmm/__init__.py
@@ -14,6 +14,15 @@
from rmm import mr
from rmm._lib.device_buffer import DeviceBuffer
+from rmm._lib.logger import (
+ flush_logger,
+ get_flush_level,
+ get_logging_level,
+ logging_level,
+ set_flush_level,
+ set_logging_level,
+ should_log,
+)
from rmm.mr import disable_logging, enable_logging, get_log_filenames
from rmm.rmm import (
RMMError,
@@ -25,15 +34,22 @@
__all__ = [
"DeviceBuffer",
- "RMMError",
"disable_logging",
+ "RMMError",
"enable_logging",
+ "flush_logger",
+ "get_flush_level",
"get_log_filenames",
+ "get_logging_level",
"is_initialized",
+ "logging_level",
"mr",
"register_reinitialize_hook",
"reinitialize",
+ "set_flush_level",
+ "set_logging_level",
+ "should_log",
"unregister_reinitialize_hook",
]
-__version__ = "23.08.00"
+__version__ = "23.10.00"
diff --git a/python/rmm/_cuda/stream.pxd b/python/rmm/_cuda/stream.pxd
index 6aa4e0b24..3c3d3aa6f 100644
--- a/python/rmm/_cuda/stream.pxd
+++ b/python/rmm/_cuda/stream.pxd
@@ -26,8 +26,8 @@ cdef class Stream:
@staticmethod
cdef Stream _from_cudaStream_t(cudaStream_t s, object owner=*)
- cdef cuda_stream_view view(self) nogil except *
- cdef void c_synchronize(self) nogil except *
- cdef bool c_is_default(self) nogil except *
+ cdef cuda_stream_view view(self) except * nogil
+ cdef void c_synchronize(self) except * nogil
+ cdef bool c_is_default(self) except * nogil
cdef void _init_with_new_cuda_stream(self) except *
cdef void _init_from_stream(self, Stream stream) except *
diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx
index d60dde4e1..4795cbb9f 100644
--- a/python/rmm/_cuda/stream.pyx
+++ b/python/rmm/_cuda/stream.pyx
@@ -48,7 +48,7 @@ cdef class Stream:
self._init_from_cupy_stream(obj)
@staticmethod
- cdef Stream _from_cudaStream_t(cudaStream_t s, object owner=None):
+ cdef Stream _from_cudaStream_t(cudaStream_t s, object owner=None) except *:
"""
Construct a Stream from a cudaStream_t.
"""
@@ -57,13 +57,13 @@ cdef class Stream:
obj._owner = owner
return obj
- cdef cuda_stream_view view(self) nogil except *:
+ cdef cuda_stream_view view(self) except * nogil:
"""
Generate a rmm::cuda_stream_view from this Stream instance
"""
return cuda_stream_view((self._cuda_stream))
- cdef void c_synchronize(self) nogil except *:
+ cdef void c_synchronize(self) except * nogil:
"""
Synchronize the CUDA stream.
This function *must* be called in a `with nogil` block
@@ -77,7 +77,7 @@ cdef class Stream:
with nogil:
self.c_synchronize()
- cdef bool c_is_default(self) nogil except *:
+ cdef bool c_is_default(self) except * nogil:
"""
Check if we are the default CUDA stream
"""
diff --git a/python/rmm/_lib/CMakeLists.txt b/python/rmm/_lib/CMakeLists.txt
index 9e90d7e99..852dd87c4 100644
--- a/python/rmm/_lib/CMakeLists.txt
+++ b/python/rmm/_lib/CMakeLists.txt
@@ -12,10 +12,12 @@
# the License.
# =============================================================================
-set(cython_sources device_buffer.pyx lib.pyx memory_resource.pyx cuda_stream.pyx
+set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx
torch_allocator.pyx)
set(linked_libraries rmm::rmm)
# Build all of the Cython targets
rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}"
CXX)
+# The cdef public functions in this file need to have a C ABI
+target_compile_definitions(torch_allocator PRIVATE CYTHON_EXTERN_C=extern\ "C")
diff --git a/python/rmm/_lib/cuda_stream.pxd b/python/rmm/_lib/cuda_stream.pxd
index 1eed1cefb..e224cf9af 100644
--- a/python/rmm/_lib/cuda_stream.pxd
+++ b/python/rmm/_lib/cuda_stream.pxd
@@ -33,5 +33,5 @@ cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil:
@cython.final
cdef class CudaStream:
cdef unique_ptr[cuda_stream] c_obj
- cdef cudaStream_t value(self) nogil except *
- cdef bool is_valid(self) nogil except *
+ cdef cudaStream_t value(self) except * nogil
+ cdef bool is_valid(self) except * nogil
diff --git a/python/rmm/_lib/cuda_stream.pyx b/python/rmm/_lib/cuda_stream.pyx
index fb35ec11f..0861f0663 100644
--- a/python/rmm/_lib/cuda_stream.pyx
+++ b/python/rmm/_lib/cuda_stream.pyx
@@ -27,8 +27,8 @@ cdef class CudaStream:
def __cinit__(self):
self.c_obj.reset(new cuda_stream())
- cdef cudaStream_t value(self) nogil except *:
+ cdef cudaStream_t value(self) except * nogil:
return self.c_obj.get()[0].value()
- cdef bool is_valid(self) nogil except *:
+ cdef bool is_valid(self) except * nogil:
return self.c_obj.get()[0].is_valid()
diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd
index 364dbb2c0..3d5f29f9a 100644
--- a/python/rmm/_lib/device_buffer.pxd
+++ b/python/rmm/_lib/device_buffer.pxd
@@ -56,7 +56,7 @@ cdef class DeviceBuffer:
@staticmethod
cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
- Stream stream=*)
+ Stream stream=*) except *
cpdef copy_to_host(self, ary=*, Stream stream=*)
cpdef copy_from_host(self, ary, Stream stream=*)
cpdef copy_from_device(self, cuda_ary, Stream stream=*)
diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx
index c2bfd1459..d248d01ab 100644
--- a/python/rmm/_lib/device_buffer.pyx
+++ b/python/rmm/_lib/device_buffer.pyx
@@ -172,7 +172,7 @@ cdef class DeviceBuffer:
@staticmethod
cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
- Stream stream=DEFAULT_STREAM):
+ Stream stream=DEFAULT_STREAM) except *:
"""Calls ``to_device`` function on arguments provided"""
return to_device(b, stream)
@@ -382,7 +382,7 @@ cdef void _copy_async(const void* src,
void* dst,
size_t count,
ccudart.cudaMemcpyKind kind,
- cuda_stream_view stream) nogil except *:
+ cuda_stream_view stream) except * nogil:
"""
Asynchronously copy data between host and/or device pointers.
diff --git a/python/rmm/_lib/logger.pyx b/python/rmm/_lib/logger.pyx
new file mode 100644
index 000000000..029bbdd79
--- /dev/null
+++ b/python/rmm/_lib/logger.pyx
@@ -0,0 +1,260 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+
+from libcpp cimport bool
+
+
+cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil:
+ cpdef enum logging_level "spdlog::level::level_enum":
+ """
+ The debug logging level for RMM.
+
+ Debug logging prints messages to a log file. See
+ `Debug Logging `_
+ for more information.
+
+ Valid levels, in decreasing order of verbosity, are TRACE, DEBUG,
+ INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO.
+
+ Examples
+ --------
+ >>> import rmm
+ >>> rmm.logging_level.DEBUG
+
+ >>> rmm.logging_level.DEBUG.value
+ 1
+ >>> rmm.logging_level.DEBUG.name
+ 'DEBUG'
+
+ See Also
+ --------
+ set_logging_level : Set the debug logging level
+ get_logging_level : Get the current debug logging level
+ """
+ TRACE "spdlog::level::trace"
+ DEBUG "spdlog::level::debug"
+ INFO "spdlog::level::info"
+ WARN "spdlog::level::warn"
+ ERR "spdlog::level::err"
+ CRITICAL "spdlog::level::critical"
+ OFF "spdlog::level::off"
+
+
+cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil:
+ cdef cppclass spdlog_logger "spdlog::logger":
+ spdlog_logger() except +
+ void set_level(logging_level level)
+ logging_level level()
+ void flush() except +
+ void flush_on(logging_level level)
+ logging_level flush_level()
+ bool should_log(logging_level msg_level)
+
+
+cdef extern from "rmm/logger.hpp" namespace "rmm" nogil:
+ cdef spdlog_logger& logger() except +
+
+
+def _validate_level_type(level):
+ if not isinstance(level, logging_level):
+ raise TypeError("level must be an instance of the logging_level enum")
+
+
+def should_log(level):
+ """
+ Check if a message at the given level would be logged.
+
+ A message at the given level would be logged if the current debug logging
+ level is set to a level that is at least as verbose than the given level,
+ *and* the RMM module is compiled for a logging level at least as verbose.
+ If these conditions are not both met, this function will return false.
+
+ Debug logging prints messages to a log file. See
+ `Debug Logging `_
+ for more information.
+
+ Parameters
+ ----------
+ level : logging_level
+ The debug logging level. Valid values are instances of the
+ ``logging_level`` enum.
+
+ Returns
+ -------
+ should_log : bool
+ True if a message at the given level would be logged, False otherwise.
+
+ Raises
+ ------
+ TypeError
+ If the logging level is not an instance of the ``logging_level`` enum.
+ """
+ _validate_level_type(level)
+ return logger().should_log(level)
+
+
+def set_logging_level(level):
+ """
+ Set the debug logging level.
+
+ Debug logging prints messages to a log file. See
+ `Debug Logging `_
+ for more information.
+
+ Parameters
+ ----------
+ level : logging_level
+ The debug logging level. Valid values are instances of the
+ ``logging_level`` enum.
+
+ Raises
+ ------
+ TypeError
+ If the logging level is not an instance of the ``logging_level`` enum.
+
+ See Also
+ --------
+ get_logging_level : Get the current debug logging level.
+
+ Examples
+ --------
+ >>> import rmm
+ >>> rmm.set_logging_level(rmm.logging_level.WARN) # set logging level to warn
+ """
+ _validate_level_type(level)
+ logger().set_level(level)
+
+ if not should_log(level):
+ warnings.warn(f"RMM will not log logging_level.{level.name}. This "
+ "may be because the C++ library is compiled for a "
+ "less-verbose logging level.")
+
+
+def get_logging_level():
+ """
+ Get the current debug logging level.
+
+ Debug logging prints messages to a log file. See
+ `Debug Logging `_
+ for more information.
+
+ Returns
+ -------
+ level : logging_level
+ The current debug logging level, an instance of the ``logging_level``
+ enum.
+
+ See Also
+ --------
+ set_logging_level : Set the debug logging level.
+
+ Examples
+ --------
+ >>> import rmm
+ >>> rmm.get_logging_level() # get current logging level
+
+ """
+ return logging_level(logger().level())
+
+
+def flush_logger():
+ """
+ Flush the debug logger. This will cause any buffered log messages to
+ be written to the log file.
+
+ Debug logging prints messages to a log file. See
+ `Debug Logging `_
+ for more information.
+
+ See Also
+ --------
+ set_flush_level : Set the flush level for the debug logger.
+ get_flush_level : Get the current debug logging flush level.
+
+ Examples
+ --------
+ >>> import rmm
+ >>> rmm.flush_logger() # flush the logger
+ """
+ logger().flush()
+
+
+def set_flush_level(level):
+ """
+ Set the flush level for the debug logger. Messages of this level or higher
+ will automatically flush to the file.
+
+ Debug logging prints messages to a log file. See
+ `Debug Logging `_
+ for more information.
+
+ Parameters
+ ----------
+ level : logging_level
+ The debug logging level. Valid values are instances of the
+ ``logging_level`` enum.
+
+ Raises
+ ------
+ TypeError
+ If the logging level is not an instance of the ``logging_level`` enum.
+
+ See Also
+ --------
+ get_flush_level : Get the current debug logging flush level.
+ flush_logger : Flush the logger.
+
+ Examples
+ --------
+ >>> import rmm
+ >>> rmm.flush_on(rmm.logging_level.WARN) # set flush level to warn
+ """
+ _validate_level_type(level)
+ logger().flush_on(level)
+
+ if not should_log(level):
+ warnings.warn(f"RMM will not log logging_level.{level.name}. This "
+ "may be because the C++ library is compiled for a "
+ "less-verbose logging level.")
+
+
+def get_flush_level():
+ """
+ Get the current debug logging flush level for the RMM logger. Messages of
+ this level or higher will automatically flush to the file.
+
+ Debug logging prints messages to a log file. See
+ `Debug Logging `_
+ for more information.
+
+ Returns
+ -------
+ logging_level
+ The current flush level, an instance of the ``logging_level``
+ enum.
+
+ See Also
+ --------
+ set_flush_level : Set the flush level for the logger.
+ flush_logger : Flush the logger.
+
+ Examples
+ --------
+ >>> import rmm
+ >>> rmm.flush_level() # get current flush level
+
+ """
+ return logging_level(logger().flush_level())
diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd
index 5bb3746bc..0770fb8ed 100644
--- a/python/rmm/_lib/memory_resource.pxd
+++ b/python/rmm/_lib/memory_resource.pxd
@@ -69,6 +69,9 @@ cdef class CallbackMemoryResource(DeviceMemoryResource):
cdef object _allocate_func
cdef object _deallocate_func
+cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
+ pass
+
cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
cdef object _log_file_name
cpdef get_file_name(self)
diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx
index 774db374a..ce7f45e19 100644
--- a/python/rmm/_lib/memory_resource.pyx
+++ b/python/rmm/_lib/memory_resource.pyx
@@ -14,10 +14,14 @@
import os
import warnings
+# This import is needed for Cython typing in translate_python_except_to_cpp
+# See https://github.com/cython/cython/issues/5589
+from builtins import BaseException
from collections import defaultdict
cimport cython
from cython.operator cimport dereference as deref
+from libc.stddef cimport size_t
from libc.stdint cimport int8_t, int64_t, uintptr_t
from libcpp cimport bool
from libcpp.memory cimport make_unique, unique_ptr
@@ -37,7 +41,7 @@ from rmm._lib.per_device_resource cimport (
# Transparent handle of a C++ exception
ctypedef pair[int, string] CppExcept
-cdef CppExcept translate_python_except_to_cpp(err: BaseException):
+cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept:
"""Translate a Python exception into a C++ exception handle
The returned exception handle can then be thrown by `throw_cpp_except()`,
@@ -160,6 +164,16 @@ cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \
size_t allocation_size,
device_memory_resource* bin_resource) except +
+cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \
+ namespace "rmm::mr" nogil:
+ cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource):
+ limiting_resource_adaptor(
+ Upstream* upstream_mr,
+ size_t allocation_limit) except +
+
+ size_t get_allocated_bytes() except +
+ size_t get_allocation_limit() except +
+
cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \
namespace "rmm::mr" nogil:
cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource):
@@ -213,18 +227,41 @@ cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \
cdef class DeviceMemoryResource:
cdef device_memory_resource* get_mr(self):
+ """Get the underlying C++ memory resource object."""
return self.c_obj.get()
def allocate(self, size_t nbytes):
+ """Allocate ``nbytes`` bytes of memory.
+
+ Parameters
+ ----------
+ nbytes : size_t
+ The size of the allocation in bytes
+ """
return self.c_obj.get().allocate(nbytes)
def deallocate(self, uintptr_t ptr, size_t nbytes):
+ """Deallocate memory pointed to by ``ptr`` of size ``nbytes``.
+
+ Parameters
+ ----------
+ ptr : uintptr_t
+ Pointer to be deallocated
+ nbytes : size_t
+ Size of the allocation in bytes
+ """
self.c_obj.get().deallocate((ptr), nbytes)
# See the note about `no_gc_clear` in `device_buffer.pyx`.
@cython.no_gc_clear
cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
+ """Parent class for all memory resources that track an upstream.
+
+ Upstream resource tracking requires maintaining a reference to the upstream
+ mr so that it is kept alive and may be accessed by any downstream resource
+ adaptors.
+ """
def __cinit__(self, DeviceMemoryResource upstream_mr, *args, **kwargs):
@@ -526,7 +563,10 @@ cdef void* _allocate_callback_wrapper(
size_t nbytes,
cuda_stream_view stream,
void* ctx
-) nogil:
+ # Note that this function is specifically designed to rethrow Python
+ # exceptions as C++ exceptions when called as a callback from C++, so it is
+ # noexcept from Cython's perspective.
+) noexcept nogil:
cdef CppExcept err
with gil:
try:
@@ -540,7 +580,7 @@ cdef void _deallocate_callback_wrapper(
size_t nbytes,
cuda_stream_view stream,
void* ctx
-) with gil:
+) except * with gil:
(