diff --git a/.clang-tidy b/.clang-tidy
index 04689c330..9b3f844c9 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -64,4 +64,6 @@ CheckOptions:
     value:           '1'
   - key:             readability-magic-numbers.IgnorePowersOf2IntegerValues
     value:           '1'
+  - key:             cppcoreguidelines-avoid-do-while.IgnoreMacros
+    value:           'true'
 ...
diff --git a/.clangd b/.clangd
new file mode 100644
index 000000000..7c4fe036d
--- /dev/null
+++ b/.clangd
@@ -0,0 +1,65 @@
+# https://clangd.llvm.org/config
+
+# Apply a config conditionally to all C files
+If:
+  PathMatch: .*\.(c|h)$
+
+---
+
+# Apply a config conditionally to all C++ files
+If:
+  PathMatch: .*\.(c|h)pp
+
+---
+
+# Apply a config conditionally to all CUDA files
+If:
+  PathMatch: .*\.cuh?
+CompileFlags:
+  Add:
+    - "-x"
+    - "cuda"
+    # No error on unknown CUDA versions
+    - "-Wno-unknown-cuda-version"
+    # Allow variadic CUDA functions
+    - "-Xclang=-fcuda-allow-variadic-functions"
+Diagnostics:
+  Suppress:
+    - "variadic_device_fn"
+    - "attributes_not_allowed"
+
+---
+
+# Tweak the clangd parse settings for all files
+CompileFlags:
+  Add:
+    # report all errors
+    - "-ferror-limit=0"
+    - "-fmacro-backtrace-limit=0"
+    - "-ftemplate-backtrace-limit=0"
+    # Skip the CUDA version check
+    - "--no-cuda-version-check"
+  Remove:
+    # remove gcc's -fcoroutines
+    - -fcoroutines
+    # remove nvc++ flags unknown to clang
+    - "-gpu=*"
+    - "-stdpar*"
+    # remove nvcc flags unknown to clang
+    - "-arch*"
+    - "-gencode*"
+    - "--generate-code*"
+    - "-ccbin*"
+    - "-t=*"
+    - "--threads*"
+    - "-Xptxas*"
+    - "-Xcudafe*"
+    - "-Xfatbin*"
+    - "-Xcompiler*"
+    - "--diag-suppress*"
+    - "--diag_suppress*"
+    - "--compiler-options*"
+    - "--expt-extended-lambda"
+    - "--expt-relaxed-constexpr"
+    - "-forward-unknown-to-host-compiler"
+    - "-Werror=cross-execution-space-call"
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
new file mode 100644
index 000000000..9d35e3f97
--- /dev/null
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,30 @@
+# syntax=docker/dockerfile:1.5
+
+ARG BASE
+ARG PYTHON_PACKAGE_MANAGER=conda
+
+FROM ${BASE} as pip-base
+
+ENV DEFAULT_VIRTUAL_ENV=rapids
+
+FROM ${BASE} as conda-base
+
+ENV DEFAULT_CONDA_ENV=rapids
+
+FROM ${PYTHON_PACKAGE_MANAGER}-base
+
+ARG CUDA
+ENV CUDAARCHS="RAPIDS"
+ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}"
+
+ARG PYTHON_PACKAGE_MANAGER
+ENV PYTHON_PACKAGE_MANAGER="${PYTHON_PACKAGE_MANAGER}"
+
+ENV PYTHONSAFEPATH="1"
+ENV PYTHONUNBUFFERED="1"
+ENV PYTHONDONTWRITEBYTECODE="1"
+
+ENV SCCACHE_REGION="us-east-2"
+ENV SCCACHE_BUCKET="rapids-sccache-devs"
+ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
+ENV HISTFILE="/home/coder/.cache/._bash_history"
diff --git a/.devcontainer/README.md b/.devcontainer/README.md
new file mode 100644
index 000000000..31ddab9aa
--- /dev/null
+++ b/.devcontainer/README.md
@@ -0,0 +1,35 @@
+# RMM Development Containers
+
+This directory contains [devcontainer configurations](https://containers.dev/implementors/json_reference/) for using VSCode to [develop in a container](https://code.visualstudio.com/docs/devcontainers/containers) via the `Remote Containers` [extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [GitHub Codespaces](https://github.com/codespaces).
+
+This container is a turnkey development environment for building and testing the RMM C++ and Python libraries.
+
+## Table of Contents
+
+* [Prerequisites](#prerequisites)
+* [Host bind mounts](#host-bind-mounts)
+* [Launch a Dev Container](#launch-a-dev-container)
+
+## Prerequisites
+
+* [VSCode](https://code.visualstudio.com/download)
+* [VSCode Remote Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
+
+## Host bind mounts
+
+By default, the following directories are bind-mounted into the devcontainer:
+
+* `${repo}:/home/coder/rmm`
+* `${repo}/../.aws:/home/coder/.aws`
+* `${repo}/../.local:/home/coder/.local`
+* `${repo}/../.cache:/home/coder/.cache`
+* `${repo}/../.conda:/home/coder/.conda`
+* `${repo}/../.config:/home/coder/.config`
+
+This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs.
+
+## Launch a Dev Container
+
+To launch a devcontainer from VSCode, open the RMM repo and select the "Reopen in Container" button in the bottom right:<br/><img src="https://user-images.githubusercontent.com/178183/221771999-97ab29d5-e718-4e5f-b32f-2cdd51bba25c.png"/>
+
+Alternatively, open the VSCode command palette (typically `cmd/ctrl + shift + P`) and run the "Rebuild and Reopen in Container" command.
diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
new file mode 100644
index 000000000..ba3555a26
--- /dev/null
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+  "build": {
+    "context": "${localWorkspaceFolder}/.devcontainer",
+    "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+    "args": {
+      "CUDA": "11.8",
+      "PYTHON_PACKAGE_MANAGER": "conda",
+      "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
+    }
+  },
+  "hostRequirements": {"gpu": "optional"},
+  "features": {
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+  },
+  "overrideFeatureInstallOrder": [
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+  ],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda11.8-envs}"],
+  "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+  "workspaceFolder": "/home/coder",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda11.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.flake8",
+        "nvidia.nsight-vscode-edition"
+      ]
+    }
+  }
+}
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
new file mode 100644
index 000000000..a5b4290e9
--- /dev/null
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -0,0 +1,36 @@
+{
+  "build": {
+    "context": "${localWorkspaceFolder}/.devcontainer",
+    "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+    "args": {
+      "CUDA": "11.8",
+      "PYTHON_PACKAGE_MANAGER": "pip",
+      "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-ubuntu22.04"
+    }
+  },
+  "hostRequirements": {"gpu": "optional"},
+  "features": {
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+  },
+  "overrideFeatureInstallOrder": [
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+  ],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"],
+  "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+  "workspaceFolder": "/home/coder",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.flake8",
+        "nvidia.nsight-vscode-edition"
+      ]
+    }
+  }
+}
diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json
new file mode 100644
index 000000000..03282635e
--- /dev/null
+++ b/.devcontainer/cuda12.0-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+  "build": {
+    "context": "${localWorkspaceFolder}/.devcontainer",
+    "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+    "args": {
+      "CUDA": "12.0",
+      "PYTHON_PACKAGE_MANAGER": "conda",
+      "BASE": "rapidsai/devcontainers:23.10-cpp-mambaforge-ubuntu22.04"
+    }
+  },
+  "hostRequirements": {"gpu": "optional"},
+  "features": {
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+  },
+  "overrideFeatureInstallOrder": [
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+  ],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.0-envs}"],
+  "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+  "workspaceFolder": "/home/coder",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.flake8",
+        "nvidia.nsight-vscode-edition"
+      ]
+    }
+  }
+}
diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json
new file mode 100644
index 000000000..8dc607f28
--- /dev/null
+++ b/.devcontainer/cuda12.0-pip/devcontainer.json
@@ -0,0 +1,36 @@
+{
+  "build": {
+    "context": "${localWorkspaceFolder}/.devcontainer",
+    "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+    "args": {
+      "CUDA": "12.0",
+      "PYTHON_PACKAGE_MANAGER": "pip",
+      "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda12.0-ubuntu22.04"
+    }
+  },
+  "hostRequirements": {"gpu": "optional"},
+  "features": {
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+  },
+  "overrideFeatureInstallOrder": [
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+  ],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs}"],
+  "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+  "workspaceFolder": "/home/coder",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/rmm,type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.flake8",
+        "nvidia.nsight-vscode-edition"
+      ]
+    }
+  }
+}
diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
new file mode 100644
index 000000000..895ba83ee
--- /dev/null
+++ b/.github/copy-pr-bot.yaml
@@ -0,0 +1,4 @@
+# Configuration file for `copy-pr-bot` GitHub App
+# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
+
+enabled: true
diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml
index 57bdd6110..9a0b41550 100644
--- a/.github/ops-bot.yaml
+++ b/.github/ops-bot.yaml
@@ -5,6 +5,4 @@ auto_merger: true
 branch_checker: true
 label_checker: true
 release_drafter: true
-copy_prs: true
-rerun_tests: true
 recently_updated: true
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 67af4dafc..d80bf1d41 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -56,7 +56,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -64,11 +64,11 @@ jobs:
       date: ${{ inputs.date }}
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci:latest"
+      container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/build_docs.sh"
   wheel-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
   wheel-publish:
     needs: wheel-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index f5b7e3bf5..95ac72d59 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -20,58 +20,67 @@ jobs:
       - docs-build
       - wheel-build
       - wheel-tests
+      - devcontainer
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.10
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.10
     with:
       enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10
     with:
       build_type: pull-request
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci:latest"
+      container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/build_docs.sh"
   wheel-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10
     with:
       build_type: pull-request
       script: ci/build_wheel.sh
   wheel-tests:
     needs: wheel-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10
     with:
       build_type: pull-request
       script: ci/test_wheel.sh
+  devcontainer:
+    secrets: inherit
+    uses: rapidsai/shared-action-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.10
+    with:
+      build_command: |
+        sccache -z;
+        build-all -DBUILD_BENCHMARKS=ON --verbose;
+        sccache -s;
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 9a26dac4a..81f1b791b 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
       sha: ${{ inputs.sha }}
   python-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/.gitignore b/.gitignore
index 1ab57e4d4..0f98d3e44 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ DartConfiguration.tcl
 .DS_Store
 *.manifest
 *.spec
+compile_commands.json
 
 ## Python build directories & artifacts
 dist/
@@ -153,3 +154,7 @@ rmm_log.txt
 
 # cibuildwheel
 /wheelhouse
+
+# clang tooling
+compile_commands.json
+.clangd/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index aa076e6be..7f095025a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
       hooks:
           - id: cython-lint
     - repo: https://github.com/pre-commit/mirrors-clang-format
-      rev: v16.0.1
+      rev: v16.0.6
       hooks:
             - id: clang-format
               types_or: [c, c++, cuda]
@@ -69,6 +69,13 @@ repos:
               # of dependencies, so we'll have to update this manually.
               additional_dependencies:
                 - cmakelang==0.6.13
+            - id: doxygen-check
+              name: doxygen-check
+              entry: ./scripts/doxygen.sh
+              types_or: [file]
+              language: system
+              pass_filenames: false
+              verbose: true
     - repo: https://github.com/astral-sh/ruff-pre-commit
       rev: v0.0.278
       hooks:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 453ab54c2..901b3790a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,42 @@
+# RMM 23.10.00 (11 Oct 2023)
+
+## 🚨 Breaking Changes
+
+- Update to Cython 3.0.0 ([#1313](https://github.com/rapidsai/rmm/pull/1313)) [@vyasr](https://github.com/vyasr)
+
+## 🐛 Bug Fixes
+
+- Compile cdef public functions from torch_allocator with C ABI ([#1350](https://github.com/rapidsai/rmm/pull/1350)) [@wence-](https://github.com/wence-)
+- Make doxygen only a conda dependency. ([#1344](https://github.com/rapidsai/rmm/pull/1344)) [@bdice](https://github.com/bdice)
+- Use `conda mambabuild` not `mamba mambabuild` ([#1338](https://github.com/rapidsai/rmm/pull/1338)) [@wence-](https://github.com/wence-)
+- Fix stream_ordered_memory_resource attempt to record event in stream from another device ([#1333](https://github.com/rapidsai/rmm/pull/1333)) [@harrism](https://github.com/harrism)
+
+## 📖 Documentation
+
+- Clean up headers in CMakeLists.txt. ([#1341](https://github.com/rapidsai/rmm/pull/1341)) [@bdice](https://github.com/bdice)
+- Add pre-commit hook to validate doxygen ([#1334](https://github.com/rapidsai/rmm/pull/1334)) [@vyasr](https://github.com/vyasr)
+- Fix doxygen warnings ([#1317](https://github.com/rapidsai/rmm/pull/1317)) [@vyasr](https://github.com/vyasr)
+- Treat warnings as errors in Python documentation ([#1316](https://github.com/rapidsai/rmm/pull/1316)) [@vyasr](https://github.com/vyasr)
+
+## 🚀 New Features
+
+- Enable RMM Debug Logging via Python ([#1339](https://github.com/rapidsai/rmm/pull/1339)) [@harrism](https://github.com/harrism)
+
+## 🛠️ Improvements
+
+- Update image names ([#1346](https://github.com/rapidsai/rmm/pull/1346)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Update to clang 16.0.6. ([#1343](https://github.com/rapidsai/rmm/pull/1343)) [@bdice](https://github.com/bdice)
+- Update doxygen to 1.9.1 ([#1337](https://github.com/rapidsai/rmm/pull/1337)) [@vyasr](https://github.com/vyasr)
+- Simplify wheel build scripts and allow alphas of RAPIDS dependencies ([#1335](https://github.com/rapidsai/rmm/pull/1335)) [@divyegala](https://github.com/divyegala)
+- Use `copy-pr-bot` ([#1329](https://github.com/rapidsai/rmm/pull/1329)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Add RMM devcontainers ([#1328](https://github.com/rapidsai/rmm/pull/1328)) [@trxcllnt](https://github.com/trxcllnt)
+- Add Python bindings for `limiting_resource_adaptor` ([#1327](https://github.com/rapidsai/rmm/pull/1327)) [@pentschev](https://github.com/pentschev)
+- Fix missing jQuery error in docs ([#1321](https://github.com/rapidsai/rmm/pull/1321)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Use fetch_rapids.cmake. ([#1319](https://github.com/rapidsai/rmm/pull/1319)) [@bdice](https://github.com/bdice)
+- Update to Cython 3.0.0 ([#1313](https://github.com/rapidsai/rmm/pull/1313)) [@vyasr](https://github.com/vyasr)
+- Branch 23.10 merge 23.08 ([#1312](https://github.com/rapidsai/rmm/pull/1312)) [@vyasr](https://github.com/vyasr)
+- Branch 23.10 merge 23.08 ([#1309](https://github.com/rapidsai/rmm/pull/1309)) [@vyasr](https://github.com/vyasr)
+
 # RMM 23.08.00 (9 Aug 2023)
 
 ## 🚨 Breaking Changes
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2c4453968..814f41be0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,11 +14,7 @@
 
 cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 
-if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/RAPIDS.cmake
-       ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
-endif()
-include(${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
+include(fetch_rapids.cmake)
 
 include(rapids-cmake)
 include(rapids-cpm)
@@ -27,16 +23,21 @@ include(rapids-find)
 
 project(
   RMM
-  VERSION 23.08.00
+  VERSION 23.10.00
   LANGUAGES CXX)
 
 # Write the version header
 rapids_cmake_write_version_file(include/rmm/version_config.hpp)
 
+# ##################################################################################################
+# * build type -------------------------------------------------------------------------------------
+
 # Set a default build type if none was specified
 rapids_cmake_build_type(Release)
 
-# build options
+# ##################################################################################################
+# * build options ----------------------------------------------------------------------------------
+
 option(BUILD_TESTS "Configure CMake to build tests" ON)
 option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)
 set(RMM_LOGGING_LEVEL
@@ -52,17 +53,28 @@ message(STATUS "RMM: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'")
 # cudart can be statically linked or dynamically linked the python ecosystem wants dynamic linking
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
 
+# ##################################################################################################
+# * compiler options -------------------------------------------------------------------------------
+
 # find packages we depend on
 rapids_find_package(
   CUDAToolkit REQUIRED
   BUILD_EXPORT_SET rmm-exports
   INSTALL_EXPORT_SET rmm-exports)
+
+# ##################################################################################################
+# * dependencies -----------------------------------------------------------------------------------
+
+# add third party dependencies using CPM
 rapids_cpm_init()
+
 include(cmake/thirdparty/get_fmt.cmake)
 include(cmake/thirdparty/get_spdlog.cmake)
 include(cmake/thirdparty/get_thrust.cmake)
 
-# library targets
+# ##################################################################################################
+# * library targets --------------------------------------------------------------------------------
+
 add_library(rmm INTERFACE)
 add_library(rmm::rmm ALIAS rmm)
 
@@ -83,6 +95,9 @@ target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only)
 target_link_libraries(rmm INTERFACE dl)
 target_compile_features(rmm INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
 
+# ##################################################################################################
+# * tests and benchmarks ---------------------------------------------------------------------------
+
 if((BUILD_TESTS OR BUILD_BENCHMARKS) AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
   include(rapids-cuda)
   rapids_cuda_init_architectures(RMM)
@@ -94,7 +109,9 @@ if((BUILD_TESTS OR BUILD_BENCHMARKS) AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAM
   message(STATUS "RMM: Building benchmarks with GPU Architectures: ${CMAKE_CUDA_ARCHITECTURES}")
 endif()
 
-# optionally build tests
+# ##################################################################################################
+# * add tests --------------------------------------------------------------------------------------
+
 if(BUILD_TESTS AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
   include(cmake/thirdparty/get_gtest.cmake)
   include(CTest) # calls enable_testing()
@@ -102,19 +119,19 @@ if(BUILD_TESTS AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
   add_subdirectory(tests)
 endif()
 
-# optionally build benchmarks
+# ##################################################################################################
+# * add benchmarks ---------------------------------------------------------------------------------
+
 if(BUILD_BENCHMARKS AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
   include(${rapids-cmake-dir}/cpm/gbench.cmake)
   rapids_cpm_gbench()
   add_subdirectory(benchmarks)
 endif()
 
-include(CPack)
+# ##################################################################################################
+# * install targets --------------------------------------------------------------------------------
 
-# optionally assemble Thrust pretty-printers
-if(Thrust_SOURCE_DIR)
-  configure_file(scripts/load-pretty-printers.in load-pretty-printers @ONLY)
-endif()
+include(CPack)
 
 # install export targets
 install(TARGETS rmm EXPORT rmm-exports)
@@ -147,7 +164,9 @@ rapids_export(
   DOCUMENTATION doc_string
   FINAL_CODE_BLOCK code_string)
 
-# build export targets
+# ##################################################################################################
+# * build export -----------------------------------------------------------------------------------
+
 rapids_export(
   BUILD rmm
   EXPORT_SET rmm-exports
@@ -156,7 +175,8 @@ rapids_export(
   DOCUMENTATION doc_string
   FINAL_CODE_BLOCK code_string)
 
-# make documentation
+# ##################################################################################################
+# * make documentation -----------------------------------------------------------------------------
 
 add_custom_command(
   OUTPUT RMM_DOXYGEN
@@ -169,3 +189,11 @@ add_custom_target(
   rmm_doc
   DEPENDS RMM_DOXYGEN
   COMMENT "Target for the custom command to build the RMM doxygen docs")
+
+# ##################################################################################################
+# * make gdb helper scripts ------------------------------------------------------------------------
+
+# optionally assemble Thrust pretty-printers
+if(Thrust_SOURCE_DIR)
+  configure_file(scripts/load-pretty-printers.in load-pretty-printers @ONLY)
+endif()
diff --git a/README.md b/README.md
index 8d65b7d33..4059c9ce0 100644
--- a/README.md
+++ b/README.md
@@ -354,11 +354,32 @@ objects for each device and sets them as the per-device resource for that device
 ```c++
 std::vector<unique_ptr<pool_memory_resource>> per_device_pools;
 for(int i = 0; i < N; ++i) {
-    cudaSetDevice(i); // set device i before creating MR
-    // Use a vector of unique_ptr to maintain the lifetime of the MRs
-    per_device_pools.push_back(std::make_unique<pool_memory_resource>());
-    // Set the per-device resource for device i
-    set_per_device_resource(cuda_device_id{i}, &per_device_pools.back());
+  cudaSetDevice(i); // set device i before creating MR
+  // Use a vector of unique_ptr to maintain the lifetime of the MRs
+  per_device_pools.push_back(std::make_unique<pool_memory_resource>());
+  // Set the per-device resource for device i
+  set_per_device_resource(cuda_device_id{i}, &per_device_pools.back());
+}
+```
+
+Note that the CUDA device that is current when creating a `device_memory_resource` must also be
+current any time that `device_memory_resource` is used to deallocate memory, including in a
+destructor. This affects RAII classes like `rmm::device_buffer` and `rmm::device_uvector`. Here's an
+(incorrect) example that assumes the above example loop has been run to create a
+`pool_memory_resource` for each device. A correct example adds a call to `cudaSetDevice(0)` on the
+line of the error comment.
+
+```c++
+{
+  RMM_CUDA_TRY(cudaSetDevice(0));
+  rmm::device_buffer buf_a(16);
+
+  {
+    RMM_CUDA_TRY(cudaSetDevice(1));
+    rmm::device_buffer buf_b(16);
+  }
+
+  // Error: when buf_a is destroyed, the current device must be 0, but it is 1
 }
 ```
 
@@ -560,9 +581,12 @@ of more detailed logging. The default is `INFO`. Available levels are `TRACE`, `
 
 The log relies on the [spdlog](https://github.com/gabime/spdlog.git) library.
 
-Note that to see logging below the `INFO` level, the C++ application must also call
-`rmm::logger().set_level()`, e.g. to enable all levels of logging down to `TRACE`, call
-`rmm::logger().set_level(spdlog::level::trace)` (and compile with `-DRMM_LOGGING_LEVEL=TRACE`).
+Note that to see logging below the `INFO` level, the application must also set the logging level at
+run time. C++ applications must must call `rmm::logger().set_level()`, for example to enable all
+levels of logging down to `TRACE`, call `rmm::logger().set_level(spdlog::level::trace)` (and compile
+librmm with `-DRMM_LOGGING_LEVEL=TRACE`). Python applications must call `rmm.set_logging_level()`,
+for example to enable all levels of logging down to `TRACE`, call `rmm.set_logging_level("trace")`
+(and compile the RMM Python module with `-DRMM_LOGGING_LEVEL=TRACE`).
 
 Note that debug logging is different from the CSV memory allocation logging provided by
 `rmm::mr::logging_resource_adapter`. The latter is for logging a history of allocation /
diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp
index 8c670525d..470442830 100644
--- a/benchmarks/random_allocations/random_allocations.cpp
+++ b/benchmarks/random_allocations/random_allocations.cpp
@@ -68,7 +68,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr,
 {
   std::default_random_engine generator;
 
-  max_usage *= size_mb;                      // convert to bytes
+  max_usage *= size_mb;  // convert to bytes
 
   constexpr int allocation_probability{73};  // percent
   constexpr int max_op_chance{99};
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index bc6e18021..0ae14a88f 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -11,6 +11,7 @@ rapids-print-env
 
 rapids-logger "Begin cpp build"
 
-rapids-mamba-retry mambabuild conda/recipes/librmm
+# This calls mambabuild when boa is installed (as is the case in the CI images)
+rapids-conda-retry mambabuild conda/recipes/librmm
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 7e1efef9a..d54947106 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -25,7 +25,7 @@ rapids-mamba-retry install \
   --channel "${PYTHON_CHANNEL}" \
   rmm librmm
 
-export RAPIDS_VERSION_NUMBER="23.08"
+export RAPIDS_VERSION_NUMBER="23.10"
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
 rapids-logger "Build CPP docs"
@@ -37,11 +37,11 @@ popd
 
 rapids-logger "Build Python docs"
 pushd python/docs
-sphinx-build -b dirhtml . _html
-sphinx-build -b text . _text
+make dirhtml
+make text
 mkdir -p "${RAPIDS_DOCS_DIR}/rmm/"{html,txt}
-mv _html/* "${RAPIDS_DOCS_DIR}/rmm/html"
-mv _text/* "${RAPIDS_DOCS_DIR}/rmm/txt"
+mv _build/dirhtml/* "${RAPIDS_DOCS_DIR}/rmm/html"
+mv _build/text/* "${RAPIDS_DOCS_DIR}/rmm/txt"
 popd
 
 rapids-upload-docs
diff --git a/ci/build_python.sh b/ci/build_python.sh
index b306d3e47..9da4025c7 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -13,6 +13,7 @@ rapids-logger "Begin py build"
 
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
-rapids-mamba-retry mambabuild -c "${CPP_CHANNEL}" conda/recipes/rmm
+# This calls mambabuild when boa is installed (as is the case in the CI images)
+rapids-conda-retry mambabuild -c "${CPP_CHANNEL}" conda/recipes/rmm
 
 rapids-upload-conda-to-s3 python
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 09c1e104e..660c88387 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -3,6 +3,9 @@
 
 set -euo pipefail
 
+package_name="rmm"
+package_dir="python"
+
 source rapids-configure-sccache
 source rapids-date-string
 
@@ -12,15 +15,34 @@ version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})"
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-ci/release/apply_wheel_modifications.sh ${version_override} "-${RAPIDS_PY_CUDA_SUFFIX}"
-echo "The package name and/or version was modified in the package source. The git diff is:"
-git diff
+# This is the version of the suffix with a preceding hyphen. It's used
+# everywhere except in the final wheel name.
+PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
+
+# Patch project metadata files to include the CUDA version suffix and version override.
+pyproject_file="${package_dir}/pyproject.toml"
+
+sed -i "s/^version = .*/version = \"${version_override}\"/g" ${pyproject_file}
+sed -i "s/name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
+
+# For nightlies we want to ensure that we're pulling in alphas as well. The
+# easiest way to do so is to augment the spec with a constraint containing a
+# min alpha version that doesn't affect the version bounds but does allow usage
+# of alpha versions for that dependency without --pre
+alpha_spec=''
+if ! rapids-is-release-build; then
+    alpha_spec=',>=0.0.0a0'
+fi
+
+if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
+    sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
+fi
 
-cd python
+cd "${package_dir}"
 
 SKBUILD_CONFIGURE_OPTIONS="-DRMM_BUILD_WHEELS=ON" python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 mkdir -p final_dist
 python -m auditwheel repair -w final_dist dist/*
 
-RAPIDS_PY_WHEEL_NAME="rmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
diff --git a/ci/check_style.sh b/ci/check_style.sh
index 4beaca333..e96ad8bf1 100755
--- a/ci/check_style.sh
+++ b/ci/check_style.sh
@@ -14,7 +14,7 @@ rapids-dependency-file-generator \
 rapids-mamba-retry env create --force -f env.yaml -n checks
 conda activate checks
 
-FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/cmake-format-rapids-cmake.json
+FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.10/cmake-format-rapids-cmake.json
 export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
 mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
 wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
deleted file mode 100755
index cc71389ea..000000000
--- a/ci/release/apply_wheel_modifications.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Usage: bash apply_wheel_modifications.sh <new_version> <cuda_suffix>
-
-VERSION=${1}
-CUDA_SUFFIX=${2}
-
-sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/pyproject.toml
-
-sed -i "s/^name = \"rmm\"/name = \"rmm${CUDA_SUFFIX}\"/g" python/pyproject.toml
-
-if [[ $CUDA_SUFFIX == "-cu12" ]]; then
-    sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" python/pyproject.toml
-fi
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 7314442ff..8b7f07167 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -22,6 +22,9 @@ NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
 NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
 NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
 
+# Need to distutils-normalize the original version
+NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
+
 echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
 
 # Inplace sed replace; workaround for Linux and Mac
@@ -31,14 +34,15 @@ function sed_runner() {
 
 # cpp update
 sed_runner 's/'"  VERSION .*"'/'"  VERSION ${NEXT_FULL_TAG}"'/g' CMakeLists.txt
-sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' CMakeLists.txt
 
 # Python update
 sed_runner 's/'"rmm_version .*)"'/'"rmm_version ${NEXT_FULL_TAG})"'/g' python/CMakeLists.txt
-sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' python/CMakeLists.txt
 sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/rmm/__init__.py
 sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pyproject.toml
 
+# rapids-cmake version
+sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
+
 # cmake-format rapids-cmake definitions
 sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHORT_TAG}\/cmake-format-rapids-cmake.json"'/g' ci/check_style.sh
 
@@ -54,3 +58,9 @@ for FILE in .github/workflows/*.yaml; do
   sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
 done
 sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh
+
+# .devcontainer files
+find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
+    sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
+    sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
+done
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index f668c4a4c..8571a1928 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -4,17 +4,28 @@ channels:
 - rapidsai
 - conda-forge
 dependencies:
+- c-compiler
+- clang-tools==16.0.6
+- clang==16.0.6
 - cmake>=3.26.4
 - cuda-python>=11.7.1,<12.0a0
 - cuda-version=11.8
 - cudatoolkit
-- cython>=0.29,<0.30
+- cxx-compiler
+- cython>=3.0.0
+- doxygen=1.9.1
 - fmt>=9.1.0,<10
+- gcc_linux-64=11.*
 - gcovr>=5.0
+- graphviz
 - identify>=2.5.20
+- ipython
+- make
+- nbsphinx
 - ninja
 - numba>=0.57
 - numpy>=1.21
+- numpydoc
 - nvcc_linux-64=11.8
 - pre-commit
 - pytest
@@ -22,5 +33,10 @@ dependencies:
 - python>=3.9,<3.11
 - scikit-build>=0.13.1
 - spdlog>=1.11.0,<1.12
+- sphinx
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sphinx_rtd_theme
+- sysroot_linux-64==2.17
 - tomli
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 40291ba33..aec31b3db 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -4,22 +4,38 @@ channels:
 - rapidsai
 - conda-forge
 dependencies:
+- c-compiler
+- clang-tools==16.0.6
+- clang==16.0.6
 - cmake>=3.26.4
 - cuda-nvcc
 - cuda-python>=12.0,<13.0a0
 - cuda-version=12.0
-- cython>=0.29,<0.30
+- cxx-compiler
+- cython>=3.0.0
+- doxygen=1.9.1
 - fmt>=9.1.0,<10
+- gcc_linux-64=11.*
 - gcovr>=5.0
+- graphviz
 - identify>=2.5.20
+- ipython
+- make
+- nbsphinx
 - ninja
 - numba>=0.57
 - numpy>=1.21
+- numpydoc
 - pre-commit
 - pytest
 - pytest-cov
 - python>=3.9,<3.11
 - scikit-build>=0.13.1
 - spdlog>=1.11.0,<1.12
+- sphinx
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sphinx_rtd_theme
+- sysroot_linux-64==2.17
 - tomli
 name: all_cuda-120_arch-x86_64
diff --git a/conda/recipes/rmm/meta.yaml b/conda/recipes/rmm/meta.yaml
index 5d905b853..e13254d4c 100644
--- a/conda/recipes/rmm/meta.yaml
+++ b/conda/recipes/rmm/meta.yaml
@@ -58,7 +58,7 @@ requirements:
     - cuda-cudart-dev
     - cuda-python ==12.0.0
     {% endif %}
-    - cython >=0.29,<0.30
+    - cython >=3.0.0
     - librmm ={{ version }}
     - python
     - scikit-build >=0.13.1
diff --git a/dependencies.yaml b/dependencies.yaml
index 91eed8280..7d9af48cf 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -10,6 +10,7 @@ files:
       - checks
       - cudatoolkit
       - develop
+      - docs
       - run
       - test_python
   test_python:
@@ -62,12 +63,14 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - &cmake_ver cmake>=3.26.4
-          - cython>=0.29,<0.30
+          - cython>=3.0.0
           - ninja
           - scikit-build>=0.13.1
           - tomli
       - output_types: conda
         packages:
+          - c-compiler
+          - cxx-compiler
           - fmt>=9.1.0,<10
           - spdlog>=1.11.0,<1.12
           - python>=3.9,<3.11
@@ -76,6 +79,18 @@ dependencies:
           - wheel
           - setuptools>=61.0.0
     specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              arch: x86_64
+            packages:
+              - gcc_linux-64=11.*
+              - sysroot_linux-64==2.17
+          - matrix:
+              arch: aarch64
+            packages:
+              - gcc_linux-aarch64=11.*
+              - sysroot_linux-aarch64==2.17
       - output_types: conda
         matrices:
           - matrix:
@@ -110,6 +125,9 @@ dependencies:
           # pre-commit requires identify minimum version 1.0, but clang-format requires textproto support and that was
           # added in 2.5.20, so we need to call out the minimum version needed for our plugins
           - identify>=2.5.20
+      - output_types: conda
+        packages:
+          - &doxygen doxygen=1.9.1
   cudatoolkit:
     specific:
       - output_types: conda
@@ -148,13 +166,18 @@ dependencies:
       - output_types: [conda, requirements]
         packages:
           - gcovr>=5.0
+      - output_types: conda
+        packages:
+          - clang==16.0.6
+          - clang-tools==16.0.6
   docs:
     common:
-      - output_types: [conda]
+      - output_types: conda
         packages:
-          - doxygen=1.8.20
+          - *doxygen
           - graphviz
           - ipython
+          - make
           - nbsphinx
           - numpydoc
           - sphinx
diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile
index 264d99df0..5d6153f81 100644
--- a/doxygen/Doxyfile
+++ b/doxygen/Doxyfile
@@ -1,4 +1,4 @@
-# Doxyfile 1.8.20
+# Doxyfile 1.9.1
 
 # This file describes the settings to be used by the documentation system
 # doxygen (www.doxygen.org) for a project.
@@ -32,13 +32,13 @@ DOXYFILE_ENCODING      = UTF-8
 # title of most generated pages and in a few other places.
 # The default value is: My Project.
 
-PROJECT_NAME           = "RMM"
+PROJECT_NAME           = RMM
 
 # The PROJECT_NUMBER tag can be used to enter a project or revision number. This
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 23.08
+PROJECT_NUMBER         = 23.10
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -259,7 +259,11 @@ TAB_SIZE               = 4
 # commands \{ and \} for these it is advised to use the version @{ and @} or use
 # a double escape (\\{ and \\})
 
-ALIASES                =
+ALIASES                = "briefreturn{1}=@brief \1 @return \1" \
+                         "default_copy_constructor=Default copy constructor" \
+                         "default_move_constructor=Default move constructor" \
+                         "default_copy_assignment{1}=@brief Default copy assignment operator @return \1& Reference to the assigned object" \
+                         "default_move_assignment{1}=@brief Default move assignment operator @return \1& Reference to the assigned object"
 
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
 # only. Doxygen will then generate output that is more tailored for C. For
@@ -313,7 +317,10 @@ OPTIMIZE_OUTPUT_SLICE  = NO
 # Note: For files without extension you can use no_extension as a placeholder.
 #
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.
 
 EXTENSION_MAPPING      = cu=C++ \
                          cuh=C++
@@ -524,6 +531,13 @@ EXTRACT_LOCAL_METHODS  = NO
 
 EXTRACT_ANON_NSPACES   = NO
 
+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
 # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
 # undocumented members inside documented classes or files. If set to NO these
 # members will be included in the various overviews, but no documentation
@@ -561,11 +575,18 @@ HIDE_IN_BODY_DOCS      = NO
 
 INTERNAL_DOCS          = NO
 
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# (including Cygwin) and Mac users are advised to set this option to NO.
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
 # The default value is: system dependent.
 
 CASE_SENSE_NAMES       = YES
@@ -804,7 +825,10 @@ WARN_IF_DOC_ERROR      = YES
 WARN_NO_PARAMDOC       = YES
 
 # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# Possible values are: NO, YES and FAIL_ON_WARNINGS.
 # The default value is: NO.
 
 WARN_AS_ERROR          = NO
@@ -840,8 +864,8 @@ INPUT                  = ../include
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
-# possible encodings.
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
 # The default value is: UTF-8.
 
 INPUT_ENCODING         = UTF-8
@@ -854,13 +878,15 @@ INPUT_ENCODING         = UTF-8
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # read by doxygen.
 #
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
 # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
 # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
 # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
 # *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
-# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
-# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd,
-# *.vhdl, *.ucf, *.qsf and *.ice.
+# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
+# *.ucf, *.qsf and *.ice.
 
 FILE_PATTERNS          = *.c \
                          *.cpp \
@@ -898,7 +924,7 @@ EXCLUDE_SYMLINKS       = NO
 # Note that the wildcards are matched against the file with absolute path, so to
 # exclude all test directories for example use the pattern */test/*
 
-EXCLUDE_PATTERNS       =
+EXCLUDE_PATTERNS       = */detail/*
 
 # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
 # (namespaces, classes, functions, etc.) that should be excluded from the
@@ -1090,13 +1116,6 @@ VERBATIM_HEADERS       = YES
 
 ALPHABETICAL_INDEX     = YES
 
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
 # In case all classes in a project start with a common prefix, all classes will
 # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
 # can be used to specify a prefix (or a list of prefixes) that should be ignored
@@ -1267,10 +1286,11 @@ HTML_INDEX_NUM_ENTRIES = 100
 
 # If the GENERATE_DOCSET tag is set to YES, additional index files will be
 # generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: https://developer.apple.com/xcode/), introduced with OSX
-# 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
 # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
 # genXcode/_index.html for more information.
@@ -1312,8 +1332,8 @@ DOCSET_PUBLISHER_NAME  = Publisher
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
 # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
+# (see:
+# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
 #
 # The HTML Help Workshop contains a compiler that can convert all HTML output
 # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@@ -1388,7 +1408,8 @@ QCH_FILE               =
 
 # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
 # Project output. For more information please see Qt Help Project / Namespace
-# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1396,8 +1417,8 @@ QHP_NAMESPACE          = org.doxygen.Project
 
 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
-# folders).
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
 # The default value is: doc.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1405,16 +1426,16 @@ QHP_VIRTUAL_FOLDER     = doc
 
 # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
 # filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_CUST_FILTER_NAME   =
 
 # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
 # custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_CUST_FILTER_ATTRS  =
@@ -1426,9 +1447,9 @@ QHP_CUST_FILTER_ATTRS  =
 
 QHP_SECT_FILTER_ATTRS  =
 
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHG_LOCATION           =
@@ -1555,7 +1576,7 @@ USE_MATHJAX            = NO
 
 # When MathJax is enabled you can set the default output format to be used for
 # the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
 # Possible values are: HTML-CSS (which is slower, but has the best
 # compatibility), NativeMML (i.e. MathML) and SVG.
 # The default value is: HTML-CSS.
@@ -1585,7 +1606,8 @@ MATHJAX_EXTENSIONS     =
 
 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
 # example see the documentation.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
@@ -1632,7 +1654,8 @@ SERVER_BASED_SEARCH    = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/).
+# Xapian (see:
+# https://xapian.org/).
 #
 # See the section "External Indexing and Searching" for details.
 # The default value is: NO.
@@ -1645,8 +1668,9 @@ EXTERNAL_SEARCH        = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/). See the section "External Indexing and
-# Searching" for details.
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
 # This tag requires that the tag SEARCHENGINE is set to YES.
 
 SEARCHENGINE_URL       =
@@ -2325,10 +2349,32 @@ UML_LOOK               = NO
 # but if the number exceeds 15, the total amount of fields shown is limited to
 # 10.
 # Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
+# This tag requires that the tag UML_LOOK is set to YES.
 
 UML_LIMIT_NUM_FIELDS   = 10
 
+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
 # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
 # collaboration graphs will show the relations between templates and their
 # instances.
@@ -2518,9 +2564,11 @@ DOT_MULTI_TARGETS      = NO
 
 GENERATE_LEGEND        = YES
 
-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
 # files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc and
+# plantuml temporary files.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_CLEANUP            = YES
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
new file mode 100644
index 000000000..1ff8119ba
--- /dev/null
+++ b/fetch_rapids.cmake
@@ -0,0 +1,18 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.10/RAPIDS.cmake
+       ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
+endif()
+include(${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake)
diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp
index ab225490e..8d355ee23 100644
--- a/include/rmm/cuda_device.hpp
+++ b/include/rmm/cuda_device.hpp
@@ -26,23 +26,22 @@ namespace rmm {
  *
  */
 struct cuda_device_id {
-  using value_type = int;
+  using value_type = int;  ///< Integer type used for device identifier
 
   /**
    * @brief Construct a `cuda_device_id` from the specified integer value
    *
-   * @param id The device's integer identifier
+   * @param dev_id The device's integer identifier
    */
   explicit constexpr cuda_device_id(value_type dev_id) noexcept : id_{dev_id} {}
 
-  /// Returns the wrapped integer value
+  /// @briefreturn{The wrapped integer value}
   [[nodiscard]] constexpr value_type value() const noexcept { return id_; }
 
  private:
   value_type id_;
 };
 
-namespace detail {
 /**
  * @brief Returns a `cuda_device_id` for the current device
  *
@@ -50,11 +49,56 @@ namespace detail {
  *
  * @return `cuda_device_id` for the current device
  */
-inline cuda_device_id current_device()
+inline cuda_device_id get_current_cuda_device()
 {
-  int dev_id{};
-  RMM_CUDA_TRY(cudaGetDevice(&dev_id));
+  cuda_device_id::value_type dev_id{-1};
+  RMM_ASSERT_CUDA_SUCCESS(cudaGetDevice(&dev_id));
   return cuda_device_id{dev_id};
 }
-}  // namespace detail
+
+/**
+ * @brief Returns the number of CUDA devices in the system
+ *
+ * @return Number of CUDA devices in the system
+ */
+inline int get_num_cuda_devices()
+{
+  cuda_device_id::value_type num_dev{-1};
+  RMM_ASSERT_CUDA_SUCCESS(cudaGetDeviceCount(&num_dev));
+  return num_dev;
+}
+
+/**
+ * @brief RAII class that sets the current CUDA device to the specified device on construction
+ * and restores the previous device on destruction.
+ */
+struct cuda_set_device_raii {
+  /**
+   * @brief Construct a new cuda_set_device_raii object and sets the current CUDA device to `dev_id`
+   *
+   * @param dev_id The device to set as the current CUDA device
+   */
+  explicit cuda_set_device_raii(cuda_device_id dev_id)
+    : old_device_{get_current_cuda_device()}, needs_reset_{old_device_.value() != dev_id.value()}
+  {
+    if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value()));
+  }
+  /**
+   * @brief Reactivates the previous CUDA device
+   */
+  ~cuda_set_device_raii() noexcept
+  {
+    if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value()));
+  }
+
+  cuda_set_device_raii(cuda_set_device_raii const&)            = delete;
+  cuda_set_device_raii& operator=(cuda_set_device_raii const&) = delete;
+  cuda_set_device_raii(cuda_set_device_raii&&)                 = delete;
+  cuda_set_device_raii& operator=(cuda_set_device_raii&&)      = delete;
+
+ private:
+  cuda_device_id old_device_;
+  bool needs_reset_;
+};
+
 }  // namespace rmm
diff --git a/include/rmm/cuda_stream.hpp b/include/rmm/cuda_stream.hpp
index 9d2dab4f0..b778cf7bd 100644
--- a/include/rmm/cuda_stream.hpp
+++ b/include/rmm/cuda_stream.hpp
@@ -46,6 +46,8 @@ class cuda_stream {
    *
    * A moved-from cuda_stream is invalid and it is Undefined Behavior to call methods that access
    * the owned stream.
+   *
+   * @return A reference to this cuda_stream
    */
   cuda_stream& operator=(cuda_stream&&) = default;
   ~cuda_stream()                        = default;
diff --git a/include/rmm/cuda_stream_view.hpp b/include/rmm/cuda_stream_view.hpp
index bc30f26d8..150fe3957 100644
--- a/include/rmm/cuda_stream_view.hpp
+++ b/include/rmm/cuda_stream_view.hpp
@@ -33,41 +33,47 @@ namespace rmm {
  */
 class cuda_stream_view {
  public:
-  constexpr cuda_stream_view()                                   = default;
-  constexpr cuda_stream_view(cuda_stream_view const&)            = default;
-  constexpr cuda_stream_view(cuda_stream_view&&)                 = default;
-  constexpr cuda_stream_view& operator=(cuda_stream_view const&) = default;
-  constexpr cuda_stream_view& operator=(cuda_stream_view&&)      = default;
-  ~cuda_stream_view()                                            = default;
+  constexpr cuda_stream_view()                        = default;
+  ~cuda_stream_view()                                 = default;
+  constexpr cuda_stream_view(cuda_stream_view const&) = default;  ///< @default_copy_constructor
+  constexpr cuda_stream_view(cuda_stream_view&&)      = default;  ///< @default_move_constructor
+  constexpr cuda_stream_view& operator=(cuda_stream_view const&) =
+    default;  ///< @default_copy_assignment{cuda_stream_view}
+  constexpr cuda_stream_view& operator=(cuda_stream_view&&) =
+    default;  ///< @default_move_assignment{cuda_stream_view}
 
   // Disable construction from literal 0
   constexpr cuda_stream_view(int)            = delete;  //< Prevent cast from 0
   constexpr cuda_stream_view(std::nullptr_t) = delete;  //< Prevent cast from nullptr
 
   /**
-   * @brief Implicit conversion from cudaStream_t.
+   * @brief Constructor from a cudaStream_t
+   *
+   * @param stream The underlying stream for this view
    */
   constexpr cuda_stream_view(cudaStream_t stream) noexcept : stream_{stream} {}
 
   /**
    * @brief Get the wrapped stream.
    *
-   * @return cudaStream_t The wrapped stream.
+   * @return cudaStream_t The underlying stream referenced by this cuda_stream_view
    */
   [[nodiscard]] constexpr cudaStream_t value() const noexcept { return stream_; }
 
   /**
    * @brief Implicit conversion to cudaStream_t.
+   *
+   * @return cudaStream_t The underlying stream referenced by this cuda_stream_view
    */
   constexpr operator cudaStream_t() const noexcept { return value(); }
 
   /**
-   * @brief Return true if the wrapped stream is the CUDA per-thread default stream.
+   * @briefreturn{true if the wrapped stream is the CUDA per-thread default stream}
    */
   [[nodiscard]] inline bool is_per_thread_default() const noexcept;
 
   /**
-   * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream.
+   * @briefreturn{true if the wrapped stream is explicitly the CUDA legacy default stream}
    */
   [[nodiscard]] inline bool is_default() const noexcept;
 
diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp
index 28121e6a8..b45dbae25 100644
--- a/include/rmm/detail/dynamic_load_runtime.hpp
+++ b/include/rmm/detail/dynamic_load_runtime.hpp
@@ -115,7 +115,7 @@ struct async_alloc {
       int cuda_pool_supported{};
       auto result = cudaDeviceGetAttribute(&cuda_pool_supported,
                                            cudaDevAttrMemoryPoolsSupported,
-                                           rmm::detail::current_device().value());
+                                           rmm::get_current_cuda_device().value());
       return result == cudaSuccess and cuda_pool_supported == 1;
     }()};
     return runtime_supports_pool and driver_supports_pool;
@@ -139,7 +139,7 @@ struct async_alloc {
     if (cudaMemHandleTypeNone != handle_type) {
       auto const result = cudaDeviceGetAttribute(&supported_handle_types_bitmask,
                                                  cudaDevAttrMemoryPoolSupportedHandleTypes,
-                                                 rmm::detail::current_device().value());
+                                                 rmm::get_current_cuda_device().value());
 
       // Don't throw on cudaErrorInvalidValue
       auto const unsupported_runtime = (result == cudaErrorInvalidValue);
diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp
index bd8a8211a..84638ee89 100644
--- a/include/rmm/device_buffer.hpp
+++ b/include/rmm/device_buffer.hpp
@@ -29,7 +29,6 @@
 
 namespace rmm {
 /**
- * @file device_buffer.hpp
  * @brief RAII construct for device memory allocation
  *
  * This class allocates untyped and *uninitialized* device memory using a
@@ -202,6 +201,8 @@ class device_buffer {
    * replaced by the `other.stream()`.
    *
    * @param other The `device_buffer` whose contents will be moved.
+   *
+   * @return A reference to this `device_buffer`
    */
   device_buffer& operator=(device_buffer&& other) noexcept
   {
@@ -331,22 +332,22 @@ class device_buffer {
   }
 
   /**
-   * @brief Returns raw pointer to underlying device memory allocation
+   * @briefreturn{Const pointer to the device memory allocation}
    */
   [[nodiscard]] void const* data() const noexcept { return _data; }
 
   /**
-   * @brief Returns raw pointer to underlying device memory allocation
+   * @briefreturn{Pointer to the device memory allocation}
    */
   void* data() noexcept { return _data; }
 
   /**
-   * @brief Returns the number of bytes.
+   * @briefreturn{The number of bytes}
    */
   [[nodiscard]] std::size_t size() const noexcept { return _size; }
 
   /**
-   * @brief Returns the signed number of bytes.
+   * @briefreturn{The signed number of bytes}
    */
   [[nodiscard]] std::int64_t ssize() const noexcept
   {
@@ -356,11 +357,10 @@ class device_buffer {
   }
 
   /**
-   * @brief returns the number of bytes that can be held in currently allocated storage.
+   * @briefreturn{Whether or not the buffer currently holds any data}
    *
    * If `is_empty() == true`, the `device_buffer` may still hold an allocation
    * if `capacity() > 0`.
-   *
    */
   [[nodiscard]] bool is_empty() const noexcept { return 0 == size(); }
 
@@ -368,11 +368,13 @@ class device_buffer {
    * @brief Returns actual size in bytes of device memory allocation.
    *
    * The invariant `size() <= capacity()` holds.
+   *
+   * @return The actual size in bytes of the device memory allocation
    */
   [[nodiscard]] std::size_t capacity() const noexcept { return _capacity; }
 
   /**
-   * @brief Returns stream most recently specified for allocation/deallocation
+   * @briefreturn{The stream most recently specified for allocation/deallocation}
    */
   [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
 
@@ -384,20 +386,21 @@ class device_buffer {
    * will be used for deallocation in the `rmm::device_uvector` destructor.
    * However, if either of `resize()` or `shrink_to_fit()` is called after this,
    * the later stream parameter will be stored and used in the destructor.
+   *
+   * @param stream The stream to use for deallocation
    */
   void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }
 
   /**
-   * @brief Returns pointer to the memory resource used to allocate and
-   * deallocate the device memory
+   * @briefreturn{Pointer to the memory resource used to allocate and deallocate}
    */
   [[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _mr; }
 
  private:
-  void* _data{nullptr};                  ///< Pointer to device memory allocation
-  std::size_t _size{};                   ///< Requested size of the device memory allocation
-  std::size_t _capacity{};               ///< The actual size of the device memory allocation
-  cuda_stream_view _stream{};            ///< Stream to use for device memory deallocation
+  void* _data{nullptr};        ///< Pointer to device memory allocation
+  std::size_t _size{};         ///< Requested size of the device memory allocation
+  std::size_t _capacity{};     ///< The actual size of the device memory allocation
+  cuda_stream_view _stream{};  ///< Stream to use for device memory deallocation
   mr::device_memory_resource* _mr{
     mr::get_current_device_resource()};  ///< The memory resource used to
                                          ///< allocate/deallocate device memory
diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp
index e476b2d87..83fcdda09 100644
--- a/include/rmm/device_scalar.hpp
+++ b/include/rmm/device_scalar.hpp
@@ -37,18 +37,25 @@ class device_scalar {
  public:
   static_assert(std::is_trivially_copyable<T>::value, "Scalar type must be trivially copyable");
 
-  using value_type      = typename device_uvector<T>::value_type;
-  using reference       = typename device_uvector<T>::reference;
-  using const_reference = typename device_uvector<T>::const_reference;
-  using pointer         = typename device_uvector<T>::pointer;
-  using const_pointer   = typename device_uvector<T>::const_pointer;
+  using value_type = typename device_uvector<T>::value_type;  ///< T, the type of the scalar element
+  using reference  = typename device_uvector<T>::reference;   ///< value_type&
+  using const_reference = typename device_uvector<T>::const_reference;  ///< const value_type&
+  using pointer =
+    typename device_uvector<T>::pointer;  ///< The type of the pointer returned by data()
+  using const_pointer = typename device_uvector<T>::const_pointer;  ///< The type of the iterator
+                                                                    ///< returned by data() const
 
   RMM_EXEC_CHECK_DISABLE
   ~device_scalar() = default;
 
   RMM_EXEC_CHECK_DISABLE
-  device_scalar(device_scalar&&) noexcept = default;
+  device_scalar(device_scalar&&) noexcept = default;  ///< Default move constructor
 
+  /**
+   * @brief Default move assignment operator
+   *
+   * @return device_scalar& A reference to the assigned-to object
+   */
   device_scalar& operator=(device_scalar&&) noexcept = default;
 
   /**
@@ -224,6 +231,8 @@ class device_scalar {
    * specified to the constructor, then appropriate dependencies must be inserted between the
    * streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may
    * be a race condition.
+   *
+   * @return Pointer to underlying device memory
    */
   [[nodiscard]] pointer data() noexcept { return static_cast<pointer>(_storage.data()); }
 
@@ -234,6 +243,8 @@ class device_scalar {
    * specified to the constructor, then appropriate dependencies must be inserted between the
    * streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may
    * be a race condition.
+   *
+   * @return Const pointer to underlying device memory
    */
   [[nodiscard]] const_pointer data() const noexcept
   {
@@ -241,12 +252,14 @@ class device_scalar {
   }
 
   /**
-   * @brief Returns stream most recently specified for allocation/deallocation
+   * @briefreturn{Stream associated with the device memory allocation}
    */
   [[nodiscard]] cuda_stream_view stream() const noexcept { return _storage.stream(); }
 
   /**
    * @brief Sets the stream to be used for deallocation
+   *
+   * @param stream Stream to be used for deallocation
    */
   void set_stream(cuda_stream_view stream) noexcept { _storage.set_stream(stream); }
 
diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index 40f5d8c5d..47ed1adff 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -71,22 +71,24 @@ class device_uvector {
                 "device_uvector only supports types that are trivially copyable.");
 
  public:
-  using value_type      = T;
-  using size_type       = std::size_t;
-  using reference       = value_type&;
-  using const_reference = value_type const&;
-  using pointer         = value_type*;
-  using const_pointer   = value_type const*;
-  using iterator        = pointer;
-  using const_iterator  = const_pointer;
+  using value_type = T;            ///< T; stored value type
+  using size_type  = std::size_t;  ///< The type used for the size of the vector
+  using reference = value_type&;  ///< value_type&; reference type returned by operator[](size_type)
+  using const_reference = value_type const&;  ///< value_type const&; constant reference type
+                                              ///< returned by operator[](size_type) const
+  using pointer        = value_type*;         ///< The type of the pointer returned by data()
+  using const_pointer  = value_type const*;   ///< The type of the pointer returned by data() const
+  using iterator       = pointer;             ///< The type of the iterator returned by begin()
+  using const_iterator = const_pointer;  ///< The type of the const iterator returned by cbegin()
 
   RMM_EXEC_CHECK_DISABLE
   ~device_uvector() = default;
 
   RMM_EXEC_CHECK_DISABLE
-  device_uvector(device_uvector&&) noexcept = default;
+  device_uvector(device_uvector&&) noexcept = default;  ///< @default_move_constructor
 
-  device_uvector& operator=(device_uvector&&) noexcept = default;
+  device_uvector& operator=(device_uvector&&) noexcept =
+    default;  ///< @default_move_assignment{device_uvector}
 
   /**
    * @brief Copy ctor is deleted as it doesn't allow a stream argument
@@ -497,12 +499,12 @@ class device_uvector {
   [[nodiscard]] const_iterator end() const noexcept { return cend(); }
 
   /**
-   * @brief Returns the number of elements.
+   * @briefreturn{The number of elements in the vector}
    */
   [[nodiscard]] std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); }
 
   /**
-   * @brief Returns the signed number of elements.
+   * @briefreturn{The signed number of elements in the vector}
    */
   [[nodiscard]] std::int64_t ssize() const noexcept
   {
@@ -512,17 +514,12 @@ class device_uvector {
   }
 
   /**
-   * @brief Returns true if the vector contains no elements, i.e., `size() == 0`.
-   *
-   * @return true The vector is empty
-   * @return false The vector is not empty
+   * @briefreturn{true if the vector contains no elements, i.e. `size() == 0`}
    */
   [[nodiscard]] bool is_empty() const noexcept { return size() == 0; }
 
   /**
-   * @brief Returns pointer to the resource used to allocate and deallocate the device storage.
-   *
-   * @return Pointer to underlying resource
+   * @briefreturn{Pointer to underlying resource used to allocate and deallocate the device storage}
    */
   [[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept
   {
@@ -530,7 +527,7 @@ class device_uvector {
   }
 
   /**
-   * @brief Returns stream most recently specified for allocation/deallocation
+   * @briefreturn{Stream most recently specified for allocation/deallocation}
    */
   [[nodiscard]] cuda_stream_view stream() const noexcept { return _storage.stream(); }
 
@@ -542,6 +539,8 @@ class device_uvector {
    * will be used for deallocation in the `rmm::device_uvector destructor.
    * However, if either of `resize()` or `shrink_to_fit()` is called after this,
    * the later stream parameter will be stored and used in the destructor.
+   *
+   * @param stream The stream to use for deallocation
    */
   void set_stream(cuda_stream_view stream) noexcept { _storage.set_stream(stream); }
 
diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp
index 015c3646d..78c747652 100644
--- a/include/rmm/exec_policy.hpp
+++ b/include/rmm/exec_policy.hpp
@@ -30,6 +30,9 @@
 
 namespace rmm {
 
+/**
+ * @brief Synchronous execution policy for allocations using thrust
+ */
 using thrust_exec_policy_t =
   thrust::detail::execute_with_allocator<rmm::mr::thrust_allocator<char>,
                                          thrust::cuda_cub::execute_on_stream_base>;
@@ -40,6 +43,12 @@ using thrust_exec_policy_t =
  */
 class exec_policy : public thrust_exec_policy_t {
  public:
+  /**
+   * @brief Construct a new execution policy object
+   *
+   * @param stream The stream on which to allocate temporary memory
+   * @param mr The resource to use for allocating temporary memory
+   */
   explicit exec_policy(cuda_stream_view stream             = cuda_stream_default,
                        rmm::mr::device_memory_resource* mr = mr::get_current_device_resource())
     : thrust_exec_policy_t(
@@ -50,6 +59,9 @@ class exec_policy : public thrust_exec_policy_t {
 
 #if THRUST_VERSION >= 101600
 
+/**
+ * @brief Asynchronous execution policy for allocations using thrust
+ */
 using thrust_exec_policy_nosync_t =
   thrust::detail::execute_with_allocator<rmm::mr::thrust_allocator<char>,
                                          thrust::cuda_cub::execute_on_stream_nosync_base>;
@@ -72,9 +84,11 @@ class exec_policy_nosync : public thrust_exec_policy_nosync_t {
 
 #else
 
-using thrust_exec_policy_nosync_t = thrust_exec_policy_t;
-using exec_policy_nosync          = exec_policy;
-
+using thrust_exec_policy_nosync_t =
+  thrust_exec_policy_t;  ///< When used with Thrust < 1.16.0, thrust_exec_policy_nosync_t is an
+                         ///< alias for thrust_exec_policy_t
+using exec_policy_nosync =
+  exec_policy;  ///< When used with Thrust < 1.16.0, exec_policy_nosync is an alias for exec_policy
 #endif
 
 }  // namespace rmm
diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp
index d0d6f8fa4..4e17878bd 100644
--- a/include/rmm/logger.hpp
+++ b/include/rmm/logger.hpp
@@ -43,9 +43,11 @@ inline std::string default_log_filename()
   return (filename == nullptr) ? std::string{"rmm_log.txt"} : std::string{filename};
 }
 
-// Simple wrapper around a spdlog::logger that performs RMM-specific initialization
+/**
+ * @brief Simple wrapper around a spdlog::logger that performs RMM-specific initialization
+ */
 struct logger_wrapper {
-  spdlog::logger logger_;
+  spdlog::logger logger_;  ///< The underlying logger
 
   logger_wrapper()
     : logger_{"RMM",
@@ -70,8 +72,14 @@ struct logger_wrapper {
  * @brief Represent a size in number of bytes.
  */
 struct bytes {
-  std::size_t value;
-
+  std::size_t value;  ///< The size in bytes
+
+  /**
+   * @brief Construct a new bytes object
+   *
+   * @param os The output stream
+   * @param value The size in bytes
+   */
   friend std::ostream& operator<<(std::ostream& os, bytes const& value)
   {
     static std::array units{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
@@ -113,5 +121,8 @@ inline spdlog::logger& logger()
 
 }  // namespace rmm
 
+// Doxygen doesn't like this because we're overloading something from fmt
+//! @cond Doxygen_Suppress
 template <>
 struct fmt::formatter<rmm::detail::bytes> : fmt::ostream_formatter {};
+//! @endcond
diff --git a/include/rmm/mr/device/callback_memory_resource.hpp b/include/rmm/mr/device/callback_memory_resource.hpp
index 1937e7ef0..2d9695be2 100644
--- a/include/rmm/mr/device/callback_memory_resource.hpp
+++ b/include/rmm/mr/device/callback_memory_resource.hpp
@@ -90,12 +90,14 @@ class callback_memory_resource final : public device_memory_resource {
   {
   }
 
-  callback_memory_resource()                                               = delete;
-  ~callback_memory_resource() override                                     = default;
-  callback_memory_resource(callback_memory_resource const&)                = delete;
-  callback_memory_resource& operator=(callback_memory_resource const&)     = delete;
-  callback_memory_resource(callback_memory_resource&&) noexcept            = default;
-  callback_memory_resource& operator=(callback_memory_resource&&) noexcept = default;
+  callback_memory_resource()                                           = delete;
+  ~callback_memory_resource() override                                 = default;
+  callback_memory_resource(callback_memory_resource const&)            = delete;
+  callback_memory_resource& operator=(callback_memory_resource const&) = delete;
+  callback_memory_resource(callback_memory_resource&&) noexcept =
+    default;  ///< @default_move_constructor
+  callback_memory_resource& operator=(callback_memory_resource&&) noexcept =
+    default;  ///< @default_move_assignment{callback_memory_resource}
 
  private:
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp
index d41eae63e..329d8f29a 100644
--- a/include/rmm/mr/device/cuda_async_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -98,7 +98,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
     RMM_EXPECTS(rmm::detail::async_alloc::is_export_handle_type_supported(pool_props.handleTypes),
                 "Requested IPC memory handle type not supported");
     pool_props.location.type = cudaMemLocationTypeDevice;
-    pool_props.location.id   = rmm::detail::current_device().value();
+    pool_props.location.id   = rmm::get_current_cuda_device().value();
     cudaMemPool_t cuda_pool_handle{};
     RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props));
     pool_ = cuda_async_view_memory_resource{cuda_pool_handle};
diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp
index 191e4741d..c685cd75f 100644
--- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp
@@ -60,7 +60,7 @@ class cuda_async_view_memory_resource final : public device_memory_resource {
       }()}
   {
     // Check if cudaMallocAsync Memory pool supported
-    auto const device = rmm::detail::current_device();
+    auto const device = rmm::get_current_cuda_device();
     int cuda_pool_supported{};
     auto result =
       cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value());
@@ -77,11 +77,15 @@ class cuda_async_view_memory_resource final : public device_memory_resource {
   [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
 #endif
 
-  cuda_async_view_memory_resource()                                                  = default;
-  cuda_async_view_memory_resource(cuda_async_view_memory_resource const&)            = default;
-  cuda_async_view_memory_resource(cuda_async_view_memory_resource&&)                 = default;
-  cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource const&) = default;
-  cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&)      = default;
+  cuda_async_view_memory_resource() = default;
+  cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) =
+    default;  ///< @default_copy_constructor
+  cuda_async_view_memory_resource(cuda_async_view_memory_resource&&) =
+    default;  ///< @default_move_constructor
+  cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource const&) =
+    default;  ///< @default_copy_assignment{cuda_async_view_memory_resource}
+  cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&) =
+    default;  ///< @default_move_assignment{cuda_async_view_memory_resource}
 
   /**
    * @brief Query whether the resource supports use of non-null CUDA streams for
diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp
index 7a5385f4e..b0bf9ae09 100644
--- a/include/rmm/mr/device/cuda_memory_resource.hpp
+++ b/include/rmm/mr/device/cuda_memory_resource.hpp
@@ -29,12 +29,14 @@ namespace rmm::mr {
  */
 class cuda_memory_resource final : public device_memory_resource {
  public:
-  cuda_memory_resource()                                       = default;
-  ~cuda_memory_resource() override                             = default;
-  cuda_memory_resource(cuda_memory_resource const&)            = default;
-  cuda_memory_resource(cuda_memory_resource&&)                 = default;
-  cuda_memory_resource& operator=(cuda_memory_resource const&) = default;
-  cuda_memory_resource& operator=(cuda_memory_resource&&)      = default;
+  cuda_memory_resource()                            = default;
+  ~cuda_memory_resource() override                  = default;
+  cuda_memory_resource(cuda_memory_resource const&) = default;  ///< @default_copy_constructor
+  cuda_memory_resource(cuda_memory_resource&&)      = default;  ///< @default_move_constructor
+  cuda_memory_resource& operator=(cuda_memory_resource const&) =
+    default;  ///< @default_copy_assignment{cuda_memory_resource}
+  cuda_memory_resource& operator=(cuda_memory_resource&&) =
+    default;  ///< @default_move_assignment{cuda_memory_resource}
 
   /**
    * @brief Query whether the resource supports use of non-null CUDA streams for
diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
index 53575e5ce..f071717c0 100644
--- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
+++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
@@ -15,15 +15,16 @@
  */
 #pragma once
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/detail/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
-#include <fmt/core.h>
-
 #include <cuda_runtime_api.h>
 
+#include <fmt/core.h>
+
 #include <cstddef>
 #include <functional>
 #include <limits>
@@ -288,17 +289,25 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
   stream_event_pair get_event(cuda_stream_view stream)
   {
     if (stream.is_per_thread_default()) {
-      // Create a thread-local shared event wrapper. Shared pointers in the thread and in each MR
-      // instance ensures it is destroyed cleaned up only after all are finished with it.
-      thread_local auto event_tls = std::make_shared<event_wrapper>();
-      default_stream_events.insert(event_tls);
-      return stream_event_pair{stream.value(), event_tls->event};
+      // Create a thread-local shared event wrapper for each device. Shared pointers in the thread
+      // and in each MR instance ensure the wrappers are destroyed only after all are finished
+      // with them.
+      thread_local std::vector<std::shared_ptr<event_wrapper>> events_tls(
+        rmm::get_num_cuda_devices());
+      auto event = [&, device_id = this->device_id_]() {
+        if (events_tls[device_id.value()]) { return events_tls[device_id.value()]->event; }
+
+        auto event = std::make_shared<event_wrapper>();
+        this->default_stream_events.insert(event);
+        return (events_tls[device_id.value()] = std::move(event))->event;
+      }();
+      return stream_event_pair{stream.value(), event};
     }
     // We use cudaStreamLegacy as the event map key for the default stream for consistency between
     // PTDS and non-PTDS mode. In PTDS mode, the cudaStreamLegacy map key will only exist if the
     // user explicitly passes it, so it is used as the default location for the free list
-    // at construction. For consistency, the same key is used for null stream free lists in non-PTDS
-    // mode.
+    // at construction. For consistency, the same key is used for null stream free lists in
+    // non-PTDS mode.
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
     auto* const stream_to_store = stream.is_default() ? cudaStreamLegacy : stream.value();
 
@@ -496,11 +505,13 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
   // bidirectional mapping between non-default streams and events
   std::unordered_map<cudaStream_t, stream_event_pair> stream_events_;
 
-  // shared pointers to events keeps the events alive as long as either the thread that created them
-  // or the MR that is using them exists.
+  // shared pointers to events keeps the events alive as long as either the thread that created
+  // them or the MR that is using them exists.
   std::set<std::shared_ptr<event_wrapper>> default_stream_events;
 
   std::mutex mtx_;  // mutex for thread-safe access
-};                  // namespace detail
+
+  rmm::cuda_device_id device_id_{rmm::get_current_cuda_device()};
+};  // namespace detail
 
 }  // namespace rmm::mr::detail
diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
index 4778d9bda..5f511e393 100644
--- a/include/rmm/mr/device/device_memory_resource.hpp
+++ b/include/rmm/mr/device/device_memory_resource.hpp
@@ -81,12 +81,15 @@ namespace rmm::mr {
  */
 class device_memory_resource {
  public:
-  device_memory_resource()                                             = default;
-  virtual ~device_memory_resource()                                    = default;
-  device_memory_resource(device_memory_resource const&)                = default;
-  device_memory_resource& operator=(device_memory_resource const&)     = default;
-  device_memory_resource(device_memory_resource&&) noexcept            = default;
-  device_memory_resource& operator=(device_memory_resource&&) noexcept = default;
+  device_memory_resource()                              = default;
+  virtual ~device_memory_resource()                     = default;
+  device_memory_resource(device_memory_resource const&) = default;  ///< @default_copy_constructor
+  device_memory_resource(device_memory_resource&&) noexcept =
+    default;  ///< @default_move_constructor
+  device_memory_resource& operator=(device_memory_resource const&) =
+    default;  ///< @default_copy_assignment{device_memory_resource}
+  device_memory_resource& operator=(device_memory_resource&&) noexcept =
+    default;  ///< @default_move_assignment{device_memory_resource}
 
   /**
    * @brief Allocates memory of size at least \p bytes.
@@ -121,7 +124,7 @@ class device_memory_resource {
    *
    * @throws Nothing.
    *
-   * @param p Pointer to be deallocated
+   * @param ptr Pointer to be deallocated
    * @param bytes The size in bytes of the allocation. This must be equal to the
    * value of `bytes` that was passed to the `allocate` call that returned `p`.
    * @param stream Stream on which to perform deallocation
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index 4a46cb660..48f0513d8 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -109,14 +109,13 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
   ~failure_callback_resource_adaptor() override                                          = default;
   failure_callback_resource_adaptor(failure_callback_resource_adaptor const&)            = delete;
   failure_callback_resource_adaptor& operator=(failure_callback_resource_adaptor const&) = delete;
-  failure_callback_resource_adaptor(failure_callback_resource_adaptor&&) noexcept        = default;
+  failure_callback_resource_adaptor(failure_callback_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_constructor
   failure_callback_resource_adaptor& operator=(failure_callback_resource_adaptor&&) noexcept =
-    default;
+    default;  ///< @default_move_assignment{failure_callback_resource_adaptor}
 
   /**
-   * @brief Return pointer to the upstream resource.
-   *
-   * @return Upstream* Pointer to the upstream resource.
+   * @briefreturn{Pointer to the upstream resource}
    */
   Upstream* get_upstream() const noexcept { return upstream_; }
 
diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp
index 4954415c3..a24ac6ddd 100644
--- a/include/rmm/mr/device/fixed_size_memory_resource.hpp
+++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp
@@ -50,10 +50,10 @@ class fixed_size_memory_resource
   friend class detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
                                                       detail::fixed_size_free_list>;
 
-  // A block is the fixed size this resource alloates
-  static constexpr std::size_t default_block_size = 1 << 20;  // 1 MiB
-  // This is the number of blocks that the pool starts out with, and also the number of
-  // blocks by which the pool grows when all of its current blocks are allocated
+  static constexpr std::size_t default_block_size = 1 << 20;  ///< Default allocation block size
+
+  /// The number of blocks that the pool starts out with, and also the number of
+  /// blocks by which the pool grows when all of its current blocks are allocated
   static constexpr std::size_t default_blocks_to_preallocate = 128;
 
   /**
@@ -121,11 +121,11 @@ class fixed_size_memory_resource
   [[nodiscard]] std::size_t get_block_size() const noexcept { return block_size_; }
 
  protected:
-  using free_list  = detail::fixed_size_free_list;
-  using block_type = free_list::block_type;
+  using free_list  = detail::fixed_size_free_list;  ///< The free list type
+  using block_type = free_list::block_type;         ///< The type of block managed by the free list
   using typename detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
                                                         detail::fixed_size_free_list>::split_block;
-  using lock_guard = std::lock_guard<std::mutex>;
+  using lock_guard = std::lock_guard<std::mutex>;  ///< Type of lock used to synchronize access
 
   /**
    * @brief Get the (fixed) size of allocations supported by this memory resource
@@ -142,6 +142,7 @@ class fixed_size_memory_resource
    * strategy (see `size_to_grow()`).
    *
    * @param size The minimum size to allocate
+   * @param blocks The set of blocks from which to allocate
    * @param stream The stream on which the memory is to be used.
    * @return block_type The allocated block
    */
@@ -154,7 +155,6 @@ class fixed_size_memory_resource
   /**
    * @brief Allocate blocks from upstream to expand the suballocation pool.
    *
-   * @param size The minimum size to allocate
    * @param stream The stream on which the memory is to be used.
    * @return block_type The allocated block
    */
@@ -182,7 +182,6 @@ class fixed_size_memory_resource
    *
    * @param block The block to allocate from.
    * @param size The size in bytes of the requested allocation.
-   * @param stream_event The stream and associated event on which the allocation will be used.
    * @return A pair comprising the allocated pointer and any unallocated remainder of the input
    * block.
    */
@@ -196,7 +195,6 @@ class fixed_size_memory_resource
    *
    * @param ptr The pointer to the memory to free.
    * @param size The size of the memory to free. Must be equal to the original allocation size.
-   * @param stream The stream-event pair for the stream on which the memory was last used.
    * @return The (now freed) block associated with `p`. The caller is expected to return the block
    * to the pool.
    */
@@ -217,7 +215,8 @@ class fixed_size_memory_resource
    * @param stream the stream being executed on
    * @return std::pair with available and free memory for resource
    */
-  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view) const override
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    [[maybe_unused]] cuda_stream_view stream) const override
   {
     return std::make_pair(0, 0);
   }
@@ -272,7 +271,7 @@ class fixed_size_memory_resource
   }
 
  private:
-  Upstream* upstream_mr_;                  // The resource from which to allocate new blocks
+  Upstream* upstream_mr_;  // The resource from which to allocate new blocks
 
   std::size_t const block_size_;           // size of blocks this MR allocates
   std::size_t const upstream_chunk_size_;  // size of chunks allocated from heap MR
diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index 6d1843e82..895c404b0 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -44,7 +44,8 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @throws `rmm::logic_error` if `upstream == nullptr`
    *
    * @param upstream The resource used for allocating/deallocating device memory
-   * @param allocation_limit Maximum memory allowed for this allocator.
+   * @param allocation_limit Maximum memory allowed for this allocator
+   * @param alignment Alignment in bytes for the start of each allocated buffer
    */
   limiting_resource_adaptor(Upstream* upstream,
                             std::size_t allocation_limit,
@@ -57,17 +58,17 @@ class limiting_resource_adaptor final : public device_memory_resource {
     RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
-  limiting_resource_adaptor()                                                = delete;
-  ~limiting_resource_adaptor() override                                      = default;
-  limiting_resource_adaptor(limiting_resource_adaptor const&)                = delete;
-  limiting_resource_adaptor(limiting_resource_adaptor&&) noexcept            = default;
-  limiting_resource_adaptor& operator=(limiting_resource_adaptor const&)     = delete;
-  limiting_resource_adaptor& operator=(limiting_resource_adaptor&&) noexcept = default;
+  limiting_resource_adaptor()                                 = delete;
+  ~limiting_resource_adaptor() override                       = default;
+  limiting_resource_adaptor(limiting_resource_adaptor const&) = delete;
+  limiting_resource_adaptor(limiting_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_constructor
+  limiting_resource_adaptor& operator=(limiting_resource_adaptor const&) = delete;
+  limiting_resource_adaptor& operator=(limiting_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_assignment{limiting_resource_adaptor}
 
   /**
-   * @brief Return pointer to the upstream resource.
-   *
-   * @return Upstream* Pointer to the upstream resource.
+   * @briefreturn{Pointer to the upstream resource}
    */
   [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
 
diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp
index 71487705a..0ff9e950b 100644
--- a/include/rmm/mr/device/logging_resource_adaptor.hpp
+++ b/include/rmm/mr/device/logging_resource_adaptor.hpp
@@ -98,6 +98,20 @@ class logging_resource_adaptor final : public device_memory_resource {
     init_logger(auto_flush);
   }
 
+  /**
+   * @brief Construct a new logging resource adaptor using `upstream` to satisfy
+   * allocation requests and logging information about each allocation/free to
+   * the ostream specified by `stream`.
+   *
+   * The logfile will be written using CSV formatting.
+   *
+   * @throws `rmm::logic_error` if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param sinks A list of logging sinks to which log output will be written.
+   * @param auto_flush If true, flushes the log for every (de)allocation. Warning, this will degrade
+   * performance.
+   */
   logging_resource_adaptor(Upstream* upstream,
                            spdlog::sinks_init_list sinks,
                            bool auto_flush = false)
@@ -108,12 +122,14 @@ class logging_resource_adaptor final : public device_memory_resource {
     init_logger(auto_flush);
   }
 
-  logging_resource_adaptor()                                               = delete;
-  ~logging_resource_adaptor() override                                     = default;
-  logging_resource_adaptor(logging_resource_adaptor const&)                = delete;
-  logging_resource_adaptor& operator=(logging_resource_adaptor const&)     = delete;
-  logging_resource_adaptor(logging_resource_adaptor&&) noexcept            = default;
-  logging_resource_adaptor& operator=(logging_resource_adaptor&&) noexcept = default;
+  logging_resource_adaptor()                                           = delete;
+  ~logging_resource_adaptor() override                                 = default;
+  logging_resource_adaptor(logging_resource_adaptor const&)            = delete;
+  logging_resource_adaptor& operator=(logging_resource_adaptor const&) = delete;
+  logging_resource_adaptor(logging_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_constructor
+  logging_resource_adaptor& operator=(logging_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_assignment{logging_resource_adaptor}
 
   /**
    * @brief Return pointer to the upstream resource.
@@ -301,8 +317,8 @@ class logging_resource_adaptor final : public device_memory_resource {
 
   std::shared_ptr<spdlog::logger> logger_;  ///< spdlog logger object
 
-  Upstream* upstream_;                      ///< The upstream resource used for satisfying
-                                            ///< allocation requests
+  Upstream* upstream_;  ///< The upstream resource used for satisfying
+                        ///< allocation requests
 };
 
 /**
diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp
index 50317720a..4a0f7701a 100644
--- a/include/rmm/mr/device/managed_memory_resource.hpp
+++ b/include/rmm/mr/device/managed_memory_resource.hpp
@@ -29,12 +29,14 @@ namespace rmm::mr {
  */
 class managed_memory_resource final : public device_memory_resource {
  public:
-  managed_memory_resource()                                          = default;
-  ~managed_memory_resource() override                                = default;
-  managed_memory_resource(managed_memory_resource const&)            = default;
-  managed_memory_resource(managed_memory_resource&&)                 = default;
-  managed_memory_resource& operator=(managed_memory_resource const&) = default;
-  managed_memory_resource& operator=(managed_memory_resource&&)      = default;
+  managed_memory_resource()                               = default;
+  ~managed_memory_resource() override                     = default;
+  managed_memory_resource(managed_memory_resource const&) = default;  ///< @default_copy_constructor
+  managed_memory_resource(managed_memory_resource&&)      = default;  ///< @default_move_constructor
+  managed_memory_resource& operator=(managed_memory_resource const&) =
+    default;  ///< @default_copy_assignment{managed_memory_resource}
+  managed_memory_resource& operator=(managed_memory_resource&&) =
+    default;  ///< @default_move_assignment{managed_memory_resource}
 
   /**
    * @brief Query whether the resource supports use of non-null streams for
diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp
index d7c7b8c46..eca0b3207 100644
--- a/include/rmm/mr/device/owning_wrapper.hpp
+++ b/include/rmm/mr/device/owning_wrapper.hpp
@@ -73,7 +73,8 @@ auto make_resource(std::tuple<std::shared_ptr<Upstreams>...> const& upstreams, A
 template <typename Resource, typename... Upstreams>
 class owning_wrapper : public device_memory_resource {
  public:
-  using upstream_tuple = std::tuple<std::shared_ptr<Upstreams>...>;
+  using upstream_tuple =
+    std::tuple<std::shared_ptr<Upstreams>...>;  ///< Tuple of upstream memory resources
 
   /**
    * @brief Constructs the wrapped resource using the provided upstreams and any additional
@@ -116,14 +117,12 @@ class owning_wrapper : public device_memory_resource {
   }
 
   /**
-   * @brief Returns a constant reference to the wrapped resource.
-   *
+   * @briefreturn{A constant reference to the wrapped resource}
    */
   [[nodiscard]] Resource const& wrapped() const noexcept { return *wrapped_; }
 
   /**
-   * @brief Returns reference to the wrapped resource.
-   *
+   * @briefreturn{A reference to the wrapped resource}
    */
   [[nodiscard]] Resource& wrapped() noexcept { return *wrapped_; }
 
@@ -136,9 +135,7 @@ class owning_wrapper : public device_memory_resource {
   }
 
   /**
-   * @brief Query whether the resource supports the get_mem_info API.
-   *
-   * @return true if the wrapped resource supports get_mem_info, false otherwise.
+   * @briefreturn{true if the wrapped resource supports get_mem_info, false otherwise}
    */
   [[nodiscard]] bool supports_get_mem_info() const noexcept override
   {
diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index 4ddbd874a..aa7217758 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -88,13 +88,19 @@ inline device_memory_resource* initial_resource()
   return &mr;
 }
 
+/**
+ * @briefreturn{Reference to the lock}
+ */
 inline std::mutex& map_lock()
 {
   static std::mutex map_lock;
   return map_lock;
 }
 
-// Must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
+// This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
+/**
+ * @briefreturn{Reference to the map from device id -> resource}
+ */
 RMM_EXPORT inline auto& get_map()
 {
   static std::map<cuda_device_id::value_type, device_memory_resource*> device_id_to_resource;
@@ -121,7 +127,7 @@ RMM_EXPORT inline auto& get_map()
  * undefined if used while the active CUDA device is a different device from the one that was active
  * when the device_memory_resource was created.
  *
- * @param id The id of the target device
+ * @param device_id The id of the target device
  * @return Pointer to the current `device_memory_resource` for device `id`
  */
 inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
@@ -156,7 +162,7 @@ inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
  * undefined if used while the active CUDA device is a different device from the one that was active
  * when the device_memory_resource was created.
  *
- * @param id The id of the target device
+ * @param device_id The id of the target device
  * @param new_mr If not `nullptr`, pointer to new `device_memory_resource` to use as new resource
  * for `id`
  * @return Pointer to the previous memory resource for `id`
@@ -196,7 +202,7 @@ inline device_memory_resource* set_per_device_resource(cuda_device_id device_id,
  */
 inline device_memory_resource* get_current_device_resource()
 {
-  return get_per_device_resource(rmm::detail::current_device());
+  return get_per_device_resource(rmm::get_current_cuda_device());
 }
 
 /**
@@ -225,6 +231,6 @@ inline device_memory_resource* get_current_device_resource()
  */
 inline device_memory_resource* set_current_device_resource(device_memory_resource* new_mr)
 {
-  return set_per_device_resource(rmm::detail::current_device(), new_mr);
+  return set_per_device_resource(rmm::get_current_cuda_device(), new_mr);
 }
 }  // namespace rmm::mr
diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp
index 5c87ef7f7..a52ec14d1 100644
--- a/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -43,7 +43,7 @@ namespace rmm::mr {
 template <typename T>
 class polymorphic_allocator {
  public:
-  using value_type = T;
+  using value_type = T;  ///< T, the value type of objects allocated by this allocator
   /**
    * @brief Construct a `polymorphic_allocator` using the return value of
    * `rmm::mr::get_current_device_resource()` as the underlying memory resource.
@@ -148,7 +148,9 @@ bool operator!=(polymorphic_allocator<T> const& lhs, polymorphic_allocator<U> co
 template <typename Allocator>
 class stream_allocator_adaptor {
  public:
-  using value_type = typename std::allocator_traits<Allocator>::value_type;
+  using value_type =
+    typename std::allocator_traits<Allocator>::value_type;  ///< The value type of objects allocated
+                                                            ///< by this allocator
 
   stream_allocator_adaptor() = delete;
 
@@ -187,8 +189,8 @@ class stream_allocator_adaptor {
    */
   template <typename T>
   struct rebind {
-    using other =
-      stream_allocator_adaptor<typename std::allocator_traits<Allocator>::template rebind_alloc<T>>;
+    using other = stream_allocator_adaptor<typename std::allocator_traits<
+      Allocator>::template rebind_alloc<T>>;  ///< The type to bind to
   };
 
   /**
@@ -212,14 +214,12 @@ class stream_allocator_adaptor {
   void deallocate(value_type* ptr, std::size_t num) { alloc_.deallocate(ptr, num, stream()); }
 
   /**
-   * @brief Returns the underlying stream on which calls to the underlying allocator are made.
-   *
+   * @briefreturn{The stream on which calls to the underlying allocator are made}
    */
   [[nodiscard]] cuda_stream_view stream() const noexcept { return stream_; }
 
   /**
-   * @brief Returns the underlying stream-ordered allocator
-   *
+   * @briefreturn{The underlying allocator}
    */
   [[nodiscard]] Allocator underlying_allocator() const noexcept { return alloc_; }
 
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index 8955fd958..703ce8ea7 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -143,11 +143,11 @@ class pool_memory_resource final
   [[nodiscard]] std::size_t pool_size() const noexcept { return current_pool_size_; }
 
  protected:
-  using free_list  = detail::coalescing_free_list;
-  using block_type = free_list::block_type;
+  using free_list  = detail::coalescing_free_list;  ///< The free list implementation
+  using block_type = free_list::block_type;         ///< The type of block returned by the free list
   using typename detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
                                                         detail::coalescing_free_list>::split_block;
-  using lock_guard = std::lock_guard<std::mutex>;
+  using lock_guard = std::lock_guard<std::mutex>;  ///< Type of lock used to synchronize access
 
   /**
    * @brief Get the maximum size of allocations supported by this memory resource
@@ -306,7 +306,6 @@ class pool_memory_resource final
    *
    * @param block The block to allocate from.
    * @param size The size in bytes of the requested allocation.
-   * @param stream_event The stream and associated event on which the allocation will be used.
    * @return A pair comprising the allocated pointer and any unallocated remainder of the input
    * block.
    */
@@ -329,7 +328,6 @@ class pool_memory_resource final
    *
    * @param ptr The pointer to the memory to free.
    * @param size The size of the memory to free. Must be equal to the original allocation size.
-   * @param stream The stream-event pair for the stream on which the memory was last used.
    * @return The (now freed) block associated with `p`. The caller is expected to return the block
    * to the pool.
    */
diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp
index c3fe3010f..7af75593d 100644
--- a/include/rmm/mr/device/statistics_resource_adaptor.hpp
+++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -40,17 +40,24 @@ template <typename Upstream>
 class statistics_resource_adaptor final : public device_memory_resource {
  public:
   // can be a std::shared_mutex once C++17 is adopted
-  using read_lock_t  = std::shared_lock<std::shared_timed_mutex>;
-  using write_lock_t = std::unique_lock<std::shared_timed_mutex>;
-
+  using read_lock_t =
+    std::shared_lock<std::shared_timed_mutex>;  ///< Type of lock used to synchronize read access
+  using write_lock_t =
+    std::unique_lock<std::shared_timed_mutex>;  ///< Type of lock used to synchronize write access
   /**
    * @brief Utility struct for counting the current, peak, and total value of a number
    */
   struct counter {
-    int64_t value{0};  // Current value
-    int64_t peak{0};   // Max value of `value`
-    int64_t total{0};  // Sum of all added values
+    int64_t value{0};  ///< Current value
+    int64_t peak{0};   ///< Max value of `value`
+    int64_t total{0};  ///< Sum of all added values
 
+    /**
+     * @brief Add `val` to the current value and update the peak value if necessary
+     *
+     * @param val Value to add
+     * @return Reference to this object
+     */
     counter& operator+=(int64_t val)
     {
       value += val;
@@ -59,6 +66,12 @@ class statistics_resource_adaptor final : public device_memory_resource {
       return *this;
     }
 
+    /**
+     * @brief Subtract `val` from the current value and update the peak value if necessary
+     *
+     * @param val Value to subtract
+     * @return Reference to this object
+     */
     counter& operator-=(int64_t val)
     {
       value -= val;
@@ -79,17 +92,17 @@ class statistics_resource_adaptor final : public device_memory_resource {
     RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
-  statistics_resource_adaptor()                                                  = delete;
-  ~statistics_resource_adaptor() override                                        = default;
-  statistics_resource_adaptor(statistics_resource_adaptor const&)                = delete;
-  statistics_resource_adaptor& operator=(statistics_resource_adaptor const&)     = delete;
-  statistics_resource_adaptor(statistics_resource_adaptor&&) noexcept            = default;
-  statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) noexcept = default;
+  statistics_resource_adaptor()                                              = delete;
+  ~statistics_resource_adaptor() override                                    = default;
+  statistics_resource_adaptor(statistics_resource_adaptor const&)            = delete;
+  statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete;
+  statistics_resource_adaptor(statistics_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_constructor
+  statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_assignment{statistics_resource_adaptor}
 
   /**
-   * @brief Return pointer to the upstream resource.
-   *
-   * @return Upstream* Pointer to the upstream resource.
+   * @briefreturn{Pointer to the upstream resource}
    */
   Upstream* get_upstream() const noexcept { return upstream_; }
 
diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
index 37053cd07..0b8570985 100644
--- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
+++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
@@ -35,7 +35,7 @@ namespace rmm::mr {
 template <typename Upstream>
 class thread_safe_resource_adaptor final : public device_memory_resource {
  public:
-  using lock_t = std::lock_guard<std::mutex>;
+  using lock_t = std::lock_guard<std::mutex>;  ///< Type of lock used to synchronize access
 
   /**
    * @brief Construct a new thread safe resource adaptor using `upstream` to satisfy
diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
index 2b2aee0dd..de2f25043 100644
--- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp
+++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
@@ -38,9 +38,9 @@ namespace rmm::mr {
 template <typename T>
 class thrust_allocator : public thrust::device_malloc_allocator<T> {
  public:
-  using Base      = thrust::device_malloc_allocator<T>;
-  using pointer   = typename Base::pointer;
-  using size_type = typename Base::size_type;
+  using Base      = thrust::device_malloc_allocator<T>;  ///< The base type of this allocator
+  using pointer   = typename Base::pointer;              ///< The pointer type
+  using size_type = typename Base::size_type;            ///< The size type
 
   /**
    * @brief Provides the type of a `thrust_allocator` instantiated with another
@@ -50,7 +50,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
    */
   template <typename U>
   struct rebind {
-    using other = thrust_allocator<U>;
+    using other = thrust_allocator<U>;  ///< The type to bind to
   };
 
   /**
@@ -113,12 +113,12 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
   }
 
   /**
-   * @brief Returns the device memory resource used by this allocator.
+   * @briefreturn{The device memory resource used by this}
    */
   [[nodiscard]] device_memory_resource* resource() const noexcept { return _mr; }
 
   /**
-   * @brief Returns the stream used by this allocator.
+   * @briefreturn{The stream used by this allocator}
    */
   [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
 
diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 0bbdc7468..b87218d12 100644
--- a/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -48,9 +48,10 @@ template <typename Upstream>
 class tracking_resource_adaptor final : public device_memory_resource {
  public:
   // can be a std::shared_mutex once C++17 is adopted
-  using read_lock_t  = std::shared_lock<std::shared_timed_mutex>;
-  using write_lock_t = std::unique_lock<std::shared_timed_mutex>;
-
+  using read_lock_t =
+    std::shared_lock<std::shared_timed_mutex>;  ///< Type of lock used to synchronize read access
+  using write_lock_t =
+    std::unique_lock<std::shared_timed_mutex>;  ///< Type of lock used to synchronize write access
   /**
    * @brief Information stored about an allocation. Includes the size
    * and a stack trace if the `tracking_resource_adaptor` was initialized
@@ -58,10 +59,16 @@ class tracking_resource_adaptor final : public device_memory_resource {
    *
    */
   struct allocation_info {
-    std::unique_ptr<rmm::detail::stack_trace> strace;
-    std::size_t allocation_size;
+    std::unique_ptr<rmm::detail::stack_trace> strace;  ///< Stack trace of the allocation
+    std::size_t allocation_size;                       ///< Size of the allocation
 
     allocation_info() = delete;
+    /**
+     * @brief Construct a new allocation info object
+     *
+     * @param size Size of the allocation
+     * @param capture_stack If true, capture the stack trace for the allocation
+     */
     allocation_info(std::size_t size, bool capture_stack)
       : strace{[&]() {
           return capture_stack ? std::make_unique<rmm::detail::stack_trace>() : nullptr;
@@ -84,17 +91,17 @@ class tracking_resource_adaptor final : public device_memory_resource {
     RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
-  tracking_resource_adaptor()                                                = delete;
-  ~tracking_resource_adaptor() override                                      = default;
-  tracking_resource_adaptor(tracking_resource_adaptor const&)                = delete;
-  tracking_resource_adaptor& operator=(tracking_resource_adaptor const&)     = delete;
-  tracking_resource_adaptor(tracking_resource_adaptor&&) noexcept            = default;
-  tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) noexcept = default;
+  tracking_resource_adaptor()                                 = delete;
+  ~tracking_resource_adaptor() override                       = default;
+  tracking_resource_adaptor(tracking_resource_adaptor const&) = delete;
+  tracking_resource_adaptor(tracking_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_constructor
+  tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete;
+  tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_assignment{tracking_resource_adaptor}
 
   /**
-   * @brief Return pointer to the upstream resource.
-   *
-   * @return Upstream* Pointer to the upstream resource.
+   * @briefreturn{Pointer to the upstream resource}
    */
   Upstream* get_upstream() const noexcept { return upstream_; }
 
diff --git a/include/rmm/mr/host/host_memory_resource.hpp b/include/rmm/mr/host/host_memory_resource.hpp
index c0fe85594..3279bfb3f 100644
--- a/include/rmm/mr/host/host_memory_resource.hpp
+++ b/include/rmm/mr/host/host_memory_resource.hpp
@@ -46,12 +46,14 @@ namespace rmm::mr {
  */
 class host_memory_resource {
  public:
-  host_memory_resource()                                           = default;
-  virtual ~host_memory_resource()                                  = default;
-  host_memory_resource(host_memory_resource const&)                = default;
-  host_memory_resource& operator=(host_memory_resource const&)     = default;
-  host_memory_resource(host_memory_resource&&) noexcept            = default;
-  host_memory_resource& operator=(host_memory_resource&&) noexcept = default;
+  host_memory_resource()                                = default;
+  virtual ~host_memory_resource()                       = default;
+  host_memory_resource(host_memory_resource const&)     = default;  ///< @default_copy_constructor
+  host_memory_resource(host_memory_resource&&) noexcept = default;  ///< @default_move_constructor
+  host_memory_resource& operator=(host_memory_resource const&) =
+    default;  ///< @default_copy_assignment{host_memory_resource}
+  host_memory_resource& operator=(host_memory_resource&&) noexcept =
+    default;  ///< @default_move_assignment{host_memory_resource}
 
   /**
    * @brief Allocates memory on the host of size at least `bytes` bytes.
@@ -84,7 +86,6 @@ class host_memory_resource {
    *              that was passed to the `allocate` call that returned `ptr`.
    * @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
    *                  that was passed to the `allocate` call that returned `ptr`.
-   * @param stream Stream on which to perform deallocation
    */
   void deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
   {
diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp
index ffd3345bb..68cccc320 100644
--- a/include/rmm/mr/host/new_delete_resource.hpp
+++ b/include/rmm/mr/host/new_delete_resource.hpp
@@ -30,12 +30,14 @@ namespace rmm::mr {
  */
 class new_delete_resource final : public host_memory_resource {
  public:
-  new_delete_resource()                                      = default;
-  ~new_delete_resource() override                            = default;
-  new_delete_resource(new_delete_resource const&)            = default;
-  new_delete_resource(new_delete_resource&&)                 = default;
-  new_delete_resource& operator=(new_delete_resource const&) = default;
-  new_delete_resource& operator=(new_delete_resource&&)      = default;
+  new_delete_resource()                           = default;
+  ~new_delete_resource() override                 = default;
+  new_delete_resource(new_delete_resource const&) = default;  ///< @default_copy_constructor
+  new_delete_resource(new_delete_resource&&)      = default;  ///< @default_move_constructor
+  new_delete_resource& operator=(new_delete_resource const&) =
+    default;  ///< @default_copy_assignment{new_delete_resource}
+  new_delete_resource& operator=(new_delete_resource&&) =
+    default;  ///< @default_move_assignment{new_delete_resource}
 
  private:
   /**
diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp
index 3e0c5e3e6..fb28ebfb7 100644
--- a/include/rmm/mr/host/pinned_memory_resource.hpp
+++ b/include/rmm/mr/host/pinned_memory_resource.hpp
@@ -24,7 +24,7 @@
 
 namespace rmm::mr {
 
-/*
+/**
  * @brief A `host_memory_resource` that uses `cudaMallocHost` to allocate
  * pinned/page-locked host memory.
  *
@@ -32,12 +32,14 @@ namespace rmm::mr {
  */
 class pinned_memory_resource final : public host_memory_resource {
  public:
-  pinned_memory_resource()                                         = default;
-  ~pinned_memory_resource() override                               = default;
-  pinned_memory_resource(pinned_memory_resource const&)            = default;
-  pinned_memory_resource(pinned_memory_resource&&)                 = default;
-  pinned_memory_resource& operator=(pinned_memory_resource const&) = default;
-  pinned_memory_resource& operator=(pinned_memory_resource&&)      = default;
+  pinned_memory_resource()                              = default;
+  ~pinned_memory_resource() override                    = default;
+  pinned_memory_resource(pinned_memory_resource const&) = default;  ///< @default_copy_constructor
+  pinned_memory_resource(pinned_memory_resource&&)      = default;  ///< @default_move_constructor
+  pinned_memory_resource& operator=(pinned_memory_resource const&) =
+    default;  ///< @default_copy_assignment{pinned_memory_resource}
+  pinned_memory_resource& operator=(pinned_memory_resource&&) =
+    default;  ///< @default_move_assignment{pinned_memory_resource}
 
  private:
   /**
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index b26a6dd43..3506bf9a9 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -14,11 +14,9 @@
 
 cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 
-set(rmm_version 23.08.00)
+set(rmm_version 23.10.00)
 
-file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/RAPIDS.cmake
-     ${CMAKE_BINARY_DIR}/RAPIDS.cmake)
-include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)
+include(../fetch_rapids.cmake)
 
 project(
   rmm-python
@@ -58,5 +56,7 @@ endif()
 include(rapids-cython)
 rapids_cython_init()
 
+add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
+
 add_subdirectory(rmm/_cuda)
 add_subdirectory(rmm/_lib)
diff --git a/python/docs/Makefile b/python/docs/Makefile
index d4bb2cbb9..72d1dcd85 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -3,7 +3,7 @@
 
 # You can set these variables from the command line, and also
 # from the environment for the first two.
-SPHINXOPTS    ?=
+SPHINXOPTS    = -n -v -W --keep-going
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = .
 BUILDDIR      = _build
diff --git a/python/docs/api.rst b/python/docs/api.rst
index 73cd5dd81..b229d8214 100644
--- a/python/docs/api.rst
+++ b/python/docs/api.rst
@@ -15,6 +15,7 @@ Memory Resources
 
 .. automodule:: rmm.mr
    :members:
+   :inherited-members:
    :undoc-members:
    :show-inheritance:
 
@@ -28,6 +29,7 @@ Memory Allocators
 
 .. automodule:: rmm.allocators.numba
    :members:
+   :inherited-members:
    :undoc-members:
    :show-inheritance:
 
diff --git a/python/docs/basics.md b/python/docs/basics.md
index 0c47073c1..997745f00 100644
--- a/python/docs/basics.md
+++ b/python/docs/basics.md
@@ -91,7 +91,7 @@ example, enabling the `ManagedMemoryResource` tells RMM to use
 > :warning: The default resource must be set for any device **before**
 > allocating any device memory on that device.  Setting or changing the
 > resource after device allocations have been made can lead to unexpected
-> behaviour or crashes. See [Multiple Devices](#multiple-devices)
+> behaviour or crashes.
 
 As another example, `PoolMemoryResource` allows you to allocate a
 large "pool" of device memory up-front. Subsequent allocations will
diff --git a/python/docs/conf.py b/python/docs/conf.py
index f6f3adf43..17c012194 100644
--- a/python/docs/conf.py
+++ b/python/docs/conf.py
@@ -23,9 +23,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = "23.08"
+version = "23.10"
 # The full version, including alpha/beta/rc tags.
-release = "23.08.00"
+release = "23.10.00"
 
 
 # -- General configuration ---------------------------------------------------
@@ -35,6 +35,7 @@
 # ones.
 
 extensions = [
+    "sphinxcontrib.jquery",
     "sphinx.ext.intersphinx",
     "sphinx.ext.autodoc",
     "sphinx.ext.autosummary",
@@ -113,7 +114,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
+html_static_path = []
 
 
 # -- Options for HTMLHelp output ------------------------------------------
@@ -179,7 +180,10 @@
 
 
 # Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {"https://docs.python.org/": None}
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "numba": ("https://numba.readthedocs.io/en/stable", None),
+}
 
 # Config numpydoc
 numpydoc_show_inherited_class_members = True
@@ -187,6 +191,11 @@
 
 autoclass_content = "init"
 
+nitpick_ignore = [
+    ("py:class", "size_t"),
+    ("py:class", "void"),
+]
+
 
 def setup(app):
     app.add_js_file("copybutton_pydocs.js")
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 58ac1d5ad..f83e4fec4 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -17,7 +17,7 @@ build-backend = "setuptools.build_meta"
 requires = [
     "cmake>=3.26.4",
     "cuda-python>=11.7.1,<12.0a0",
-    "cython>=0.29,<0.30",
+    "cython>=3.0.0",
     "ninja",
     "scikit-build>=0.13.1",
     "setuptools>=61.0.0",
@@ -27,7 +27,7 @@ requires = [
 
 [project]
 name = "rmm"
-version = "23.08.00"
+version = "23.10.00"
 description = "rmm - RAPIDS Memory Manager"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
diff --git a/python/rmm/__init__.py b/python/rmm/__init__.py
index a5467251e..52d232dc2 100644
--- a/python/rmm/__init__.py
+++ b/python/rmm/__init__.py
@@ -14,6 +14,15 @@
 
 from rmm import mr
 from rmm._lib.device_buffer import DeviceBuffer
+from rmm._lib.logger import (
+    flush_logger,
+    get_flush_level,
+    get_logging_level,
+    logging_level,
+    set_flush_level,
+    set_logging_level,
+    should_log,
+)
 from rmm.mr import disable_logging, enable_logging, get_log_filenames
 from rmm.rmm import (
     RMMError,
@@ -25,15 +34,22 @@
 
 __all__ = [
     "DeviceBuffer",
-    "RMMError",
     "disable_logging",
+    "RMMError",
     "enable_logging",
+    "flush_logger",
+    "get_flush_level",
     "get_log_filenames",
+    "get_logging_level",
     "is_initialized",
+    "logging_level",
     "mr",
     "register_reinitialize_hook",
     "reinitialize",
+    "set_flush_level",
+    "set_logging_level",
+    "should_log",
     "unregister_reinitialize_hook",
 ]
 
-__version__ = "23.08.00"
+__version__ = "23.10.00"
diff --git a/python/rmm/_cuda/stream.pxd b/python/rmm/_cuda/stream.pxd
index 6aa4e0b24..3c3d3aa6f 100644
--- a/python/rmm/_cuda/stream.pxd
+++ b/python/rmm/_cuda/stream.pxd
@@ -26,8 +26,8 @@ cdef class Stream:
     @staticmethod
     cdef Stream _from_cudaStream_t(cudaStream_t s, object owner=*)
 
-    cdef cuda_stream_view view(self) nogil except *
-    cdef void c_synchronize(self) nogil except *
-    cdef bool c_is_default(self) nogil except *
+    cdef cuda_stream_view view(self) except * nogil
+    cdef void c_synchronize(self) except * nogil
+    cdef bool c_is_default(self) except * nogil
     cdef void _init_with_new_cuda_stream(self) except *
     cdef void _init_from_stream(self, Stream stream) except *
diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx
index d60dde4e1..4795cbb9f 100644
--- a/python/rmm/_cuda/stream.pyx
+++ b/python/rmm/_cuda/stream.pyx
@@ -48,7 +48,7 @@ cdef class Stream:
                 self._init_from_cupy_stream(obj)
 
     @staticmethod
-    cdef Stream _from_cudaStream_t(cudaStream_t s, object owner=None):
+    cdef Stream _from_cudaStream_t(cudaStream_t s, object owner=None) except *:
         """
         Construct a Stream from a cudaStream_t.
         """
@@ -57,13 +57,13 @@ cdef class Stream:
         obj._owner = owner
         return obj
 
-    cdef cuda_stream_view view(self) nogil except *:
+    cdef cuda_stream_view view(self) except * nogil:
         """
         Generate a rmm::cuda_stream_view from this Stream instance
         """
         return cuda_stream_view(<cudaStream_t><uintptr_t>(self._cuda_stream))
 
-    cdef void c_synchronize(self) nogil except *:
+    cdef void c_synchronize(self) except * nogil:
         """
         Synchronize the CUDA stream.
         This function *must* be called in a `with nogil` block
@@ -77,7 +77,7 @@ cdef class Stream:
         with nogil:
             self.c_synchronize()
 
-    cdef bool c_is_default(self) nogil except *:
+    cdef bool c_is_default(self) except * nogil:
         """
         Check if we are the default CUDA stream
         """
diff --git a/python/rmm/_lib/CMakeLists.txt b/python/rmm/_lib/CMakeLists.txt
index 9e90d7e99..852dd87c4 100644
--- a/python/rmm/_lib/CMakeLists.txt
+++ b/python/rmm/_lib/CMakeLists.txt
@@ -12,10 +12,12 @@
 # the License.
 # =============================================================================
 
-set(cython_sources device_buffer.pyx lib.pyx memory_resource.pyx cuda_stream.pyx
+set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx
                    torch_allocator.pyx)
 set(linked_libraries rmm::rmm)
 
 # Build all of the Cython targets
 rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}"
                                                                                CXX)
+# The cdef public functions in this file need to have a C ABI
+target_compile_definitions(torch_allocator PRIVATE CYTHON_EXTERN_C=extern\ "C")
diff --git a/python/rmm/_lib/cuda_stream.pxd b/python/rmm/_lib/cuda_stream.pxd
index 1eed1cefb..e224cf9af 100644
--- a/python/rmm/_lib/cuda_stream.pxd
+++ b/python/rmm/_lib/cuda_stream.pxd
@@ -33,5 +33,5 @@ cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil:
 @cython.final
 cdef class CudaStream:
     cdef unique_ptr[cuda_stream] c_obj
-    cdef cudaStream_t value(self) nogil except *
-    cdef bool is_valid(self) nogil except *
+    cdef cudaStream_t value(self) except * nogil
+    cdef bool is_valid(self) except * nogil
diff --git a/python/rmm/_lib/cuda_stream.pyx b/python/rmm/_lib/cuda_stream.pyx
index fb35ec11f..0861f0663 100644
--- a/python/rmm/_lib/cuda_stream.pyx
+++ b/python/rmm/_lib/cuda_stream.pyx
@@ -27,8 +27,8 @@ cdef class CudaStream:
     def __cinit__(self):
         self.c_obj.reset(new cuda_stream())
 
-    cdef cudaStream_t value(self) nogil except *:
+    cdef cudaStream_t value(self) except * nogil:
         return self.c_obj.get()[0].value()
 
-    cdef bool is_valid(self) nogil except *:
+    cdef bool is_valid(self) except * nogil:
         return self.c_obj.get()[0].is_valid()
diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd
index 364dbb2c0..3d5f29f9a 100644
--- a/python/rmm/_lib/device_buffer.pxd
+++ b/python/rmm/_lib/device_buffer.pxd
@@ -56,7 +56,7 @@ cdef class DeviceBuffer:
 
     @staticmethod
     cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
-                                  Stream stream=*)
+                                  Stream stream=*) except *
     cpdef copy_to_host(self, ary=*, Stream stream=*)
     cpdef copy_from_host(self, ary, Stream stream=*)
     cpdef copy_from_device(self, cuda_ary, Stream stream=*)
diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx
index c2bfd1459..d248d01ab 100644
--- a/python/rmm/_lib/device_buffer.pyx
+++ b/python/rmm/_lib/device_buffer.pyx
@@ -172,7 +172,7 @@ cdef class DeviceBuffer:
 
     @staticmethod
     cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
-                                  Stream stream=DEFAULT_STREAM):
+                                  Stream stream=DEFAULT_STREAM) except *:
         """Calls ``to_device`` function on arguments provided"""
         return to_device(b, stream)
 
@@ -382,7 +382,7 @@ cdef void _copy_async(const void* src,
                       void* dst,
                       size_t count,
                       ccudart.cudaMemcpyKind kind,
-                      cuda_stream_view stream) nogil except *:
+                      cuda_stream_view stream) except * nogil:
     """
     Asynchronously copy data between host and/or device pointers.
 
diff --git a/python/rmm/_lib/logger.pyx b/python/rmm/_lib/logger.pyx
new file mode 100644
index 000000000..029bbdd79
--- /dev/null
+++ b/python/rmm/_lib/logger.pyx
@@ -0,0 +1,260 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+
+from libcpp cimport bool
+
+
+cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil:
+    cpdef enum logging_level "spdlog::level::level_enum":
+        """
+        The debug logging level for RMM.
+
+        Debug logging prints messages to a log file. See
+        `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+        for more information.
+
+        Valid levels, in decreasing order of verbosity, are TRACE, DEBUG,
+        INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO.
+
+        Examples
+        --------
+        >>> import rmm
+        >>> rmm.logging_level.DEBUG
+        <logging_level.DEBUG: 1>
+        >>> rmm.logging_level.DEBUG.value
+        1
+        >>> rmm.logging_level.DEBUG.name
+        'DEBUG'
+
+        See Also
+        --------
+        set_logging_level : Set the debug logging level
+        get_logging_level : Get the current debug logging level
+        """
+        TRACE "spdlog::level::trace"
+        DEBUG "spdlog::level::debug"
+        INFO "spdlog::level::info"
+        WARN "spdlog::level::warn"
+        ERR "spdlog::level::err"
+        CRITICAL "spdlog::level::critical"
+        OFF "spdlog::level::off"
+
+
+cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil:
+    cdef cppclass spdlog_logger "spdlog::logger":
+        spdlog_logger() except +
+        void set_level(logging_level level)
+        logging_level level()
+        void flush() except +
+        void flush_on(logging_level level)
+        logging_level flush_level()
+        bool should_log(logging_level msg_level)
+
+
+cdef extern from "rmm/logger.hpp" namespace "rmm" nogil:
+    cdef spdlog_logger& logger() except +
+
+
+def _validate_level_type(level):
+    if not isinstance(level, logging_level):
+        raise TypeError("level must be an instance of the logging_level enum")
+
+
+def should_log(level):
+    """
+    Check if a message at the given level would be logged.
+
+    A message at the given level would be logged if the current debug logging
+    level is set to a level that is at least as verbose than the given level,
+    *and* the RMM module is compiled for a logging level at least as verbose.
+    If these conditions are not both met, this function will return false.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Parameters
+    ----------
+    level : logging_level
+        The debug logging level. Valid values are instances of the
+        ``logging_level`` enum.
+
+    Returns
+    -------
+    should_log : bool
+        True if a message at the given level would be logged, False otherwise.
+
+    Raises
+    ------
+    TypeError
+        If the logging level is not an instance of the ``logging_level`` enum.
+    """
+    _validate_level_type(level)
+    return logger().should_log(level)
+
+
+def set_logging_level(level):
+    """
+    Set the debug logging level.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Parameters
+    ----------
+    level : logging_level
+        The debug logging level. Valid values are instances of the
+        ``logging_level`` enum.
+
+    Raises
+    ------
+    TypeError
+        If the logging level is not an instance of the ``logging_level`` enum.
+
+    See Also
+    --------
+    get_logging_level : Get the current debug logging level.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.set_logging_level(rmm.logging_level.WARN) # set logging level to warn
+    """
+    _validate_level_type(level)
+    logger().set_level(level)
+
+    if not should_log(level):
+        warnings.warn(f"RMM will not log logging_level.{level.name}. This "
+                      "may be because the C++ library is compiled for a "
+                      "less-verbose logging level.")
+
+
+def get_logging_level():
+    """
+    Get the current debug logging level.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Returns
+    -------
+    level : logging_level
+        The current debug logging level, an instance of the ``logging_level``
+        enum.
+
+    See Also
+    --------
+    set_logging_level : Set the debug logging level.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.get_logging_level() # get current logging level
+    <logging_level.INFO: 2>
+    """
+    return logging_level(logger().level())
+
+
+def flush_logger():
+    """
+    Flush the debug logger. This will cause any buffered log messages to
+    be written to the log file.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    See Also
+    --------
+    set_flush_level : Set the flush level for the debug logger.
+    get_flush_level : Get the current debug logging flush level.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.flush_logger() # flush the logger
+    """
+    logger().flush()
+
+
+def set_flush_level(level):
+    """
+    Set the flush level for the debug logger. Messages of this level or higher
+    will automatically flush to the file.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Parameters
+    ----------
+    level : logging_level
+        The debug logging level. Valid values are instances of the
+        ``logging_level`` enum.
+
+    Raises
+    ------
+    TypeError
+        If the logging level is not an instance of the ``logging_level`` enum.
+
+    See Also
+    --------
+    get_flush_level : Get the current debug logging flush level.
+    flush_logger : Flush the logger.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.flush_on(rmm.logging_level.WARN) # set flush level to warn
+    """
+    _validate_level_type(level)
+    logger().flush_on(level)
+
+    if not should_log(level):
+        warnings.warn(f"RMM will not log logging_level.{level.name}. This "
+                      "may be because the C++ library is compiled for a "
+                      "less-verbose logging level.")
+
+
+def get_flush_level():
+    """
+    Get the current debug logging flush level for the RMM logger. Messages of
+    this level or higher will automatically flush to the file.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Returns
+    -------
+    logging_level
+        The current flush level, an instance of the ``logging_level``
+        enum.
+
+    See Also
+    --------
+    set_flush_level : Set the flush level for the logger.
+    flush_logger : Flush the logger.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.flush_level() # get current flush level
+    <logging_level.INFO: 2>
+    """
+    return logging_level(logger().flush_level())
diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd
index 5bb3746bc..0770fb8ed 100644
--- a/python/rmm/_lib/memory_resource.pxd
+++ b/python/rmm/_lib/memory_resource.pxd
@@ -69,6 +69,9 @@ cdef class CallbackMemoryResource(DeviceMemoryResource):
     cdef object _allocate_func
     cdef object _deallocate_func
 
+cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
 cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
     cdef object _log_file_name
     cpdef get_file_name(self)
diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx
index 774db374a..ce7f45e19 100644
--- a/python/rmm/_lib/memory_resource.pyx
+++ b/python/rmm/_lib/memory_resource.pyx
@@ -14,10 +14,14 @@
 
 import os
 import warnings
+# This import is needed for Cython typing in translate_python_except_to_cpp
+# See https://github.com/cython/cython/issues/5589
+from builtins import BaseException
 from collections import defaultdict
 
 cimport cython
 from cython.operator cimport dereference as deref
+from libc.stddef cimport size_t
 from libc.stdint cimport int8_t, int64_t, uintptr_t
 from libcpp cimport bool
 from libcpp.memory cimport make_unique, unique_ptr
@@ -37,7 +41,7 @@ from rmm._lib.per_device_resource cimport (
 # Transparent handle of a C++ exception
 ctypedef pair[int, string] CppExcept
 
-cdef CppExcept translate_python_except_to_cpp(err: BaseException):
+cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept:
     """Translate a Python exception into a C++ exception handle
 
     The returned exception handle can then be thrown by `throw_cpp_except()`,
@@ -160,6 +164,16 @@ cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \
             size_t allocation_size,
             device_memory_resource* bin_resource) except +
 
+cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource):
+        limiting_resource_adaptor(
+            Upstream* upstream_mr,
+            size_t allocation_limit) except +
+
+        size_t get_allocated_bytes() except +
+        size_t get_allocation_limit() except +
+
 cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \
         namespace "rmm::mr" nogil:
     cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource):
@@ -213,18 +227,41 @@ cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \
 cdef class DeviceMemoryResource:
 
     cdef device_memory_resource* get_mr(self):
+        """Get the underlying C++ memory resource object."""
         return self.c_obj.get()
 
     def allocate(self, size_t nbytes):
+        """Allocate ``nbytes`` bytes of memory.
+
+        Parameters
+        ----------
+        nbytes : size_t
+            The size of the allocation in bytes
+        """
         return <uintptr_t>self.c_obj.get().allocate(nbytes)
 
     def deallocate(self, uintptr_t ptr, size_t nbytes):
+        """Deallocate memory pointed to by ``ptr`` of size ``nbytes``.
+
+        Parameters
+        ----------
+        ptr : uintptr_t
+            Pointer to be deallocated
+        nbytes : size_t
+            Size of the allocation in bytes
+        """
         self.c_obj.get().deallocate(<void*>(ptr), nbytes)
 
 
 # See the note about `no_gc_clear` in `device_buffer.pyx`.
 @cython.no_gc_clear
 cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
+    """Parent class for all memory resources that track an upstream.
+
+    Upstream resource tracking requires maintaining a reference to the upstream
+    mr so that it is kept alive and may be accessed by any downstream resource
+    adaptors.
+    """
 
     def __cinit__(self, DeviceMemoryResource upstream_mr, *args, **kwargs):
 
@@ -526,7 +563,10 @@ cdef void* _allocate_callback_wrapper(
     size_t nbytes,
     cuda_stream_view stream,
     void* ctx
-) nogil:
+    # Note that this function is specifically designed to rethrow Python
+    # exceptions as C++ exceptions when called as a callback from C++, so it is
+    # noexcept from Cython's perspective.
+) noexcept nogil:
     cdef CppExcept err
     with gil:
         try:
@@ -540,7 +580,7 @@ cdef void _deallocate_callback_wrapper(
     size_t nbytes,
     cuda_stream_view stream,
     void* ctx
-) with gil:
+) except * with gil:
     (<object>ctx)(<uintptr_t>(ptr), nbytes)
 
 
@@ -619,6 +659,61 @@ def _append_id(filename, id):
     return f"{name}.dev{id}{ext}"
 
 
+cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        size_t allocation_limit
+    ):
+        self.c_obj.reset(
+            new limiting_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr(),
+                allocation_limit
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        size_t allocation_limit
+    ):
+        """
+        Memory resource that limits the total allocation amount possible
+        performed by an upstream memory resource.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The upstream memory resource.
+        allocation_limit : size_t
+            Maximum memory allowed for this allocator.
+        """
+        pass
+
+    def get_allocated_bytes(self) -> size_t:
+        """
+        Query the number of bytes that have been allocated. Note that this can
+        not be used to know how large of an allocation is possible due to both
+        possible fragmentation and also internal page sizes and alignment that
+        is not tracked by this allocator.
+        """
+        return (<limiting_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_allocated_bytes()
+
+    def get_allocation_limit(self) -> size_t:
+        """
+        Query the maximum number of bytes that this allocator is allowed to
+        allocate. This is the limit on the allocator and not a representation
+        of the underlying device. The device may not be able to support this
+        limit.
+        """
+        return (<limiting_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_allocation_limit()
+
+
 cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
     def __cinit__(
         self,
@@ -796,7 +891,10 @@ cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor):
             self.c_obj.get()))[0].log_outstanding_allocations()
 
 
-cdef bool _oom_callback_function(size_t bytes, void *callback_arg) nogil:
+# Note that this function is specifically designed to rethrow Python exceptions
+# as C++ exceptions when called as a callback from C++, so it is noexcept from
+# Cython's perspective.
+cdef bool _oom_callback_function(size_t bytes, void *callback_arg) noexcept nogil:
     cdef CppExcept err
     with gil:
         try:
diff --git a/python/rmm/allocators/numba.py b/python/rmm/allocators/numba.py
index 18a010e1c..5e87b87b6 100644
--- a/python/rmm/allocators/numba.py
+++ b/python/rmm/allocators/numba.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import ctypes
+import inspect
 
 from cuda.cuda import CUdeviceptr, cuIpcGetMemHandle
 from numba import config, cuda
@@ -112,6 +113,12 @@ def get_ipc_handle(self, memory):
         )
 
     def get_memory_info(self):
+        """Returns ``(free, total)`` memory in bytes in the context.
+
+        This implementation raises `NotImplementedError` because the allocation
+        will be performed using rmm's currently set default mr, which may be a
+        pool allocator.
+        """
         raise NotImplementedError()
 
     @property
@@ -119,6 +126,16 @@ def interface_version(self):
         return 1
 
 
+# The parent class docstrings contain references without fully qualified names,
+# so we need to replace them here for our Sphinx docs to render properly.
+for _, method in inspect.getmembers(RMMNumbaManager, inspect.isfunction):
+    if method.__doc__ is not None:
+        method.__doc__ = method.__doc__.replace(
+            ":class:`BaseCUDAMemoryManager`",
+            ":class:`numba.cuda.BaseCUDAMemoryManager`",
+        )
+
+
 # Enables the use of RMM for Numba via an environment variable setting,
 # NUMBA_CUDA_MEMORY_MANAGER=rmm. See:
 # https://numba.readthedocs.io/en/stable/cuda/external-memory.html#environment-variable
diff --git a/python/rmm/mr.py b/python/rmm/mr.py
index ea0a0cf8c..4f6b801f5 100644
--- a/python/rmm/mr.py
+++ b/python/rmm/mr.py
@@ -19,11 +19,13 @@
     DeviceMemoryResource,
     FailureCallbackResourceAdaptor,
     FixedSizeMemoryResource,
+    LimitingResourceAdaptor,
     LoggingResourceAdaptor,
     ManagedMemoryResource,
     PoolMemoryResource,
     StatisticsResourceAdaptor,
     TrackingResourceAdaptor,
+    UpstreamResourceAdaptor,
     _flush_logs,
     _initialize,
     disable_logging,
@@ -45,12 +47,14 @@
     "CudaMemoryResource",
     "DeviceMemoryResource",
     "FixedSizeMemoryResource",
+    "LimitingResourceAdaptor",
     "LoggingResourceAdaptor",
     "ManagedMemoryResource",
     "PoolMemoryResource",
     "StatisticsResourceAdaptor",
     "TrackingResourceAdaptor",
     "FailureCallbackResourceAdaptor",
+    "UpstreamResourceAdaptor",
     "_flush_logs",
     "_initialize",
     "set_per_device_resource",
diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py
index 70aafe601..fd537749b 100644
--- a/python/rmm/tests/test_rmm.py
+++ b/python/rmm/tests/test_rmm.py
@@ -16,6 +16,7 @@
 import gc
 import os
 import pickle
+import warnings
 from itertools import product
 
 import numpy as np
@@ -586,6 +587,41 @@ def test_cuda_async_memory_resource_threshold(nelem, alloc):
     array_tester("u1", 2 * nelem, alloc)  # should trigger release
 
 
+@pytest.mark.parametrize(
+    "mr",
+    [
+        rmm.mr.CudaMemoryResource,
+        pytest.param(
+            rmm.mr.CudaAsyncMemoryResource,
+            marks=pytest.mark.skipif(
+                not _CUDAMALLOC_ASYNC_SUPPORTED,
+                reason="cudaMallocAsync not supported",
+            ),
+        ),
+    ],
+)
+def test_limiting_resource_adaptor(mr):
+    cuda_mr = mr()
+
+    allocation_limit = 1 << 20
+    num_buffers = 2
+    buffer_size = allocation_limit // num_buffers
+
+    mr = rmm.mr.LimitingResourceAdaptor(
+        cuda_mr, allocation_limit=allocation_limit
+    )
+    assert mr.get_allocation_limit() == allocation_limit
+
+    rmm.mr.set_current_device_resource(mr)
+
+    buffers = [rmm.DeviceBuffer(size=buffer_size) for _ in range(num_buffers)]
+
+    assert mr.get_allocated_bytes() == sum(b.size for b in buffers)
+
+    with pytest.raises(MemoryError):
+        rmm.DeviceBuffer(size=1)
+
+
 def test_statistics_resource_adaptor(stats_mr):
 
     buffers = [rmm.DeviceBuffer(size=1000) for _ in range(10)]
@@ -907,3 +943,35 @@ def test_rmm_device_buffer_copy(cuda_ary, make_copy):
     result = db_copy.copy_to_host()
 
     np.testing.assert_equal(expected, result)
+
+
+@pytest.mark.parametrize("level", rmm.logging_level)
+def test_valid_logging_level(level):
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", message="RMM will not log logging_level.TRACE."
+        )
+        warnings.filterwarnings(
+            "ignore", message="RMM will not log logging_level.DEBUG."
+        )
+        rmm.set_logging_level(level)
+        assert rmm.get_logging_level() == level
+        rmm.set_logging_level(rmm.logging_level.INFO)  # reset to default
+
+        rmm.set_flush_level(level)
+        assert rmm.get_flush_level() == level
+        rmm.set_flush_level(rmm.logging_level.INFO)  # reset to default
+
+        rmm.should_log(level)
+
+
+@pytest.mark.parametrize(
+    "level", ["INFO", 3, "invalid", 100, None, 1.2345, [1, 2, 3]]
+)
+def test_invalid_logging_level(level):
+    with pytest.raises(TypeError):
+        rmm.set_logging_level(level)
+    with pytest.raises(TypeError):
+        rmm.set_flush_level(level)
+    with pytest.raises(TypeError):
+        rmm.should_log(level)
diff --git a/scripts/doxygen.sh b/scripts/doxygen.sh
new file mode 100755
index 000000000..c584723ae
--- /dev/null
+++ b/scripts/doxygen.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+##############################
+# RMM doxygen warnings check #
+##############################
+
+# skip if doxygen is not installed
+if ! [ -x "$(command -v doxygen)" ]; then
+  echo -e "warning: doxygen is not installed"
+  exit 0
+fi
+
+# Utility to return version as number for comparison
+function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; }
+
+# doxygen supported version 1.9.1
+DOXYGEN_VERSION=`doxygen --version`
+if [ ! $(version "$DOXYGEN_VERSION") -eq $(version "1.9.1") ] ; then
+  echo -e "warning: Unsupported doxygen version $DOXYGEN_VERSION"
+  echo -e "Expecting doxygen version 1.9.1"
+  exit 0
+fi
+
+# Run doxygen, ignore missing tag files error
+TAG_ERROR1="error: Tag file '.*.tag' does not exist or is not a file. Skipping it..."
+TAG_ERROR2="error: cannot open tag file .*.tag for writing"
+DOXYGEN_STDERR=`cd doxygen && { cat Doxyfile ; echo QUIET = YES; echo GENERATE_HTML = NO; }  | doxygen - 2>&1 | sed "/\($TAG_ERROR1\|$TAG_ERROR2\)/d"`
+RETVAL=$?
+
+if [ "$RETVAL" != "0" ] || [ ! -z "$DOXYGEN_STDERR" ]; then
+  echo -e "$DOXYGEN_STDERR"
+  RETVAL=1 #because return value is not generated by doxygen 1.8.20
+fi
+
+exit $RETVAL
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 24e9d240d..36c3aa043 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -31,7 +31,11 @@ function(ConfigureTestInternal TEST_NAME)
     PROPERTIES POSITION_INDEPENDENT_CODE ON
                RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${RMM_BINARY_DIR}/gtests>"
                CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}"
-               INSTALL_RPATH "\$ORIGIN/../../../lib")
+               INSTALL_RPATH "\$ORIGIN/../../../lib"
+               CXX_STANDARD 17
+               CXX_STANDARD_REQUIRED ON
+               CUDA_STANDARD 17
+               CUDA_STANDARD_REQUIRED ON)
   target_compile_definitions(${TEST_NAME}
                              PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
   target_compile_options(${TEST_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror
diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp
index 23d0c1b85..48967d06a 100644
--- a/tests/mr/device/arena_mr_tests.cpp
+++ b/tests/mr/device/arena_mr_tests.cpp
@@ -486,7 +486,7 @@ TEST_F(ArenaTest, SizeSmallerThanSuperblockSize)  // NOLINT
 
 TEST_F(ArenaTest, AllocateNinetyPercent)  // NOLINT
 {
-  EXPECT_NO_THROW([]() {                  // NOLINT(cppcoreguidelines-avoid-goto)
+  EXPECT_NO_THROW([]() {  // NOLINT(cppcoreguidelines-avoid-goto)
     auto const free = rmm::detail::available_device_memory().first;
     auto const ninety_percent =
       rmm::detail::align_up(static_cast<std::size_t>(static_cast<double>(free) * 0.9),
@@ -497,7 +497,7 @@ TEST_F(ArenaTest, AllocateNinetyPercent)  // NOLINT
 
 TEST_F(ArenaTest, SmallMediumLarge)  // NOLINT
 {
-  EXPECT_NO_THROW([]() {             // NOLINT(cppcoreguidelines-avoid-goto)
+  EXPECT_NO_THROW([]() {  // NOLINT(cppcoreguidelines-avoid-goto)
     arena_mr mr(rmm::mr::get_current_device_resource());
     auto* small     = mr.allocate(256);
     auto* medium    = mr.allocate(64_MiB);
@@ -511,7 +511,7 @@ TEST_F(ArenaTest, SmallMediumLarge)  // NOLINT
 
 TEST_F(ArenaTest, Defragment)  // NOLINT
 {
-  EXPECT_NO_THROW([]() {       // NOLINT(cppcoreguidelines-avoid-goto)
+  EXPECT_NO_THROW([]() {  // NOLINT(cppcoreguidelines-avoid-goto)
     auto const arena_size = superblock::minimum_size * 4;
     arena_mr mr(rmm::mr::get_current_device_resource(), arena_size);
     std::vector<std::thread> threads;
diff --git a/tests/mr/device/cuda_async_view_mr_tests.cpp b/tests/mr/device/cuda_async_view_mr_tests.cpp
index 86cb6f106..209429b4b 100644
--- a/tests/mr/device/cuda_async_view_mr_tests.cpp
+++ b/tests/mr/device/cuda_async_view_mr_tests.cpp
@@ -31,7 +31,7 @@ TEST(PoolTest, UsePool)
 {
   cudaMemPool_t memPool{};
   RMM_CUDA_TRY(rmm::detail::async_alloc::cudaDeviceGetDefaultMemPool(
-    &memPool, rmm::detail::current_device().value()));
+    &memPool, rmm::get_current_cuda_device().value()));
 
   const auto pool_init_size{100};
   cuda_async_view_mr mr{memPool};
@@ -44,7 +44,7 @@ TEST(PoolTest, NotTakingOwnershipOfPool)
 {
   cudaMemPoolProps poolProps = {};
   poolProps.allocType        = cudaMemAllocationTypePinned;
-  poolProps.location.id      = rmm::detail::current_device().value();
+  poolProps.location.id      = rmm::get_current_cuda_device().value();
   poolProps.location.type    = cudaMemLocationTypeDevice;
 
   cudaMemPool_t memPool{};
diff --git a/tests/mr/device/failure_callback_mr_tests.cpp b/tests/mr/device/failure_callback_mr_tests.cpp
index 11c119304..bb5484c69 100644
--- a/tests/mr/device/failure_callback_mr_tests.cpp
+++ b/tests/mr/device/failure_callback_mr_tests.cpp
@@ -40,7 +40,7 @@ bool failure_handler(std::size_t /*bytes*/, void* arg)
     retried = true;
     return true;  // First time we request an allocation retry
   }
-  return false;   // Second time we let the adaptor throw std::bad_alloc
+  return false;  // Second time we let the adaptor throw std::bad_alloc
 }
 
 TEST(FailureCallbackTest, RetryAllocationOnce)
diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp
index c5df1951c..4a234d2f9 100644
--- a/tests/mr/device/pool_mr_tests.cpp
+++ b/tests/mr/device/pool_mr_tests.cpp
@@ -14,10 +14,12 @@
  * limitations under the License.
  */
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/detail/aligned.hpp>
 #include <rmm/detail/cuda_util.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/device_buffer.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/limiting_resource_adaptor.hpp>
@@ -100,7 +102,7 @@ TEST(PoolTest, ForceGrowth)
     EXPECT_NO_THROW(mr.allocate(1000));
     EXPECT_THROW(mr.allocate(4000), rmm::out_of_memory);  // too much
     EXPECT_NO_THROW(mr.allocate(500));
-    EXPECT_NO_THROW(mr.allocate(2000));                   // fits
+    EXPECT_NO_THROW(mr.allocate(2000));  // fits
   }
 }
 
@@ -150,5 +152,42 @@ TEST(PoolTest, UpstreamDoesntSupportMemInfo)
   mr2.deallocate(ptr, 1024);
 }
 
+TEST(PoolTest, MultidevicePool)
+{
+  using MemoryResource = rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>;
+
+  // Get the number of cuda devices
+  int num_devices = rmm::get_num_cuda_devices();
+
+  // only run on multidevice systems
+  if (num_devices >= 2) {
+    rmm::mr::cuda_memory_resource general_mr;
+
+    // initializing pool_memory_resource of multiple devices
+    int devices      = 2;
+    size_t pool_size = 1024;
+    std::vector<std::shared_ptr<MemoryResource>> mrs;
+
+    for (int i = 0; i < devices; ++i) {
+      RMM_CUDA_TRY(cudaSetDevice(i));
+      auto mr = std::make_shared<MemoryResource>(&general_mr, pool_size, pool_size);
+      rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, mr.get());
+      mrs.emplace_back(mr);
+    }
+
+    {
+      RMM_CUDA_TRY(cudaSetDevice(0));
+      rmm::device_buffer buf_a(16, rmm::cuda_stream_per_thread, mrs[0].get());
+
+      {
+        RMM_CUDA_TRY(cudaSetDevice(1));
+        rmm::device_buffer buf_b(16, rmm::cuda_stream_per_thread, mrs[1].get());
+      }
+
+      RMM_CUDA_TRY(cudaSetDevice(0));
+    }
+  }
+}
+
 }  // namespace
 }  // namespace rmm::test