From 35101f3ed5aeac83b6d18b0f22c2e9ac227dd94a Mon Sep 17 00:00:00 2001
From: Patrick Walton <pcwalton@mimiga.net>
Date: Tue, 14 Jan 2025 13:19:20 -0800
Subject: [PATCH] Use `multi_draw_indirect_count` where available, in
 preparation for two-phase occlusion culling. (#17211)

This commit allows Bevy to use `multi_draw_indirect_count` for drawing
meshes. The `multi_draw_indirect_count` feature works just like
`multi_draw_indirect`, but it takes the number of indirect parameters
from a GPU buffer rather than specifying it on the CPU.

Currently, the CPU constructs the list of indirect draw parameters with
the instance count for each batch set to zero, uploads the resulting
buffer to the GPU, and dispatches a compute shader that bumps the
instance count for each mesh that survives culling. Unfortunately, this
is inefficient when we support `multi_draw_indirect_count`. Draw
commands corresponding to meshes for which all instances were culled
will remain present in the list when calling
`multi_draw_indirect_count`, causing overhead. Proper use of
`multi_draw_indirect_count` requires eliminating these empty draw
commands.

To address this inefficiency, this PR makes Bevy fully construct the
indirect draw commands on the GPU instead of on the CPU. Instead of
writing instance counts to the draw command buffer, the mesh
preprocessing shader now writes them to a separate *indirect metadata
buffer*. A second compute dispatch known as the *build indirect
parameters* shader runs after mesh preprocessing and converts the
indirect draw metadata into actual indirect draw commands for the GPU.
The build indirect parameters shader operates on a batch at a time,
rather than an instance at a time, and as such each thread writes only 0
or 1 indirect draw parameters, simplifying the current logic in
`mesh_preprocessing`, which currently has to have special cases for the
first mesh in each batch. The build indirect parameters shader emits
draw commands in a tightly packed manner, enabling maximally efficient
use of `multi_draw_indirect_count`.

Along the way, this patch switches mesh preprocessing to dispatch one
compute invocation per render phase per view, instead of dispatching one
compute invocation per view. This is preparation for two-phase occlusion
culling, in which we will have two mesh preprocessing stages. In that
scenario, the first mesh preprocessing stage must only process opaque
and alpha tested objects, so the work items must be separated into those
that are opaque or alpha tested and those that aren't. Thus this PR
splits out the work items into a separate buffer for each phase. As this
patch rewrites so much of the mesh preprocessing infrastructure, it was
simpler to just fold the change into this patch instead of deferring it
to the forthcoming occlusion culling PR.

Finally, this patch changes mesh preprocessing so that it runs
separately for indexed and non-indexed meshes. This is because draw
commands for indexed and non-indexed meshes have different sizes and
layouts. *The existing code is actually broken for non-indexed meshes*,
as it attempts to overlay the indirect parameters for non-indexed meshes
on top of those for indexed meshes. Consequently, right now the
parameters will be read incorrectly when multiple non-indexed meshes are
multi-drawn together. *This is a bug fix* and, as with the change to
dispatch phases separately noted above, was easiest to include in this
patch as opposed to separately.

## Migration Guide

* Systems that add custom phase items now need to populate the indirect
drawing-related buffers. See the `specialized_mesh_pipeline` example for
an example of how this is done.
---
 crates/bevy_core_pipeline/src/core_2d/mod.rs  |  34 +-
 crates/bevy_core_pipeline/src/core_3d/mod.rs  |  27 +-
 crates/bevy_core_pipeline/src/prepass/mod.rs  |   7 +
 crates/bevy_gizmos/src/pipeline_2d.rs         |   3 +
 crates/bevy_gizmos/src/pipeline_3d.rs         |   3 +
 crates/bevy_pbr/src/lib.rs                    |   2 +
 crates/bevy_pbr/src/material.rs               |  12 +-
 crates/bevy_pbr/src/prepass/mod.rs            |   4 +-
 .../src/render/build_indirect_params.wgsl     | 106 +++
 crates/bevy_pbr/src/render/gpu_preprocess.rs  | 872 ++++++++++++++---
 crates/bevy_pbr/src/render/light.rs           |  35 +-
 crates/bevy_pbr/src/render/mesh.rs            | 296 +++---
 .../bevy_pbr/src/render/mesh_preprocess.wgsl  |  51 +-
 .../src/render/mesh_preprocess_types.wgsl     | 105 ++-
 .../src/batching/gpu_preprocessing.rs         | 886 +++++++++++++-----
 crates/bevy_render/src/batching/mod.rs        |  39 +-
 crates/bevy_render/src/camera/camera.rs       |   2 +-
 crates/bevy_render/src/mesh/components.rs     |  37 +-
 crates/bevy_render/src/mesh/mod.rs            |  15 +-
 crates/bevy_render/src/render_phase/mod.rs    | 107 ++-
 crates/bevy_render/src/view/mod.rs            |  30 +-
 crates/bevy_sprite/src/mesh2d/material.rs     |  13 +-
 crates/bevy_sprite/src/mesh2d/mesh.rs         |  63 +-
 crates/bevy_sprite/src/render/mod.rs          |   1 +
 crates/bevy_ui/src/render/box_shadow.rs       |   1 +
 crates/bevy_ui/src/render/mod.rs              |   3 +-
 crates/bevy_ui/src/render/render_pass.rs      |   6 +
 .../src/render/ui_material_pipeline.rs        |   1 +
 .../src/render/ui_texture_slice_pipeline.rs   |   1 +
 examples/2d/mesh2d_manual.rs                  |   9 +-
 examples/shader/custom_shader_instancing.rs   |   1 +
 examples/shader/specialized_mesh_pipeline.rs  | 115 ++-
 32 files changed, 2257 insertions(+), 630 deletions(-)
 create mode 100644 crates/bevy_pbr/src/render/build_indirect_params.wgsl

diff --git a/crates/bevy_core_pipeline/src/core_2d/mod.rs b/crates/bevy_core_pipeline/src/core_2d/mod.rs
index 2d018edaa0ec3..ec0fa58d73f60 100644
--- a/crates/bevy_core_pipeline/src/core_2d/mod.rs
+++ b/crates/bevy_core_pipeline/src/core_2d/mod.rs
@@ -35,6 +35,7 @@ use core::ops::Range;
 use bevy_asset::UntypedAssetId;
 use bevy_render::{
     batching::gpu_preprocessing::GpuPreprocessingMode,
+    render_phase::PhaseItemBatchSetKey,
     view::{ExtractedView, RetainedViewEntity},
 };
 use bevy_utils::{HashMap, HashSet};
@@ -132,7 +133,7 @@ pub struct Opaque2d {
     ///
     /// Objects in a single batch set can potentially be multi-drawn together,
     /// if it's enabled and the current platform supports it.
-    pub batch_set_key: (),
+    pub batch_set_key: BatchSetKey2d,
     /// The key, which determines which can be batched.
     pub bin_key: Opaque2dBinKey,
     /// An entity from which data will be fetched, including the mesh if
@@ -198,7 +199,7 @@ impl PhaseItem for Opaque2d {
 impl BinnedPhaseItem for Opaque2d {
     // Since 2D meshes presently can't be multidrawn, the batch set key is
     // irrelevant.
-    type BatchSetKey = ();
+    type BatchSetKey = BatchSetKey2d;
 
     type BinKey = Opaque2dBinKey;
 
@@ -219,6 +220,20 @@ impl BinnedPhaseItem for Opaque2d {
     }
 }
 
+/// 2D meshes aren't currently multi-drawn together, so this batch set key only
+/// stores whether the mesh is indexed.
+#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
+pub struct BatchSetKey2d {
+    /// True if the mesh is indexed.
+    pub indexed: bool,
+}
+
+impl PhaseItemBatchSetKey for BatchSetKey2d {
+    fn indexed(&self) -> bool {
+        self.indexed
+    }
+}
+
 impl CachedRenderPipelinePhaseItem for Opaque2d {
     #[inline]
     fn cached_pipeline(&self) -> CachedRenderPipelineId {
@@ -232,7 +247,7 @@ pub struct AlphaMask2d {
     ///
     /// Objects in a single batch set can potentially be multi-drawn together,
     /// if it's enabled and the current platform supports it.
-    pub batch_set_key: (),
+    pub batch_set_key: BatchSetKey2d,
     /// The key, which determines which can be batched.
     pub bin_key: AlphaMask2dBinKey,
     /// An entity from which data will be fetched, including the mesh if
@@ -297,9 +312,7 @@ impl PhaseItem for AlphaMask2d {
 }
 
 impl BinnedPhaseItem for AlphaMask2d {
-    // Since 2D meshes presently can't be multidrawn, the batch set key is
-    // irrelevant.
-    type BatchSetKey = ();
+    type BatchSetKey = BatchSetKey2d;
 
     type BinKey = AlphaMask2dBinKey;
 
@@ -335,6 +348,9 @@ pub struct Transparent2d {
     pub draw_function: DrawFunctionId,
     pub batch_range: Range<u32>,
     pub extra_index: PhaseItemExtraIndex,
+    /// Whether the mesh in question is indexed (uses an index buffer in
+    /// addition to its vertex buffer).
+    pub indexed: bool,
 }
 
 impl PhaseItem for Transparent2d {
@@ -387,6 +403,10 @@ impl SortedPhaseItem for Transparent2d {
         // radsort is a stable radix sort that performed better than `slice::sort_by_key` or `slice::sort_unstable_by_key`.
         radsort::sort_by_key(items, |item| item.sort_key().0);
     }
+
+    fn indexed(&self) -> bool {
+        self.indexed
+    }
 }
 
 impl CachedRenderPipelinePhaseItem for Transparent2d {
@@ -411,7 +431,7 @@ pub fn extract_core_2d_camera_phases(
         }
 
         // This is the main 2D camera, so we use the first subview index (0).
-        let retained_view_entity = RetainedViewEntity::new(main_entity.into(), 0);
+        let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0);
 
         transparent_2d_phases.insert_or_clear(retained_view_entity);
         opaque_2d_phases.insert_or_clear(retained_view_entity, GpuPreprocessingMode::None);
diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs
index 02ac01a9922be..393508047a017 100644
--- a/crates/bevy_core_pipeline/src/core_3d/mod.rs
+++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs
@@ -68,6 +68,7 @@ use core::ops::Range;
 use bevy_render::{
     batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
     mesh::allocator::SlabId,
+    render_phase::PhaseItemBatchSetKey,
     view::{NoIndirectDrawing, RetainedViewEntity},
 };
 pub use camera_3d::*;
@@ -269,6 +270,12 @@ pub struct Opaque3dBatchSetKey {
     pub lightmap_slab: Option<NonMaxU32>,
 }
 
+impl PhaseItemBatchSetKey for Opaque3dBatchSetKey {
+    fn indexed(&self) -> bool {
+        self.index_slab.is_some()
+    }
+}
+
 /// Data that must be identical in order to *batch* phase items together.
 ///
 /// Note that a *batch set* (if multi-draw is in use) contains multiple batches.
@@ -430,6 +437,9 @@ pub struct Transmissive3d {
     pub draw_function: DrawFunctionId,
     pub batch_range: Range<u32>,
     pub extra_index: PhaseItemExtraIndex,
+    /// Whether the mesh in question is indexed (uses an index buffer in
+    /// addition to its vertex buffer).
+    pub indexed: bool,
 }
 
 impl PhaseItem for Transmissive3d {
@@ -493,6 +503,11 @@ impl SortedPhaseItem for Transmissive3d {
     fn sort(items: &mut [Self]) {
         radsort::sort_by_key(items, |item| item.distance);
     }
+
+    #[inline]
+    fn indexed(&self) -> bool {
+        self.indexed
+    }
 }
 
 impl CachedRenderPipelinePhaseItem for Transmissive3d {
@@ -509,6 +524,9 @@ pub struct Transparent3d {
     pub draw_function: DrawFunctionId,
     pub batch_range: Range<u32>,
     pub extra_index: PhaseItemExtraIndex,
+    /// Whether the mesh in question is indexed (uses an index buffer in
+    /// addition to its vertex buffer).
+    pub indexed: bool,
 }
 
 impl PhaseItem for Transparent3d {
@@ -560,6 +578,11 @@ impl SortedPhaseItem for Transparent3d {
     fn sort(items: &mut [Self]) {
         radsort::sort_by_key(items, |item| item.distance);
     }
+
+    #[inline]
+    fn indexed(&self) -> bool {
+        self.indexed
+    }
 }
 
 impl CachedRenderPipelinePhaseItem for Transparent3d {
@@ -594,7 +617,7 @@ pub fn extract_core_3d_camera_phases(
         });
 
         // This is the main 3D camera, so use the first subview index (0).
-        let retained_view_entity = RetainedViewEntity::new(main_entity.into(), 0);
+        let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0);
 
         opaque_3d_phases.insert_or_clear(retained_view_entity, gpu_preprocessing_mode);
         alpha_mask_3d_phases.insert_or_clear(retained_view_entity, gpu_preprocessing_mode);
@@ -662,7 +685,7 @@ pub fn extract_camera_prepass_phase(
         });
 
         // This is the main 3D camera, so we use the first subview index (0).
-        let retained_view_entity = RetainedViewEntity::new(main_entity.into(), 0);
+        let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0);
 
         if depth_prepass || normal_prepass || motion_vector_prepass {
             opaque_3d_prepass_phases.insert_or_clear(retained_view_entity, gpu_preprocessing_mode);
diff --git a/crates/bevy_core_pipeline/src/prepass/mod.rs b/crates/bevy_core_pipeline/src/prepass/mod.rs
index b90dea03a6260..7fb2dfcea961b 100644
--- a/crates/bevy_core_pipeline/src/prepass/mod.rs
+++ b/crates/bevy_core_pipeline/src/prepass/mod.rs
@@ -35,6 +35,7 @@ use bevy_ecs::prelude::*;
 use bevy_math::Mat4;
 use bevy_reflect::{std_traits::ReflectDefault, Reflect};
 use bevy_render::mesh::allocator::SlabId;
+use bevy_render::render_phase::PhaseItemBatchSetKey;
 use bevy_render::sync_world::MainEntity;
 use bevy_render::{
     render_phase::{
@@ -184,6 +185,12 @@ pub struct OpaqueNoLightmap3dBatchSetKey {
     pub index_slab: Option<SlabId>,
 }
 
+impl PhaseItemBatchSetKey for OpaqueNoLightmap3dBatchSetKey {
+    fn indexed(&self) -> bool {
+        self.index_slab.is_some()
+    }
+}
+
 // TODO: Try interning these.
 /// The data used to bin each opaque 3D object in the prepass and deferred pass.
 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
diff --git a/crates/bevy_gizmos/src/pipeline_2d.rs b/crates/bevy_gizmos/src/pipeline_2d.rs
index 96df48e164ca6..13c9b89dd98ff 100644
--- a/crates/bevy_gizmos/src/pipeline_2d.rs
+++ b/crates/bevy_gizmos/src/pipeline_2d.rs
@@ -340,6 +340,7 @@ fn queue_line_gizmos_2d(
                     sort_key: FloatOrd(f32::INFINITY),
                     batch_range: 0..1,
                     extra_index: PhaseItemExtraIndex::None,
+                    indexed: false,
                 });
             }
 
@@ -360,6 +361,7 @@ fn queue_line_gizmos_2d(
                     sort_key: FloatOrd(f32::INFINITY),
                     batch_range: 0..1,
                     extra_index: PhaseItemExtraIndex::None,
+                    indexed: false,
                 });
             }
         }
@@ -418,6 +420,7 @@ fn queue_line_joint_gizmos_2d(
                 sort_key: FloatOrd(f32::INFINITY),
                 batch_range: 0..1,
                 extra_index: PhaseItemExtraIndex::None,
+                indexed: false,
             });
         }
     }
diff --git a/crates/bevy_gizmos/src/pipeline_3d.rs b/crates/bevy_gizmos/src/pipeline_3d.rs
index 458cb28e70ef7..aac6358d638bf 100644
--- a/crates/bevy_gizmos/src/pipeline_3d.rs
+++ b/crates/bevy_gizmos/src/pipeline_3d.rs
@@ -369,6 +369,7 @@ fn queue_line_gizmos_3d(
                     distance: 0.,
                     batch_range: 0..1,
                     extra_index: PhaseItemExtraIndex::None,
+                    indexed: true,
                 });
             }
 
@@ -390,6 +391,7 @@ fn queue_line_gizmos_3d(
                     distance: 0.,
                     batch_range: 0..1,
                     extra_index: PhaseItemExtraIndex::None,
+                    indexed: true,
                 });
             }
         }
@@ -484,6 +486,7 @@ fn queue_line_joint_gizmos_3d(
                 distance: 0.,
                 batch_range: 0..1,
                 extra_index: PhaseItemExtraIndex::None,
+                indexed: true,
             });
         }
     }
diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs
index 4bd5c76f3331a..44107dfe7dd5d 100644
--- a/crates/bevy_pbr/src/lib.rs
+++ b/crates/bevy_pbr/src/lib.rs
@@ -102,6 +102,8 @@ pub mod graph {
         GpuPreprocess,
         /// Label for the screen space reflections pass.
         ScreenSpaceReflections,
+        /// Label for the indirect parameters building pass.
+        BuildIndirectParameters,
     }
 }
 
diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs
index c22952226dc96..0ffe305bd7cb1 100644
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@@ -851,6 +851,9 @@ pub fn queue_material_meshes<M: Material>(
                 }
             };
 
+            // Fetch the slabs that this mesh resides in.
+            let (vertex_slab, index_slab) = mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
+
             match mesh_key
                 .intersection(MeshPipelineKey::BLEND_RESERVED_BITS | MeshPipelineKey::MAY_DISCARD)
             {
@@ -865,13 +868,12 @@ pub fn queue_material_meshes<M: Material>(
                             distance,
                             batch_range: 0..1,
                             extra_index: PhaseItemExtraIndex::None,
+                            indexed: index_slab.is_some(),
                         });
                     } else if material.properties.render_method == OpaqueRendererMethod::Forward {
-                        let (vertex_slab, index_slab) =
-                            mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
                         let batch_set_key = Opaque3dBatchSetKey {
-                            draw_function: draw_opaque_pbr,
                             pipeline: pipeline_id,
+                            draw_function: draw_opaque_pbr,
                             material_bind_group_index: Some(material.binding.group.0),
                             vertex_slab: vertex_slab.unwrap_or_default(),
                             index_slab,
@@ -903,10 +905,9 @@ pub fn queue_material_meshes<M: Material>(
                             distance,
                             batch_range: 0..1,
                             extra_index: PhaseItemExtraIndex::None,
+                            indexed: index_slab.is_some(),
                         });
                     } else if material.properties.render_method == OpaqueRendererMethod::Forward {
-                        let (vertex_slab, index_slab) =
-                            mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
                         let batch_set_key = OpaqueNoLightmap3dBatchSetKey {
                             draw_function: draw_alpha_mask_pbr,
                             pipeline: pipeline_id,
@@ -938,6 +939,7 @@ pub fn queue_material_meshes<M: Material>(
                         distance,
                         batch_range: 0..1,
                         extra_index: PhaseItemExtraIndex::None,
+                        indexed: index_slab.is_some(),
                     });
                 }
             }
diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs
index 9b34c04dc1bb6..373119f25ffde 100644
--- a/crates/bevy_pbr/src/prepass/mod.rs
+++ b/crates/bevy_pbr/src/prepass/mod.rs
@@ -966,13 +966,13 @@ pub fn queue_prepass_material_meshes<M: Material>(
                 }
             };
 
+            let (vertex_slab, index_slab) = mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
+
             match mesh_key
                 .intersection(MeshPipelineKey::BLEND_RESERVED_BITS | MeshPipelineKey::MAY_DISCARD)
             {
                 MeshPipelineKey::BLEND_OPAQUE | MeshPipelineKey::BLEND_ALPHA_TO_COVERAGE => {
                     if deferred {
-                        let (vertex_slab, index_slab) =
-                            mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
                         opaque_deferred_phase.as_mut().unwrap().add(
                             OpaqueNoLightmap3dBatchSetKey {
                                 draw_function: opaque_draw_deferred,
diff --git a/crates/bevy_pbr/src/render/build_indirect_params.wgsl b/crates/bevy_pbr/src/render/build_indirect_params.wgsl
new file mode 100644
index 0000000000000..90741e9064971
--- /dev/null
+++ b/crates/bevy_pbr/src/render/build_indirect_params.wgsl
@@ -0,0 +1,106 @@
+// Builds GPU indirect draw parameters from metadata.
+//
+// This only runs when indirect drawing is enabled. It takes the output of
+// `mesh_preprocess.wgsl` and creates indirect parameters for the GPU.
+//
+// This shader runs separately for indexed and non-indexed meshes. Unlike
+// `mesh_preprocess.wgsl`, which runs one instance per mesh *instance*, one
+// instance of this shader corresponds to a single *batch* which could contain
+// arbitrarily many instances of a single mesh.
+
+#import bevy_pbr::mesh_preprocess_types::{
+    IndirectBatchSet,
+    IndirectParametersIndexed,
+    IndirectParametersNonIndexed,
+    IndirectParametersMetadata,
+    MeshInput
+}
+
+// The data for each mesh that the CPU supplied to the GPU.
+@group(0) @binding(0) var<storage> current_input: array<MeshInput>;
+
+// Data that we use to generate the indirect parameters.
+//
+// The `mesh_preprocess.wgsl` shader emits these.
+@group(0) @binding(1) var<storage> indirect_parameters_metadata: array<IndirectParametersMetadata>;
+
+// Information about each batch set.
+//
+// A *batch set* is a set of meshes that might be multi-drawn together.
+@group(0) @binding(2) var<storage, read_write> indirect_batch_sets: array<IndirectBatchSet>;
+
+#ifdef INDEXED
+// The buffer of indirect draw parameters that we generate, and that the GPU
+// reads to issue the draws.
+//
+// This buffer is for indexed meshes.
+@group(0) @binding(3) var<storage, read_write> indirect_parameters:
+    array<IndirectParametersIndexed>;
+#else   // INDEXED
+// The buffer of indirect draw parameters that we generate, and that the GPU
+// reads to issue the draws.
+//
+// This buffer is for non-indexed meshes.
+@group(0) @binding(3) var<storage, read_write> indirect_parameters:
+    array<IndirectParametersNonIndexed>;
+#endif  // INDEXED
+
+@compute
+@workgroup_size(64)
+fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
+    // Figure out our instance index (i.e. batch index). If this thread doesn't
+    // correspond to any index, bail.
+    let instance_index = global_invocation_id.x;
+    if (instance_index >= arrayLength(&indirect_parameters_metadata)) {
+        return;
+    }
+
+    // Unpack the metadata for this batch.
+    let mesh_index = indirect_parameters_metadata[instance_index].mesh_index;
+    let base_output_index = indirect_parameters_metadata[instance_index].base_output_index;
+    let batch_set_index = indirect_parameters_metadata[instance_index].batch_set_index;
+    let instance_count = atomicLoad(&indirect_parameters_metadata[instance_index].instance_count);
+
+    // If we aren't using `multi_draw_indirect_count`, we have a 1:1 fixed
+    // assignment of batches to slots in the indirect parameters buffer, so we
+    // can just use the instance index as the index of our indirect parameters.
+    var indirect_parameters_index = instance_index;
+
+    // If the current hardware and driver support `multi_draw_indirect_count`,
+    // dynamically reserve an index for the indirect parameters we're to
+    // generate.
+#ifdef MULTI_DRAW_INDIRECT_COUNT_SUPPORTED
+    if (instance_count == 0u) {
+        return;
+    }
+
+    // If this batch belongs to a batch set, then allocate space for the
+    // indirect commands in that batch set.
+    if (batch_set_index != 0xffffffffu) {
+        let indirect_parameters_base =
+            indirect_batch_sets[batch_set_index].indirect_parameters_base;
+        let indirect_parameters_offset =
+            atomicAdd(&indirect_batch_sets[batch_set_index].indirect_parameters_count, 1u);
+
+        indirect_parameters_index = indirect_parameters_base + indirect_parameters_offset;
+    }
+#endif  // MULTI_DRAW_INDIRECT_COUNT_SUPPORTED
+
+    // Build up the indirect parameters. The structures for indexed and
+    // non-indexed meshes are slightly different.
+
+    indirect_parameters[indirect_parameters_index].instance_count = instance_count;
+    indirect_parameters[indirect_parameters_index].first_instance = base_output_index;
+    indirect_parameters[indirect_parameters_index].base_vertex =
+        current_input[mesh_index].first_vertex_index;
+
+#ifdef INDEXED
+    indirect_parameters[indirect_parameters_index].index_count =
+        current_input[mesh_index].index_count;
+    indirect_parameters[indirect_parameters_index].first_index =
+        current_input[mesh_index].first_index_index;
+#else   // INDEXED
+    indirect_parameters[indirect_parameters_index].vertex_count =
+        current_input[mesh_index].index_count;
+#endif  // INDEXED
+}
\ No newline at end of file
diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs
index 3c7954a8c4321..8ac5a7c96e29c 100644
--- a/crates/bevy_pbr/src/render/gpu_preprocess.rs
+++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs
@@ -6,10 +6,12 @@
 //! [`MeshInputUniform`]s instead and use the GPU to calculate the remaining
 //! derived fields in [`MeshUniform`].
 
-use core::num::NonZero;
+use core::num::{NonZero, NonZeroU64};
 
 use bevy_app::{App, Plugin};
 use bevy_asset::{load_internal_asset, Handle};
+use bevy_core_pipeline::core_3d::graph::{Core3d, Node3d};
+use bevy_derive::{Deref, DerefMut};
 use bevy_ecs::{
     component::Component,
     entity::Entity,
@@ -20,22 +22,24 @@ use bevy_ecs::{
 };
 use bevy_render::{
     batching::gpu_preprocessing::{
-        BatchedInstanceBuffers, GpuPreprocessingSupport, IndirectParameters,
-        IndirectParametersBuffer, PreprocessWorkItem,
+        BatchedInstanceBuffers, GpuPreprocessingSupport, IndirectBatchSet,
+        IndirectParametersBuffers, IndirectParametersIndexed, IndirectParametersMetadata,
+        IndirectParametersNonIndexed, PreprocessWorkItem, PreprocessWorkItemBuffers,
     },
-    graph::CameraDriverLabel,
-    render_graph::{Node, NodeRunError, RenderGraph, RenderGraphContext},
+    render_graph::{Node, NodeRunError, RenderGraphApp, RenderGraphContext},
     render_resource::{
         binding_types::{storage_buffer, storage_buffer_read_only, uniform_buffer},
-        BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, BufferBinding,
+        BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, Buffer, BufferBinding,
         CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor,
         DynamicBindGroupLayoutEntries, PipelineCache, Shader, ShaderStages, ShaderType,
         SpecializedComputePipeline, SpecializedComputePipelines,
     },
     renderer::{RenderContext, RenderDevice, RenderQueue},
+    settings::WgpuFeatures,
     view::{NoIndirectDrawing, ViewUniform, ViewUniformOffset, ViewUniforms},
     Render, RenderApp, RenderSet,
 };
+use bevy_utils::TypeIdMap;
 use bitflags::bitflags;
 use smallvec::{smallvec, SmallVec};
 use tracing::warn;
@@ -44,12 +48,17 @@ use crate::{
     graph::NodePbr, MeshCullingData, MeshCullingDataBuffer, MeshInputUniform, MeshUniform,
 };
 
+use super::ViewLightEntities;
+
 /// The handle to the `mesh_preprocess.wgsl` compute shader.
 pub const MESH_PREPROCESS_SHADER_HANDLE: Handle<Shader> =
     Handle::weak_from_u128(16991728318640779533);
 /// The handle to the `mesh_preprocess_types.wgsl` compute shader.
 pub const MESH_PREPROCESS_TYPES_SHADER_HANDLE: Handle<Shader> =
     Handle::weak_from_u128(2720440370122465935);
+/// The handle to the `build_indirect_params.wgsl` compute shader.
+pub const BUILD_INDIRECT_PARAMS_SHADER_HANDLE: Handle<Shader> =
+    Handle::weak_from_u128(3711077208359699672);
 
 /// The GPU workgroup size.
 const WORKGROUP_SIZE: usize = 64;
@@ -66,28 +75,58 @@ pub struct GpuMeshPreprocessPlugin {
     pub use_gpu_instance_buffer_builder: bool,
 }
 
-/// The render node for the mesh uniform building pass.
+/// The render node for the mesh preprocessing pass.
+///
+/// This pass runs a compute shader to cull invisible meshes (if that wasn't
+/// done by the CPU), transforms them, and, if indirect drawing is on, populates
+/// indirect draw parameter metadata for the subsequent
+/// [`BuildIndirectParametersNode`].
 pub struct GpuPreprocessNode {
     view_query: QueryState<
         (
             Entity,
-            Read<PreprocessBindGroup>,
+            Read<PreprocessBindGroups>,
             Read<ViewUniformOffset>,
             Has<NoIndirectDrawing>,
         ),
         Without<SkipGpuPreprocess>,
     >,
+    main_view_query: QueryState<Read<ViewLightEntities>>,
+}
+
+/// The render node for the indirect parameter building pass.
+///
+/// This node runs a compute shader on the output of the [`GpuPreprocessNode`]
+/// in order to transform the [`IndirectParametersMetadata`] into
+/// properly-formatted [`IndirectParametersIndexed`] and
+/// [`IndirectParametersNonIndexed`].
+pub struct BuildIndirectParametersNode {
+    view_query: QueryState<
+        Read<PreprocessBindGroups>,
+        (Without<SkipGpuPreprocess>, Without<NoIndirectDrawing>),
+    >,
 }
 
-/// The compute shader pipelines for the mesh uniform building pass.
+/// The compute shader pipelines for the GPU mesh preprocessing and indirect
+/// parameter building passes.
 #[derive(Resource)]
 pub struct PreprocessPipelines {
     /// The pipeline used for CPU culling. This pipeline doesn't populate
-    /// indirect parameters.
-    pub direct: PreprocessPipeline,
+    /// indirect parameter metadata.
+    pub direct_preprocess: PreprocessPipeline,
     /// The pipeline used for GPU culling. This pipeline populates indirect
+    /// parameter metadata.
+    pub gpu_culling_preprocess: PreprocessPipeline,
+    /// The pipeline used for indexed indirect parameter building.
+    ///
+    /// This pipeline converts indirect parameter metadata into indexed indirect
     /// parameters.
-    pub gpu_culling: PreprocessPipeline,
+    pub build_indexed_indirect_params: BuildIndirectParametersPipeline,
+    /// The pipeline used for non-indexed indirect parameter building.
+    ///
+    /// This pipeline converts indirect parameter metadata into non-indexed
+    /// indirect parameters.
+    pub build_non_indexed_indirect_params: BuildIndirectParametersPipeline,
 }
 
 /// The pipeline for the GPU mesh preprocessing shader.
@@ -100,6 +139,16 @@ pub struct PreprocessPipeline {
     pub pipeline_id: Option<CachedComputePipelineId>,
 }
 
+/// The pipeline for the indirect parameter building shader.
+pub struct BuildIndirectParametersPipeline {
+    /// The bind group layout for the compute shader.
+    pub bind_group_layout: BindGroupLayout,
+    /// The pipeline ID for the compute shader.
+    ///
+    /// This gets filled in `prepare_preprocess_pipelines`.
+    pub pipeline_id: Option<CachedComputePipelineId>,
+}
+
 bitflags! {
     /// Specifies variants of the mesh preprocessing shader.
     #[derive(Clone, Copy, PartialEq, Eq, Hash)]
@@ -109,13 +158,73 @@ bitflags! {
         /// This `#define`'s `GPU_CULLING` in the shader.
         const GPU_CULLING = 1;
     }
+
+    /// Specifies variants of the indirect parameter building shader.
+    #[derive(Clone, Copy, PartialEq, Eq, Hash)]
+    pub struct BuildIndirectParametersPipelineKey: u8 {
+        /// Whether the indirect parameter building shader is processing indexed
+        /// meshes (those that have index buffers).
+        ///
+        /// This defines `INDEXED` in the shader.
+        const INDEXED = 1;
+        /// Whether the GPU and driver supports `multi_draw_indirect_count`.
+        ///
+        /// This defines `MULTI_DRAW_INDIRECT_COUNT_SUPPORTED` in the shader.
+        const MULTI_DRAW_INDIRECT_COUNT_SUPPORTED = 2;
+    }
+}
+
+/// The compute shader bind group for the mesh preprocessing pass for each
+/// render phase.
+///
+/// This goes on the view. It maps the [`core::any::TypeId`] of a render phase
+/// (e.g.  [`bevy_core_pipeline::core_3d::Opaque3d`]) to the
+/// [`PhasePreprocessBindGroups`] for that phase.
+#[derive(Component, Clone, Deref, DerefMut)]
+pub struct PreprocessBindGroups(pub TypeIdMap<PhasePreprocessBindGroups>);
+
+/// The compute shader bind group for the mesh preprocessing step for a single
+/// render phase on a single view.
+#[derive(Clone)]
+pub enum PhasePreprocessBindGroups {
+    /// The bind group used for the single invocation of the compute shader when
+    /// indirect drawing is *not* being used.
+    ///
+    /// Because direct drawing doesn't require splitting the meshes into indexed
+    /// and non-indexed meshes, there's only one bind group in this case.
+    Direct(BindGroup),
+
+    /// The bind groups used for the compute shader when indirect drawing is
+    /// being used.
+    ///
+    /// Because indirect drawing requires splitting the meshes into indexed and
+    /// non-indexed meshes, there are two bind groups here.
+    Indirect {
+        /// The bind group used for indexed meshes.
+        ///
+        /// This will be `None` if there are no indexed meshes.
+        indexed: Option<BindGroup>,
+        /// The bind group used for non-indexed meshes.
+        ///
+        /// This will be `None` if there are no non-indexed meshes.
+        non_indexed: Option<BindGroup>,
+    },
 }
 
-/// The compute shader bind group for the mesh uniform building pass.
+/// The bind groups for the indirect parameters building compute shader.
 ///
-/// This goes on the view.
-#[derive(Component, Clone)]
-pub struct PreprocessBindGroup(BindGroup);
+/// This is shared among all views and phases.
+#[derive(Resource)]
+pub struct BuildIndirectParametersBindGroups {
+    /// The bind group used for indexed meshes.
+    ///
+    /// This will be `None` if there are no indexed meshes.
+    indexed: Option<BindGroup>,
+    /// The bind group used for non-indexed meshes.
+    ///
+    /// This will be `None` if there are no non-indexed meshes.
+    non_indexed: Option<BindGroup>,
+}
 
 /// Stops the `GpuPreprocessNode` attempting to generate the buffer for this view
 /// useful to avoid duplicating effort if the bind group is shared between views
@@ -136,6 +245,12 @@ impl Plugin for GpuMeshPreprocessPlugin {
             "mesh_preprocess_types.wgsl",
             Shader::from_wgsl
         );
+        load_internal_asset!(
+            app,
+            BUILD_INDIRECT_PARAMS_SHADER_HANDLE,
+            "build_indirect_params.wgsl",
+            Shader::from_wgsl
+        );
     }
 
     fn finish(&self, app: &mut App) {
@@ -150,15 +265,10 @@ impl Plugin for GpuMeshPreprocessPlugin {
             return;
         }
 
-        // Stitch the node in.
-        let gpu_preprocess_node = GpuPreprocessNode::from_world(render_app.world_mut());
-        let mut render_graph = render_app.world_mut().resource_mut::<RenderGraph>();
-        render_graph.add_node(NodePbr::GpuPreprocess, gpu_preprocess_node);
-        render_graph.add_node_edge(NodePbr::GpuPreprocess, CameraDriverLabel);
-
         render_app
             .init_resource::<PreprocessPipelines>()
             .init_resource::<SpecializedComputePipelines<PreprocessPipeline>>()
+            .init_resource::<SpecializedComputePipelines<BuildIndirectParametersPipeline>>()
             .add_systems(
                 Render,
                 (
@@ -170,6 +280,19 @@ impl Plugin for GpuMeshPreprocessPlugin {
                         .in_set(RenderSet::PrepareBindGroups),
                     write_mesh_culling_data_buffer.in_set(RenderSet::PrepareResourcesFlush),
                 )
+            )
+            .add_render_graph_node::<GpuPreprocessNode>(Core3d, NodePbr::GpuPreprocess)
+            .add_render_graph_node::<BuildIndirectParametersNode>(
+                Core3d,
+                NodePbr::BuildIndirectParameters
+            )
+            .add_render_graph_edges(
+                Core3d,
+                (NodePbr::GpuPreprocess, NodePbr::BuildIndirectParameters, Node3d::Prepass)
+            )
+            .add_render_graph_edges(
+                Core3d,
+                (NodePbr::GpuPreprocess, NodePbr::BuildIndirectParameters, NodePbr::ShadowPass)
             );
     }
 }
@@ -178,6 +301,7 @@ impl FromWorld for GpuPreprocessNode {
     fn from_world(world: &mut World) -> Self {
         Self {
             view_query: QueryState::new(world),
+            main_view_query: QueryState::new(world),
         }
     }
 }
@@ -185,11 +309,12 @@ impl FromWorld for GpuPreprocessNode {
 impl Node for GpuPreprocessNode {
     fn update(&mut self, world: &mut World) {
         self.view_query.update_archetypes(world);
+        self.main_view_query.update_archetypes(world);
     }
 
     fn run<'w>(
         &self,
-        _: &mut RenderGraphContext,
+        graph: &mut RenderGraphContext,
         render_context: &mut RenderContext<'w>,
         world: &'w World,
     ) -> Result<(), NodeRunError> {
@@ -210,12 +335,25 @@ impl Node for GpuPreprocessNode {
                     timestamp_writes: None,
                 });
 
-        // Run the compute passes.
-        for (view, bind_group, view_uniform_offset, no_indirect_drawing) in
-            self.view_query.iter_manual(world)
+        let mut all_views: SmallVec<[_; 8]> = SmallVec::new();
+        all_views.push(graph.view_entity());
+        if let Ok(shadow_cascade_views) =
+            self.main_view_query.get_manual(world, graph.view_entity())
         {
-            // Grab the index buffer for this view.
-            let Some(index_buffer) = index_buffers.get(&view) else {
+            all_views.extend(shadow_cascade_views.lights.iter().copied());
+        }
+
+        // Run the compute passes.
+
+        for view_entity in all_views {
+            let Ok((view, bind_groups, view_uniform_offset, no_indirect_drawing)) =
+                self.view_query.get_manual(world, view_entity)
+            else {
+                continue;
+            };
+
+            // Grab the work item buffers for this view.
+            let Some(view_work_item_buffers) = index_buffers.get(&view) else {
                 warn!("The preprocessing index buffer wasn't present");
                 continue;
             };
@@ -223,34 +361,204 @@ impl Node for GpuPreprocessNode {
             // Select the right pipeline, depending on whether GPU culling is in
             // use.
             let maybe_pipeline_id = if !no_indirect_drawing {
-                preprocess_pipelines.gpu_culling.pipeline_id
+                preprocess_pipelines.gpu_culling_preprocess.pipeline_id
             } else {
-                preprocess_pipelines.direct.pipeline_id
+                preprocess_pipelines.direct_preprocess.pipeline_id
             };
 
             // Fetch the pipeline.
             let Some(preprocess_pipeline_id) = maybe_pipeline_id else {
                 warn!("The build mesh uniforms pipeline wasn't ready");
-                return Ok(());
+                continue;
             };
 
             let Some(preprocess_pipeline) =
                 pipeline_cache.get_compute_pipeline(preprocess_pipeline_id)
             else {
                 // This will happen while the pipeline is being compiled and is fine.
-                return Ok(());
+                continue;
             };
 
             compute_pass.set_pipeline(preprocess_pipeline);
 
-            let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![];
-            if !no_indirect_drawing {
-                dynamic_offsets.push(view_uniform_offset.offset);
+            // Loop over each render phase.
+            for (phase_type_id, phase_work_item_buffers) in view_work_item_buffers {
+                // Fetch the bind group for the render phase.
+                let Some(phase_bind_groups) = bind_groups.get(phase_type_id) else {
+                    continue;
+                };
+
+                // If we're drawing indirectly, make sure the mesh preprocessing
+                // shader has access to the view info it needs to do culling.
+                let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![];
+                if !no_indirect_drawing {
+                    dynamic_offsets.push(view_uniform_offset.offset);
+                }
+
+                // Are we drawing directly or indirectly?
+                match *phase_bind_groups {
+                    PhasePreprocessBindGroups::Direct(ref bind_group) => {
+                        // Invoke the mesh preprocessing shader to transform
+                        // meshes only, but not cull.
+                        let PreprocessWorkItemBuffers::Direct(phase_work_item_buffer) =
+                            phase_work_item_buffers
+                        else {
+                            continue;
+                        };
+                        compute_pass.set_bind_group(0, bind_group, &dynamic_offsets);
+                        let workgroup_count = phase_work_item_buffer.len().div_ceil(WORKGROUP_SIZE);
+                        if workgroup_count > 0 {
+                            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+                        }
+                    }
+
+                    PhasePreprocessBindGroups::Indirect {
+                        indexed: ref maybe_indexed_bind_group,
+                        non_indexed: ref maybe_non_indexed_bind_group,
+                    } => {
+                        // Invoke the mesh preprocessing shader to transform and
+                        // cull the meshes.
+                        let PreprocessWorkItemBuffers::Indirect {
+                            indexed: indexed_buffer,
+                            non_indexed: non_indexed_buffer,
+                            ..
+                        } = phase_work_item_buffers
+                        else {
+                            continue;
+                        };
+
+                        // Transform and cull indexed meshes if there are any.
+                        if let Some(indexed_bind_group) = maybe_indexed_bind_group {
+                            compute_pass.set_bind_group(0, indexed_bind_group, &dynamic_offsets);
+                            let workgroup_count = indexed_buffer.len().div_ceil(WORKGROUP_SIZE);
+                            if workgroup_count > 0 {
+                                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+                            }
+                        }
+
+                        // Transform and cull non-indexed meshes if there are any.
+                        if let Some(non_indexed_bind_group) = maybe_non_indexed_bind_group {
+                            compute_pass.set_bind_group(
+                                0,
+                                non_indexed_bind_group,
+                                &dynamic_offsets,
+                            );
+                            let workgroup_count = non_indexed_buffer.len().div_ceil(WORKGROUP_SIZE);
+                            if workgroup_count > 0 {
+                                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+                            }
+                        }
+                    }
+                }
             }
-            compute_pass.set_bind_group(0, &bind_group.0, &dynamic_offsets);
+        }
+
+        Ok(())
+    }
+}
 
-            let workgroup_count = index_buffer.buffer.len().div_ceil(WORKGROUP_SIZE);
-            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+impl FromWorld for BuildIndirectParametersNode {
+    fn from_world(world: &mut World) -> Self {
+        Self {
+            view_query: QueryState::new(world),
+        }
+    }
+}
+
+impl Node for BuildIndirectParametersNode {
+    fn update(&mut self, world: &mut World) {
+        self.view_query.update_archetypes(world);
+    }
+
+    fn run<'w>(
+        &self,
+        _: &mut RenderGraphContext,
+        render_context: &mut RenderContext<'w>,
+        world: &'w World,
+    ) -> Result<(), NodeRunError> {
+        // Fetch the bind group.
+        let Some(build_indirect_params_bind_groups) =
+            world.get_resource::<BuildIndirectParametersBindGroups>()
+        else {
+            return Ok(());
+        };
+
+        // Fetch the pipelines and the buffers we need.
+        let pipeline_cache = world.resource::<PipelineCache>();
+        let preprocess_pipelines = world.resource::<PreprocessPipelines>();
+        let indirect_parameters_buffers = world.resource::<IndirectParametersBuffers>();
+
+        // Create the compute pass.
+        let mut compute_pass =
+            render_context
+                .command_encoder()
+                .begin_compute_pass(&ComputePassDescriptor {
+                    label: Some("build indirect parameters"),
+                    timestamp_writes: None,
+                });
+
+        // Fetch the pipelines.
+
+        let (maybe_indexed_pipeline_id, maybe_non_indexed_pipeline_id) = (
+            preprocess_pipelines
+                .build_indexed_indirect_params
+                .pipeline_id,
+            preprocess_pipelines
+                .build_non_indexed_indirect_params
+                .pipeline_id,
+        );
+
+        let (
+            Some(build_indexed_indirect_params_pipeline_id),
+            Some(build_non_indexed_indirect_params_pipeline_id),
+        ) = (maybe_indexed_pipeline_id, maybe_non_indexed_pipeline_id)
+        else {
+            warn!("The build indirect parameters pipelines weren't ready");
+            return Ok(());
+        };
+
+        let (
+            Some(build_indexed_indirect_params_pipeline),
+            Some(build_non_indexed_indirect_params_pipeline),
+        ) = (
+            pipeline_cache.get_compute_pipeline(build_indexed_indirect_params_pipeline_id),
+            pipeline_cache.get_compute_pipeline(build_non_indexed_indirect_params_pipeline_id),
+        )
+        else {
+            // This will happen while the pipeline is being compiled and is fine.
+            return Ok(());
+        };
+
+        // Transform the [`IndirectParametersMetadata`] that the GPU mesh
+        // preprocessing phase wrote to [`IndirectParametersIndexed`] for
+        // indexed meshes, if we have any.
+        if let Some(ref build_indirect_indexed_params_bind_group) =
+            build_indirect_params_bind_groups.indexed
+        {
+            compute_pass.set_pipeline(build_indexed_indirect_params_pipeline);
+            compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]);
+            let workgroup_count = indirect_parameters_buffers
+                .indexed_batch_count()
+                .div_ceil(WORKGROUP_SIZE);
+            if workgroup_count > 0 {
+                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+            }
+        }
+
+        // Transform the [`IndirectParametersMetadata`] that the GPU mesh
+        // preprocessing phase wrote to [`IndirectParametersNonIndexed`] for
+        // non-indexed meshes, if we have any.
+        if let Some(ref build_indirect_non_indexed_params_bind_group) =
+            build_indirect_params_bind_groups.non_indexed
+        {
+            compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline);
+            compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]);
+            let workgroup_count = indirect_parameters_buffers
+                .non_indexed_batch_count()
+                .div_ceil(WORKGROUP_SIZE);
+            if workgroup_count > 0 {
+                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+            }
         }
 
         Ok(())
@@ -258,8 +566,15 @@ impl Node for GpuPreprocessNode {
 }
 
 impl PreprocessPipelines {
+    /// Returns true if the preprocessing and indirect parameters pipelines have
+    /// been loaded or false otherwise.
     pub(crate) fn pipelines_are_loaded(&self, pipeline_cache: &PipelineCache) -> bool {
-        self.direct.is_loaded(pipeline_cache) && self.gpu_culling.is_loaded(pipeline_cache)
+        self.direct_preprocess.is_loaded(pipeline_cache)
+            && self.gpu_culling_preprocess.is_loaded(pipeline_cache)
+            && self.build_indexed_indirect_params.is_loaded(pipeline_cache)
+            && self
+                .build_non_indexed_indirect_params
+                .is_loaded(pipeline_cache)
     }
 }
 
@@ -270,6 +585,15 @@ impl PreprocessPipeline {
     }
 }
 
+impl BuildIndirectParametersPipeline {
+    /// Returns true if this pipeline has been loaded into the pipeline cache or
+    /// false otherwise.
+    fn is_loaded(&self, pipeline_cache: &PipelineCache) -> bool {
+        self.pipeline_id
+            .is_some_and(|pipeline_id| pipeline_cache.get_compute_pipeline(pipeline_id).is_some())
+    }
+}
+
 impl SpecializedComputePipeline for PreprocessPipeline {
     type Key = PreprocessPipelineKey;
 
@@ -311,14 +635,24 @@ impl FromWorld for PreprocessPipelines {
         let direct_bind_group_layout_entries = preprocess_direct_bind_group_layout_entries();
         let gpu_culling_bind_group_layout_entries = preprocess_direct_bind_group_layout_entries()
             .extend_sequential((
-                // `indirect_parameters`
-                storage_buffer::<IndirectParameters>(/* has_dynamic_offset= */ false),
+                // `indirect_parameters_metadata`
+                storage_buffer::<IndirectParametersMetadata>(/* has_dynamic_offset= */ false),
                 // `mesh_culling_data`
                 storage_buffer_read_only::<MeshCullingData>(/* has_dynamic_offset= */ false),
                 // `view`
                 uniform_buffer::<ViewUniform>(/* has_dynamic_offset= */ true),
             ));
 
+        // Indexed and non-indexed bind group parameters share all the bind
+        // group layout entries except the final one.
+        let build_indexed_indirect_params_bind_group_layout_entries =
+            build_indirect_params_bind_group_layout_entries()
+                .extend_sequential((storage_buffer::<IndirectParametersIndexed>(false),));
+        let build_non_indexed_indirect_params_bind_group_layout_entries =
+            build_indirect_params_bind_group_layout_entries()
+                .extend_sequential((storage_buffer::<IndirectParametersNonIndexed>(false),));
+
+        // Create the bind group layouts.
         let direct_bind_group_layout = render_device.create_bind_group_layout(
             "build mesh uniforms direct bind group layout",
             &direct_bind_group_layout_entries,
@@ -327,16 +661,34 @@ impl FromWorld for PreprocessPipelines {
             "build mesh uniforms GPU culling bind group layout",
             &gpu_culling_bind_group_layout_entries,
         );
+        let build_indexed_indirect_params_bind_group_layout = render_device
+            .create_bind_group_layout(
+                "build indexed indirect parameters bind group layout",
+                &build_indexed_indirect_params_bind_group_layout_entries,
+            );
+        let build_non_indexed_indirect_params_bind_group_layout = render_device
+            .create_bind_group_layout(
+                "build non-indexed indirect parameters bind group layout",
+                &build_non_indexed_indirect_params_bind_group_layout_entries,
+            );
 
         PreprocessPipelines {
-            direct: PreprocessPipeline {
+            direct_preprocess: PreprocessPipeline {
                 bind_group_layout: direct_bind_group_layout,
                 pipeline_id: None,
             },
-            gpu_culling: PreprocessPipeline {
+            gpu_culling_preprocess: PreprocessPipeline {
                 bind_group_layout: gpu_culling_bind_group_layout,
                 pipeline_id: None,
             },
+            build_indexed_indirect_params: BuildIndirectParametersPipeline {
+                bind_group_layout: build_indexed_indirect_params_bind_group_layout,
+                pipeline_id: None,
+            },
+            build_non_indexed_indirect_params: BuildIndirectParametersPipeline {
+                bind_group_layout: build_non_indexed_indirect_params_bind_group_layout,
+                pipeline_id: None,
+            },
         }
     }
 }
@@ -357,22 +709,66 @@ fn preprocess_direct_bind_group_layout_entries() -> DynamicBindGroupLayoutEntrie
     )
 }
 
-/// A system that specializes the `mesh_preprocess.wgsl` pipelines if necessary.
+// Returns the first 3 bind group layout entries shared between all invocations
+// of the indirect parameters building shader.
+fn build_indirect_params_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries {
+    DynamicBindGroupLayoutEntries::sequential(
+        ShaderStages::COMPUTE,
+        (
+            storage_buffer_read_only::<MeshInputUniform>(false),
+            storage_buffer_read_only::<IndirectParametersMetadata>(false),
+            storage_buffer::<IndirectBatchSet>(false),
+        ),
+    )
+}
+
+/// A system that specializes the `mesh_preprocess.wgsl` and
+/// `build_indirect_params.wgsl` pipelines if necessary.
 pub fn prepare_preprocess_pipelines(
     pipeline_cache: Res<PipelineCache>,
-    mut pipelines: ResMut<SpecializedComputePipelines<PreprocessPipeline>>,
+    render_device: Res<RenderDevice>,
+    mut specialized_preprocess_pipelines: ResMut<SpecializedComputePipelines<PreprocessPipeline>>,
+    mut specialized_build_indirect_parameters_pipelines: ResMut<
+        SpecializedComputePipelines<BuildIndirectParametersPipeline>,
+    >,
     mut preprocess_pipelines: ResMut<PreprocessPipelines>,
 ) {
-    preprocess_pipelines.direct.prepare(
+    preprocess_pipelines.direct_preprocess.prepare(
         &pipeline_cache,
-        &mut pipelines,
+        &mut specialized_preprocess_pipelines,
         PreprocessPipelineKey::empty(),
     );
-    preprocess_pipelines.gpu_culling.prepare(
+    preprocess_pipelines.gpu_culling_preprocess.prepare(
         &pipeline_cache,
-        &mut pipelines,
+        &mut specialized_preprocess_pipelines,
         PreprocessPipelineKey::GPU_CULLING,
     );
+
+    let mut build_indirect_parameters_pipeline_key = BuildIndirectParametersPipelineKey::empty();
+
+    // If the GPU and driver support `multi_draw_indirect_count`, tell the
+    // shader that.
+    if render_device
+        .wgpu_device()
+        .features()
+        .contains(WgpuFeatures::MULTI_DRAW_INDIRECT_COUNT)
+    {
+        build_indirect_parameters_pipeline_key
+            .insert(BuildIndirectParametersPipelineKey::MULTI_DRAW_INDIRECT_COUNT_SUPPORTED);
+    }
+
+    preprocess_pipelines.build_indexed_indirect_params.prepare(
+        &pipeline_cache,
+        &mut specialized_build_indirect_parameters_pipelines,
+        build_indirect_parameters_pipeline_key | BuildIndirectParametersPipelineKey::INDEXED,
+    );
+    preprocess_pipelines
+        .build_non_indexed_indirect_params
+        .prepare(
+            &pipeline_cache,
+            &mut specialized_build_indirect_parameters_pipelines,
+            build_indirect_parameters_pipeline_key,
+        );
 }
 
 impl PreprocessPipeline {
@@ -391,96 +787,344 @@ impl PreprocessPipeline {
     }
 }
 
+impl SpecializedComputePipeline for BuildIndirectParametersPipeline {
+    type Key = BuildIndirectParametersPipelineKey;
+
+    fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor {
+        let mut shader_defs = vec![];
+        if key.contains(BuildIndirectParametersPipelineKey::INDEXED) {
+            shader_defs.push("INDEXED".into());
+        }
+        if key.contains(BuildIndirectParametersPipelineKey::MULTI_DRAW_INDIRECT_COUNT_SUPPORTED) {
+            shader_defs.push("MULTI_DRAW_INDIRECT_COUNT_SUPPORTED".into());
+        }
+
+        ComputePipelineDescriptor {
+            label: if key.contains(BuildIndirectParametersPipelineKey::INDEXED) {
+                Some("build indexed indirect parameters".into())
+            } else {
+                Some("build non-indexed indirect parameters".into())
+            },
+            layout: vec![self.bind_group_layout.clone()],
+            push_constant_ranges: vec![],
+            shader: BUILD_INDIRECT_PARAMS_SHADER_HANDLE,
+            shader_defs,
+            entry_point: "main".into(),
+            zero_initialize_workgroup_memory: false,
+        }
+    }
+}
+
+impl BuildIndirectParametersPipeline {
+    fn prepare(
+        &mut self,
+        pipeline_cache: &PipelineCache,
+        pipelines: &mut SpecializedComputePipelines<BuildIndirectParametersPipeline>,
+        key: BuildIndirectParametersPipelineKey,
+    ) {
+        if self.pipeline_id.is_some() {
+            return;
+        }
+
+        let build_indirect_parameters_pipeline_id = pipelines.specialize(pipeline_cache, self, key);
+        self.pipeline_id = Some(build_indirect_parameters_pipeline_id);
+    }
+}
+
 /// A system that attaches the mesh uniform buffers to the bind groups for the
 /// variants of the mesh preprocessing compute shader.
 pub fn prepare_preprocess_bind_groups(
     mut commands: Commands,
     render_device: Res<RenderDevice>,
     batched_instance_buffers: Res<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>,
-    indirect_parameters_buffer: Res<IndirectParametersBuffer>,
+    indirect_parameters_buffers: Res<IndirectParametersBuffers>,
     mesh_culling_data_buffer: Res<MeshCullingDataBuffer>,
     view_uniforms: Res<ViewUniforms>,
     pipelines: Res<PreprocessPipelines>,
 ) {
     // Grab the `BatchedInstanceBuffers`.
+    let batched_instance_buffers = batched_instance_buffers.into_inner();
+
+    let Some(current_input_buffer) = batched_instance_buffers
+        .current_input_buffer
+        .buffer()
+        .buffer()
+    else {
+        return;
+    };
+
+    // Keep track of whether any of the phases will be drawn indirectly. If
+    // they are, then we'll need bind groups for the indirect parameters
+    // building shader too.
+    let mut any_indirect = false;
+
+    for (view, phase_work_item_buffers) in &batched_instance_buffers.work_item_buffers {
+        let mut bind_groups = TypeIdMap::default();
+
+        for (&phase_id, work_item_buffers) in phase_work_item_buffers {
+            if let Some(bind_group) = prepare_preprocess_bind_group_for_phase(
+                &render_device,
+                &pipelines,
+                &view_uniforms,
+                &indirect_parameters_buffers,
+                &mesh_culling_data_buffer,
+                batched_instance_buffers,
+                work_item_buffers,
+                &mut any_indirect,
+            ) {
+                bind_groups.insert(phase_id, bind_group);
+            }
+        }
+
+        commands
+            .entity(*view)
+            .insert(PreprocessBindGroups(bind_groups));
+    }
+
+    // If any of the phases will be drawn indirectly, create the bind groups for
+    // the indirect parameters building shader.
+    if any_indirect {
+        create_build_indirect_parameters_bind_groups(
+            &mut commands,
+            &render_device,
+            &pipelines,
+            current_input_buffer,
+            &indirect_parameters_buffers,
+        );
+    }
+}
+
+// Creates the bind group for the GPU preprocessing shader for a single phase
+// for a single view.
+#[expect(
+    clippy::too_many_arguments,
+    reason = "it's a system that needs a bunch of parameters"
+)]
+fn prepare_preprocess_bind_group_for_phase(
+    render_device: &RenderDevice,
+    pipelines: &PreprocessPipelines,
+    view_uniforms: &ViewUniforms,
+    indirect_parameters_buffers: &IndirectParametersBuffers,
+    mesh_culling_data_buffer: &MeshCullingDataBuffer,
+    batched_instance_buffers: &BatchedInstanceBuffers<MeshUniform, MeshInputUniform>,
+    work_item_buffers: &PreprocessWorkItemBuffers,
+    any_indirect: &mut bool,
+) -> Option<PhasePreprocessBindGroups> {
+    // Get the current input buffers.
+
     let BatchedInstanceBuffers {
         data_buffer: ref data_buffer_vec,
-        work_item_buffers: ref index_buffers,
         current_input_buffer: ref current_input_buffer_vec,
         previous_input_buffer: ref previous_input_buffer_vec,
-    } = batched_instance_buffers.into_inner();
+        ..
+    } = batched_instance_buffers;
 
-    let (Some(current_input_buffer), Some(previous_input_buffer), Some(data_buffer)) = (
-        current_input_buffer_vec.buffer().buffer(),
-        previous_input_buffer_vec.buffer().buffer(),
-        data_buffer_vec.buffer(),
-    ) else {
-        return;
-    };
+    let current_input_buffer = current_input_buffer_vec.buffer().buffer()?;
+    let previous_input_buffer = previous_input_buffer_vec.buffer().buffer()?;
+    let data_buffer = data_buffer_vec.buffer()?;
 
-    for (view, index_buffer_vec) in index_buffers {
-        let Some(index_buffer) = index_buffer_vec.buffer.buffer() else {
-            continue;
-        };
+    // Build the appropriate bind group, depending on whether we're drawing
+    // directly or indirectly.
 
-        // Don't use `as_entire_binding()` here; the shader reads the array
-        // length and the underlying buffer may be longer than the actual size
-        // of the vector.
-        let index_buffer_size = NonZero::<u64>::try_from(
-            index_buffer_vec.buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()),
-        )
-        .ok();
-
-        let bind_group = if !index_buffer_vec.no_indirect_drawing {
-            let (
-                Some(indirect_parameters_buffer),
-                Some(mesh_culling_data_buffer),
-                Some(view_uniforms_binding),
-            ) = (
-                indirect_parameters_buffer.buffer(),
-                mesh_culling_data_buffer.buffer(),
-                view_uniforms.uniforms.binding(),
+    match *work_item_buffers {
+        PreprocessWorkItemBuffers::Direct(ref work_item_buffer_vec) => {
+            let work_item_buffer = work_item_buffer_vec.buffer()?;
+
+            // Don't use `as_entire_binding()` here; the shader reads the array
+            // length and the underlying buffer may be longer than the actual size
+            // of the vector.
+            let work_item_buffer_size = NonZero::<u64>::try_from(
+                work_item_buffer_vec.len() as u64 * u64::from(PreprocessWorkItem::min_size()),
             )
-            else {
-                continue;
-            };
+            .ok();
 
-            PreprocessBindGroup(render_device.create_bind_group(
-                "preprocess_gpu_culling_bind_group",
-                &pipelines.gpu_culling.bind_group_layout,
-                &BindGroupEntries::sequential((
-                    current_input_buffer.as_entire_binding(),
-                    previous_input_buffer.as_entire_binding(),
-                    BindingResource::Buffer(BufferBinding {
-                        buffer: index_buffer,
-                        offset: 0,
-                        size: index_buffer_size,
-                    }),
-                    data_buffer.as_entire_binding(),
-                    indirect_parameters_buffer.as_entire_binding(),
-                    mesh_culling_data_buffer.as_entire_binding(),
-                    view_uniforms_binding,
-                )),
+            Some(PhasePreprocessBindGroups::Direct(
+                render_device.create_bind_group(
+                    "preprocess_direct_bind_group",
+                    &pipelines.direct_preprocess.bind_group_layout,
+                    &BindGroupEntries::sequential((
+                        current_input_buffer.as_entire_binding(),
+                        previous_input_buffer.as_entire_binding(),
+                        BindingResource::Buffer(BufferBinding {
+                            buffer: work_item_buffer,
+                            offset: 0,
+                            size: work_item_buffer_size,
+                        }),
+                        data_buffer.as_entire_binding(),
+                    )),
+                ),
             ))
-        } else {
-            PreprocessBindGroup(render_device.create_bind_group(
-                "preprocess_direct_bind_group",
-                &pipelines.direct.bind_group_layout,
+        }
+
+        PreprocessWorkItemBuffers::Indirect {
+            indexed: ref indexed_buffer,
+            non_indexed: ref non_indexed_buffer,
+        } => {
+            // For indirect drawing, we need two separate bind groups, one for indexed meshes and one for non-indexed meshes.
+
+            let mesh_culling_data_buffer = mesh_culling_data_buffer.buffer()?;
+            let view_uniforms_binding = view_uniforms.uniforms.binding()?;
+
+            let indexed_bind_group = match (
+                indexed_buffer.buffer(),
+                indirect_parameters_buffers.indexed_metadata_buffer(),
+            ) {
+                (
+                    Some(indexed_work_item_buffer),
+                    Some(indexed_indirect_parameters_metadata_buffer),
+                ) => {
+                    // Don't use `as_entire_binding()` here; the shader reads the array
+                    // length and the underlying buffer may be longer than the actual size
+                    // of the vector.
+                    let indexed_work_item_buffer_size = NonZero::<u64>::try_from(
+                        indexed_buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()),
+                    )
+                    .ok();
+                    Some(render_device.create_bind_group(
+                        "preprocess_indexed_indirect_gpu_culling_bind_group",
+                        &pipelines.gpu_culling_preprocess.bind_group_layout,
+                        &BindGroupEntries::sequential((
+                            current_input_buffer.as_entire_binding(),
+                            previous_input_buffer.as_entire_binding(),
+                            BindingResource::Buffer(BufferBinding {
+                                buffer: indexed_work_item_buffer,
+                                offset: 0,
+                                size: indexed_work_item_buffer_size,
+                            }),
+                            data_buffer.as_entire_binding(),
+                            indexed_indirect_parameters_metadata_buffer.as_entire_binding(),
+                            mesh_culling_data_buffer.as_entire_binding(),
+                            view_uniforms_binding.clone(),
+                        )),
+                    ))
+                }
+                _ => None,
+            };
+
+            let non_indexed_bind_group = match (
+                non_indexed_buffer.buffer(),
+                indirect_parameters_buffers.non_indexed_metadata_buffer(),
+            ) {
+                (
+                    Some(non_indexed_work_item_buffer),
+                    Some(non_indexed_indirect_parameters_metadata_buffer),
+                ) => {
+                    // Don't use `as_entire_binding()` here; the shader reads the array
+                    // length and the underlying buffer may be longer than the actual size
+                    // of the vector.
+                    let non_indexed_work_item_buffer_size = NonZero::<u64>::try_from(
+                        non_indexed_buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()),
+                    )
+                    .ok();
+                    Some(render_device.create_bind_group(
+                        "preprocess_non_indexed_indirect_gpu_culling_bind_group",
+                        &pipelines.gpu_culling_preprocess.bind_group_layout,
+                        &BindGroupEntries::sequential((
+                            current_input_buffer.as_entire_binding(),
+                            previous_input_buffer.as_entire_binding(),
+                            BindingResource::Buffer(BufferBinding {
+                                buffer: non_indexed_work_item_buffer,
+                                offset: 0,
+                                size: non_indexed_work_item_buffer_size,
+                            }),
+                            data_buffer.as_entire_binding(),
+                            non_indexed_indirect_parameters_metadata_buffer.as_entire_binding(),
+                            mesh_culling_data_buffer.as_entire_binding(),
+                            view_uniforms_binding,
+                        )),
+                    ))
+                }
+                _ => None,
+            };
+
+            // Note that we found phases that will be drawn indirectly so that
+            // we remember to build the bind groups for the indirect parameter
+            // building shader.
+            *any_indirect = true;
+
+            Some(PhasePreprocessBindGroups::Indirect {
+                indexed: indexed_bind_group,
+                non_indexed: non_indexed_bind_group,
+            })
+        }
+    }
+}
+
+/// A system that creates bind groups from the indirect parameters metadata and
+/// data buffers for the indirect parameter building shader.
+fn create_build_indirect_parameters_bind_groups(
+    commands: &mut Commands,
+    render_device: &RenderDevice,
+    pipelines: &PreprocessPipelines,
+    current_input_buffer: &Buffer,
+    indirect_parameters_buffer: &IndirectParametersBuffers,
+) {
+    commands.insert_resource(BuildIndirectParametersBindGroups {
+        indexed: match (
+            indirect_parameters_buffer.indexed_metadata_buffer(),
+            indirect_parameters_buffer.indexed_data_buffer(),
+            indirect_parameters_buffer.indexed_batch_sets_buffer(),
+        ) {
+            (
+                Some(indexed_indirect_parameters_metadata_buffer),
+                Some(indexed_indirect_parameters_data_buffer),
+                Some(indexed_batch_sets_buffer),
+            ) => Some(render_device.create_bind_group(
+                "build_indexed_indirect_parameters_bind_group",
+                &pipelines.build_indexed_indirect_params.bind_group_layout,
                 &BindGroupEntries::sequential((
                     current_input_buffer.as_entire_binding(),
-                    previous_input_buffer.as_entire_binding(),
-                    BindingResource::Buffer(BufferBinding {
-                        buffer: index_buffer,
+                    // Don't use `as_entire_binding` here; the shader reads
+                    // the length and `RawBufferVec` overallocates.
+                    BufferBinding {
+                        buffer: indexed_indirect_parameters_metadata_buffer,
                         offset: 0,
-                        size: index_buffer_size,
-                    }),
-                    data_buffer.as_entire_binding(),
+                        size: NonZeroU64::new(
+                            indirect_parameters_buffer.indexed_batch_count() as u64
+                                * size_of::<IndirectParametersMetadata>() as u64,
+                        ),
+                    },
+                    indexed_batch_sets_buffer.as_entire_binding(),
+                    indexed_indirect_parameters_data_buffer.as_entire_binding(),
                 )),
-            ))
-        };
-
-        commands.entity(*view).insert(bind_group);
-    }
+            )),
+            _ => None,
+        },
+        non_indexed: match (
+            indirect_parameters_buffer.non_indexed_metadata_buffer(),
+            indirect_parameters_buffer.non_indexed_data_buffer(),
+            indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
+        ) {
+            (
+                Some(non_indexed_indirect_parameters_metadata_buffer),
+                Some(non_indexed_indirect_parameters_data_buffer),
+                Some(non_indexed_batch_sets_buffer),
+            ) => Some(
+                render_device.create_bind_group(
+                    "build_non_indexed_indirect_parameters_bind_group",
+                    &pipelines
+                        .build_non_indexed_indirect_params
+                        .bind_group_layout,
+                    &BindGroupEntries::sequential((
+                        current_input_buffer.as_entire_binding(),
+                        // Don't use `as_entire_binding` here; the shader reads
+                        // the length and `RawBufferVec` overallocates.
+                        BufferBinding {
+                            buffer: non_indexed_indirect_parameters_metadata_buffer,
+                            offset: 0,
+                            size: NonZeroU64::new(
+                                indirect_parameters_buffer.non_indexed_batch_count() as u64
+                                    * size_of::<IndirectParametersMetadata>() as u64,
+                            ),
+                        },
+                        non_indexed_batch_sets_buffer.as_entire_binding(),
+                        non_indexed_indirect_parameters_data_buffer.as_entire_binding(),
+                    )),
+                ),
+            ),
+            _ => None,
+        },
+    });
 }
 
 /// Writes the information needed to do GPU mesh culling to the GPU.
diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs
index 90614772e3f1c..4a1187400e5fe 100644
--- a/crates/bevy_pbr/src/render/light.rs
+++ b/crates/bevy_pbr/src/render/light.rs
@@ -614,8 +614,18 @@ pub struct ViewShadowBindings {
     pub directional_light_depth_texture_view: TextureView,
 }
 
+/// A component that holds the shadow cascade views for all shadow cascades
+/// associated with a camera.
+///
+/// Note: Despite the name, this component actually holds the shadow cascade
+/// views, not the lights themselves.
 #[derive(Component)]
 pub struct ViewLightEntities {
+    /// The shadow cascade views for all shadow cascades associated with a
+    /// camera.
+    ///
+    /// Note: Despite the name, this component actually holds the shadow cascade
+    /// views, not the lights themselves.
     pub lights: Vec<Entity>,
 }
 
@@ -701,6 +711,7 @@ pub fn prepare_lights(
     views: Query<
         (
             Entity,
+            MainEntity,
             &ExtractedView,
             &ExtractedClusterConfig,
             Option<&RenderLayers>,
@@ -1118,6 +1129,7 @@ pub fn prepare_lights(
     // set up light data for each view
     for (
         entity,
+        camera_main_entity,
         extracted_view,
         clusters,
         maybe_layers,
@@ -1238,8 +1250,11 @@ pub fn prepare_lights(
                     })
                     .clone();
 
-                let retained_view_entity =
-                    RetainedViewEntity::new(*light_main_entity, face_index as u32);
+                let retained_view_entity = RetainedViewEntity::new(
+                    *light_main_entity,
+                    Some(camera_main_entity.into()),
+                    face_index as u32,
+                );
 
                 commands.entity(view_light_entity).insert((
                     ShadowView {
@@ -1343,7 +1358,8 @@ pub fn prepare_lights(
 
             let view_light_entity = light_view_entities[0];
 
-            let retained_view_entity = RetainedViewEntity::new(*light_main_entity, 0);
+            let retained_view_entity =
+                RetainedViewEntity::new(*light_main_entity, Some(camera_main_entity.into()), 0);
 
             commands.entity(view_light_entity).insert((
                 ShadowView {
@@ -1476,8 +1492,11 @@ pub fn prepare_lights(
                 frustum.half_spaces[4] =
                     HalfSpace::new(frustum.half_spaces[4].normal().extend(f32::INFINITY));
 
-                let retained_view_entity =
-                    RetainedViewEntity::new(*light_main_entity, cascade_index as u32);
+                let retained_view_entity = RetainedViewEntity::new(
+                    *light_main_entity,
+                    Some(camera_main_entity.into()),
+                    cascade_index as u32,
+                );
 
                 commands.entity(view_light_entity).insert((
                     ShadowView {
@@ -1764,6 +1783,12 @@ pub struct ShadowBatchSetKey {
     pub index_slab: Option<SlabId>,
 }
 
+impl PhaseItemBatchSetKey for ShadowBatchSetKey {
+    fn indexed(&self) -> bool {
+        self.index_slab.is_some()
+    }
+}
+
 /// Data used to bin each object in the shadow map phase.
 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct ShadowBinKey {
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 6df24bbaa8858..dd92ef7828eec 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -20,7 +20,8 @@ use bevy_math::{Affine3, Rect, UVec2, Vec3, Vec4};
 use bevy_render::{
     batching::{
         gpu_preprocessing::{
-            self, GpuPreprocessingSupport, IndirectParameters, IndirectParametersBuffer,
+            self, GpuPreprocessingSupport, IndirectBatchSet, IndirectParametersBuffers,
+            IndirectParametersIndexed, IndirectParametersMetadata, IndirectParametersNonIndexed,
             InstanceInputUniformBuffer,
         },
         no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching,
@@ -352,6 +353,17 @@ pub struct MeshInputUniform {
     /// [`MeshAllocator`]). This value stores the offset of the first vertex in
     /// this mesh in that buffer.
     pub first_vertex_index: u32,
+    /// The index of this mesh's first index in the index buffer, if any.
+    ///
+    /// Multiple meshes can be packed into a single index buffer (see
+    /// [`MeshAllocator`]). This value stores the offset of the first index in
+    /// this mesh in that buffer.
+    ///
+    /// If this mesh isn't indexed, this value is ignored.
+    pub first_index_index: u32,
+    /// For an indexed mesh, the number of indices that make it up; for a
+    /// non-indexed mesh, the number of vertices in it.
+    pub index_count: u32,
     /// The current skin index, or `u32::MAX` if there's no skin.
     pub current_skin_index: u32,
     /// The previous skin index, or `u32::MAX` if there's no previous skin.
@@ -361,6 +373,10 @@ pub struct MeshInputUniform {
     /// Low 16 bits: index of the material inside the bind group data.
     /// High 16 bits: index of the lightmap in the binding array.
     pub material_and_lightmap_bind_group_slot: u32,
+    /// Padding.
+    pub pad_a: u32,
+    /// Padding.
+    pub pad_b: u32,
 }
 
 /// Information about each mesh instance needed to cull it on GPU.
@@ -907,11 +923,23 @@ impl RenderMeshInstanceGpuBuilder {
         render_lightmaps: &RenderLightmaps,
         skin_indices: &SkinIndices,
     ) -> u32 {
-        let first_vertex_index = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id)
-        {
-            Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
-            None => 0,
-        };
+        let (first_vertex_index, vertex_count) =
+            match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) {
+                Some(mesh_vertex_slice) => (
+                    mesh_vertex_slice.range.start,
+                    mesh_vertex_slice.range.end - mesh_vertex_slice.range.start,
+                ),
+                None => (0, 0),
+            };
+        let (mesh_is_indexed, first_index_index, index_count) =
+            match mesh_allocator.mesh_index_slice(&self.shared.mesh_asset_id) {
+                Some(mesh_index_slice) => (
+                    true,
+                    mesh_index_slice.range.start,
+                    mesh_index_slice.range.end - mesh_index_slice.range.start,
+                ),
+                None => (false, 0, 0),
+            };
 
         let current_skin_index = match skin_indices.current.get(&entity) {
             Some(skin_indices) => skin_indices.index(),
@@ -938,11 +966,19 @@ impl RenderMeshInstanceGpuBuilder {
             flags: self.mesh_flags.bits(),
             previous_input_index: u32::MAX,
             first_vertex_index,
+            first_index_index,
+            index_count: if mesh_is_indexed {
+                index_count
+            } else {
+                vertex_count
+            },
             current_skin_index,
             previous_skin_index,
             material_and_lightmap_bind_group_slot: u32::from(
                 self.shared.material_bindings_index.slot,
             ) | ((lightmap_slot as u32) << 16),
+            pad_a: 0,
+            pad_b: 0,
         };
 
         // Did the last frame contain this entity as well?
@@ -1698,86 +1734,31 @@ impl GetFullBatchData for MeshPipeline {
             .map(|entity| entity.current_uniform_index)
     }
 
-    fn write_batch_indirect_parameters(
-        (mesh_instances, _, meshes, mesh_allocator, _): &SystemParamItem<Self::Param>,
-        indirect_parameters_buffer: &mut IndirectParametersBuffer,
+    fn write_batch_indirect_parameters_metadata(
+        mesh_index: u32,
+        indexed: bool,
+        base_output_index: u32,
+        batch_set_index: Option<NonMaxU32>,
+        indirect_parameters_buffer: &mut IndirectParametersBuffers,
         indirect_parameters_offset: u32,
-        main_entity: MainEntity,
     ) {
-        write_batch_indirect_parameters(
-            mesh_instances,
-            meshes,
-            mesh_allocator,
-            indirect_parameters_buffer,
-            indirect_parameters_offset,
-            main_entity,
-        );
-    }
-}
-
-/// Pushes a set of [`IndirectParameters`] onto the [`IndirectParametersBuffer`]
-/// for the given mesh instance, and returns the index of those indirect
-/// parameters.
-fn write_batch_indirect_parameters(
-    mesh_instances: &RenderMeshInstances,
-    meshes: &RenderAssets<RenderMesh>,
-    mesh_allocator: &MeshAllocator,
-    indirect_parameters_buffer: &mut IndirectParametersBuffer,
-    indirect_parameters_offset: u32,
-    main_entity: MainEntity,
-) {
-    // This should only be called during GPU building.
-    let RenderMeshInstances::GpuBuilding(ref mesh_instances) = *mesh_instances else {
-        error!(
-            "`write_batch_indirect_parameters_index` should never be called in CPU mesh uniform \
-                building mode"
-        );
-        return;
-    };
-
-    let Some(mesh_instance) = mesh_instances.get(&main_entity) else {
-        return;
-    };
-    let Some(mesh) = meshes.get(mesh_instance.mesh_asset_id) else {
-        return;
-    };
-    let Some(vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id)
-    else {
-        return;
-    };
-
-    // Note that `IndirectParameters` covers both of these structures, even
-    // though they actually have distinct layouts. See the comment above that
-    // type for more information.
-    let indirect_parameters = match mesh.buffer_info {
-        RenderMeshBufferInfo::Indexed {
-            count: index_count, ..
-        } => {
-            let Some(index_buffer_slice) =
-                mesh_allocator.mesh_index_slice(&mesh_instance.mesh_asset_id)
-            else {
-                return;
-            };
-            IndirectParameters {
-                vertex_or_index_count: index_count,
-                instance_count: 0,
-                first_vertex_or_first_index: index_buffer_slice.range.start,
-                base_vertex_or_first_instance: vertex_buffer_slice.range.start,
-                first_instance: 0,
-            }
-        }
-        RenderMeshBufferInfo::NonIndexed => IndirectParameters {
-            vertex_or_index_count: mesh.vertex_count,
+        let indirect_parameters = IndirectParametersMetadata {
+            mesh_index,
+            base_output_index,
+            batch_set_index: match batch_set_index {
+                Some(batch_set_index) => u32::from(batch_set_index),
+                None => !0,
+            },
             instance_count: 0,
-            first_vertex_or_first_index: vertex_buffer_slice.range.start,
-            base_vertex_or_first_instance: 0,
-            // Use `0xffffffff` as a placeholder to tell the mesh preprocessing
-            // shader that this is a non-indexed mesh.
-            first_instance: !0,
-        },
-    };
+        };
 
-    indirect_parameters_buffer.set(indirect_parameters_offset, indirect_parameters);
+        if indexed {
+            indirect_parameters_buffer.set_indexed(indirect_parameters_offset, indirect_parameters);
+        } else {
+            indirect_parameters_buffer
+                .set_non_indexed(indirect_parameters_offset, indirect_parameters);
+        }
+    }
 }
 
 bitflags::bitflags! {
@@ -2687,12 +2668,12 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
     type Param = (
         SRes<RenderAssets<RenderMesh>>,
         SRes<RenderMeshInstances>,
-        SRes<IndirectParametersBuffer>,
+        SRes<IndirectParametersBuffers>,
         SRes<PipelineCache>,
         SRes<MeshAllocator>,
         Option<SRes<PreprocessPipelines>>,
     );
-    type ViewQuery = Has<PreprocessBindGroup>;
+    type ViewQuery = Has<PreprocessBindGroups>;
     type ItemQuery = ();
     #[inline]
     fn render<'w>(
@@ -2735,26 +2716,6 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
             return RenderCommandResult::Skip;
         };
 
-        // Calculate the indirect offset, and look up the buffer.
-        let indirect_parameters = match item.extra_index() {
-            PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => None,
-            PhaseItemExtraIndex::IndirectParametersIndex(indices) => {
-                match indirect_parameters_buffer.buffer() {
-                    None => {
-                        warn!(
-                            "Not rendering mesh because indirect parameters buffer wasn't present"
-                        );
-                        return RenderCommandResult::Skip;
-                    }
-                    Some(buffer) => Some((
-                        indices.start as u64 * size_of::<IndirectParameters>() as u64,
-                        indices.end - indices.start,
-                        buffer,
-                    )),
-                }
-            }
-        };
-
         pass.set_vertex_buffer(0, vertex_buffer_slice.buffer.slice(..));
 
         let batch_range = item.batch_range();
@@ -2774,8 +2735,8 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
 
                 pass.set_index_buffer(index_buffer_slice.buffer.slice(..), 0, *index_format);
 
-                match indirect_parameters {
-                    None => {
+                match item.extra_index() {
+                    PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => {
                         pass.draw_indexed(
                             index_buffer_slice.range.start
                                 ..(index_buffer_slice.range.start + *count),
@@ -2783,33 +2744,112 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
                             batch_range.clone(),
                         );
                     }
-                    Some((
-                        indirect_parameters_offset,
-                        indirect_parameters_count,
-                        indirect_parameters_buffer,
-                    )) => {
-                        pass.multi_draw_indexed_indirect(
-                            indirect_parameters_buffer,
-                            indirect_parameters_offset,
-                            indirect_parameters_count,
-                        );
+                    PhaseItemExtraIndex::IndirectParametersIndex {
+                        range: indirect_parameters_range,
+                        batch_set_index,
+                    } => {
+                        // Look up the indirect parameters buffer, as well as
+                        // the buffer we're going to use for
+                        // `multi_draw_indexed_indirect_count` (if available).
+                        let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = (
+                            indirect_parameters_buffer.indexed_data_buffer(),
+                            indirect_parameters_buffer.indexed_batch_sets_buffer(),
+                        ) else {
+                            warn!(
+                                "Not rendering mesh because indexed indirect parameters buffer \
+                                 wasn't present",
+                            );
+                            return RenderCommandResult::Skip;
+                        };
+
+                        // Calculate the location of the indirect parameters
+                        // within the buffer.
+                        let indirect_parameters_offset = indirect_parameters_range.start as u64
+                            * size_of::<IndirectParametersIndexed>() as u64;
+                        let indirect_parameters_count =
+                            indirect_parameters_range.end - indirect_parameters_range.start;
+
+                        // If we're using `multi_draw_indirect_count`, take the
+                        // number of batches from the appropriate position in
+                        // the batch sets buffer. Otherwise, supply the size of
+                        // the batch set.
+                        match batch_set_index {
+                            Some(batch_set_index) => {
+                                let count_offset = u32::from(batch_set_index)
+                                    * (size_of::<IndirectBatchSet>() as u32);
+                                pass.multi_draw_indexed_indirect_count(
+                                    indirect_parameters_buffer,
+                                    indirect_parameters_offset,
+                                    batch_sets_buffer,
+                                    count_offset as u64,
+                                    indirect_parameters_count,
+                                );
+                            }
+                            None => {
+                                pass.multi_draw_indexed_indirect(
+                                    indirect_parameters_buffer,
+                                    indirect_parameters_offset,
+                                    indirect_parameters_count,
+                                );
+                            }
+                        }
                     }
                 }
             }
-            RenderMeshBufferInfo::NonIndexed => match indirect_parameters {
-                None => {
+
+            RenderMeshBufferInfo::NonIndexed => match item.extra_index() {
+                PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => {
                     pass.draw(vertex_buffer_slice.range, batch_range.clone());
                 }
-                Some((
-                    indirect_parameters_offset,
-                    indirect_parameters_count,
-                    indirect_parameters_buffer,
-                )) => {
-                    pass.multi_draw_indirect(
-                        indirect_parameters_buffer,
-                        indirect_parameters_offset,
-                        indirect_parameters_count,
-                    );
+                PhaseItemExtraIndex::IndirectParametersIndex {
+                    range: indirect_parameters_range,
+                    batch_set_index,
+                } => {
+                    // Look up the indirect parameters buffer, as well as the
+                    // buffer we're going to use for
+                    // `multi_draw_indirect_count` (if available).
+                    let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = (
+                        indirect_parameters_buffer.non_indexed_data_buffer(),
+                        indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
+                    ) else {
+                        warn!(
+                            "Not rendering mesh because non-indexed indirect parameters buffer \
+                             wasn't present"
+                        );
+                        return RenderCommandResult::Skip;
+                    };
+
+                    // Calculate the location of the indirect parameters within
+                    // the buffer.
+                    let indirect_parameters_offset = indirect_parameters_range.start as u64
+                        * size_of::<IndirectParametersNonIndexed>() as u64;
+                    let indirect_parameters_count =
+                        indirect_parameters_range.end - indirect_parameters_range.start;
+
+                    // If we're using `multi_draw_indirect_count`, take the
+                    // number of batches from the appropriate position in the
+                    // batch sets buffer. Otherwise, supply the size of the
+                    // batch set.
+                    match batch_set_index {
+                        Some(batch_set_index) => {
+                            let count_offset =
+                                u32::from(batch_set_index) * (size_of::<IndirectBatchSet>() as u32);
+                            pass.multi_draw_indirect_count(
+                                indirect_parameters_buffer,
+                                indirect_parameters_offset,
+                                batch_sets_buffer,
+                                count_offset as u64,
+                                indirect_parameters_count,
+                            );
+                        }
+                        None => {
+                            pass.multi_draw_indirect(
+                                indirect_parameters_buffer,
+                                indirect_parameters_offset,
+                                indirect_parameters_count,
+                            );
+                        }
+                    }
                 }
             },
         }
diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
index 74f527a374949..df73454a3e880 100644
--- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
+++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
@@ -8,29 +8,10 @@
 // so that TAA works.
 
 #import bevy_pbr::mesh_types::{Mesh, MESH_FLAGS_NO_FRUSTUM_CULLING_BIT}
-#import bevy_pbr::mesh_preprocess_types::IndirectParameters
+#import bevy_pbr::mesh_preprocess_types::{MeshInput, IndirectParametersMetadata}
 #import bevy_render::maths
 #import bevy_render::view::View
 
-// Per-frame data that the CPU supplies to the GPU.
-struct MeshInput {
-    // The model transform.
-    world_from_local: mat3x4<f32>,
-    // The lightmap UV rect, packed into 64 bits.
-    lightmap_uv_rect: vec2<u32>,
-    // Various flags.
-    flags: u32,
-    // The index of this mesh's `MeshInput` in the `previous_input` array, if
-    // applicable. If not present, this is `u32::MAX`.
-    previous_input_index: u32,
-    first_vertex_index: u32,
-    current_skin_index: u32,
-    previous_skin_index: u32,
-    // Low 16 bits: index of the material inside the bind group data.
-    // High 16 bits: index of the lightmap in the binding array.
-    material_and_lightmap_bind_group_slot: u32,
-}
-
 // Information about each mesh instance needed to cull it on GPU.
 //
 // At the moment, this just consists of its axis-aligned bounding box (AABB).
@@ -68,7 +49,8 @@ struct PreprocessWorkItem {
 
 #ifdef INDIRECT
 // The array of indirect parameters for drawcalls.
-@group(0) @binding(4) var<storage, read_write> indirect_parameters: array<IndirectParameters>;
+@group(0) @binding(4) var<storage, read_write> indirect_parameters_metadata:
+    array<IndirectParametersMetadata>;
 #endif
 
 #ifdef FRUSTUM_CULLING
@@ -167,28 +149,15 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
     }
 
     // Figure out the output index. In indirect mode, this involves bumping the
-    // instance index in the indirect parameters structure. Otherwise, this
-    // index was directly supplied to us.
+    // instance index in the indirect parameters metadata, which
+    // `build_indirect_params.wgsl` will use to generate the actual indirect
+    // parameters. Otherwise, this index was directly supplied to us.
 #ifdef INDIRECT
     let batch_output_index =
-        atomicAdd(&indirect_parameters[indirect_parameters_index].instance_count, 1u);
-    let mesh_output_index = output_index + batch_output_index;
-
-    // If this is the first mesh in the batch, write the first instance index
-    // into the indirect parameters.
-    //
-    // We could have done this on CPU, but when we start retaining indirect
-    // parameters that will no longer be desirable, as the index of the first
-    // instance will change from frame to frame and we won't want the CPU to
-    // have to keep updating it.
-    if (batch_output_index == 0u) {
-        if (indirect_parameters[indirect_parameters_index].first_instance == 0xffffffffu) {
-            indirect_parameters[indirect_parameters_index].base_vertex_or_first_instance =
-                mesh_output_index;
-        } else {
-            indirect_parameters[indirect_parameters_index].first_instance = mesh_output_index;
-        }
-    }
+        atomicAdd(&indirect_parameters_metadata[indirect_parameters_index].instance_count, 1u);
+    let mesh_output_index =
+        indirect_parameters_metadata[indirect_parameters_index].base_output_index +
+        batch_output_index;
 #else   // INDIRECT
     let mesh_output_index = output_index;
 #endif  // INDIRECT
diff --git a/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl
index 5314e75ce34de..974a9d303aa6d 100644
--- a/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl
+++ b/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl
@@ -2,18 +2,97 @@
 
 #define_import_path bevy_pbr::mesh_preprocess_types
 
-// The `wgpu` indirect parameters structure. This is a union of two structures.
-// For more information, see the corresponding comment in
-// `gpu_preprocessing.rs`.
-struct IndirectParameters {
-    // `vertex_count` or `index_count`.
-    vertex_count_or_index_count: u32,
-    // `instance_count` in both structures.
-    instance_count: atomic<u32>,
-    // `first_vertex` or `first_index`.
-    first_vertex_or_first_index: u32,
-    // `base_vertex` or `first_instance`.
-    base_vertex_or_first_instance: u32,
-    // A read-only copy of `instance_index`.
+// Per-frame data that the CPU supplies to the GPU.
+struct MeshInput {
+    // The model transform.
+    world_from_local: mat3x4<f32>,
+    // The lightmap UV rect, packed into 64 bits.
+    lightmap_uv_rect: vec2<u32>,
+    // A set of bitflags corresponding to `MeshFlags` on the Rust side. See the
+    // `MESH_FLAGS_` flags in `mesh_types.wgsl` for a list of these.
+    flags: u32,
+    // The index of this mesh's `MeshInput` in the `previous_input` array, if
+    // applicable. If not present, this is `u32::MAX`.
+    previous_input_index: u32,
+    // The index of the first vertex in the vertex slab.
+    first_vertex_index: u32,
+    // The index of the first vertex index in the index slab.
+    //
+    // If this mesh isn't indexed, this value is ignored.
+    first_index_index: u32,
+    // For indexed meshes, the number of indices that this mesh has; for
+    // non-indexed meshes, the number of vertices that this mesh consists of.
+    index_count: u32,
+    current_skin_index: u32,
+    previous_skin_index: u32,
+    // Low 16 bits: index of the material inside the bind group data.
+    // High 16 bits: index of the lightmap in the binding array.
+    material_and_lightmap_bind_group_slot: u32,
+}
+
+// The `wgpu` indirect parameters structure for indexed meshes.
+//
+// The `build_indirect_params.wgsl` shader generates these.
+struct IndirectParametersIndexed {
+    // The number of indices that this mesh has.
+    index_count: u32,
+    // The number of instances we are to draw.
+    instance_count: u32,
+    // The offset of the first index for this mesh in the index buffer slab.
+    first_index: u32,
+    // The offset of the first vertex for this mesh in the vertex buffer slab.
+    base_vertex: u32,
+    // The index of the first mesh instance in the `Mesh` buffer.
+    first_instance: u32,
+}
+
+// The `wgpu` indirect parameters structure for non-indexed meshes.
+//
+// The `build_indirect_params.wgsl` shader generates these.
+struct IndirectParametersNonIndexed {
+    // The number of vertices that this mesh has.
+    vertex_count: u32,
+    // The number of instances we are to draw.
+    instance_count: u32,
+    // The offset of the first vertex for this mesh in the vertex buffer slab.
+    base_vertex: u32,
+    // The index of the first mesh instance in the `Mesh` buffer.
     first_instance: u32,
 }
+
+// Information needed to generate the `IndirectParametersIndexed` and
+// `IndirectParametersNonIndexed` draw commands.
+struct IndirectParametersMetadata {
+    // The index of the mesh in the `MeshInput` buffer.
+    mesh_index: u32,
+    // The index of the first instance corresponding to this batch in the `Mesh`
+    // buffer.
+    base_output_index: u32,
+    // The index of the batch set in the `IndirectBatchSet` buffer.
+    batch_set_index: u32,
+    // The number of instances that are to be drawn.
+    //
+    // The `mesh_preprocess.wgsl` shader determines this, and the
+    // `build_indirect_params.wgsl` shader copies this value into the indirect
+    // draw command.
+    instance_count: atomic<u32>,
+}
+
+// Information about each batch set.
+//
+// A *batch set* is a set of meshes that might be multi-drawn together.
+//
+// The CPU creates this structure, and the `build_indirect_params.wgsl` shader
+// modifies it. If `multi_draw_indirect_count` is in use, the GPU reads this
+// value when multi-drawing a batch set in order to determine how many commands
+// make up the batch set.
+struct IndirectBatchSet {
+    // The number of commands that make up this batch set.
+    //
+    // The CPU initializes this value to zero. The `build_indirect_params.wgsl`
+    // shader increments this value as it processes batches.
+    indirect_parameters_count: atomic<u32>,
+    // The offset of the first batch corresponding to this batch set within the
+    // `IndirectParametersIndexed` or `IndirectParametersNonIndexed` arrays.
+    indirect_parameters_base: u32,
+}
diff --git a/crates/bevy_render/src/batching/gpu_preprocessing.rs b/crates/bevy_render/src/batching/gpu_preprocessing.rs
index a98afc02de48a..2e893616f9294 100644
--- a/crates/bevy_render/src/batching/gpu_preprocessing.rs
+++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs
@@ -1,5 +1,7 @@
 //! Batching functionality when GPU preprocessing is in use.
 
+use core::any::TypeId;
+
 use bevy_app::{App, Plugin};
 use bevy_ecs::{
     entity::{Entity, EntityHashMap},
@@ -9,7 +11,7 @@ use bevy_ecs::{
     world::{FromWorld, World},
 };
 use bevy_encase_derive::ShaderType;
-use bevy_utils::default;
+use bevy_utils::{default, TypeIdMap};
 use bytemuck::{Pod, Zeroable};
 use nonmax::NonMaxU32;
 use tracing::error;
@@ -18,9 +20,9 @@ use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
 use crate::{
     render_phase::{
         BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
-        BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItemExtraIndex,
-        SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases,
-        ViewSortedRenderPhases,
+        BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItemBatchSetKey as _,
+        PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices,
+        ViewBinnedRenderPhases, ViewSortedRenderPhases,
     },
     render_resource::{Buffer, BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
     renderer::{RenderAdapter, RenderDevice, RenderQueue},
@@ -39,10 +41,14 @@ impl Plugin for BatchingPlugin {
         };
 
         render_app
-            .insert_resource(IndirectParametersBuffer::new())
+            .insert_resource(IndirectParametersBuffers::new())
+            .add_systems(
+                Render,
+                write_indirect_parameters_buffers.in_set(RenderSet::PrepareResourcesFlush),
+            )
             .add_systems(
                 Render,
-                write_indirect_parameters_buffer.in_set(RenderSet::PrepareResourcesFlush),
+                clear_indirect_parameters_buffers.in_set(RenderSet::ManageViews),
             );
     }
 
@@ -137,7 +143,7 @@ where
     /// corresponds to each instance.
     ///
     /// This is keyed off each view. Each view has a separate buffer.
-    pub work_item_buffers: EntityHashMap<PreprocessWorkItemBuffer>,
+    pub work_item_buffers: EntityHashMap<TypeIdMap<PreprocessWorkItemBuffers>>,
 
     /// The uniform data inputs for the current frame.
     ///
@@ -265,16 +271,68 @@ where
 }
 
 /// The buffer of GPU preprocessing work items for a single view.
-pub struct PreprocessWorkItemBuffer {
-    /// The buffer of work items.
-    pub buffer: BufferVec<PreprocessWorkItem>,
-    /// True if we're drawing directly instead of indirectly.
-    pub no_indirect_drawing: bool,
+pub enum PreprocessWorkItemBuffers {
+    /// The work items we use if we aren't using indirect drawing.
+    ///
+    /// Because we don't have to separate indexed from non-indexed meshes in
+    /// direct mode, we only have a single buffer here.
+    Direct(BufferVec<PreprocessWorkItem>),
+
+    /// The buffer of work items we use if we are using indirect drawing.
+    ///
+    /// We need to separate out indexed meshes from non-indexed meshes in this
+    /// case because the indirect parameters for these two types of meshes have
+    /// different sizes.
+    Indirect {
+        /// The buffer of work items corresponding to indexed meshes.
+        indexed: BufferVec<PreprocessWorkItem>,
+        /// The buffer of work items corresponding to non-indexed meshes.
+        non_indexed: BufferVec<PreprocessWorkItem>,
+    },
+}
+
+impl PreprocessWorkItemBuffers {
+    /// Creates a new set of buffers.
+    ///
+    /// `no_indirect_drawing` specifies whether we're drawing directly or
+    /// indirectly.
+    pub fn new(no_indirect_drawing: bool) -> Self {
+        if no_indirect_drawing {
+            PreprocessWorkItemBuffers::Direct(BufferVec::new(BufferUsages::STORAGE))
+        } else {
+            PreprocessWorkItemBuffers::Indirect {
+                indexed: BufferVec::new(BufferUsages::STORAGE),
+                non_indexed: BufferVec::new(BufferUsages::STORAGE),
+            }
+        }
+    }
+
+    /// Adds a new work item to the appropriate buffer.
+    ///
+    /// `indexed` specifies whether the work item corresponds to an indexed
+    /// mesh.
+    pub fn push(&mut self, indexed: bool, preprocess_work_item: PreprocessWorkItem) {
+        match *self {
+            PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
+                buffer.push(preprocess_work_item);
+            }
+            PreprocessWorkItemBuffers::Indirect {
+                indexed: ref mut indexed_buffer,
+                non_indexed: ref mut non_indexed_buffer,
+            } => {
+                if indexed {
+                    indexed_buffer.push(preprocess_work_item);
+                } else {
+                    non_indexed_buffer.push(preprocess_work_item);
+                }
+            }
+        }
+    }
 }
 
 /// One invocation of the preprocessing shader: i.e. one mesh instance in a
 /// view.
-#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
+#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
 #[repr(C)]
 pub struct PreprocessWorkItem {
     /// The index of the batch input data in the input buffer that the shader
@@ -284,112 +342,378 @@ pub struct PreprocessWorkItem {
     /// In direct mode, this is the index of the uniform. In indirect mode, this
     /// is the first index uniform in the batch set.
     pub output_index: u32,
-    /// The index of the [`IndirectParameters`] in the
-    /// [`IndirectParametersBuffer`].
+    /// The index of the [`IndirectParametersMetadata`] in the
+    /// `IndirectParametersBuffers::indexed_metadata` or
+    /// `IndirectParametersBuffers::non_indexed_metadata`.
     pub indirect_parameters_index: u32,
 }
 
-/// The `wgpu` indirect parameters structure.
+/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
 ///
-/// This is actually a union of the two following structures:
+/// This is the variant for indexed meshes. We generate the instances of this
+/// structure in the `build_indirect_params.wgsl` compute shader.
+#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
+#[repr(C)]
+pub struct IndirectParametersIndexed {
+    /// The number of indices that this mesh has.
+    pub index_count: u32,
+    /// The number of instances we are to draw.
+    pub instance_count: u32,
+    /// The offset of the first index for this mesh in the index buffer slab.
+    pub first_index: u32,
+    /// The offset of the first vertex for this mesh in the vertex buffer slab.
+    pub base_vertex: u32,
+    /// The index of the first mesh instance in the `MeshUniform` buffer.
+    pub first_instance: u32,
+}
+
+/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
 ///
-/// ```
-/// #[repr(C)]
-/// struct ArrayIndirectParameters {
-///     vertex_count: u32,
-///     instance_count: u32,
-///     first_vertex: u32,
-///     first_instance: u32,
-/// }
+/// This is the variant for non-indexed meshes. We generate the instances of
+/// this structure in the `build_indirect_params.wgsl` compute shader.
+#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
+#[repr(C)]
+pub struct IndirectParametersNonIndexed {
+    /// The number of vertices that this mesh has.
+    pub vertex_count: u32,
+    /// The number of instances we are to draw.
+    pub instance_count: u32,
+    /// The offset of the first vertex for this mesh in the vertex buffer slab.
+    pub base_vertex: u32,
+    /// The index of the first mesh instance in the `Mesh` buffer.
+    pub first_instance: u32,
+}
+
+/// A structure, shared between CPU and GPU, that records how many instances of
+/// each mesh are actually to be drawn.
 ///
-/// #[repr(C)]
-/// struct ElementIndirectParameters {
-///     index_count: u32,
-///     instance_count: u32,
-///     first_vertex: u32,
-///     base_vertex: u32,
-///     first_instance: u32,
-/// }
-/// ```
+/// The CPU writes to this structure in order to initialize the fields other
+/// than [`Self::instance_count`]. The GPU mesh preprocessing shader increments
+/// the [`Self::instance_count`] as it determines that meshes are visible. The
+/// indirect parameter building shader reads this metadata in order to construct
+/// the indirect draw parameters.
 ///
-/// We actually generally treat these two variants identically in code. To do
-/// that, we make the following two observations:
+/// Each batch will have one instance of this structure.
+#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
+#[repr(C)]
+pub struct IndirectParametersMetadata {
+    /// The index of the mesh in the array of `MeshInputUniform`s.
+    pub mesh_index: u32,
+
+    /// The index of the first instance of this mesh in the array of
+    /// `MeshUniform`s.
+    ///
+    /// Note that this is the *first* output index in this batch. Since each
+    /// instance of this structure refers to arbitrarily many instances, the
+    /// `MeshUniform`s corresponding to this batch span the indices
+    /// `base_output_index..(base_output_index + instance_count)`.
+    pub base_output_index: u32,
+
+    /// The index of the batch set that this batch belongs to in the
+    /// [`IndirectBatchSet`] buffer.
+    ///
+    /// A *batch set* is a set of meshes that may be multi-drawn together.
+    /// Multiple batches (and therefore multiple instances of
+    /// [`IndirectParametersMetadata`] structures) can be part of the same batch
+    /// set.
+    pub batch_set_index: u32,
+
+    /// The number of instances that have been judged potentially visible.
+    ///
+    /// The CPU sets this value to 0, and the GPU mesh preprocessing shader
+    /// increments it as it culls mesh instances.
+    pub instance_count: u32,
+}
+
+/// A structure, shared between CPU and GPU, that holds the number of on-GPU
+/// indirect draw commands for each *batch set*.
 ///
-/// 1. `instance_count` is in the same place in both structures. So we can
-///     access it regardless of the structure we're looking at.
+/// A *batch set* is a set of meshes that may be multi-drawn together.
 ///
-/// 2. The second structure is one word larger than the first. Thus we need to
-///     pad out the first structure by one word in order to place both structures in
-///     an array. If we pad out `ArrayIndirectParameters` by copying the
-///     `first_instance` field into the padding, then the resulting union structure
-///     will always have a read-only copy of `first_instance` in the final word. We
-///     take advantage of this in the shader to reduce branching.
-#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
+/// If the current hardware and driver support `multi_draw_indirect_count`, the
+/// indirect parameters building shader increments
+/// [`Self::indirect_parameters_count`] as it generates indirect parameters. The
+/// `multi_draw_indirect_count` command reads
+/// [`Self::indirect_parameters_count`] in order to determine how many commands
+/// belong to each batch set.
+#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
 #[repr(C)]
-pub struct IndirectParameters {
-    /// For `ArrayIndirectParameters`, `vertex_count`; for
-    /// `ElementIndirectParameters`, `index_count`.
-    pub vertex_or_index_count: u32,
+pub struct IndirectBatchSet {
+    /// The number of indirect parameter commands (i.e. batches) in this batch
+    /// set.
+    ///
+    /// The CPU sets this value to 0 before uploading this structure to GPU. The
+    /// indirect parameters building shader increments this value as it creates
+    /// indirect parameters. Then the `multi_draw_indirect_count` command reads
+    /// this value in order to determine how many indirect draw commands to
+    /// process.
+    pub indirect_parameters_count: u32,
+
+    /// The offset within the `IndirectParametersBuffers::indexed_data` or
+    /// `IndirectParametersBuffers::non_indexed_data` of the first indirect draw
+    /// command for this batch set.
+    ///
+    /// The CPU fills out this value.
+    pub indirect_parameters_base: u32,
+}
 
-    /// The number of instances we're going to draw.
+/// The buffers containing all the information that indirect draw commands
+/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene.
+///
+/// In addition to the indirect draw buffers themselves, this structure contains
+/// the buffers that store [`IndirectParametersMetadata`], which are the
+/// structures that culling writes to so that the indirect parameter building
+/// pass can determine how many meshes are actually to be drawn.
+///
+/// These buffers will remain empty if indirect drawing isn't in use.
+#[derive(Resource)]
+pub struct IndirectParametersBuffers {
+    /// The GPU buffer that stores the indirect draw parameters for non-indexed
+    /// meshes.
     ///
-    /// This field is in the same place in both structures.
-    pub instance_count: u32,
+    /// The indirect parameters building shader writes to this buffer, while the
+    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
+    /// it to perform the draws.
+    non_indexed_data: UninitBufferVec<IndirectParametersNonIndexed>,
 
-    /// For `ArrayIndirectParameters`, `first_vertex`; for
-    /// `ElementIndirectParameters`, `first_index`.
-    pub first_vertex_or_first_index: u32,
+    /// The GPU buffer that holds the data used to construct indirect draw
+    /// parameters for non-indexed meshes.
+    ///
+    /// The GPU mesh preprocessing shader writes to this buffer, and the
+    /// indirect parameters building shader reads this buffer to construct the
+    /// indirect draw parameters.
+    non_indexed_metadata: RawBufferVec<IndirectParametersMetadata>,
 
-    /// For `ArrayIndirectParameters`, `first_instance`; for
-    /// `ElementIndirectParameters`, `base_vertex`.
-    pub base_vertex_or_first_instance: u32,
+    /// The GPU buffer that holds the number of indirect draw commands for each
+    /// phase of each view, for non-indexed meshes.
+    ///
+    /// The indirect parameters building shader writes to this buffer, and the
+    /// `multi_draw_indirect_count` command reads from it in order to know how
+    /// many indirect draw commands to process.
+    non_indexed_batch_sets: RawBufferVec<IndirectBatchSet>,
 
-    /// For `ArrayIndirectParameters`, this is padding; for
-    /// `ElementIndirectParameters`, this is `first_instance`.
+    /// The GPU buffer that stores the indirect draw parameters for indexed
+    /// meshes.
     ///
-    /// Conventionally, we copy `first_instance` into this field when padding
-    /// out `ArrayIndirectParameters`. That way, shader code can read this value
-    /// at the same place, regardless of the specific structure this represents.
-    pub first_instance: u32,
-}
+    /// The indirect parameters building shader writes to this buffer, while the
+    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
+    /// it to perform the draws.
+    indexed_data: UninitBufferVec<IndirectParametersIndexed>,
 
-/// The buffer containing the list of [`IndirectParameters`], for draw commands.
-#[derive(Resource)]
-pub struct IndirectParametersBuffer {
-    /// The actual buffer.
-    buffer: RawBufferVec<IndirectParameters>,
+    /// The GPU buffer that holds the data used to construct indirect draw
+    /// parameters for indexed meshes.
+    ///
+    /// The GPU mesh preprocessing shader writes to this buffer, and the
+    /// indirect parameters building shader reads this buffer to construct the
+    /// indirect draw parameters.
+    indexed_metadata: RawBufferVec<IndirectParametersMetadata>,
+
+    /// The GPU buffer that holds the number of indirect draw commands for each
+    /// phase of each view, for indexed meshes.
+    ///
+    /// The indirect parameters building shader writes to this buffer, and the
+    /// `multi_draw_indirect_count` command reads from it in order to know how
+    /// many indirect draw commands to process.
+    indexed_batch_sets: RawBufferVec<IndirectBatchSet>,
 }
 
-impl IndirectParametersBuffer {
-    /// Creates the indirect parameters buffer.
-    pub fn new() -> IndirectParametersBuffer {
-        IndirectParametersBuffer {
-            buffer: RawBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT),
+impl IndirectParametersBuffers {
+    /// Creates the indirect parameters buffers.
+    pub fn new() -> IndirectParametersBuffers {
+        IndirectParametersBuffers {
+            non_indexed_data: UninitBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT),
+            non_indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE),
+            non_indexed_batch_sets: RawBufferVec::new(
+                BufferUsages::STORAGE | BufferUsages::INDIRECT,
+            ),
+            indexed_data: UninitBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT),
+            indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE),
+            indexed_batch_sets: RawBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT),
         }
     }
 
-    /// Returns the underlying GPU buffer.
+    /// Returns the GPU buffer that stores the indirect draw parameters for
+    /// indexed meshes.
+    ///
+    /// The indirect parameters building shader writes to this buffer, while the
+    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
+    /// it to perform the draws.
+    #[inline]
+    pub fn indexed_data_buffer(&self) -> Option<&Buffer> {
+        self.indexed_data.buffer()
+    }
+
+    /// Returns the GPU buffer that holds the data used to construct indirect
+    /// draw parameters for indexed meshes.
+    ///
+    /// The GPU mesh preprocessing shader writes to this buffer, and the
+    /// indirect parameters building shader reads this buffer to construct the
+    /// indirect draw parameters.
+    #[inline]
+    pub fn indexed_metadata_buffer(&self) -> Option<&Buffer> {
+        self.indexed_metadata.buffer()
+    }
+
+    /// Returns the GPU buffer that holds the number of indirect draw commands
+    /// for each phase of each view, for indexed meshes.
+    ///
+    /// The indirect parameters building shader writes to this buffer, and the
+    /// `multi_draw_indirect_count` command reads from it in order to know how
+    /// many indirect draw commands to process.
+    #[inline]
+    pub fn indexed_batch_sets_buffer(&self) -> Option<&Buffer> {
+        self.indexed_batch_sets.buffer()
+    }
+
+    /// Returns the GPU buffer that stores the indirect draw parameters for
+    /// non-indexed meshes.
+    ///
+    /// The indirect parameters building shader writes to this buffer, while the
+    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
+    /// it to perform the draws.
+    #[inline]
+    pub fn non_indexed_data_buffer(&self) -> Option<&Buffer> {
+        self.non_indexed_data.buffer()
+    }
+
+    /// Returns the GPU buffer that holds the data used to construct indirect
+    /// draw parameters for non-indexed meshes.
+    ///
+    /// The GPU mesh preprocessing shader writes to this buffer, and the
+    /// indirect parameters building shader reads this buffer to construct the
+    /// indirect draw parameters.
+    #[inline]
+    pub fn non_indexed_metadata_buffer(&self) -> Option<&Buffer> {
+        self.non_indexed_metadata.buffer()
+    }
+
+    /// Returns the GPU buffer that holds the number of indirect draw commands
+    /// for each phase of each view, for non-indexed meshes.
+    ///
+    /// The indirect parameters building shader writes to this buffer, and the
+    /// `multi_draw_indirect_count` command reads from it in order to know how
+    /// many indirect draw commands to process.
     #[inline]
-    pub fn buffer(&self) -> Option<&Buffer> {
-        self.buffer.buffer()
+    pub fn non_indexed_batch_sets_buffer(&self) -> Option<&Buffer> {
+        self.non_indexed_batch_sets.buffer()
     }
 
-    /// Adds a new set of indirect parameters to the buffer.
-    pub fn allocate(&mut self, count: u32) -> u32 {
-        let length = self.buffer.len();
-        self.buffer.reserve_internal(count as usize);
+    /// Reserves space for `count` new batches corresponding to indexed meshes.
+    ///
+    /// This allocates in both the [`Self::indexed_metadata`] and
+    /// [`Self::indexed_data`] buffers.
+    fn allocate_indexed(&mut self, count: u32) -> u32 {
+        let length = self.indexed_data.len();
+        self.indexed_metadata.reserve_internal(count as usize);
         for _ in 0..count {
-            self.buffer.push(Zeroable::zeroed());
+            self.indexed_data.add();
+            self.indexed_metadata
+                .push(IndirectParametersMetadata::default());
         }
         length as u32
     }
 
-    pub fn set(&mut self, index: u32, value: IndirectParameters) {
-        self.buffer.set(index, value);
+    /// Reserves space for `count` new batches corresponding to non-indexed
+    /// meshes.
+    ///
+    /// This allocates in both the [`Self::non_indexed_metadata`] and
+    /// [`Self::non_indexed_data`] buffers.
+    fn allocate_non_indexed(&mut self, count: u32) -> u32 {
+        let length = self.non_indexed_data.len();
+        self.non_indexed_metadata.reserve_internal(count as usize);
+        for _ in 0..count {
+            self.non_indexed_data.add();
+            self.non_indexed_metadata
+                .push(IndirectParametersMetadata::default());
+        }
+        length as u32
+    }
+
+    /// Reserves space for `count` new batches.
+    ///
+    /// The `indexed` parameter specifies whether the meshes that these batches
+    /// correspond to are indexed or not.
+    pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 {
+        if indexed {
+            self.allocate_indexed(count)
+        } else {
+            self.allocate_non_indexed(count)
+        }
+    }
+
+    /// Initializes the batch corresponding to an indexed mesh at the given
+    /// index with the given [`IndirectParametersMetadata`].
+    pub fn set_indexed(&mut self, index: u32, value: IndirectParametersMetadata) {
+        self.indexed_metadata.set(index, value);
+    }
+
+    /// Initializes the batch corresponding to a non-indexed mesh at the given
+    /// index with the given [`IndirectParametersMetadata`].
+    pub fn set_non_indexed(&mut self, index: u32, value: IndirectParametersMetadata) {
+        self.non_indexed_metadata.set(index, value);
+    }
+
+    /// Returns the number of batches currently allocated.
+    ///
+    /// The `indexed` parameter specifies whether the meshes that these batches
+    /// correspond to are indexed or not.
+    fn batch_count(&self, indexed: bool) -> usize {
+        if indexed {
+            self.indexed_batch_count()
+        } else {
+            self.non_indexed_batch_count()
+        }
+    }
+
+    /// Returns the number of batches corresponding to indexed meshes that are
+    /// currently allocated.
+    #[inline]
+    pub fn indexed_batch_count(&self) -> usize {
+        self.indexed_data.len()
+    }
+
+    /// Returns the number of batches corresponding to non-indexed meshes that
+    /// are currently allocated.
+    #[inline]
+    pub fn non_indexed_batch_count(&self) -> usize {
+        self.non_indexed_data.len()
+    }
+
+    /// Returns the number of batch sets currently allocated.
+    ///
+    /// The `indexed` parameter specifies whether the meshes that these batch
+    /// sets correspond to are indexed or not.
+    pub fn batch_set_count(&self, indexed: bool) -> usize {
+        if indexed {
+            self.indexed_batch_sets.len()
+        } else {
+            self.non_indexed_batch_sets.len()
+        }
+    }
+
+    /// Adds a new batch set to `Self::indexed_batch_sets` or
+    /// `Self::non_indexed_batch_sets` as appropriate.
+    ///
+    /// `indexed` specifies whether the meshes that these batch sets correspond
+    /// to are indexed or not. `indirect_parameters_base` specifies the offset
+    /// within `Self::indexed_data` or `Self::non_indexed_data` of the first
+    /// batch in this batch set.
+    pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) {
+        if indexed {
+            self.indexed_batch_sets.push(IndirectBatchSet {
+                indirect_parameters_base,
+                indirect_parameters_count: 0,
+            });
+        } else {
+            self.non_indexed_batch_sets.push(IndirectBatchSet {
+                indirect_parameters_base,
+                indirect_parameters_count: 0,
+            });
+        }
     }
 }
 
-impl Default for IndirectParametersBuffer {
+impl Default for IndirectParametersBuffers {
     fn default() -> Self {
         Self::new()
     }
@@ -454,8 +778,20 @@ where
     /// Clears out the buffers in preparation for a new frame.
     pub fn clear(&mut self) {
         self.data_buffer.clear();
-        for work_item_buffer in self.work_item_buffers.values_mut() {
-            work_item_buffer.buffer.clear();
+
+        for view_work_item_buffers in self.work_item_buffers.values_mut() {
+            for phase_work_item_buffers in view_work_item_buffers.values_mut() {
+                match *phase_work_item_buffers {
+                    PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => buffer_vec.clear(),
+                    PreprocessWorkItemBuffers::Indirect {
+                        ref mut indexed,
+                        ref mut non_indexed,
+                    } => {
+                        indexed.clear();
+                        non_indexed.clear();
+                    }
+                }
+            }
         }
     }
 }
@@ -483,8 +819,11 @@ where
     /// The index of the first instance in this batch in the instance buffer.
     instance_start_index: u32,
 
+    /// True if the mesh in question has an index buffer; false otherwise.
+    indexed: bool,
+
     /// The index of the indirect parameters for this batch in the
-    /// [`IndirectParametersBuffer`].
+    /// [`IndirectParametersBuffers`].
     ///
     /// If CPU culling is being used, then this will be `None`.
     indirect_parameters_index: Option<NonMaxU32>,
@@ -505,8 +844,12 @@ where
     ///
     /// `instance_end_index` is the index of the last instance in this batch
     /// plus one.
-    fn flush<I>(self, instance_end_index: u32, phase: &mut SortedRenderPhase<I>)
-    where
+    fn flush<I>(
+        self,
+        instance_end_index: u32,
+        phase: &mut SortedRenderPhase<I>,
+        indirect_parameters_buffers: &mut IndirectParametersBuffers,
+    ) where
         I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
     {
         let (batch_range, batch_extra_index) =
@@ -514,6 +857,11 @@ where
         *batch_range = self.instance_start_index..instance_end_index;
         *batch_extra_index =
             PhaseItemExtraIndex::maybe_indirect_parameters_index(self.indirect_parameters_index);
+
+        if let Some(indirect_parameters_index) = self.indirect_parameters_index {
+            indirect_parameters_buffers
+                .add_batch_set(self.indexed, indirect_parameters_index.into());
+        }
     }
 }
 
@@ -559,7 +907,7 @@ pub fn delete_old_work_item_buffers<GFBD>(
 /// trying to combine the draws into a batch.
 pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
     gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
-    mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
+    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
     mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
     mut views: Query<(Entity, &ExtractedView, Has<NoIndirectDrawing>)>,
     system_param_item: StaticSystemParam<GFBD::Param>,
@@ -580,24 +928,15 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
         };
 
         // Create the work item buffer if necessary.
-        let work_item_buffer =
-            work_item_buffers
-                .entry(view)
-                .or_insert_with(|| PreprocessWorkItemBuffer {
-                    buffer: BufferVec::new(BufferUsages::STORAGE),
-                    no_indirect_drawing,
-                });
+        let work_item_buffer = work_item_buffers
+            .entry(view)
+            .or_insert_with(TypeIdMap::default)
+            .entry(TypeId::of::<I>())
+            .or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing));
 
         // Walk through the list of phase items, building up batches as we go.
         let mut batch: Option<SortedRenderBatch<GFBD>> = None;
 
-        // Allocate the indirect parameters if necessary.
-        let mut indirect_parameters_offset = if no_indirect_drawing {
-            None
-        } else {
-            Some(indirect_parameters_buffer.allocate(phase.items.len() as u32))
-        };
-
         let mut first_output_index = data_buffer.len() as u32;
 
         for current_index in 0..phase.items.len() {
@@ -605,6 +944,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
             // this entity.
             let item = &phase.items[current_index];
             let entity = item.main_entity();
+            let item_is_indexed = item.indexed();
             let current_batch_input_index =
                 GFBD::get_index_and_compare_data(&system_param_item, entity);
 
@@ -615,7 +955,11 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
             let Some((current_input_index, current_meta)) = current_batch_input_index else {
                 // Break a batch if we need to.
                 if let Some(batch) = batch.take() {
-                    batch.flush(data_buffer.len() as u32, phase);
+                    batch.flush(
+                        data_buffer.len() as u32,
+                        phase,
+                        &mut indirect_parameters_buffers,
+                    );
                 }
 
                 continue;
@@ -634,62 +978,74 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
             });
 
             // Make space in the data buffer for this instance.
-            let item = &phase.items[current_index];
-            let entity = item.main_entity();
             let output_index = data_buffer.add() as u32;
 
             // If we can't batch, break the existing batch and make a new one.
             if !can_batch {
                 // Break a batch if we need to.
                 if let Some(batch) = batch.take() {
-                    batch.flush(output_index, phase);
+                    batch.flush(output_index, phase, &mut indirect_parameters_buffers);
                 }
 
+                let indirect_parameters_index = if no_indirect_drawing {
+                    None
+                } else if item_is_indexed {
+                    Some(indirect_parameters_buffers.allocate_indexed(1))
+                } else {
+                    Some(indirect_parameters_buffers.allocate_non_indexed(1))
+                };
+
                 // Start a new batch.
-                if let Some(indirect_parameters_offset) = indirect_parameters_offset {
-                    GFBD::write_batch_indirect_parameters(
-                        &system_param_item,
-                        &mut indirect_parameters_buffer,
-                        indirect_parameters_offset,
-                        entity,
+                if let Some(indirect_parameters_index) = indirect_parameters_index {
+                    GFBD::write_batch_indirect_parameters_metadata(
+                        current_input_index.into(),
+                        item_is_indexed,
+                        output_index,
+                        None,
+                        &mut indirect_parameters_buffers,
+                        indirect_parameters_index,
                     );
                 };
 
                 batch = Some(SortedRenderBatch {
                     phase_item_start_index: current_index as u32,
                     instance_start_index: output_index,
-                    indirect_parameters_index: indirect_parameters_offset.and_then(NonMaxU32::new),
+                    indexed: item_is_indexed,
+                    indirect_parameters_index: indirect_parameters_index.and_then(NonMaxU32::new),
                     meta: current_meta,
                 });
 
-                if let Some(ref mut indirect_parameters_offset) = indirect_parameters_offset {
-                    *indirect_parameters_offset += 1;
-                }
-
                 first_output_index = output_index;
             }
 
             // Add a new preprocessing work item so that the preprocessing
             // shader will copy the per-instance data over.
             if let Some(batch) = batch.as_ref() {
-                work_item_buffer.buffer.push(PreprocessWorkItem {
-                    input_index: current_input_index.into(),
-                    output_index: if no_indirect_drawing {
-                        output_index
-                    } else {
-                        first_output_index
-                    },
-                    indirect_parameters_index: match batch.indirect_parameters_index {
-                        Some(indirect_parameters_index) => indirect_parameters_index.into(),
-                        None => 0,
+                work_item_buffer.push(
+                    item_is_indexed,
+                    PreprocessWorkItem {
+                        input_index: current_input_index.into(),
+                        output_index: if no_indirect_drawing {
+                            output_index
+                        } else {
+                            first_output_index
+                        },
+                        indirect_parameters_index: match batch.indirect_parameters_index {
+                            Some(indirect_parameters_index) => indirect_parameters_index.into(),
+                            None => 0,
+                        },
                     },
-                });
+                );
             }
         }
 
         // Flush the final batch if necessary.
         if let Some(batch) = batch.take() {
-            batch.flush(data_buffer.len() as u32, phase);
+            batch.flush(
+                data_buffer.len() as u32,
+                phase,
+                &mut indirect_parameters_buffers,
+            );
         }
     }
 }
@@ -697,7 +1053,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
 /// Creates batches for a render phase that uses bins.
 pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
     gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
-    mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
+    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
     mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
     mut views: Query<(Entity, &ExtractedView, Has<NoIndirectDrawing>)>,
     param: StaticSystemParam<GFBD::Param>,
@@ -720,18 +1076,18 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
 
         // Create the work item buffer if necessary; otherwise, just mark it as
         // used this frame.
-        let work_item_buffer =
-            work_item_buffers
-                .entry(view)
-                .or_insert_with(|| PreprocessWorkItemBuffer {
-                    buffer: BufferVec::new(BufferUsages::STORAGE),
-                    no_indirect_drawing,
-                });
+        let work_item_buffer = work_item_buffers
+            .entry(view)
+            .or_insert_with(TypeIdMap::default)
+            .entry(TypeId::of::<BPI>())
+            .or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing));
 
         // Prepare multidrawables.
 
         for batch_set_key in &phase.multidrawable_mesh_keys {
             let mut batch_set = None;
+            let indirect_parameters_base =
+                indirect_parameters_buffers.batch_count(batch_set_key.indexed()) as u32;
             for (bin_key, bin) in &phase.multidrawable_mesh_values[batch_set_key] {
                 let first_output_index = data_buffer.len() as u32;
                 let mut batch: Option<BinnedRenderPhaseBatch> = None;
@@ -747,33 +1103,47 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                         Some(ref mut batch) => {
                             // Append to the current batch.
                             batch.instance_range.end = output_index + 1;
-                            work_item_buffer.buffer.push(PreprocessWorkItem {
-                                input_index: input_index.into(),
-                                output_index: first_output_index,
-                                indirect_parameters_index: match batch.extra_index {
-                                    PhaseItemExtraIndex::IndirectParametersIndex(ref range) => {
-                                        range.start
-                                    }
-                                    PhaseItemExtraIndex::DynamicOffset(_)
-                                    | PhaseItemExtraIndex::None => 0,
+                            work_item_buffer.push(
+                                batch_set_key.indexed(),
+                                PreprocessWorkItem {
+                                    input_index: input_index.into(),
+                                    output_index: first_output_index,
+                                    indirect_parameters_index: match batch.extra_index {
+                                        PhaseItemExtraIndex::IndirectParametersIndex {
+                                            ref range,
+                                            ..
+                                        } => range.start,
+                                        PhaseItemExtraIndex::DynamicOffset(_)
+                                        | PhaseItemExtraIndex::None => 0,
+                                    },
                                 },
-                            });
+                            );
                         }
 
                         None => {
                             // Start a new batch, in indirect mode.
-                            let indirect_parameters_index = indirect_parameters_buffer.allocate(1);
-                            GFBD::write_batch_indirect_parameters(
-                                &system_param_item,
-                                &mut indirect_parameters_buffer,
-                                indirect_parameters_index,
-                                main_entity,
+                            let indirect_parameters_index =
+                                indirect_parameters_buffers.allocate(batch_set_key.indexed(), 1);
+                            let batch_set_index = NonMaxU32::new(
+                                indirect_parameters_buffers.batch_set_count(batch_set_key.indexed())
+                                    as u32,
                             );
-                            work_item_buffer.buffer.push(PreprocessWorkItem {
-                                input_index: input_index.into(),
-                                output_index: first_output_index,
+                            GFBD::write_batch_indirect_parameters_metadata(
+                                input_index.into(),
+                                batch_set_key.indexed(),
+                                output_index,
+                                batch_set_index,
+                                &mut indirect_parameters_buffers,
                                 indirect_parameters_index,
-                            });
+                            );
+                            work_item_buffer.push(
+                                batch_set_key.indexed(),
+                                PreprocessWorkItem {
+                                    input_index: input_index.into(),
+                                    output_index: first_output_index,
+                                    indirect_parameters_index,
+                                },
+                            );
                             batch = Some(BinnedRenderPhaseBatch {
                                 representative_entity: (entity, main_entity),
                                 instance_range: output_index..output_index + 1,
@@ -791,6 +1161,9 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                             batch_set = Some(BinnedRenderPhaseBatchSet {
                                 batches: vec![batch],
                                 bin_key: bin_key.clone(),
+                                index: indirect_parameters_buffers
+                                    .batch_set_count(batch_set_key.indexed())
+                                    as u32,
                             });
                         }
                         Some(ref mut batch_set) => {
@@ -805,6 +1178,8 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
             {
                 if let Some(batch_set) = batch_set {
                     batch_sets.push(batch_set);
+                    indirect_parameters_buffers
+                        .add_batch_set(batch_set_key.indexed(), indirect_parameters_base);
                 }
             }
         }
@@ -833,37 +1208,50 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                         // tightly-packed buffer if GPU culling discards some of
                         // the instances. Otherwise, we can just write the
                         // output index directly.
-                        work_item_buffer.buffer.push(PreprocessWorkItem {
-                            input_index: input_index.into(),
-                            output_index: if no_indirect_drawing {
-                                output_index
-                            } else {
-                                first_output_index
-                            },
-                            indirect_parameters_index: match batch.extra_index {
-                                PhaseItemExtraIndex::IndirectParametersIndex(ref range) => {
-                                    range.start
-                                }
-                                PhaseItemExtraIndex::DynamicOffset(_)
-                                | PhaseItemExtraIndex::None => 0,
+                        work_item_buffer.push(
+                            key.0.indexed(),
+                            PreprocessWorkItem {
+                                input_index: input_index.into(),
+                                output_index: if no_indirect_drawing {
+                                    output_index
+                                } else {
+                                    first_output_index
+                                },
+                                indirect_parameters_index: match batch.extra_index {
+                                    PhaseItemExtraIndex::IndirectParametersIndex {
+                                        range: ref indirect_parameters_range,
+                                        ..
+                                    } => indirect_parameters_range.start,
+                                    PhaseItemExtraIndex::DynamicOffset(_)
+                                    | PhaseItemExtraIndex::None => 0,
+                                },
                             },
-                        });
+                        );
                     }
 
                     None if !no_indirect_drawing => {
                         // Start a new batch, in indirect mode.
-                        let indirect_parameters_index = indirect_parameters_buffer.allocate(1);
-                        GFBD::write_batch_indirect_parameters(
-                            &system_param_item,
-                            &mut indirect_parameters_buffer,
-                            indirect_parameters_index,
-                            main_entity,
+                        let indirect_parameters_index =
+                            indirect_parameters_buffers.allocate(key.0.indexed(), 1);
+                        let batch_set_index = NonMaxU32::new(
+                            indirect_parameters_buffers.batch_set_count(key.0.indexed()) as u32,
                         );
-                        work_item_buffer.buffer.push(PreprocessWorkItem {
-                            input_index: input_index.into(),
-                            output_index: first_output_index,
+                        GFBD::write_batch_indirect_parameters_metadata(
+                            input_index.into(),
+                            key.0.indexed(),
+                            output_index,
+                            batch_set_index,
+                            &mut indirect_parameters_buffers,
                             indirect_parameters_index,
-                        });
+                        );
+                        work_item_buffer.push(
+                            key.0.indexed(),
+                            PreprocessWorkItem {
+                                input_index: input_index.into(),
+                                output_index: first_output_index,
+                                indirect_parameters_index,
+                            },
+                        );
                         batch = Some(BinnedRenderPhaseBatch {
                             representative_entity: (entity, main_entity),
                             instance_range: output_index..output_index + 1,
@@ -875,11 +1263,14 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
 
                     None => {
                         // Start a new batch, in direct mode.
-                        work_item_buffer.buffer.push(PreprocessWorkItem {
-                            input_index: input_index.into(),
-                            output_index,
-                            indirect_parameters_index: 0,
-                        });
+                        work_item_buffer.push(
+                            key.0.indexed(),
+                            PreprocessWorkItem {
+                                input_index: input_index.into(),
+                                output_index,
+                                indirect_parameters_index: 0,
+                            },
+                        );
                         batch = Some(BinnedRenderPhaseBatch {
                             representative_entity: (entity, main_entity),
                             instance_range: output_index..output_index + 1,
@@ -905,6 +1296,8 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                         vec.push(BinnedRenderPhaseBatchSet {
                             batches: vec![batch],
                             bin_key: key.1.clone(),
+                            index: indirect_parameters_buffers.batch_set_count(key.0.indexed())
+                                as u32,
                         });
                     }
                 }
@@ -918,8 +1311,16 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
             // Allocate the indirect parameters if necessary.
             let mut indirect_parameters_offset = if no_indirect_drawing {
                 None
+            } else if key.0.indexed() {
+                Some(
+                    indirect_parameters_buffers
+                        .allocate_indexed(unbatchables.entities.len() as u32),
+                )
             } else {
-                Some(indirect_parameters_buffer.allocate(unbatchables.entities.len() as u32))
+                Some(
+                    indirect_parameters_buffers
+                        .allocate_non_indexed(unbatchables.entities.len() as u32),
+                )
             };
 
             for &(_, main_entity) in &unbatchables.entities {
@@ -932,32 +1333,43 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                 if let Some(ref mut indirect_parameters_index) = indirect_parameters_offset {
                     // We're in indirect mode, so add an indirect parameters
                     // index.
-                    GFBD::write_batch_indirect_parameters(
-                        &system_param_item,
-                        &mut indirect_parameters_buffer,
+                    GFBD::write_batch_indirect_parameters_metadata(
+                        input_index.into(),
+                        key.0.indexed(),
+                        output_index,
+                        None,
+                        &mut indirect_parameters_buffers,
                         *indirect_parameters_index,
-                        main_entity,
                     );
-                    work_item_buffer.buffer.push(PreprocessWorkItem {
-                        input_index: input_index.into(),
-                        output_index,
-                        indirect_parameters_index: *indirect_parameters_index,
-                    });
+                    work_item_buffer.push(
+                        key.0.indexed(),
+                        PreprocessWorkItem {
+                            input_index: input_index.into(),
+                            output_index,
+                            indirect_parameters_index: *indirect_parameters_index,
+                        },
+                    );
                     unbatchables
                         .buffer_indices
                         .add(UnbatchableBinnedEntityIndices {
                             instance_index: *indirect_parameters_index,
-                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex(
-                                *indirect_parameters_index..(*indirect_parameters_index + 1),
-                            ),
+                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
+                                range: *indirect_parameters_index..(*indirect_parameters_index + 1),
+                                batch_set_index: None,
+                            },
                         });
+                    indirect_parameters_buffers
+                        .add_batch_set(key.0.indexed(), *indirect_parameters_index);
                     *indirect_parameters_index += 1;
                 } else {
-                    work_item_buffer.buffer.push(PreprocessWorkItem {
-                        input_index: input_index.into(),
-                        output_index,
-                        indirect_parameters_index: 0,
-                    });
+                    work_item_buffer.push(
+                        key.0.indexed(),
+                        PreprocessWorkItem {
+                            input_index: input_index.into(),
+                            output_index,
+                            indirect_parameters_index: 0,
+                        },
+                    );
                     unbatchables
                         .buffer_indices
                         .add(UnbatchableBinnedEntityIndices {
@@ -980,7 +1392,7 @@ pub fn write_batched_instance_buffers<GFBD>(
 {
     let BatchedInstanceBuffers {
         ref mut data_buffer,
-        work_item_buffers: ref mut index_buffers,
+        ref mut work_item_buffers,
         ref mut current_input_buffer,
         ref mut previous_input_buffer,
     } = gpu_array_buffer.into_inner();
@@ -993,22 +1405,60 @@ pub fn write_batched_instance_buffers<GFBD>(
         .buffer
         .write_buffer(&render_device, &render_queue);
 
-    for index_buffer in index_buffers.values_mut() {
-        index_buffer
-            .buffer
-            .write_buffer(&render_device, &render_queue);
+    for view_work_item_buffers in work_item_buffers.values_mut() {
+        for phase_work_item_buffers in view_work_item_buffers.values_mut() {
+            match *phase_work_item_buffers {
+                PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => {
+                    buffer_vec.write_buffer(&render_device, &render_queue);
+                }
+                PreprocessWorkItemBuffers::Indirect {
+                    ref mut indexed,
+                    ref mut non_indexed,
+                } => {
+                    indexed.write_buffer(&render_device, &render_queue);
+                    non_indexed.write_buffer(&render_device, &render_queue);
+                }
+            }
+        }
     }
 }
 
-pub fn write_indirect_parameters_buffer(
+pub fn clear_indirect_parameters_buffers(
+    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
+) {
+    indirect_parameters_buffers.indexed_data.clear();
+    indirect_parameters_buffers.indexed_metadata.clear();
+    indirect_parameters_buffers.indexed_batch_sets.clear();
+    indirect_parameters_buffers.non_indexed_data.clear();
+    indirect_parameters_buffers.non_indexed_metadata.clear();
+    indirect_parameters_buffers.non_indexed_batch_sets.clear();
+}
+
+pub fn write_indirect_parameters_buffers(
     render_device: Res<RenderDevice>,
     render_queue: Res<RenderQueue>,
-    mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
+    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
 ) {
-    indirect_parameters_buffer
-        .buffer
+    indirect_parameters_buffers
+        .indexed_data
+        .write_buffer(&render_device);
+    indirect_parameters_buffers
+        .non_indexed_data
+        .write_buffer(&render_device);
+
+    indirect_parameters_buffers
+        .indexed_metadata
+        .write_buffer(&render_device, &render_queue);
+    indirect_parameters_buffers
+        .non_indexed_metadata
+        .write_buffer(&render_device, &render_queue);
+
+    indirect_parameters_buffers
+        .indexed_batch_sets
+        .write_buffer(&render_device, &render_queue);
+    indirect_parameters_buffers
+        .non_indexed_batch_sets
         .write_buffer(&render_device, &render_queue);
-    indirect_parameters_buffer.buffer.clear();
 }
 
 #[cfg(test)]
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 590ed94293290..214fdda13644e 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -6,7 +6,7 @@ use bevy_ecs::{
 use bytemuck::Pod;
 use nonmax::NonMaxU32;
 
-use self::gpu_preprocessing::IndirectParametersBuffer;
+use self::gpu_preprocessing::IndirectParametersBuffers;
 use crate::{render_phase::PhaseItemExtraIndex, sync_world::MainEntity};
 use crate::{
     render_phase::{
@@ -58,7 +58,9 @@ impl<T: PartialEq> BatchMeta<T> {
                 PhaseItemExtraIndex::DynamicOffset(dynamic_offset) => {
                     NonMaxU32::new(dynamic_offset)
                 }
-                PhaseItemExtraIndex::None | PhaseItemExtraIndex::IndirectParametersIndex(_) => None,
+                PhaseItemExtraIndex::None | PhaseItemExtraIndex::IndirectParametersIndex { .. } => {
+                    None
+                }
             },
             user_data,
         }
@@ -141,17 +143,36 @@ pub trait GetFullBatchData: GetBatchData {
         query_item: MainEntity,
     ) -> Option<NonMaxU32>;
 
-    /// Writes the [`gpu_preprocessing::IndirectParameters`] necessary to draw
-    /// this batch into the given [`IndirectParametersBuffer`] at the given
-    /// index.
+    /// Writes the [`gpu_preprocessing::IndirectParametersMetadata`] necessary
+    /// to draw this batch into the given metadata buffer at the given index.
     ///
     /// This is only used if GPU culling is enabled (which requires GPU
     /// preprocessing).
-    fn write_batch_indirect_parameters(
-        param: &SystemParamItem<Self::Param>,
-        indirect_parameters_buffer: &mut IndirectParametersBuffer,
+    ///
+    /// * `mesh_index` describes the index of the first mesh instance in this
+    ///   batch in the `MeshInputUniform` buffer.
+    ///
+    /// * `indexed` is true if the mesh is indexed or false if it's non-indexed.
+    ///
+    /// * `base_output_index` is the index of the first mesh instance in this
+    ///   batch in the `MeshUniform` output buffer.
+    ///
+    /// * `batch_set_index` is the index of the batch set in the
+    ///   [`gpu_preprocessing::IndirectBatchSet`] buffer, if this batch belongs to
+    ///   a batch set.
+    ///
+    /// * `indirect_parameters_buffers` is the buffer in which to write the
+    ///   metadata.
+    ///
+    /// * `indirect_parameters_offset` is the index in that buffer at which to
+    ///   write the metadata.
+    fn write_batch_indirect_parameters_metadata(
+        mesh_index: u32,
+        indexed: bool,
+        base_output_index: u32,
+        batch_set_index: Option<NonMaxU32>,
+        indirect_parameters_buffers: &mut IndirectParametersBuffers,
         indirect_parameters_offset: u32,
-        entity: MainEntity,
     );
 }
 
diff --git a/crates/bevy_render/src/camera/camera.rs b/crates/bevy_render/src/camera/camera.rs
index b3b6b6f7c70aa..1bc4b3737ad81 100644
--- a/crates/bevy_render/src/camera/camera.rs
+++ b/crates/bevy_render/src/camera/camera.rs
@@ -1153,7 +1153,7 @@ pub fn extract_cameras(
                     hdr: camera.hdr,
                 },
                 ExtractedView {
-                    retained_view_entity: RetainedViewEntity::new(main_entity.into(), 0),
+                    retained_view_entity: RetainedViewEntity::new(main_entity.into(), None, 0),
                     clip_from_view: camera.clip_from_view(),
                     world_from_view: *transform,
                     clip_from_world: None,
diff --git a/crates/bevy_render/src/mesh/components.rs b/crates/bevy_render/src/mesh/components.rs
index 10229be41210d..2b887c65d32c5 100644
--- a/crates/bevy_render/src/mesh/components.rs
+++ b/crates/bevy_render/src/mesh/components.rs
@@ -2,11 +2,15 @@ use crate::{
     mesh::Mesh,
     view::{self, Visibility, VisibilityClass},
 };
-use bevy_asset::{AssetId, Handle};
+use bevy_asset::{AssetEvent, AssetId, Handle};
 use bevy_derive::{Deref, DerefMut};
-use bevy_ecs::{component::Component, prelude::require, reflect::ReflectComponent};
+use bevy_ecs::{
+    change_detection::DetectChangesMut, component::Component, event::EventReader, prelude::require,
+    reflect::ReflectComponent, system::Query,
+};
 use bevy_reflect::{std_traits::ReflectDefault, Reflect};
 use bevy_transform::components::Transform;
+use bevy_utils::{FixedHasher, HashSet};
 use derive_more::derive::From;
 
 /// A component for 2D meshes. Requires a [`MeshMaterial2d`] to be rendered, commonly using a [`ColorMaterial`].
@@ -101,3 +105,32 @@ impl From<&Mesh3d> for AssetId<Mesh> {
         mesh.id()
     }
 }
+
+/// A system that marks a [`Mesh3d`] as changed if the associated [`Mesh`] asset
+/// has changed.
+///
+/// This is needed because the systems that extract meshes, such as
+/// `extract_meshes_for_gpu_building`, write some metadata about the mesh (like
+/// the location within each slab) into the GPU structures that they build that
+/// needs to be kept up to date if the contents of the mesh change.
+pub fn mark_3d_meshes_as_changed_if_their_assets_changed(
+    mut meshes_3d: Query<&mut Mesh3d>,
+    mut mesh_asset_events: EventReader<AssetEvent<Mesh>>,
+) {
+    let mut changed_meshes: HashSet<AssetId<Mesh>, FixedHasher> = HashSet::default();
+    for mesh_asset_event in mesh_asset_events.read() {
+        if let AssetEvent::Modified { id } = mesh_asset_event {
+            changed_meshes.insert(*id);
+        }
+    }
+
+    if changed_meshes.is_empty() {
+        return;
+    }
+
+    for mut mesh_3d in &mut meshes_3d {
+        if changed_meshes.contains(&mesh_3d.0.id()) {
+            mesh_3d.set_changed();
+        }
+    }
+}
diff --git a/crates/bevy_render/src/mesh/mod.rs b/crates/bevy_render/src/mesh/mod.rs
index 7a7829e0f4ef1..703333675da74 100644
--- a/crates/bevy_render/src/mesh/mod.rs
+++ b/crates/bevy_render/src/mesh/mod.rs
@@ -9,6 +9,7 @@ use crate::{
     render_asset::{PrepareAssetError, RenderAsset, RenderAssetPlugin, RenderAssets},
     render_resource::TextureView,
     texture::GpuImage,
+    view::VisibilitySystems,
     RenderApp,
 };
 use allocator::MeshAllocatorPlugin;
@@ -17,6 +18,7 @@ use bevy_asset::{AssetApp, AssetId, RenderAssetUsages};
 use bevy_ecs::{
     entity::Entity,
     query::{Changed, With},
+    schedule::IntoSystemConfigs,
     system::Query,
 };
 use bevy_ecs::{
@@ -42,7 +44,12 @@ impl Plugin for MeshPlugin {
             .register_type::<Vec<Entity>>()
             // 'Mesh' must be prepared after 'Image' as meshes rely on the morph target image being ready
             .add_plugins(RenderAssetPlugin::<RenderMesh, GpuImage>::default())
-            .add_plugins(MeshAllocatorPlugin);
+            .add_plugins(MeshAllocatorPlugin)
+            .add_systems(
+                PostUpdate,
+                components::mark_3d_meshes_as_changed_if_their_assets_changed
+                    .ambiguous_with(VisibilitySystems::CalculateBounds),
+            );
 
         let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
             return;
@@ -130,6 +137,12 @@ impl RenderMesh {
     pub fn primitive_topology(&self) -> PrimitiveTopology {
         self.key_bits.primitive_topology()
     }
+
+    /// Returns true if this mesh uses an index buffer or false otherwise.
+    #[inline]
+    pub fn indexed(&self) -> bool {
+        matches!(self.buffer_info, RenderMeshBufferInfo::Indexed { .. })
+    }
 }
 
 /// The index/vertex buffer info of a [`RenderMesh`].
diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs
index d65b46da7cdf6..2b8d0c9e8a362 100644
--- a/crates/bevy_render/src/render_phase/mod.rs
+++ b/crates/bevy_render/src/render_phase/mod.rs
@@ -36,8 +36,10 @@ pub use draw_state::*;
 use encase::{internal::WriteInto, ShaderSize};
 use nonmax::NonMaxU32;
 pub use rangefinder::*;
+use wgpu::Features;
 
 use crate::batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport};
+use crate::renderer::RenderDevice;
 use crate::sync_world::MainEntity;
 use crate::view::RetainedViewEntity;
 use crate::{
@@ -189,6 +191,7 @@ pub enum BinnedRenderPhaseBatchSets<BK> {
 pub struct BinnedRenderPhaseBatchSet<BK> {
     pub(crate) batches: Vec<BinnedRenderPhaseBatch>,
     pub(crate) bin_key: BK,
+    pub(crate) index: u32,
 }
 
 impl<BK> BinnedRenderPhaseBatchSets<BK> {
@@ -456,6 +459,11 @@ where
         let draw_functions = world.resource::<DrawFunctions<BPI>>();
         let mut draw_functions = draw_functions.write();
 
+        let render_device = world.resource::<RenderDevice>();
+        let multi_draw_indirect_count_supported = render_device
+            .features()
+            .contains(Features::MULTI_DRAW_INDIRECT_COUNT);
+
         match self.batch_sets {
             BinnedRenderPhaseBatchSets::DynamicUniforms(ref batch_sets) => {
                 debug_assert_eq!(self.batchable_mesh_keys.len(), batch_sets.len());
@@ -522,6 +530,12 @@ where
                         continue;
                     };
 
+                    let batch_set_index = if multi_draw_indirect_count_supported {
+                        NonMaxU32::new(batch_set.index)
+                    } else {
+                        None
+                    };
+
                     let binned_phase_item = BPI::new(
                         batch_set_key.clone(),
                         batch_set.bin_key.clone(),
@@ -532,10 +546,12 @@ where
                             PhaseItemExtraIndex::DynamicOffset(ref dynamic_offset) => {
                                 PhaseItemExtraIndex::DynamicOffset(*dynamic_offset)
                             }
-                            PhaseItemExtraIndex::IndirectParametersIndex(ref range) => {
-                                PhaseItemExtraIndex::IndirectParametersIndex(
-                                    range.start..(range.start + batch_set.batches.len() as u32),
-                                )
+                            PhaseItemExtraIndex::IndirectParametersIndex { ref range, .. } => {
+                                PhaseItemExtraIndex::IndirectParametersIndex {
+                                    range: range.start
+                                        ..(range.start + batch_set.batches.len() as u32),
+                                    batch_set_index,
+                                }
                             }
                         },
                     );
@@ -585,10 +601,11 @@ where
                                 let first_indirect_parameters_index_for_entity =
                                     u32::from(*first_indirect_parameters_index)
                                         + entity_index as u32;
-                                PhaseItemExtraIndex::IndirectParametersIndex(
-                                    first_indirect_parameters_index_for_entity
+                                PhaseItemExtraIndex::IndirectParametersIndex {
+                                    range: first_indirect_parameters_index_for_entity
                                         ..(first_indirect_parameters_index_for_entity + 1),
-                                )
+                                    batch_set_index: None,
+                                }
                             }
                         },
                     },
@@ -725,10 +742,11 @@ impl UnbatchableBinnedEntityIndexSet {
                     u32::from(*first_indirect_parameters_index) + entity_index;
                 Some(UnbatchableBinnedEntityIndices {
                     instance_index: instance_range.start + entity_index,
-                    extra_index: PhaseItemExtraIndex::IndirectParametersIndex(
-                        first_indirect_parameters_index_for_this_batch
+                    extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
+                        range: first_indirect_parameters_index_for_this_batch
                             ..(first_indirect_parameters_index_for_this_batch + 1),
-                    ),
+                        batch_set_index: None,
+                    },
                 })
             }
             UnbatchableBinnedEntityIndexSet::Dense(ref indices) => {
@@ -890,12 +908,17 @@ impl UnbatchableBinnedEntityIndexSet {
                             first_indirect_parameters_index: None,
                         }
                     }
-                    PhaseItemExtraIndex::IndirectParametersIndex(ref range) => {
+                    PhaseItemExtraIndex::IndirectParametersIndex {
+                        range: ref indirect_parameters_index,
+                        ..
+                    } => {
                         // This is the first entity we've seen, and we have compute
                         // shaders. Initialize the fast path.
                         *self = UnbatchableBinnedEntityIndexSet::Sparse {
                             instance_range: indices.instance_index..indices.instance_index + 1,
-                            first_indirect_parameters_index: NonMaxU32::new(range.start),
+                            first_indirect_parameters_index: NonMaxU32::new(
+                                indirect_parameters_index.start,
+                            ),
                         }
                     }
                 }
@@ -909,7 +932,10 @@ impl UnbatchableBinnedEntityIndexSet {
                     && indices.extra_index == PhaseItemExtraIndex::None)
                     || first_indirect_parameters_index.is_some_and(
                         |first_indirect_parameters_index| match indices.extra_index {
-                            PhaseItemExtraIndex::IndirectParametersIndex(ref this_range) => {
+                            PhaseItemExtraIndex::IndirectParametersIndex {
+                                range: ref this_range,
+                                ..
+                            } => {
                                 u32::from(first_indirect_parameters_index) + instance_range.end
                                     - instance_range.start
                                     == this_range.start
@@ -1129,7 +1155,22 @@ pub enum PhaseItemExtraIndex {
     /// An index into the buffer that specifies the indirect parameters for this
     /// [`PhaseItem`]'s drawcall. This is used when indirect mode is on (as used
     /// for GPU culling).
-    IndirectParametersIndex(Range<u32>),
+    IndirectParametersIndex {
+        /// The range of indirect parameters within the indirect parameters array.
+        ///
+        /// If we're using `multi_draw_indirect_count`, this specifies the
+        /// maximum range of indirect parameters within that array. If batches
+        /// are ultimately culled out on the GPU, the actual number of draw
+        /// commands might be lower than the length of this range.
+        range: Range<u32>,
+        /// If `multi_draw_indirect_count` is in use, and this phase item is
+        /// part of a batch set, specifies the index of the batch set that this
+        /// phase item is a part of.
+        ///
+        /// If `multi_draw_indirect_count` isn't in use, or this phase item
+        /// isn't part of a batch set, this is `None`.
+        batch_set_index: Option<NonMaxU32>,
+    },
 }
 
 impl PhaseItemExtraIndex {
@@ -1139,9 +1180,11 @@ impl PhaseItemExtraIndex {
         indirect_parameters_index: Option<NonMaxU32>,
     ) -> PhaseItemExtraIndex {
         match indirect_parameters_index {
-            Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex(
-                u32::from(indirect_parameters_index)..(u32::from(indirect_parameters_index) + 1),
-            ),
+            Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex {
+                range: u32::from(indirect_parameters_index)
+                    ..(u32::from(indirect_parameters_index) + 1),
+                batch_set_index: None,
+            },
             None => PhaseItemExtraIndex::None,
         }
     }
@@ -1172,7 +1215,11 @@ pub trait BinnedPhaseItem: PhaseItem {
     /// reduces the need for rebinding between bins and improves performance.
     type BinKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash;
 
-    type BatchSetKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash;
+    /// The key used to combine batches into batch sets.
+    ///
+    /// A *batch set* is a set of meshes that can potentially be multi-drawn
+    /// together.
+    type BatchSetKey: PhaseItemBatchSetKey;
 
     /// Creates a new binned phase item from the key and per-entity data.
     ///
@@ -1188,6 +1235,19 @@ pub trait BinnedPhaseItem: PhaseItem {
     ) -> Self;
 }
 
+/// A key used to combine batches into batch sets.
+///
+/// A *batch set* is a set of meshes that can potentially be multi-drawn
+/// together.
+pub trait PhaseItemBatchSetKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash {
+    /// Returns true if this batch set key describes indexed meshes or false if
+    /// it describes non-indexed meshes.
+    ///
+    /// Bevy uses this in order to determine which kind of indirect draw
+    /// parameters to use, if indirect drawing is enabled.
+    fn indexed(&self) -> bool;
+}
+
 /// Represents phase items that must be sorted. The `SortKey` specifies the
 /// order that these items are drawn in. These are placed into a single array,
 /// and the array as a whole is then sorted.
@@ -1219,6 +1279,17 @@ pub trait SortedPhaseItem: PhaseItem {
     fn sort(items: &mut [Self]) {
         items.sort_unstable_by_key(Self::sort_key);
     }
+
+    /// Whether this phase item targets indexed meshes (those with both vertex
+    /// and index buffers as opposed to just vertex buffers).
+    ///
+    /// Bevy needs this information in order to properly group phase items
+    /// together for multi-draw indirect, because the GPU layout of indirect
+    /// commands differs between indexed and non-indexed meshes.
+    ///
+    /// If you're implementing a custom phase item that doesn't describe a mesh,
+    /// you can safely return false here.
+    fn indexed(&self) -> bool;
 }
 
 /// A [`PhaseItem`] item, that automatically sets the appropriate render pipeline,
diff --git a/crates/bevy_render/src/view/mod.rs b/crates/bevy_render/src/view/mod.rs
index 3eb0df75e2f2c..b5ad7a541e297 100644
--- a/crates/bevy_render/src/view/mod.rs
+++ b/crates/bevy_render/src/view/mod.rs
@@ -191,14 +191,25 @@ impl Msaa {
 /// stable, and we can't use just [`MainEntity`] because some main world views
 /// extract to multiple render world views. For example, a directional light
 /// extracts to one render world view per cascade, and a point light extracts to
-/// one render world view per cubemap face. So we pair the main entity with a
-/// *subview index*, which *together* uniquely identify a view in the render
-/// world in a way that's stable from frame to frame.
+/// one render world view per cubemap face. So we pair the main entity with an
+/// *auxiliary entity* and a *subview index*, which *together* uniquely identify
+/// a view in the render world in a way that's stable from frame to frame.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub struct RetainedViewEntity {
     /// The main entity that this view corresponds to.
     pub main_entity: MainEntity,
 
+    /// Another entity associated with the view entity.
+    ///
+    /// This is currently used for shadow cascades. If there are multiple
+    /// cameras, each camera needs to have its own set of shadow cascades. Thus
+    /// the light and subview index aren't themselves enough to uniquely
+    /// identify a shadow cascade: we need the camera that the cascade is
+    /// associated with as well. This entity stores that camera.
+    ///
+    /// If not present, this will be `MainEntity(Entity::PLACEHOLDER)`.
+    pub auxiliary_entity: MainEntity,
+
     /// The index of the view corresponding to the entity.
     ///
     /// For example, for point lights that cast shadows, this is the index of
@@ -208,14 +219,19 @@ pub struct RetainedViewEntity {
 }
 
 impl RetainedViewEntity {
-    /// Creates a new [`RetainedViewEntity`] from the given main world entity
-    /// and subview index.
+    /// Creates a new [`RetainedViewEntity`] from the given main world entity,
+    /// auxiliary main world entity, and subview index.
     ///
     /// See [`RetainedViewEntity::subview_index`] for an explanation of what
-    /// `subview_index` is.
-    pub fn new(main_entity: MainEntity, subview_index: u32) -> Self {
+    /// `auxiliary_entity` and `subview_index` are.
+    pub fn new(
+        main_entity: MainEntity,
+        auxiliary_entity: Option<MainEntity>,
+        subview_index: u32,
+    ) -> Self {
         Self {
             main_entity,
+            auxiliary_entity: auxiliary_entity.unwrap_or(Entity::PLACEHOLDER.into()),
             subview_index,
         }
     }
diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs
index 39b6b15f1a6ea..a4dfc376682c4 100644
--- a/crates/bevy_sprite/src/mesh2d/material.rs
+++ b/crates/bevy_sprite/src/mesh2d/material.rs
@@ -5,7 +5,9 @@ use crate::{
 use bevy_app::{App, Plugin};
 use bevy_asset::{Asset, AssetApp, AssetId, AssetServer, Handle};
 use bevy_core_pipeline::{
-    core_2d::{AlphaMask2d, AlphaMask2dBinKey, Opaque2d, Opaque2dBinKey, Transparent2d},
+    core_2d::{
+        AlphaMask2d, AlphaMask2dBinKey, BatchSetKey2d, Opaque2d, Opaque2dBinKey, Transparent2d,
+    },
     tonemapping::{DebandDither, Tonemapping},
 };
 use bevy_derive::{Deref, DerefMut};
@@ -584,7 +586,9 @@ pub fn queue_material2d_meshes<M: Material2d>(
                         material_bind_group_id: material_2d.get_bind_group_id().0,
                     };
                     opaque_phase.add(
-                        (),
+                        BatchSetKey2d {
+                            indexed: mesh.indexed(),
+                        },
                         bin_key,
                         (*render_entity, *visible_entity),
                         binned_render_phase_type,
@@ -598,7 +602,9 @@ pub fn queue_material2d_meshes<M: Material2d>(
                         material_bind_group_id: material_2d.get_bind_group_id().0,
                     };
                     alpha_mask_phase.add(
-                        (),
+                        BatchSetKey2d {
+                            indexed: mesh.indexed(),
+                        },
                         bin_key,
                         (*render_entity, *visible_entity),
                         binned_render_phase_type,
@@ -617,6 +623,7 @@ pub fn queue_material2d_meshes<M: Material2d>(
                         // Batching is done in batch_and_prepare_render_phase
                         batch_range: 0..1,
                         extra_index: PhaseItemExtraIndex::None,
+                        indexed: mesh.indexed(),
                     });
                 }
             }
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index aa7e41dbcbd4f..52309fd492aab 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -18,7 +18,7 @@ use bevy_image::{BevyDefault, Image, ImageSampler, TextureFormatPixelInfo};
 use bevy_math::{Affine3, Vec4};
 use bevy_render::{
     batching::{
-        gpu_preprocessing::IndirectParameters,
+        gpu_preprocessing::IndirectParametersMetadata,
         no_gpu_preprocessing::{
             self, batch_and_prepare_binned_render_phase, batch_and_prepare_sorted_render_phase,
             write_batched_instance_buffer, BatchedInstanceBuffer,
@@ -403,56 +403,33 @@ impl GetFullBatchData for Mesh2dPipeline {
         None
     }
 
-    fn write_batch_indirect_parameters(
-        (mesh_instances, meshes, mesh_allocator): &SystemParamItem<Self::Param>,
-        indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffer,
+    fn write_batch_indirect_parameters_metadata(
+        input_index: u32,
+        indexed: bool,
+        base_output_index: u32,
+        batch_set_index: Option<NonMaxU32>,
+        indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffers,
         indirect_parameters_offset: u32,
-        main_entity: MainEntity,
     ) {
-        let Some(mesh_instance) = mesh_instances.get(&main_entity) else {
-            return;
-        };
-        let Some(mesh) = meshes.get(mesh_instance.mesh_asset_id) else {
-            return;
-        };
-        let Some(vertex_buffer_slice) =
-            mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id)
-        else {
-            return;
-        };
-
         // Note that `IndirectParameters` covers both of these structures, even
         // though they actually have distinct layouts. See the comment above that
         // type for more information.
-        let indirect_parameters = match mesh.buffer_info {
-            RenderMeshBufferInfo::Indexed {
-                count: index_count, ..
-            } => {
-                let Some(index_buffer_slice) =
-                    mesh_allocator.mesh_index_slice(&mesh_instance.mesh_asset_id)
-                else {
-                    return;
-                };
-                IndirectParameters {
-                    vertex_or_index_count: index_count,
-                    instance_count: 0,
-                    first_vertex_or_first_index: index_buffer_slice.range.start,
-                    base_vertex_or_first_instance: vertex_buffer_slice.range.start,
-                    first_instance: 0,
-                }
-            }
-            RenderMeshBufferInfo::NonIndexed => IndirectParameters {
-                vertex_or_index_count: mesh.vertex_count,
-                instance_count: 0,
-                first_vertex_or_first_index: vertex_buffer_slice.range.start,
-                base_vertex_or_first_instance: 0,
-                // Use `0xffffffff` as a placeholder to tell the mesh
-                // preprocessing shader that this is a non-indexed mesh.
-                first_instance: !0,
+        let indirect_parameters = IndirectParametersMetadata {
+            mesh_index: input_index,
+            base_output_index,
+            batch_set_index: match batch_set_index {
+                None => !0,
+                Some(batch_set_index) => u32::from(batch_set_index),
             },
+            instance_count: 0,
         };
 
-        indirect_parameters_buffer.set(indirect_parameters_offset, indirect_parameters);
+        if indexed {
+            indirect_parameters_buffer.set_indexed(indirect_parameters_offset, indirect_parameters);
+        } else {
+            indirect_parameters_buffer
+                .set_non_indexed(indirect_parameters_offset, indirect_parameters);
+        }
     }
 }
 
diff --git a/crates/bevy_sprite/src/render/mod.rs b/crates/bevy_sprite/src/render/mod.rs
index 3dae793d323dd..585229052e04f 100644
--- a/crates/bevy_sprite/src/render/mod.rs
+++ b/crates/bevy_sprite/src/render/mod.rs
@@ -574,6 +574,7 @@ pub fn queue_sprites(
                 // batch_range and dynamic_offset will be calculated in prepare_sprites
                 batch_range: 0..0,
                 extra_index: PhaseItemExtraIndex::None,
+                indexed: true,
             });
         }
     }
diff --git a/crates/bevy_ui/src/render/box_shadow.rs b/crates/bevy_ui/src/render/box_shadow.rs
index 7f18e935ab8eb..f33a4ed4ded2d 100644
--- a/crates/bevy_ui/src/render/box_shadow.rs
+++ b/crates/bevy_ui/src/render/box_shadow.rs
@@ -387,6 +387,7 @@ pub fn queue_shadows(
             ),
             batch_range: 0..0,
             extra_index: PhaseItemExtraIndex::None,
+            indexed: true,
         });
     }
 }
diff --git a/crates/bevy_ui/src/render/mod.rs b/crates/bevy_ui/src/render/mod.rs
index bbadf8aef7601..27f94776deae2 100644
--- a/crates/bevy_ui/src/render/mod.rs
+++ b/crates/bevy_ui/src/render/mod.rs
@@ -626,7 +626,7 @@ pub fn extract_ui_camera_view(
             // We use `UI_CAMERA_SUBVIEW` here so as not to conflict with the
             // main 3D or 2D camera, which will have subview index 0.
             let retained_view_entity =
-                RetainedViewEntity::new(main_entity.into(), UI_CAMERA_SUBVIEW);
+                RetainedViewEntity::new(main_entity.into(), None, UI_CAMERA_SUBVIEW);
             // Creates the UI view.
             let ui_camera_view = commands
                 .spawn((
@@ -894,6 +894,7 @@ pub fn queue_uinodes(
             // batch_range will be calculated in prepare_uinodes
             batch_range: 0..0,
             extra_index: PhaseItemExtraIndex::None,
+            indexed: true,
         });
     }
 }
diff --git a/crates/bevy_ui/src/render/render_pass.rs b/crates/bevy_ui/src/render/render_pass.rs
index d26844fcb6161..c9f0d3d69c547 100644
--- a/crates/bevy_ui/src/render/render_pass.rs
+++ b/crates/bevy_ui/src/render/render_pass.rs
@@ -112,6 +112,7 @@ pub struct TransparentUi {
     pub draw_function: DrawFunctionId,
     pub batch_range: Range<u32>,
     pub extra_index: PhaseItemExtraIndex,
+    pub indexed: bool,
 }
 
 impl PhaseItem for TransparentUi {
@@ -162,6 +163,11 @@ impl SortedPhaseItem for TransparentUi {
     fn sort(items: &mut [Self]) {
         items.sort_by_key(SortedPhaseItem::sort_key);
     }
+
+    #[inline]
+    fn indexed(&self) -> bool {
+        self.indexed
+    }
 }
 
 impl CachedRenderPipelinePhaseItem for TransparentUi {
diff --git a/crates/bevy_ui/src/render/ui_material_pipeline.rs b/crates/bevy_ui/src/render/ui_material_pipeline.rs
index c47729105e79a..904a0bd225db4 100644
--- a/crates/bevy_ui/src/render/ui_material_pipeline.rs
+++ b/crates/bevy_ui/src/render/ui_material_pipeline.rs
@@ -662,6 +662,7 @@ pub fn queue_ui_material_nodes<M: UiMaterial>(
             ),
             batch_range: 0..0,
             extra_index: PhaseItemExtraIndex::None,
+            indexed: false,
         });
     }
 }
diff --git a/crates/bevy_ui/src/render/ui_texture_slice_pipeline.rs b/crates/bevy_ui/src/render/ui_texture_slice_pipeline.rs
index 989207b3a2728..869e5f0226c8a 100644
--- a/crates/bevy_ui/src/render/ui_texture_slice_pipeline.rs
+++ b/crates/bevy_ui/src/render/ui_texture_slice_pipeline.rs
@@ -385,6 +385,7 @@ pub fn queue_ui_slices(
             ),
             batch_range: 0..0,
             extra_index: PhaseItemExtraIndex::None,
+            indexed: true,
         });
     }
 }
diff --git a/examples/2d/mesh2d_manual.rs b/examples/2d/mesh2d_manual.rs
index 09acb6dbb99bc..15d611ce667cf 100644
--- a/examples/2d/mesh2d_manual.rs
+++ b/examples/2d/mesh2d_manual.rs
@@ -392,10 +392,10 @@ pub fn queue_colored_mesh2d(
                 let mesh2d_transforms = &mesh_instance.transforms;
                 // Get our specialized pipeline
                 let mut mesh2d_key = mesh_key;
-                if let Some(mesh) = render_meshes.get(mesh2d_handle) {
-                    mesh2d_key |=
-                        Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology());
-                }
+                let Some(mesh) = render_meshes.get(mesh2d_handle) else {
+                    continue;
+                };
+                mesh2d_key |= Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology());
 
                 let pipeline_id =
                     pipelines.specialize(&pipeline_cache, &colored_mesh2d_pipeline, mesh2d_key);
@@ -411,6 +411,7 @@ pub fn queue_colored_mesh2d(
                     // This material is not batched
                     batch_range: 0..1,
                     extra_index: PhaseItemExtraIndex::None,
+                    indexed: mesh.indexed(),
                 });
             }
         }
diff --git a/examples/shader/custom_shader_instancing.rs b/examples/shader/custom_shader_instancing.rs
index 3cdfad774dbae..cd5909c36fff1 100644
--- a/examples/shader/custom_shader_instancing.rs
+++ b/examples/shader/custom_shader_instancing.rs
@@ -166,6 +166,7 @@ fn queue_custom(
                 distance: rangefinder.distance_translation(&mesh_instance.translation),
                 batch_range: 0..1,
                 extra_index: PhaseItemExtraIndex::None,
+                indexed: true,
             });
         }
     }
diff --git a/examples/shader/specialized_mesh_pipeline.rs b/examples/shader/specialized_mesh_pipeline.rs
index ee5c68bc11a16..bc1ecf113c78b 100644
--- a/examples/shader/specialized_mesh_pipeline.rs
+++ b/examples/shader/specialized_mesh_pipeline.rs
@@ -6,8 +6,11 @@
 //!
 //! [`SpecializedMeshPipeline`] let's you customize the entire pipeline used when rendering a mesh.
 
+use std::any::TypeId;
+
 use bevy::{
     core_pipeline::core_3d::{Opaque3d, Opaque3dBatchSetKey, Opaque3dBinKey, CORE_3D_DEPTH_FORMAT},
+    ecs::system::StaticSystemParam,
     math::{vec3, vec4},
     pbr::{
         DrawMesh, MeshPipeline, MeshPipelineKey, MeshPipelineViewLayoutKey, RenderMeshInstances,
@@ -15,6 +18,14 @@ use bevy::{
     },
     prelude::*,
     render::{
+        batching::GetFullBatchData,
+        batching::{
+            gpu_preprocessing::{
+                BatchedInstanceBuffers, IndirectParametersBuffers, PreprocessWorkItem,
+                PreprocessWorkItemBuffers,
+            },
+            GetBatchData,
+        },
         extract_component::{ExtractComponent, ExtractComponentPlugin},
         mesh::{Indices, MeshVertexBufferLayoutRef, PrimitiveTopology, RenderMesh},
         render_asset::{RenderAssetUsages, RenderAssets},
@@ -28,9 +39,11 @@ use bevy::{
             RenderPipelineDescriptor, SpecializedMeshPipeline, SpecializedMeshPipelineError,
             SpecializedMeshPipelines, TextureFormat, VertexState,
         },
+        view::NoIndirectDrawing,
         view::{self, ExtractedView, RenderVisibleEntities, ViewTarget, VisibilityClass},
         Render, RenderApp, RenderSet,
     },
+    utils::TypeIdMap,
 };
 
 const SHADER_ASSET_PATH: &str = "shaders/specialized_mesh_pipeline.wgsl";
@@ -264,13 +277,39 @@ impl SpecializedMeshPipeline for CustomMeshPipeline {
 fn queue_custom_mesh_pipeline(
     pipeline_cache: Res<PipelineCache>,
     custom_mesh_pipeline: Res<CustomMeshPipeline>,
-    mut opaque_render_phases: ResMut<ViewBinnedRenderPhases<Opaque3d>>,
-    opaque_draw_functions: Res<DrawFunctions<Opaque3d>>,
+    (mut opaque_render_phases, opaque_draw_functions): (
+        ResMut<ViewBinnedRenderPhases<Opaque3d>>,
+        Res<DrawFunctions<Opaque3d>>,
+    ),
     mut specialized_mesh_pipelines: ResMut<SpecializedMeshPipelines<CustomMeshPipeline>>,
-    views: Query<(&RenderVisibleEntities, &ExtractedView, &Msaa), With<ExtractedView>>,
-    render_meshes: Res<RenderAssets<RenderMesh>>,
-    render_mesh_instances: Res<RenderMeshInstances>,
+    views: Query<(
+        Entity,
+        &RenderVisibleEntities,
+        &ExtractedView,
+        &Msaa,
+        Has<NoIndirectDrawing>,
+    )>,
+    (render_meshes, render_mesh_instances): (
+        Res<RenderAssets<RenderMesh>>,
+        Res<RenderMeshInstances>,
+    ),
+    param: StaticSystemParam<<MeshPipeline as GetBatchData>::Param>,
+    gpu_array_buffer: ResMut<
+        BatchedInstanceBuffers<
+            <MeshPipeline as GetBatchData>::BufferData,
+            <MeshPipeline as GetFullBatchData>::BufferInputData,
+        >,
+    >,
+    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
 ) {
+    let system_param_item = param.into_inner();
+
+    let BatchedInstanceBuffers {
+        ref mut data_buffer,
+        ref mut work_item_buffers,
+        ..
+    } = gpu_array_buffer.into_inner();
+
     // Get the id for our custom draw function
     let draw_function_id = opaque_draw_functions
         .read()
@@ -279,15 +318,29 @@ fn queue_custom_mesh_pipeline(
     // Render phases are per-view, so we need to iterate over all views so that
     // the entity appears in them. (In this example, we have only one view, but
     // it's good practice to loop over all views anyway.)
-    for (view_visible_entities, view, msaa) in views.iter() {
+    for (view_entity, view_visible_entities, view, msaa, no_indirect_drawing) in views.iter() {
         let Some(opaque_phase) = opaque_render_phases.get_mut(&view.retained_view_entity) else {
             continue;
         };
 
+        // Create a *work item buffer* if necessary. Work item buffers store the
+        // indices of meshes that are to be rendered when indirect drawing is
+        // enabled.
+        let work_item_buffer = work_item_buffers
+            .entry(view_entity)
+            .or_insert_with(TypeIdMap::default)
+            .entry(TypeId::of::<Opaque3d>())
+            .or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing));
+
         // Create the key based on the view. In this case we only care about MSAA and HDR
         let view_key = MeshPipelineKey::from_msaa_samples(msaa.samples())
             | MeshPipelineKey::from_hdr(view.hdr);
 
+        // Set up a slot to hold information about the batch set we're going to
+        // create. If there are any of our custom meshes in the scene, we'll
+        // need this information in order for Bevy to kick off the rendering.
+        let mut mesh_batch_set_info = None;
+
         // Find all the custom rendered entities that are visible from this
         // view.
         for &(render_entity, visible_entity) in
@@ -310,6 +363,27 @@ fn queue_custom_mesh_pipeline(
             let mut mesh_key = view_key;
             mesh_key |= MeshPipelineKey::from_primitive_topology(mesh.primitive_topology());
 
+            // Initialize the batch set information if this was the first custom
+            // mesh we saw. We'll need that information later to create the
+            // batch set.
+            if mesh_batch_set_info.is_none() {
+                mesh_batch_set_info = Some(MeshBatchSetInfo {
+                    indirect_parameters_index: indirect_parameters_buffers
+                        .allocate(mesh.indexed(), 1),
+                    is_indexed: mesh.indexed(),
+                });
+            }
+            let mesh_info = mesh_batch_set_info.unwrap();
+
+            // Allocate some input and output indices. We'll need these to
+            // create the *work item* below.
+            let Some(input_index) =
+                MeshPipeline::get_binned_index(&system_param_item, visible_entity)
+            else {
+                continue;
+            };
+            let output_index = data_buffer.add() as u32;
+
             // Finally, we can specialize the pipeline based on the key
             let pipeline_id = specialized_mesh_pipelines
                 .specialize(
@@ -343,6 +417,35 @@ fn queue_custom_mesh_pipeline(
                 // support it you can use `BinnedRenderPhaseType::UnbatchableMesh`
                 BinnedRenderPhaseType::BatchableMesh,
             );
+
+            // Create a *work item*. A work item tells the Bevy renderer to
+            // transform the mesh on GPU.
+            work_item_buffer.push(
+                mesh.indexed(),
+                PreprocessWorkItem {
+                    input_index: input_index.into(),
+                    output_index,
+                    indirect_parameters_index: mesh_info.indirect_parameters_index,
+                },
+            );
+        }
+
+        // Now if there were any meshes, we need to add a command to the
+        // indirect parameters buffer, so that the renderer will end up
+        // enqueuing a command to draw the mesh.
+        if let Some(mesh_info) = mesh_batch_set_info {
+            indirect_parameters_buffers
+                .add_batch_set(mesh_info.is_indexed, mesh_info.indirect_parameters_index);
         }
     }
 }
+
+// If we end up having any custom meshes to draw, this contains information
+// needed to create the batch set.
+#[derive(Clone, Copy)]
+struct MeshBatchSetInfo {
+    /// The first index of the mesh batch in the indirect parameters buffer.
+    indirect_parameters_index: u32,
+    /// Whether the mesh is indexed (has an index buffer).
+    is_indexed: bool,
+}