diff --git a/crates/bevy_core_pipeline/src/core_2d/mod.rs b/crates/bevy_core_pipeline/src/core_2d/mod.rs index 2d018edaa0ec3..ec0fa58d73f60 100644 --- a/crates/bevy_core_pipeline/src/core_2d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_2d/mod.rs @@ -35,6 +35,7 @@ use core::ops::Range; use bevy_asset::UntypedAssetId; use bevy_render::{ batching::gpu_preprocessing::GpuPreprocessingMode, + render_phase::PhaseItemBatchSetKey, view::{ExtractedView, RetainedViewEntity}, }; use bevy_utils::{HashMap, HashSet}; @@ -132,7 +133,7 @@ pub struct Opaque2d { /// /// Objects in a single batch set can potentially be multi-drawn together, /// if it's enabled and the current platform supports it. - pub batch_set_key: (), + pub batch_set_key: BatchSetKey2d, /// The key, which determines which can be batched. pub bin_key: Opaque2dBinKey, /// An entity from which data will be fetched, including the mesh if @@ -198,7 +199,7 @@ impl PhaseItem for Opaque2d { impl BinnedPhaseItem for Opaque2d { // Since 2D meshes presently can't be multidrawn, the batch set key is // irrelevant. - type BatchSetKey = (); + type BatchSetKey = BatchSetKey2d; type BinKey = Opaque2dBinKey; @@ -219,6 +220,20 @@ impl BinnedPhaseItem for Opaque2d { } } +/// 2D meshes aren't currently multi-drawn together, so this batch set key only +/// stores whether the mesh is indexed. +#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +pub struct BatchSetKey2d { + /// True if the mesh is indexed. + pub indexed: bool, +} + +impl PhaseItemBatchSetKey for BatchSetKey2d { + fn indexed(&self) -> bool { + self.indexed + } +} + impl CachedRenderPipelinePhaseItem for Opaque2d { #[inline] fn cached_pipeline(&self) -> CachedRenderPipelineId { @@ -232,7 +247,7 @@ pub struct AlphaMask2d { /// /// Objects in a single batch set can potentially be multi-drawn together, /// if it's enabled and the current platform supports it. - pub batch_set_key: (), + pub batch_set_key: BatchSetKey2d, /// The key, which determines which can be batched. pub bin_key: AlphaMask2dBinKey, /// An entity from which data will be fetched, including the mesh if @@ -297,9 +312,7 @@ impl PhaseItem for AlphaMask2d { } impl BinnedPhaseItem for AlphaMask2d { - // Since 2D meshes presently can't be multidrawn, the batch set key is - // irrelevant. - type BatchSetKey = (); + type BatchSetKey = BatchSetKey2d; type BinKey = AlphaMask2dBinKey; @@ -335,6 +348,9 @@ pub struct Transparent2d { pub draw_function: DrawFunctionId, pub batch_range: Range, pub extra_index: PhaseItemExtraIndex, + /// Whether the mesh in question is indexed (uses an index buffer in + /// addition to its vertex buffer). + pub indexed: bool, } impl PhaseItem for Transparent2d { @@ -387,6 +403,10 @@ impl SortedPhaseItem for Transparent2d { // radsort is a stable radix sort that performed better than `slice::sort_by_key` or `slice::sort_unstable_by_key`. radsort::sort_by_key(items, |item| item.sort_key().0); } + + fn indexed(&self) -> bool { + self.indexed + } } impl CachedRenderPipelinePhaseItem for Transparent2d { @@ -411,7 +431,7 @@ pub fn extract_core_2d_camera_phases( } // This is the main 2D camera, so we use the first subview index (0). - let retained_view_entity = RetainedViewEntity::new(main_entity.into(), 0); + let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0); transparent_2d_phases.insert_or_clear(retained_view_entity); opaque_2d_phases.insert_or_clear(retained_view_entity, GpuPreprocessingMode::None); diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs index 02ac01a9922be..393508047a017 100644 --- a/crates/bevy_core_pipeline/src/core_3d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs @@ -68,6 +68,7 @@ use core::ops::Range; use bevy_render::{ batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport}, mesh::allocator::SlabId, + render_phase::PhaseItemBatchSetKey, view::{NoIndirectDrawing, RetainedViewEntity}, }; pub use camera_3d::*; @@ -269,6 +270,12 @@ pub struct Opaque3dBatchSetKey { pub lightmap_slab: Option, } +impl PhaseItemBatchSetKey for Opaque3dBatchSetKey { + fn indexed(&self) -> bool { + self.index_slab.is_some() + } +} + /// Data that must be identical in order to *batch* phase items together. /// /// Note that a *batch set* (if multi-draw is in use) contains multiple batches. @@ -430,6 +437,9 @@ pub struct Transmissive3d { pub draw_function: DrawFunctionId, pub batch_range: Range, pub extra_index: PhaseItemExtraIndex, + /// Whether the mesh in question is indexed (uses an index buffer in + /// addition to its vertex buffer). + pub indexed: bool, } impl PhaseItem for Transmissive3d { @@ -493,6 +503,11 @@ impl SortedPhaseItem for Transmissive3d { fn sort(items: &mut [Self]) { radsort::sort_by_key(items, |item| item.distance); } + + #[inline] + fn indexed(&self) -> bool { + self.indexed + } } impl CachedRenderPipelinePhaseItem for Transmissive3d { @@ -509,6 +524,9 @@ pub struct Transparent3d { pub draw_function: DrawFunctionId, pub batch_range: Range, pub extra_index: PhaseItemExtraIndex, + /// Whether the mesh in question is indexed (uses an index buffer in + /// addition to its vertex buffer). + pub indexed: bool, } impl PhaseItem for Transparent3d { @@ -560,6 +578,11 @@ impl SortedPhaseItem for Transparent3d { fn sort(items: &mut [Self]) { radsort::sort_by_key(items, |item| item.distance); } + + #[inline] + fn indexed(&self) -> bool { + self.indexed + } } impl CachedRenderPipelinePhaseItem for Transparent3d { @@ -594,7 +617,7 @@ pub fn extract_core_3d_camera_phases( }); // This is the main 3D camera, so use the first subview index (0). - let retained_view_entity = RetainedViewEntity::new(main_entity.into(), 0); + let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0); opaque_3d_phases.insert_or_clear(retained_view_entity, gpu_preprocessing_mode); alpha_mask_3d_phases.insert_or_clear(retained_view_entity, gpu_preprocessing_mode); @@ -662,7 +685,7 @@ pub fn extract_camera_prepass_phase( }); // This is the main 3D camera, so we use the first subview index (0). - let retained_view_entity = RetainedViewEntity::new(main_entity.into(), 0); + let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0); if depth_prepass || normal_prepass || motion_vector_prepass { opaque_3d_prepass_phases.insert_or_clear(retained_view_entity, gpu_preprocessing_mode); diff --git a/crates/bevy_core_pipeline/src/prepass/mod.rs b/crates/bevy_core_pipeline/src/prepass/mod.rs index b90dea03a6260..7fb2dfcea961b 100644 --- a/crates/bevy_core_pipeline/src/prepass/mod.rs +++ b/crates/bevy_core_pipeline/src/prepass/mod.rs @@ -35,6 +35,7 @@ use bevy_ecs::prelude::*; use bevy_math::Mat4; use bevy_reflect::{std_traits::ReflectDefault, Reflect}; use bevy_render::mesh::allocator::SlabId; +use bevy_render::render_phase::PhaseItemBatchSetKey; use bevy_render::sync_world::MainEntity; use bevy_render::{ render_phase::{ @@ -184,6 +185,12 @@ pub struct OpaqueNoLightmap3dBatchSetKey { pub index_slab: Option, } +impl PhaseItemBatchSetKey for OpaqueNoLightmap3dBatchSetKey { + fn indexed(&self) -> bool { + self.index_slab.is_some() + } +} + // TODO: Try interning these. /// The data used to bin each opaque 3D object in the prepass and deferred pass. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] diff --git a/crates/bevy_gizmos/src/pipeline_2d.rs b/crates/bevy_gizmos/src/pipeline_2d.rs index 96df48e164ca6..13c9b89dd98ff 100644 --- a/crates/bevy_gizmos/src/pipeline_2d.rs +++ b/crates/bevy_gizmos/src/pipeline_2d.rs @@ -340,6 +340,7 @@ fn queue_line_gizmos_2d( sort_key: FloatOrd(f32::INFINITY), batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: false, }); } @@ -360,6 +361,7 @@ fn queue_line_gizmos_2d( sort_key: FloatOrd(f32::INFINITY), batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: false, }); } } @@ -418,6 +420,7 @@ fn queue_line_joint_gizmos_2d( sort_key: FloatOrd(f32::INFINITY), batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: false, }); } } diff --git a/crates/bevy_gizmos/src/pipeline_3d.rs b/crates/bevy_gizmos/src/pipeline_3d.rs index 458cb28e70ef7..aac6358d638bf 100644 --- a/crates/bevy_gizmos/src/pipeline_3d.rs +++ b/crates/bevy_gizmos/src/pipeline_3d.rs @@ -369,6 +369,7 @@ fn queue_line_gizmos_3d( distance: 0., batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: true, }); } @@ -390,6 +391,7 @@ fn queue_line_gizmos_3d( distance: 0., batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: true, }); } } @@ -484,6 +486,7 @@ fn queue_line_joint_gizmos_3d( distance: 0., batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: true, }); } } diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs index 4bd5c76f3331a..44107dfe7dd5d 100644 --- a/crates/bevy_pbr/src/lib.rs +++ b/crates/bevy_pbr/src/lib.rs @@ -102,6 +102,8 @@ pub mod graph { GpuPreprocess, /// Label for the screen space reflections pass. ScreenSpaceReflections, + /// Label for the indirect parameters building pass. + BuildIndirectParameters, } } diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index c22952226dc96..0ffe305bd7cb1 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -851,6 +851,9 @@ pub fn queue_material_meshes( } }; + // Fetch the slabs that this mesh resides in. + let (vertex_slab, index_slab) = mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); + match mesh_key .intersection(MeshPipelineKey::BLEND_RESERVED_BITS | MeshPipelineKey::MAY_DISCARD) { @@ -865,13 +868,12 @@ pub fn queue_material_meshes( distance, batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: index_slab.is_some(), }); } else if material.properties.render_method == OpaqueRendererMethod::Forward { - let (vertex_slab, index_slab) = - mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); let batch_set_key = Opaque3dBatchSetKey { - draw_function: draw_opaque_pbr, pipeline: pipeline_id, + draw_function: draw_opaque_pbr, material_bind_group_index: Some(material.binding.group.0), vertex_slab: vertex_slab.unwrap_or_default(), index_slab, @@ -903,10 +905,9 @@ pub fn queue_material_meshes( distance, batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: index_slab.is_some(), }); } else if material.properties.render_method == OpaqueRendererMethod::Forward { - let (vertex_slab, index_slab) = - mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); let batch_set_key = OpaqueNoLightmap3dBatchSetKey { draw_function: draw_alpha_mask_pbr, pipeline: pipeline_id, @@ -938,6 +939,7 @@ pub fn queue_material_meshes( distance, batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: index_slab.is_some(), }); } } diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index 9b34c04dc1bb6..373119f25ffde 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -966,13 +966,13 @@ pub fn queue_prepass_material_meshes( } }; + let (vertex_slab, index_slab) = mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); + match mesh_key .intersection(MeshPipelineKey::BLEND_RESERVED_BITS | MeshPipelineKey::MAY_DISCARD) { MeshPipelineKey::BLEND_OPAQUE | MeshPipelineKey::BLEND_ALPHA_TO_COVERAGE => { if deferred { - let (vertex_slab, index_slab) = - mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); opaque_deferred_phase.as_mut().unwrap().add( OpaqueNoLightmap3dBatchSetKey { draw_function: opaque_draw_deferred, diff --git a/crates/bevy_pbr/src/render/build_indirect_params.wgsl b/crates/bevy_pbr/src/render/build_indirect_params.wgsl new file mode 100644 index 0000000000000..90741e9064971 --- /dev/null +++ b/crates/bevy_pbr/src/render/build_indirect_params.wgsl @@ -0,0 +1,106 @@ +// Builds GPU indirect draw parameters from metadata. +// +// This only runs when indirect drawing is enabled. It takes the output of +// `mesh_preprocess.wgsl` and creates indirect parameters for the GPU. +// +// This shader runs separately for indexed and non-indexed meshes. Unlike +// `mesh_preprocess.wgsl`, which runs one instance per mesh *instance*, one +// instance of this shader corresponds to a single *batch* which could contain +// arbitrarily many instances of a single mesh. + +#import bevy_pbr::mesh_preprocess_types::{ + IndirectBatchSet, + IndirectParametersIndexed, + IndirectParametersNonIndexed, + IndirectParametersMetadata, + MeshInput +} + +// The data for each mesh that the CPU supplied to the GPU. +@group(0) @binding(0) var current_input: array; + +// Data that we use to generate the indirect parameters. +// +// The `mesh_preprocess.wgsl` shader emits these. +@group(0) @binding(1) var indirect_parameters_metadata: array; + +// Information about each batch set. +// +// A *batch set* is a set of meshes that might be multi-drawn together. +@group(0) @binding(2) var indirect_batch_sets: array; + +#ifdef INDEXED +// The buffer of indirect draw parameters that we generate, and that the GPU +// reads to issue the draws. +// +// This buffer is for indexed meshes. +@group(0) @binding(3) var indirect_parameters: + array; +#else // INDEXED +// The buffer of indirect draw parameters that we generate, and that the GPU +// reads to issue the draws. +// +// This buffer is for non-indexed meshes. +@group(0) @binding(3) var indirect_parameters: + array; +#endif // INDEXED + +@compute +@workgroup_size(64) +fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { + // Figure out our instance index (i.e. batch index). If this thread doesn't + // correspond to any index, bail. + let instance_index = global_invocation_id.x; + if (instance_index >= arrayLength(&indirect_parameters_metadata)) { + return; + } + + // Unpack the metadata for this batch. + let mesh_index = indirect_parameters_metadata[instance_index].mesh_index; + let base_output_index = indirect_parameters_metadata[instance_index].base_output_index; + let batch_set_index = indirect_parameters_metadata[instance_index].batch_set_index; + let instance_count = atomicLoad(&indirect_parameters_metadata[instance_index].instance_count); + + // If we aren't using `multi_draw_indirect_count`, we have a 1:1 fixed + // assignment of batches to slots in the indirect parameters buffer, so we + // can just use the instance index as the index of our indirect parameters. + var indirect_parameters_index = instance_index; + + // If the current hardware and driver support `multi_draw_indirect_count`, + // dynamically reserve an index for the indirect parameters we're to + // generate. +#ifdef MULTI_DRAW_INDIRECT_COUNT_SUPPORTED + if (instance_count == 0u) { + return; + } + + // If this batch belongs to a batch set, then allocate space for the + // indirect commands in that batch set. + if (batch_set_index != 0xffffffffu) { + let indirect_parameters_base = + indirect_batch_sets[batch_set_index].indirect_parameters_base; + let indirect_parameters_offset = + atomicAdd(&indirect_batch_sets[batch_set_index].indirect_parameters_count, 1u); + + indirect_parameters_index = indirect_parameters_base + indirect_parameters_offset; + } +#endif // MULTI_DRAW_INDIRECT_COUNT_SUPPORTED + + // Build up the indirect parameters. The structures for indexed and + // non-indexed meshes are slightly different. + + indirect_parameters[indirect_parameters_index].instance_count = instance_count; + indirect_parameters[indirect_parameters_index].first_instance = base_output_index; + indirect_parameters[indirect_parameters_index].base_vertex = + current_input[mesh_index].first_vertex_index; + +#ifdef INDEXED + indirect_parameters[indirect_parameters_index].index_count = + current_input[mesh_index].index_count; + indirect_parameters[indirect_parameters_index].first_index = + current_input[mesh_index].first_index_index; +#else // INDEXED + indirect_parameters[indirect_parameters_index].vertex_count = + current_input[mesh_index].index_count; +#endif // INDEXED +} \ No newline at end of file diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs index 3c7954a8c4321..8ac5a7c96e29c 100644 --- a/crates/bevy_pbr/src/render/gpu_preprocess.rs +++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs @@ -6,10 +6,12 @@ //! [`MeshInputUniform`]s instead and use the GPU to calculate the remaining //! derived fields in [`MeshUniform`]. -use core::num::NonZero; +use core::num::{NonZero, NonZeroU64}; use bevy_app::{App, Plugin}; use bevy_asset::{load_internal_asset, Handle}; +use bevy_core_pipeline::core_3d::graph::{Core3d, Node3d}; +use bevy_derive::{Deref, DerefMut}; use bevy_ecs::{ component::Component, entity::Entity, @@ -20,22 +22,24 @@ use bevy_ecs::{ }; use bevy_render::{ batching::gpu_preprocessing::{ - BatchedInstanceBuffers, GpuPreprocessingSupport, IndirectParameters, - IndirectParametersBuffer, PreprocessWorkItem, + BatchedInstanceBuffers, GpuPreprocessingSupport, IndirectBatchSet, + IndirectParametersBuffers, IndirectParametersIndexed, IndirectParametersMetadata, + IndirectParametersNonIndexed, PreprocessWorkItem, PreprocessWorkItemBuffers, }, - graph::CameraDriverLabel, - render_graph::{Node, NodeRunError, RenderGraph, RenderGraphContext}, + render_graph::{Node, NodeRunError, RenderGraphApp, RenderGraphContext}, render_resource::{ binding_types::{storage_buffer, storage_buffer_read_only, uniform_buffer}, - BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, BufferBinding, + BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, Buffer, BufferBinding, CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor, DynamicBindGroupLayoutEntries, PipelineCache, Shader, ShaderStages, ShaderType, SpecializedComputePipeline, SpecializedComputePipelines, }, renderer::{RenderContext, RenderDevice, RenderQueue}, + settings::WgpuFeatures, view::{NoIndirectDrawing, ViewUniform, ViewUniformOffset, ViewUniforms}, Render, RenderApp, RenderSet, }; +use bevy_utils::TypeIdMap; use bitflags::bitflags; use smallvec::{smallvec, SmallVec}; use tracing::warn; @@ -44,12 +48,17 @@ use crate::{ graph::NodePbr, MeshCullingData, MeshCullingDataBuffer, MeshInputUniform, MeshUniform, }; +use super::ViewLightEntities; + /// The handle to the `mesh_preprocess.wgsl` compute shader. pub const MESH_PREPROCESS_SHADER_HANDLE: Handle = Handle::weak_from_u128(16991728318640779533); /// The handle to the `mesh_preprocess_types.wgsl` compute shader. pub const MESH_PREPROCESS_TYPES_SHADER_HANDLE: Handle = Handle::weak_from_u128(2720440370122465935); +/// The handle to the `build_indirect_params.wgsl` compute shader. +pub const BUILD_INDIRECT_PARAMS_SHADER_HANDLE: Handle = + Handle::weak_from_u128(3711077208359699672); /// The GPU workgroup size. const WORKGROUP_SIZE: usize = 64; @@ -66,28 +75,58 @@ pub struct GpuMeshPreprocessPlugin { pub use_gpu_instance_buffer_builder: bool, } -/// The render node for the mesh uniform building pass. +/// The render node for the mesh preprocessing pass. +/// +/// This pass runs a compute shader to cull invisible meshes (if that wasn't +/// done by the CPU), transforms them, and, if indirect drawing is on, populates +/// indirect draw parameter metadata for the subsequent +/// [`BuildIndirectParametersNode`]. pub struct GpuPreprocessNode { view_query: QueryState< ( Entity, - Read, + Read, Read, Has, ), Without, >, + main_view_query: QueryState>, +} + +/// The render node for the indirect parameter building pass. +/// +/// This node runs a compute shader on the output of the [`GpuPreprocessNode`] +/// in order to transform the [`IndirectParametersMetadata`] into +/// properly-formatted [`IndirectParametersIndexed`] and +/// [`IndirectParametersNonIndexed`]. +pub struct BuildIndirectParametersNode { + view_query: QueryState< + Read, + (Without, Without), + >, } -/// The compute shader pipelines for the mesh uniform building pass. +/// The compute shader pipelines for the GPU mesh preprocessing and indirect +/// parameter building passes. #[derive(Resource)] pub struct PreprocessPipelines { /// The pipeline used for CPU culling. This pipeline doesn't populate - /// indirect parameters. - pub direct: PreprocessPipeline, + /// indirect parameter metadata. + pub direct_preprocess: PreprocessPipeline, /// The pipeline used for GPU culling. This pipeline populates indirect + /// parameter metadata. + pub gpu_culling_preprocess: PreprocessPipeline, + /// The pipeline used for indexed indirect parameter building. + /// + /// This pipeline converts indirect parameter metadata into indexed indirect /// parameters. - pub gpu_culling: PreprocessPipeline, + pub build_indexed_indirect_params: BuildIndirectParametersPipeline, + /// The pipeline used for non-indexed indirect parameter building. + /// + /// This pipeline converts indirect parameter metadata into non-indexed + /// indirect parameters. + pub build_non_indexed_indirect_params: BuildIndirectParametersPipeline, } /// The pipeline for the GPU mesh preprocessing shader. @@ -100,6 +139,16 @@ pub struct PreprocessPipeline { pub pipeline_id: Option, } +/// The pipeline for the indirect parameter building shader. +pub struct BuildIndirectParametersPipeline { + /// The bind group layout for the compute shader. + pub bind_group_layout: BindGroupLayout, + /// The pipeline ID for the compute shader. + /// + /// This gets filled in `prepare_preprocess_pipelines`. + pub pipeline_id: Option, +} + bitflags! { /// Specifies variants of the mesh preprocessing shader. #[derive(Clone, Copy, PartialEq, Eq, Hash)] @@ -109,13 +158,73 @@ bitflags! { /// This `#define`'s `GPU_CULLING` in the shader. const GPU_CULLING = 1; } + + /// Specifies variants of the indirect parameter building shader. + #[derive(Clone, Copy, PartialEq, Eq, Hash)] + pub struct BuildIndirectParametersPipelineKey: u8 { + /// Whether the indirect parameter building shader is processing indexed + /// meshes (those that have index buffers). + /// + /// This defines `INDEXED` in the shader. + const INDEXED = 1; + /// Whether the GPU and driver supports `multi_draw_indirect_count`. + /// + /// This defines `MULTI_DRAW_INDIRECT_COUNT_SUPPORTED` in the shader. + const MULTI_DRAW_INDIRECT_COUNT_SUPPORTED = 2; + } +} + +/// The compute shader bind group for the mesh preprocessing pass for each +/// render phase. +/// +/// This goes on the view. It maps the [`core::any::TypeId`] of a render phase +/// (e.g. [`bevy_core_pipeline::core_3d::Opaque3d`]) to the +/// [`PhasePreprocessBindGroups`] for that phase. +#[derive(Component, Clone, Deref, DerefMut)] +pub struct PreprocessBindGroups(pub TypeIdMap); + +/// The compute shader bind group for the mesh preprocessing step for a single +/// render phase on a single view. +#[derive(Clone)] +pub enum PhasePreprocessBindGroups { + /// The bind group used for the single invocation of the compute shader when + /// indirect drawing is *not* being used. + /// + /// Because direct drawing doesn't require splitting the meshes into indexed + /// and non-indexed meshes, there's only one bind group in this case. + Direct(BindGroup), + + /// The bind groups used for the compute shader when indirect drawing is + /// being used. + /// + /// Because indirect drawing requires splitting the meshes into indexed and + /// non-indexed meshes, there are two bind groups here. + Indirect { + /// The bind group used for indexed meshes. + /// + /// This will be `None` if there are no indexed meshes. + indexed: Option, + /// The bind group used for non-indexed meshes. + /// + /// This will be `None` if there are no non-indexed meshes. + non_indexed: Option, + }, } -/// The compute shader bind group for the mesh uniform building pass. +/// The bind groups for the indirect parameters building compute shader. /// -/// This goes on the view. -#[derive(Component, Clone)] -pub struct PreprocessBindGroup(BindGroup); +/// This is shared among all views and phases. +#[derive(Resource)] +pub struct BuildIndirectParametersBindGroups { + /// The bind group used for indexed meshes. + /// + /// This will be `None` if there are no indexed meshes. + indexed: Option, + /// The bind group used for non-indexed meshes. + /// + /// This will be `None` if there are no non-indexed meshes. + non_indexed: Option, +} /// Stops the `GpuPreprocessNode` attempting to generate the buffer for this view /// useful to avoid duplicating effort if the bind group is shared between views @@ -136,6 +245,12 @@ impl Plugin for GpuMeshPreprocessPlugin { "mesh_preprocess_types.wgsl", Shader::from_wgsl ); + load_internal_asset!( + app, + BUILD_INDIRECT_PARAMS_SHADER_HANDLE, + "build_indirect_params.wgsl", + Shader::from_wgsl + ); } fn finish(&self, app: &mut App) { @@ -150,15 +265,10 @@ impl Plugin for GpuMeshPreprocessPlugin { return; } - // Stitch the node in. - let gpu_preprocess_node = GpuPreprocessNode::from_world(render_app.world_mut()); - let mut render_graph = render_app.world_mut().resource_mut::(); - render_graph.add_node(NodePbr::GpuPreprocess, gpu_preprocess_node); - render_graph.add_node_edge(NodePbr::GpuPreprocess, CameraDriverLabel); - render_app .init_resource::() .init_resource::>() + .init_resource::>() .add_systems( Render, ( @@ -170,6 +280,19 @@ impl Plugin for GpuMeshPreprocessPlugin { .in_set(RenderSet::PrepareBindGroups), write_mesh_culling_data_buffer.in_set(RenderSet::PrepareResourcesFlush), ) + ) + .add_render_graph_node::(Core3d, NodePbr::GpuPreprocess) + .add_render_graph_node::( + Core3d, + NodePbr::BuildIndirectParameters + ) + .add_render_graph_edges( + Core3d, + (NodePbr::GpuPreprocess, NodePbr::BuildIndirectParameters, Node3d::Prepass) + ) + .add_render_graph_edges( + Core3d, + (NodePbr::GpuPreprocess, NodePbr::BuildIndirectParameters, NodePbr::ShadowPass) ); } } @@ -178,6 +301,7 @@ impl FromWorld for GpuPreprocessNode { fn from_world(world: &mut World) -> Self { Self { view_query: QueryState::new(world), + main_view_query: QueryState::new(world), } } } @@ -185,11 +309,12 @@ impl FromWorld for GpuPreprocessNode { impl Node for GpuPreprocessNode { fn update(&mut self, world: &mut World) { self.view_query.update_archetypes(world); + self.main_view_query.update_archetypes(world); } fn run<'w>( &self, - _: &mut RenderGraphContext, + graph: &mut RenderGraphContext, render_context: &mut RenderContext<'w>, world: &'w World, ) -> Result<(), NodeRunError> { @@ -210,12 +335,25 @@ impl Node for GpuPreprocessNode { timestamp_writes: None, }); - // Run the compute passes. - for (view, bind_group, view_uniform_offset, no_indirect_drawing) in - self.view_query.iter_manual(world) + let mut all_views: SmallVec<[_; 8]> = SmallVec::new(); + all_views.push(graph.view_entity()); + if let Ok(shadow_cascade_views) = + self.main_view_query.get_manual(world, graph.view_entity()) { - // Grab the index buffer for this view. - let Some(index_buffer) = index_buffers.get(&view) else { + all_views.extend(shadow_cascade_views.lights.iter().copied()); + } + + // Run the compute passes. + + for view_entity in all_views { + let Ok((view, bind_groups, view_uniform_offset, no_indirect_drawing)) = + self.view_query.get_manual(world, view_entity) + else { + continue; + }; + + // Grab the work item buffers for this view. + let Some(view_work_item_buffers) = index_buffers.get(&view) else { warn!("The preprocessing index buffer wasn't present"); continue; }; @@ -223,34 +361,204 @@ impl Node for GpuPreprocessNode { // Select the right pipeline, depending on whether GPU culling is in // use. let maybe_pipeline_id = if !no_indirect_drawing { - preprocess_pipelines.gpu_culling.pipeline_id + preprocess_pipelines.gpu_culling_preprocess.pipeline_id } else { - preprocess_pipelines.direct.pipeline_id + preprocess_pipelines.direct_preprocess.pipeline_id }; // Fetch the pipeline. let Some(preprocess_pipeline_id) = maybe_pipeline_id else { warn!("The build mesh uniforms pipeline wasn't ready"); - return Ok(()); + continue; }; let Some(preprocess_pipeline) = pipeline_cache.get_compute_pipeline(preprocess_pipeline_id) else { // This will happen while the pipeline is being compiled and is fine. - return Ok(()); + continue; }; compute_pass.set_pipeline(preprocess_pipeline); - let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![]; - if !no_indirect_drawing { - dynamic_offsets.push(view_uniform_offset.offset); + // Loop over each render phase. + for (phase_type_id, phase_work_item_buffers) in view_work_item_buffers { + // Fetch the bind group for the render phase. + let Some(phase_bind_groups) = bind_groups.get(phase_type_id) else { + continue; + }; + + // If we're drawing indirectly, make sure the mesh preprocessing + // shader has access to the view info it needs to do culling. + let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![]; + if !no_indirect_drawing { + dynamic_offsets.push(view_uniform_offset.offset); + } + + // Are we drawing directly or indirectly? + match *phase_bind_groups { + PhasePreprocessBindGroups::Direct(ref bind_group) => { + // Invoke the mesh preprocessing shader to transform + // meshes only, but not cull. + let PreprocessWorkItemBuffers::Direct(phase_work_item_buffer) = + phase_work_item_buffers + else { + continue; + }; + compute_pass.set_bind_group(0, bind_group, &dynamic_offsets); + let workgroup_count = phase_work_item_buffer.len().div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + } + + PhasePreprocessBindGroups::Indirect { + indexed: ref maybe_indexed_bind_group, + non_indexed: ref maybe_non_indexed_bind_group, + } => { + // Invoke the mesh preprocessing shader to transform and + // cull the meshes. + let PreprocessWorkItemBuffers::Indirect { + indexed: indexed_buffer, + non_indexed: non_indexed_buffer, + .. + } = phase_work_item_buffers + else { + continue; + }; + + // Transform and cull indexed meshes if there are any. + if let Some(indexed_bind_group) = maybe_indexed_bind_group { + compute_pass.set_bind_group(0, indexed_bind_group, &dynamic_offsets); + let workgroup_count = indexed_buffer.len().div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + } + + // Transform and cull non-indexed meshes if there are any. + if let Some(non_indexed_bind_group) = maybe_non_indexed_bind_group { + compute_pass.set_bind_group( + 0, + non_indexed_bind_group, + &dynamic_offsets, + ); + let workgroup_count = non_indexed_buffer.len().div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + } + } + } } - compute_pass.set_bind_group(0, &bind_group.0, &dynamic_offsets); + } + + Ok(()) + } +} - let workgroup_count = index_buffer.buffer.len().div_ceil(WORKGROUP_SIZE); - compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); +impl FromWorld for BuildIndirectParametersNode { + fn from_world(world: &mut World) -> Self { + Self { + view_query: QueryState::new(world), + } + } +} + +impl Node for BuildIndirectParametersNode { + fn update(&mut self, world: &mut World) { + self.view_query.update_archetypes(world); + } + + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + // Fetch the bind group. + let Some(build_indirect_params_bind_groups) = + world.get_resource::() + else { + return Ok(()); + }; + + // Fetch the pipelines and the buffers we need. + let pipeline_cache = world.resource::(); + let preprocess_pipelines = world.resource::(); + let indirect_parameters_buffers = world.resource::(); + + // Create the compute pass. + let mut compute_pass = + render_context + .command_encoder() + .begin_compute_pass(&ComputePassDescriptor { + label: Some("build indirect parameters"), + timestamp_writes: None, + }); + + // Fetch the pipelines. + + let (maybe_indexed_pipeline_id, maybe_non_indexed_pipeline_id) = ( + preprocess_pipelines + .build_indexed_indirect_params + .pipeline_id, + preprocess_pipelines + .build_non_indexed_indirect_params + .pipeline_id, + ); + + let ( + Some(build_indexed_indirect_params_pipeline_id), + Some(build_non_indexed_indirect_params_pipeline_id), + ) = (maybe_indexed_pipeline_id, maybe_non_indexed_pipeline_id) + else { + warn!("The build indirect parameters pipelines weren't ready"); + return Ok(()); + }; + + let ( + Some(build_indexed_indirect_params_pipeline), + Some(build_non_indexed_indirect_params_pipeline), + ) = ( + pipeline_cache.get_compute_pipeline(build_indexed_indirect_params_pipeline_id), + pipeline_cache.get_compute_pipeline(build_non_indexed_indirect_params_pipeline_id), + ) + else { + // This will happen while the pipeline is being compiled and is fine. + return Ok(()); + }; + + // Transform the [`IndirectParametersMetadata`] that the GPU mesh + // preprocessing phase wrote to [`IndirectParametersIndexed`] for + // indexed meshes, if we have any. + if let Some(ref build_indirect_indexed_params_bind_group) = + build_indirect_params_bind_groups.indexed + { + compute_pass.set_pipeline(build_indexed_indirect_params_pipeline); + compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]); + let workgroup_count = indirect_parameters_buffers + .indexed_batch_count() + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + } + + // Transform the [`IndirectParametersMetadata`] that the GPU mesh + // preprocessing phase wrote to [`IndirectParametersNonIndexed`] for + // non-indexed meshes, if we have any. + if let Some(ref build_indirect_non_indexed_params_bind_group) = + build_indirect_params_bind_groups.non_indexed + { + compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline); + compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]); + let workgroup_count = indirect_parameters_buffers + .non_indexed_batch_count() + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } } Ok(()) @@ -258,8 +566,15 @@ impl Node for GpuPreprocessNode { } impl PreprocessPipelines { + /// Returns true if the preprocessing and indirect parameters pipelines have + /// been loaded or false otherwise. pub(crate) fn pipelines_are_loaded(&self, pipeline_cache: &PipelineCache) -> bool { - self.direct.is_loaded(pipeline_cache) && self.gpu_culling.is_loaded(pipeline_cache) + self.direct_preprocess.is_loaded(pipeline_cache) + && self.gpu_culling_preprocess.is_loaded(pipeline_cache) + && self.build_indexed_indirect_params.is_loaded(pipeline_cache) + && self + .build_non_indexed_indirect_params + .is_loaded(pipeline_cache) } } @@ -270,6 +585,15 @@ impl PreprocessPipeline { } } +impl BuildIndirectParametersPipeline { + /// Returns true if this pipeline has been loaded into the pipeline cache or + /// false otherwise. + fn is_loaded(&self, pipeline_cache: &PipelineCache) -> bool { + self.pipeline_id + .is_some_and(|pipeline_id| pipeline_cache.get_compute_pipeline(pipeline_id).is_some()) + } +} + impl SpecializedComputePipeline for PreprocessPipeline { type Key = PreprocessPipelineKey; @@ -311,14 +635,24 @@ impl FromWorld for PreprocessPipelines { let direct_bind_group_layout_entries = preprocess_direct_bind_group_layout_entries(); let gpu_culling_bind_group_layout_entries = preprocess_direct_bind_group_layout_entries() .extend_sequential(( - // `indirect_parameters` - storage_buffer::(/* has_dynamic_offset= */ false), + // `indirect_parameters_metadata` + storage_buffer::(/* has_dynamic_offset= */ false), // `mesh_culling_data` storage_buffer_read_only::(/* has_dynamic_offset= */ false), // `view` uniform_buffer::(/* has_dynamic_offset= */ true), )); + // Indexed and non-indexed bind group parameters share all the bind + // group layout entries except the final one. + let build_indexed_indirect_params_bind_group_layout_entries = + build_indirect_params_bind_group_layout_entries() + .extend_sequential((storage_buffer::(false),)); + let build_non_indexed_indirect_params_bind_group_layout_entries = + build_indirect_params_bind_group_layout_entries() + .extend_sequential((storage_buffer::(false),)); + + // Create the bind group layouts. let direct_bind_group_layout = render_device.create_bind_group_layout( "build mesh uniforms direct bind group layout", &direct_bind_group_layout_entries, @@ -327,16 +661,34 @@ impl FromWorld for PreprocessPipelines { "build mesh uniforms GPU culling bind group layout", &gpu_culling_bind_group_layout_entries, ); + let build_indexed_indirect_params_bind_group_layout = render_device + .create_bind_group_layout( + "build indexed indirect parameters bind group layout", + &build_indexed_indirect_params_bind_group_layout_entries, + ); + let build_non_indexed_indirect_params_bind_group_layout = render_device + .create_bind_group_layout( + "build non-indexed indirect parameters bind group layout", + &build_non_indexed_indirect_params_bind_group_layout_entries, + ); PreprocessPipelines { - direct: PreprocessPipeline { + direct_preprocess: PreprocessPipeline { bind_group_layout: direct_bind_group_layout, pipeline_id: None, }, - gpu_culling: PreprocessPipeline { + gpu_culling_preprocess: PreprocessPipeline { bind_group_layout: gpu_culling_bind_group_layout, pipeline_id: None, }, + build_indexed_indirect_params: BuildIndirectParametersPipeline { + bind_group_layout: build_indexed_indirect_params_bind_group_layout, + pipeline_id: None, + }, + build_non_indexed_indirect_params: BuildIndirectParametersPipeline { + bind_group_layout: build_non_indexed_indirect_params_bind_group_layout, + pipeline_id: None, + }, } } } @@ -357,22 +709,66 @@ fn preprocess_direct_bind_group_layout_entries() -> DynamicBindGroupLayoutEntrie ) } -/// A system that specializes the `mesh_preprocess.wgsl` pipelines if necessary. +// Returns the first 3 bind group layout entries shared between all invocations +// of the indirect parameters building shader. +fn build_indirect_params_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries { + DynamicBindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_read_only::(false), + storage_buffer_read_only::(false), + storage_buffer::(false), + ), + ) +} + +/// A system that specializes the `mesh_preprocess.wgsl` and +/// `build_indirect_params.wgsl` pipelines if necessary. pub fn prepare_preprocess_pipelines( pipeline_cache: Res, - mut pipelines: ResMut>, + render_device: Res, + mut specialized_preprocess_pipelines: ResMut>, + mut specialized_build_indirect_parameters_pipelines: ResMut< + SpecializedComputePipelines, + >, mut preprocess_pipelines: ResMut, ) { - preprocess_pipelines.direct.prepare( + preprocess_pipelines.direct_preprocess.prepare( &pipeline_cache, - &mut pipelines, + &mut specialized_preprocess_pipelines, PreprocessPipelineKey::empty(), ); - preprocess_pipelines.gpu_culling.prepare( + preprocess_pipelines.gpu_culling_preprocess.prepare( &pipeline_cache, - &mut pipelines, + &mut specialized_preprocess_pipelines, PreprocessPipelineKey::GPU_CULLING, ); + + let mut build_indirect_parameters_pipeline_key = BuildIndirectParametersPipelineKey::empty(); + + // If the GPU and driver support `multi_draw_indirect_count`, tell the + // shader that. + if render_device + .wgpu_device() + .features() + .contains(WgpuFeatures::MULTI_DRAW_INDIRECT_COUNT) + { + build_indirect_parameters_pipeline_key + .insert(BuildIndirectParametersPipelineKey::MULTI_DRAW_INDIRECT_COUNT_SUPPORTED); + } + + preprocess_pipelines.build_indexed_indirect_params.prepare( + &pipeline_cache, + &mut specialized_build_indirect_parameters_pipelines, + build_indirect_parameters_pipeline_key | BuildIndirectParametersPipelineKey::INDEXED, + ); + preprocess_pipelines + .build_non_indexed_indirect_params + .prepare( + &pipeline_cache, + &mut specialized_build_indirect_parameters_pipelines, + build_indirect_parameters_pipeline_key, + ); } impl PreprocessPipeline { @@ -391,96 +787,344 @@ impl PreprocessPipeline { } } +impl SpecializedComputePipeline for BuildIndirectParametersPipeline { + type Key = BuildIndirectParametersPipelineKey; + + fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor { + let mut shader_defs = vec![]; + if key.contains(BuildIndirectParametersPipelineKey::INDEXED) { + shader_defs.push("INDEXED".into()); + } + if key.contains(BuildIndirectParametersPipelineKey::MULTI_DRAW_INDIRECT_COUNT_SUPPORTED) { + shader_defs.push("MULTI_DRAW_INDIRECT_COUNT_SUPPORTED".into()); + } + + ComputePipelineDescriptor { + label: if key.contains(BuildIndirectParametersPipelineKey::INDEXED) { + Some("build indexed indirect parameters".into()) + } else { + Some("build non-indexed indirect parameters".into()) + }, + layout: vec![self.bind_group_layout.clone()], + push_constant_ranges: vec![], + shader: BUILD_INDIRECT_PARAMS_SHADER_HANDLE, + shader_defs, + entry_point: "main".into(), + zero_initialize_workgroup_memory: false, + } + } +} + +impl BuildIndirectParametersPipeline { + fn prepare( + &mut self, + pipeline_cache: &PipelineCache, + pipelines: &mut SpecializedComputePipelines, + key: BuildIndirectParametersPipelineKey, + ) { + if self.pipeline_id.is_some() { + return; + } + + let build_indirect_parameters_pipeline_id = pipelines.specialize(pipeline_cache, self, key); + self.pipeline_id = Some(build_indirect_parameters_pipeline_id); + } +} + /// A system that attaches the mesh uniform buffers to the bind groups for the /// variants of the mesh preprocessing compute shader. pub fn prepare_preprocess_bind_groups( mut commands: Commands, render_device: Res, batched_instance_buffers: Res>, - indirect_parameters_buffer: Res, + indirect_parameters_buffers: Res, mesh_culling_data_buffer: Res, view_uniforms: Res, pipelines: Res, ) { // Grab the `BatchedInstanceBuffers`. + let batched_instance_buffers = batched_instance_buffers.into_inner(); + + let Some(current_input_buffer) = batched_instance_buffers + .current_input_buffer + .buffer() + .buffer() + else { + return; + }; + + // Keep track of whether any of the phases will be drawn indirectly. If + // they are, then we'll need bind groups for the indirect parameters + // building shader too. + let mut any_indirect = false; + + for (view, phase_work_item_buffers) in &batched_instance_buffers.work_item_buffers { + let mut bind_groups = TypeIdMap::default(); + + for (&phase_id, work_item_buffers) in phase_work_item_buffers { + if let Some(bind_group) = prepare_preprocess_bind_group_for_phase( + &render_device, + &pipelines, + &view_uniforms, + &indirect_parameters_buffers, + &mesh_culling_data_buffer, + batched_instance_buffers, + work_item_buffers, + &mut any_indirect, + ) { + bind_groups.insert(phase_id, bind_group); + } + } + + commands + .entity(*view) + .insert(PreprocessBindGroups(bind_groups)); + } + + // If any of the phases will be drawn indirectly, create the bind groups for + // the indirect parameters building shader. + if any_indirect { + create_build_indirect_parameters_bind_groups( + &mut commands, + &render_device, + &pipelines, + current_input_buffer, + &indirect_parameters_buffers, + ); + } +} + +// Creates the bind group for the GPU preprocessing shader for a single phase +// for a single view. +#[expect( + clippy::too_many_arguments, + reason = "it's a system that needs a bunch of parameters" +)] +fn prepare_preprocess_bind_group_for_phase( + render_device: &RenderDevice, + pipelines: &PreprocessPipelines, + view_uniforms: &ViewUniforms, + indirect_parameters_buffers: &IndirectParametersBuffers, + mesh_culling_data_buffer: &MeshCullingDataBuffer, + batched_instance_buffers: &BatchedInstanceBuffers, + work_item_buffers: &PreprocessWorkItemBuffers, + any_indirect: &mut bool, +) -> Option { + // Get the current input buffers. + let BatchedInstanceBuffers { data_buffer: ref data_buffer_vec, - work_item_buffers: ref index_buffers, current_input_buffer: ref current_input_buffer_vec, previous_input_buffer: ref previous_input_buffer_vec, - } = batched_instance_buffers.into_inner(); + .. + } = batched_instance_buffers; - let (Some(current_input_buffer), Some(previous_input_buffer), Some(data_buffer)) = ( - current_input_buffer_vec.buffer().buffer(), - previous_input_buffer_vec.buffer().buffer(), - data_buffer_vec.buffer(), - ) else { - return; - }; + let current_input_buffer = current_input_buffer_vec.buffer().buffer()?; + let previous_input_buffer = previous_input_buffer_vec.buffer().buffer()?; + let data_buffer = data_buffer_vec.buffer()?; - for (view, index_buffer_vec) in index_buffers { - let Some(index_buffer) = index_buffer_vec.buffer.buffer() else { - continue; - }; + // Build the appropriate bind group, depending on whether we're drawing + // directly or indirectly. - // Don't use `as_entire_binding()` here; the shader reads the array - // length and the underlying buffer may be longer than the actual size - // of the vector. - let index_buffer_size = NonZero::::try_from( - index_buffer_vec.buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()), - ) - .ok(); - - let bind_group = if !index_buffer_vec.no_indirect_drawing { - let ( - Some(indirect_parameters_buffer), - Some(mesh_culling_data_buffer), - Some(view_uniforms_binding), - ) = ( - indirect_parameters_buffer.buffer(), - mesh_culling_data_buffer.buffer(), - view_uniforms.uniforms.binding(), + match *work_item_buffers { + PreprocessWorkItemBuffers::Direct(ref work_item_buffer_vec) => { + let work_item_buffer = work_item_buffer_vec.buffer()?; + + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let work_item_buffer_size = NonZero::::try_from( + work_item_buffer_vec.len() as u64 * u64::from(PreprocessWorkItem::min_size()), ) - else { - continue; - }; + .ok(); - PreprocessBindGroup(render_device.create_bind_group( - "preprocess_gpu_culling_bind_group", - &pipelines.gpu_culling.bind_group_layout, - &BindGroupEntries::sequential(( - current_input_buffer.as_entire_binding(), - previous_input_buffer.as_entire_binding(), - BindingResource::Buffer(BufferBinding { - buffer: index_buffer, - offset: 0, - size: index_buffer_size, - }), - data_buffer.as_entire_binding(), - indirect_parameters_buffer.as_entire_binding(), - mesh_culling_data_buffer.as_entire_binding(), - view_uniforms_binding, - )), + Some(PhasePreprocessBindGroups::Direct( + render_device.create_bind_group( + "preprocess_direct_bind_group", + &pipelines.direct_preprocess.bind_group_layout, + &BindGroupEntries::sequential(( + current_input_buffer.as_entire_binding(), + previous_input_buffer.as_entire_binding(), + BindingResource::Buffer(BufferBinding { + buffer: work_item_buffer, + offset: 0, + size: work_item_buffer_size, + }), + data_buffer.as_entire_binding(), + )), + ), )) - } else { - PreprocessBindGroup(render_device.create_bind_group( - "preprocess_direct_bind_group", - &pipelines.direct.bind_group_layout, + } + + PreprocessWorkItemBuffers::Indirect { + indexed: ref indexed_buffer, + non_indexed: ref non_indexed_buffer, + } => { + // For indirect drawing, we need two separate bind groups, one for indexed meshes and one for non-indexed meshes. + + let mesh_culling_data_buffer = mesh_culling_data_buffer.buffer()?; + let view_uniforms_binding = view_uniforms.uniforms.binding()?; + + let indexed_bind_group = match ( + indexed_buffer.buffer(), + indirect_parameters_buffers.indexed_metadata_buffer(), + ) { + ( + Some(indexed_work_item_buffer), + Some(indexed_indirect_parameters_metadata_buffer), + ) => { + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let indexed_work_item_buffer_size = NonZero::::try_from( + indexed_buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()), + ) + .ok(); + Some(render_device.create_bind_group( + "preprocess_indexed_indirect_gpu_culling_bind_group", + &pipelines.gpu_culling_preprocess.bind_group_layout, + &BindGroupEntries::sequential(( + current_input_buffer.as_entire_binding(), + previous_input_buffer.as_entire_binding(), + BindingResource::Buffer(BufferBinding { + buffer: indexed_work_item_buffer, + offset: 0, + size: indexed_work_item_buffer_size, + }), + data_buffer.as_entire_binding(), + indexed_indirect_parameters_metadata_buffer.as_entire_binding(), + mesh_culling_data_buffer.as_entire_binding(), + view_uniforms_binding.clone(), + )), + )) + } + _ => None, + }; + + let non_indexed_bind_group = match ( + non_indexed_buffer.buffer(), + indirect_parameters_buffers.non_indexed_metadata_buffer(), + ) { + ( + Some(non_indexed_work_item_buffer), + Some(non_indexed_indirect_parameters_metadata_buffer), + ) => { + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let non_indexed_work_item_buffer_size = NonZero::::try_from( + non_indexed_buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()), + ) + .ok(); + Some(render_device.create_bind_group( + "preprocess_non_indexed_indirect_gpu_culling_bind_group", + &pipelines.gpu_culling_preprocess.bind_group_layout, + &BindGroupEntries::sequential(( + current_input_buffer.as_entire_binding(), + previous_input_buffer.as_entire_binding(), + BindingResource::Buffer(BufferBinding { + buffer: non_indexed_work_item_buffer, + offset: 0, + size: non_indexed_work_item_buffer_size, + }), + data_buffer.as_entire_binding(), + non_indexed_indirect_parameters_metadata_buffer.as_entire_binding(), + mesh_culling_data_buffer.as_entire_binding(), + view_uniforms_binding, + )), + )) + } + _ => None, + }; + + // Note that we found phases that will be drawn indirectly so that + // we remember to build the bind groups for the indirect parameter + // building shader. + *any_indirect = true; + + Some(PhasePreprocessBindGroups::Indirect { + indexed: indexed_bind_group, + non_indexed: non_indexed_bind_group, + }) + } + } +} + +/// A system that creates bind groups from the indirect parameters metadata and +/// data buffers for the indirect parameter building shader. +fn create_build_indirect_parameters_bind_groups( + commands: &mut Commands, + render_device: &RenderDevice, + pipelines: &PreprocessPipelines, + current_input_buffer: &Buffer, + indirect_parameters_buffer: &IndirectParametersBuffers, +) { + commands.insert_resource(BuildIndirectParametersBindGroups { + indexed: match ( + indirect_parameters_buffer.indexed_metadata_buffer(), + indirect_parameters_buffer.indexed_data_buffer(), + indirect_parameters_buffer.indexed_batch_sets_buffer(), + ) { + ( + Some(indexed_indirect_parameters_metadata_buffer), + Some(indexed_indirect_parameters_data_buffer), + Some(indexed_batch_sets_buffer), + ) => Some(render_device.create_bind_group( + "build_indexed_indirect_parameters_bind_group", + &pipelines.build_indexed_indirect_params.bind_group_layout, &BindGroupEntries::sequential(( current_input_buffer.as_entire_binding(), - previous_input_buffer.as_entire_binding(), - BindingResource::Buffer(BufferBinding { - buffer: index_buffer, + // Don't use `as_entire_binding` here; the shader reads + // the length and `RawBufferVec` overallocates. + BufferBinding { + buffer: indexed_indirect_parameters_metadata_buffer, offset: 0, - size: index_buffer_size, - }), - data_buffer.as_entire_binding(), + size: NonZeroU64::new( + indirect_parameters_buffer.indexed_batch_count() as u64 + * size_of::() as u64, + ), + }, + indexed_batch_sets_buffer.as_entire_binding(), + indexed_indirect_parameters_data_buffer.as_entire_binding(), )), - )) - }; - - commands.entity(*view).insert(bind_group); - } + )), + _ => None, + }, + non_indexed: match ( + indirect_parameters_buffer.non_indexed_metadata_buffer(), + indirect_parameters_buffer.non_indexed_data_buffer(), + indirect_parameters_buffer.non_indexed_batch_sets_buffer(), + ) { + ( + Some(non_indexed_indirect_parameters_metadata_buffer), + Some(non_indexed_indirect_parameters_data_buffer), + Some(non_indexed_batch_sets_buffer), + ) => Some( + render_device.create_bind_group( + "build_non_indexed_indirect_parameters_bind_group", + &pipelines + .build_non_indexed_indirect_params + .bind_group_layout, + &BindGroupEntries::sequential(( + current_input_buffer.as_entire_binding(), + // Don't use `as_entire_binding` here; the shader reads + // the length and `RawBufferVec` overallocates. + BufferBinding { + buffer: non_indexed_indirect_parameters_metadata_buffer, + offset: 0, + size: NonZeroU64::new( + indirect_parameters_buffer.non_indexed_batch_count() as u64 + * size_of::() as u64, + ), + }, + non_indexed_batch_sets_buffer.as_entire_binding(), + non_indexed_indirect_parameters_data_buffer.as_entire_binding(), + )), + ), + ), + _ => None, + }, + }); } /// Writes the information needed to do GPU mesh culling to the GPU. diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index 90614772e3f1c..4a1187400e5fe 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -614,8 +614,18 @@ pub struct ViewShadowBindings { pub directional_light_depth_texture_view: TextureView, } +/// A component that holds the shadow cascade views for all shadow cascades +/// associated with a camera. +/// +/// Note: Despite the name, this component actually holds the shadow cascade +/// views, not the lights themselves. #[derive(Component)] pub struct ViewLightEntities { + /// The shadow cascade views for all shadow cascades associated with a + /// camera. + /// + /// Note: Despite the name, this component actually holds the shadow cascade + /// views, not the lights themselves. pub lights: Vec, } @@ -701,6 +711,7 @@ pub fn prepare_lights( views: Query< ( Entity, + MainEntity, &ExtractedView, &ExtractedClusterConfig, Option<&RenderLayers>, @@ -1118,6 +1129,7 @@ pub fn prepare_lights( // set up light data for each view for ( entity, + camera_main_entity, extracted_view, clusters, maybe_layers, @@ -1238,8 +1250,11 @@ pub fn prepare_lights( }) .clone(); - let retained_view_entity = - RetainedViewEntity::new(*light_main_entity, face_index as u32); + let retained_view_entity = RetainedViewEntity::new( + *light_main_entity, + Some(camera_main_entity.into()), + face_index as u32, + ); commands.entity(view_light_entity).insert(( ShadowView { @@ -1343,7 +1358,8 @@ pub fn prepare_lights( let view_light_entity = light_view_entities[0]; - let retained_view_entity = RetainedViewEntity::new(*light_main_entity, 0); + let retained_view_entity = + RetainedViewEntity::new(*light_main_entity, Some(camera_main_entity.into()), 0); commands.entity(view_light_entity).insert(( ShadowView { @@ -1476,8 +1492,11 @@ pub fn prepare_lights( frustum.half_spaces[4] = HalfSpace::new(frustum.half_spaces[4].normal().extend(f32::INFINITY)); - let retained_view_entity = - RetainedViewEntity::new(*light_main_entity, cascade_index as u32); + let retained_view_entity = RetainedViewEntity::new( + *light_main_entity, + Some(camera_main_entity.into()), + cascade_index as u32, + ); commands.entity(view_light_entity).insert(( ShadowView { @@ -1764,6 +1783,12 @@ pub struct ShadowBatchSetKey { pub index_slab: Option, } +impl PhaseItemBatchSetKey for ShadowBatchSetKey { + fn indexed(&self) -> bool { + self.index_slab.is_some() + } +} + /// Data used to bin each object in the shadow map phase. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ShadowBinKey { diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 6df24bbaa8858..dd92ef7828eec 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -20,7 +20,8 @@ use bevy_math::{Affine3, Rect, UVec2, Vec3, Vec4}; use bevy_render::{ batching::{ gpu_preprocessing::{ - self, GpuPreprocessingSupport, IndirectParameters, IndirectParametersBuffer, + self, GpuPreprocessingSupport, IndirectBatchSet, IndirectParametersBuffers, + IndirectParametersIndexed, IndirectParametersMetadata, IndirectParametersNonIndexed, InstanceInputUniformBuffer, }, no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching, @@ -352,6 +353,17 @@ pub struct MeshInputUniform { /// [`MeshAllocator`]). This value stores the offset of the first vertex in /// this mesh in that buffer. pub first_vertex_index: u32, + /// The index of this mesh's first index in the index buffer, if any. + /// + /// Multiple meshes can be packed into a single index buffer (see + /// [`MeshAllocator`]). This value stores the offset of the first index in + /// this mesh in that buffer. + /// + /// If this mesh isn't indexed, this value is ignored. + pub first_index_index: u32, + /// For an indexed mesh, the number of indices that make it up; for a + /// non-indexed mesh, the number of vertices in it. + pub index_count: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, /// The previous skin index, or `u32::MAX` if there's no previous skin. @@ -361,6 +373,10 @@ pub struct MeshInputUniform { /// Low 16 bits: index of the material inside the bind group data. /// High 16 bits: index of the lightmap in the binding array. pub material_and_lightmap_bind_group_slot: u32, + /// Padding. + pub pad_a: u32, + /// Padding. + pub pad_b: u32, } /// Information about each mesh instance needed to cull it on GPU. @@ -907,11 +923,23 @@ impl RenderMeshInstanceGpuBuilder { render_lightmaps: &RenderLightmaps, skin_indices: &SkinIndices, ) -> u32 { - let first_vertex_index = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) - { - Some(mesh_vertex_slice) => mesh_vertex_slice.range.start, - None => 0, - }; + let (first_vertex_index, vertex_count) = + match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) { + Some(mesh_vertex_slice) => ( + mesh_vertex_slice.range.start, + mesh_vertex_slice.range.end - mesh_vertex_slice.range.start, + ), + None => (0, 0), + }; + let (mesh_is_indexed, first_index_index, index_count) = + match mesh_allocator.mesh_index_slice(&self.shared.mesh_asset_id) { + Some(mesh_index_slice) => ( + true, + mesh_index_slice.range.start, + mesh_index_slice.range.end - mesh_index_slice.range.start, + ), + None => (false, 0, 0), + }; let current_skin_index = match skin_indices.current.get(&entity) { Some(skin_indices) => skin_indices.index(), @@ -938,11 +966,19 @@ impl RenderMeshInstanceGpuBuilder { flags: self.mesh_flags.bits(), previous_input_index: u32::MAX, first_vertex_index, + first_index_index, + index_count: if mesh_is_indexed { + index_count + } else { + vertex_count + }, current_skin_index, previous_skin_index, material_and_lightmap_bind_group_slot: u32::from( self.shared.material_bindings_index.slot, ) | ((lightmap_slot as u32) << 16), + pad_a: 0, + pad_b: 0, }; // Did the last frame contain this entity as well? @@ -1698,86 +1734,31 @@ impl GetFullBatchData for MeshPipeline { .map(|entity| entity.current_uniform_index) } - fn write_batch_indirect_parameters( - (mesh_instances, _, meshes, mesh_allocator, _): &SystemParamItem, - indirect_parameters_buffer: &mut IndirectParametersBuffer, + fn write_batch_indirect_parameters_metadata( + mesh_index: u32, + indexed: bool, + base_output_index: u32, + batch_set_index: Option, + indirect_parameters_buffer: &mut IndirectParametersBuffers, indirect_parameters_offset: u32, - main_entity: MainEntity, ) { - write_batch_indirect_parameters( - mesh_instances, - meshes, - mesh_allocator, - indirect_parameters_buffer, - indirect_parameters_offset, - main_entity, - ); - } -} - -/// Pushes a set of [`IndirectParameters`] onto the [`IndirectParametersBuffer`] -/// for the given mesh instance, and returns the index of those indirect -/// parameters. -fn write_batch_indirect_parameters( - mesh_instances: &RenderMeshInstances, - meshes: &RenderAssets, - mesh_allocator: &MeshAllocator, - indirect_parameters_buffer: &mut IndirectParametersBuffer, - indirect_parameters_offset: u32, - main_entity: MainEntity, -) { - // This should only be called during GPU building. - let RenderMeshInstances::GpuBuilding(ref mesh_instances) = *mesh_instances else { - error!( - "`write_batch_indirect_parameters_index` should never be called in CPU mesh uniform \ - building mode" - ); - return; - }; - - let Some(mesh_instance) = mesh_instances.get(&main_entity) else { - return; - }; - let Some(mesh) = meshes.get(mesh_instance.mesh_asset_id) else { - return; - }; - let Some(vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) - else { - return; - }; - - // Note that `IndirectParameters` covers both of these structures, even - // though they actually have distinct layouts. See the comment above that - // type for more information. - let indirect_parameters = match mesh.buffer_info { - RenderMeshBufferInfo::Indexed { - count: index_count, .. - } => { - let Some(index_buffer_slice) = - mesh_allocator.mesh_index_slice(&mesh_instance.mesh_asset_id) - else { - return; - }; - IndirectParameters { - vertex_or_index_count: index_count, - instance_count: 0, - first_vertex_or_first_index: index_buffer_slice.range.start, - base_vertex_or_first_instance: vertex_buffer_slice.range.start, - first_instance: 0, - } - } - RenderMeshBufferInfo::NonIndexed => IndirectParameters { - vertex_or_index_count: mesh.vertex_count, + let indirect_parameters = IndirectParametersMetadata { + mesh_index, + base_output_index, + batch_set_index: match batch_set_index { + Some(batch_set_index) => u32::from(batch_set_index), + None => !0, + }, instance_count: 0, - first_vertex_or_first_index: vertex_buffer_slice.range.start, - base_vertex_or_first_instance: 0, - // Use `0xffffffff` as a placeholder to tell the mesh preprocessing - // shader that this is a non-indexed mesh. - first_instance: !0, - }, - }; + }; - indirect_parameters_buffer.set(indirect_parameters_offset, indirect_parameters); + if indexed { + indirect_parameters_buffer.set_indexed(indirect_parameters_offset, indirect_parameters); + } else { + indirect_parameters_buffer + .set_non_indexed(indirect_parameters_offset, indirect_parameters); + } + } } bitflags::bitflags! { @@ -2687,12 +2668,12 @@ impl RenderCommand

for DrawMesh { type Param = ( SRes>, SRes, - SRes, + SRes, SRes, SRes, Option>, ); - type ViewQuery = Has; + type ViewQuery = Has; type ItemQuery = (); #[inline] fn render<'w>( @@ -2735,26 +2716,6 @@ impl RenderCommand

for DrawMesh { return RenderCommandResult::Skip; }; - // Calculate the indirect offset, and look up the buffer. - let indirect_parameters = match item.extra_index() { - PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => None, - PhaseItemExtraIndex::IndirectParametersIndex(indices) => { - match indirect_parameters_buffer.buffer() { - None => { - warn!( - "Not rendering mesh because indirect parameters buffer wasn't present" - ); - return RenderCommandResult::Skip; - } - Some(buffer) => Some(( - indices.start as u64 * size_of::() as u64, - indices.end - indices.start, - buffer, - )), - } - } - }; - pass.set_vertex_buffer(0, vertex_buffer_slice.buffer.slice(..)); let batch_range = item.batch_range(); @@ -2774,8 +2735,8 @@ impl RenderCommand

for DrawMesh { pass.set_index_buffer(index_buffer_slice.buffer.slice(..), 0, *index_format); - match indirect_parameters { - None => { + match item.extra_index() { + PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => { pass.draw_indexed( index_buffer_slice.range.start ..(index_buffer_slice.range.start + *count), @@ -2783,33 +2744,112 @@ impl RenderCommand

for DrawMesh { batch_range.clone(), ); } - Some(( - indirect_parameters_offset, - indirect_parameters_count, - indirect_parameters_buffer, - )) => { - pass.multi_draw_indexed_indirect( - indirect_parameters_buffer, - indirect_parameters_offset, - indirect_parameters_count, - ); + PhaseItemExtraIndex::IndirectParametersIndex { + range: indirect_parameters_range, + batch_set_index, + } => { + // Look up the indirect parameters buffer, as well as + // the buffer we're going to use for + // `multi_draw_indexed_indirect_count` (if available). + let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = ( + indirect_parameters_buffer.indexed_data_buffer(), + indirect_parameters_buffer.indexed_batch_sets_buffer(), + ) else { + warn!( + "Not rendering mesh because indexed indirect parameters buffer \ + wasn't present", + ); + return RenderCommandResult::Skip; + }; + + // Calculate the location of the indirect parameters + // within the buffer. + let indirect_parameters_offset = indirect_parameters_range.start as u64 + * size_of::() as u64; + let indirect_parameters_count = + indirect_parameters_range.end - indirect_parameters_range.start; + + // If we're using `multi_draw_indirect_count`, take the + // number of batches from the appropriate position in + // the batch sets buffer. Otherwise, supply the size of + // the batch set. + match batch_set_index { + Some(batch_set_index) => { + let count_offset = u32::from(batch_set_index) + * (size_of::() as u32); + pass.multi_draw_indexed_indirect_count( + indirect_parameters_buffer, + indirect_parameters_offset, + batch_sets_buffer, + count_offset as u64, + indirect_parameters_count, + ); + } + None => { + pass.multi_draw_indexed_indirect( + indirect_parameters_buffer, + indirect_parameters_offset, + indirect_parameters_count, + ); + } + } } } } - RenderMeshBufferInfo::NonIndexed => match indirect_parameters { - None => { + + RenderMeshBufferInfo::NonIndexed => match item.extra_index() { + PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => { pass.draw(vertex_buffer_slice.range, batch_range.clone()); } - Some(( - indirect_parameters_offset, - indirect_parameters_count, - indirect_parameters_buffer, - )) => { - pass.multi_draw_indirect( - indirect_parameters_buffer, - indirect_parameters_offset, - indirect_parameters_count, - ); + PhaseItemExtraIndex::IndirectParametersIndex { + range: indirect_parameters_range, + batch_set_index, + } => { + // Look up the indirect parameters buffer, as well as the + // buffer we're going to use for + // `multi_draw_indirect_count` (if available). + let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = ( + indirect_parameters_buffer.non_indexed_data_buffer(), + indirect_parameters_buffer.non_indexed_batch_sets_buffer(), + ) else { + warn!( + "Not rendering mesh because non-indexed indirect parameters buffer \ + wasn't present" + ); + return RenderCommandResult::Skip; + }; + + // Calculate the location of the indirect parameters within + // the buffer. + let indirect_parameters_offset = indirect_parameters_range.start as u64 + * size_of::() as u64; + let indirect_parameters_count = + indirect_parameters_range.end - indirect_parameters_range.start; + + // If we're using `multi_draw_indirect_count`, take the + // number of batches from the appropriate position in the + // batch sets buffer. Otherwise, supply the size of the + // batch set. + match batch_set_index { + Some(batch_set_index) => { + let count_offset = + u32::from(batch_set_index) * (size_of::() as u32); + pass.multi_draw_indirect_count( + indirect_parameters_buffer, + indirect_parameters_offset, + batch_sets_buffer, + count_offset as u64, + indirect_parameters_count, + ); + } + None => { + pass.multi_draw_indirect( + indirect_parameters_buffer, + indirect_parameters_offset, + indirect_parameters_count, + ); + } + } } }, } diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index 74f527a374949..df73454a3e880 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -8,29 +8,10 @@ // so that TAA works. #import bevy_pbr::mesh_types::{Mesh, MESH_FLAGS_NO_FRUSTUM_CULLING_BIT} -#import bevy_pbr::mesh_preprocess_types::IndirectParameters +#import bevy_pbr::mesh_preprocess_types::{MeshInput, IndirectParametersMetadata} #import bevy_render::maths #import bevy_render::view::View -// Per-frame data that the CPU supplies to the GPU. -struct MeshInput { - // The model transform. - world_from_local: mat3x4, - // The lightmap UV rect, packed into 64 bits. - lightmap_uv_rect: vec2, - // Various flags. - flags: u32, - // The index of this mesh's `MeshInput` in the `previous_input` array, if - // applicable. If not present, this is `u32::MAX`. - previous_input_index: u32, - first_vertex_index: u32, - current_skin_index: u32, - previous_skin_index: u32, - // Low 16 bits: index of the material inside the bind group data. - // High 16 bits: index of the lightmap in the binding array. - material_and_lightmap_bind_group_slot: u32, -} - // Information about each mesh instance needed to cull it on GPU. // // At the moment, this just consists of its axis-aligned bounding box (AABB). @@ -68,7 +49,8 @@ struct PreprocessWorkItem { #ifdef INDIRECT // The array of indirect parameters for drawcalls. -@group(0) @binding(4) var indirect_parameters: array; +@group(0) @binding(4) var indirect_parameters_metadata: + array; #endif #ifdef FRUSTUM_CULLING @@ -167,28 +149,15 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { } // Figure out the output index. In indirect mode, this involves bumping the - // instance index in the indirect parameters structure. Otherwise, this - // index was directly supplied to us. + // instance index in the indirect parameters metadata, which + // `build_indirect_params.wgsl` will use to generate the actual indirect + // parameters. Otherwise, this index was directly supplied to us. #ifdef INDIRECT let batch_output_index = - atomicAdd(&indirect_parameters[indirect_parameters_index].instance_count, 1u); - let mesh_output_index = output_index + batch_output_index; - - // If this is the first mesh in the batch, write the first instance index - // into the indirect parameters. - // - // We could have done this on CPU, but when we start retaining indirect - // parameters that will no longer be desirable, as the index of the first - // instance will change from frame to frame and we won't want the CPU to - // have to keep updating it. - if (batch_output_index == 0u) { - if (indirect_parameters[indirect_parameters_index].first_instance == 0xffffffffu) { - indirect_parameters[indirect_parameters_index].base_vertex_or_first_instance = - mesh_output_index; - } else { - indirect_parameters[indirect_parameters_index].first_instance = mesh_output_index; - } - } + atomicAdd(&indirect_parameters_metadata[indirect_parameters_index].instance_count, 1u); + let mesh_output_index = + indirect_parameters_metadata[indirect_parameters_index].base_output_index + + batch_output_index; #else // INDIRECT let mesh_output_index = output_index; #endif // INDIRECT diff --git a/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl index 5314e75ce34de..974a9d303aa6d 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl @@ -2,18 +2,97 @@ #define_import_path bevy_pbr::mesh_preprocess_types -// The `wgpu` indirect parameters structure. This is a union of two structures. -// For more information, see the corresponding comment in -// `gpu_preprocessing.rs`. -struct IndirectParameters { - // `vertex_count` or `index_count`. - vertex_count_or_index_count: u32, - // `instance_count` in both structures. - instance_count: atomic, - // `first_vertex` or `first_index`. - first_vertex_or_first_index: u32, - // `base_vertex` or `first_instance`. - base_vertex_or_first_instance: u32, - // A read-only copy of `instance_index`. +// Per-frame data that the CPU supplies to the GPU. +struct MeshInput { + // The model transform. + world_from_local: mat3x4, + // The lightmap UV rect, packed into 64 bits. + lightmap_uv_rect: vec2, + // A set of bitflags corresponding to `MeshFlags` on the Rust side. See the + // `MESH_FLAGS_` flags in `mesh_types.wgsl` for a list of these. + flags: u32, + // The index of this mesh's `MeshInput` in the `previous_input` array, if + // applicable. If not present, this is `u32::MAX`. + previous_input_index: u32, + // The index of the first vertex in the vertex slab. + first_vertex_index: u32, + // The index of the first vertex index in the index slab. + // + // If this mesh isn't indexed, this value is ignored. + first_index_index: u32, + // For indexed meshes, the number of indices that this mesh has; for + // non-indexed meshes, the number of vertices that this mesh consists of. + index_count: u32, + current_skin_index: u32, + previous_skin_index: u32, + // Low 16 bits: index of the material inside the bind group data. + // High 16 bits: index of the lightmap in the binding array. + material_and_lightmap_bind_group_slot: u32, +} + +// The `wgpu` indirect parameters structure for indexed meshes. +// +// The `build_indirect_params.wgsl` shader generates these. +struct IndirectParametersIndexed { + // The number of indices that this mesh has. + index_count: u32, + // The number of instances we are to draw. + instance_count: u32, + // The offset of the first index for this mesh in the index buffer slab. + first_index: u32, + // The offset of the first vertex for this mesh in the vertex buffer slab. + base_vertex: u32, + // The index of the first mesh instance in the `Mesh` buffer. + first_instance: u32, +} + +// The `wgpu` indirect parameters structure for non-indexed meshes. +// +// The `build_indirect_params.wgsl` shader generates these. +struct IndirectParametersNonIndexed { + // The number of vertices that this mesh has. + vertex_count: u32, + // The number of instances we are to draw. + instance_count: u32, + // The offset of the first vertex for this mesh in the vertex buffer slab. + base_vertex: u32, + // The index of the first mesh instance in the `Mesh` buffer. first_instance: u32, } + +// Information needed to generate the `IndirectParametersIndexed` and +// `IndirectParametersNonIndexed` draw commands. +struct IndirectParametersMetadata { + // The index of the mesh in the `MeshInput` buffer. + mesh_index: u32, + // The index of the first instance corresponding to this batch in the `Mesh` + // buffer. + base_output_index: u32, + // The index of the batch set in the `IndirectBatchSet` buffer. + batch_set_index: u32, + // The number of instances that are to be drawn. + // + // The `mesh_preprocess.wgsl` shader determines this, and the + // `build_indirect_params.wgsl` shader copies this value into the indirect + // draw command. + instance_count: atomic, +} + +// Information about each batch set. +// +// A *batch set* is a set of meshes that might be multi-drawn together. +// +// The CPU creates this structure, and the `build_indirect_params.wgsl` shader +// modifies it. If `multi_draw_indirect_count` is in use, the GPU reads this +// value when multi-drawing a batch set in order to determine how many commands +// make up the batch set. +struct IndirectBatchSet { + // The number of commands that make up this batch set. + // + // The CPU initializes this value to zero. The `build_indirect_params.wgsl` + // shader increments this value as it processes batches. + indirect_parameters_count: atomic, + // The offset of the first batch corresponding to this batch set within the + // `IndirectParametersIndexed` or `IndirectParametersNonIndexed` arrays. + indirect_parameters_base: u32, +} diff --git a/crates/bevy_render/src/batching/gpu_preprocessing.rs b/crates/bevy_render/src/batching/gpu_preprocessing.rs index a98afc02de48a..2e893616f9294 100644 --- a/crates/bevy_render/src/batching/gpu_preprocessing.rs +++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs @@ -1,5 +1,7 @@ //! Batching functionality when GPU preprocessing is in use. +use core::any::TypeId; + use bevy_app::{App, Plugin}; use bevy_ecs::{ entity::{Entity, EntityHashMap}, @@ -9,7 +11,7 @@ use bevy_ecs::{ world::{FromWorld, World}, }; use bevy_encase_derive::ShaderType; -use bevy_utils::default; +use bevy_utils::{default, TypeIdMap}; use bytemuck::{Pod, Zeroable}; use nonmax::NonMaxU32; use tracing::error; @@ -18,9 +20,9 @@ use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features}; use crate::{ render_phase::{ BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet, - BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItemExtraIndex, - SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, - ViewSortedRenderPhases, + BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItemBatchSetKey as _, + PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices, + ViewBinnedRenderPhases, ViewSortedRenderPhases, }, render_resource::{Buffer, BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec}, renderer::{RenderAdapter, RenderDevice, RenderQueue}, @@ -39,10 +41,14 @@ impl Plugin for BatchingPlugin { }; render_app - .insert_resource(IndirectParametersBuffer::new()) + .insert_resource(IndirectParametersBuffers::new()) + .add_systems( + Render, + write_indirect_parameters_buffers.in_set(RenderSet::PrepareResourcesFlush), + ) .add_systems( Render, - write_indirect_parameters_buffer.in_set(RenderSet::PrepareResourcesFlush), + clear_indirect_parameters_buffers.in_set(RenderSet::ManageViews), ); } @@ -137,7 +143,7 @@ where /// corresponds to each instance. /// /// This is keyed off each view. Each view has a separate buffer. - pub work_item_buffers: EntityHashMap, + pub work_item_buffers: EntityHashMap>, /// The uniform data inputs for the current frame. /// @@ -265,16 +271,68 @@ where } /// The buffer of GPU preprocessing work items for a single view. -pub struct PreprocessWorkItemBuffer { - /// The buffer of work items. - pub buffer: BufferVec, - /// True if we're drawing directly instead of indirectly. - pub no_indirect_drawing: bool, +pub enum PreprocessWorkItemBuffers { + /// The work items we use if we aren't using indirect drawing. + /// + /// Because we don't have to separate indexed from non-indexed meshes in + /// direct mode, we only have a single buffer here. + Direct(BufferVec), + + /// The buffer of work items we use if we are using indirect drawing. + /// + /// We need to separate out indexed meshes from non-indexed meshes in this + /// case because the indirect parameters for these two types of meshes have + /// different sizes. + Indirect { + /// The buffer of work items corresponding to indexed meshes. + indexed: BufferVec, + /// The buffer of work items corresponding to non-indexed meshes. + non_indexed: BufferVec, + }, +} + +impl PreprocessWorkItemBuffers { + /// Creates a new set of buffers. + /// + /// `no_indirect_drawing` specifies whether we're drawing directly or + /// indirectly. + pub fn new(no_indirect_drawing: bool) -> Self { + if no_indirect_drawing { + PreprocessWorkItemBuffers::Direct(BufferVec::new(BufferUsages::STORAGE)) + } else { + PreprocessWorkItemBuffers::Indirect { + indexed: BufferVec::new(BufferUsages::STORAGE), + non_indexed: BufferVec::new(BufferUsages::STORAGE), + } + } + } + + /// Adds a new work item to the appropriate buffer. + /// + /// `indexed` specifies whether the work item corresponds to an indexed + /// mesh. + pub fn push(&mut self, indexed: bool, preprocess_work_item: PreprocessWorkItem) { + match *self { + PreprocessWorkItemBuffers::Direct(ref mut buffer) => { + buffer.push(preprocess_work_item); + } + PreprocessWorkItemBuffers::Indirect { + indexed: ref mut indexed_buffer, + non_indexed: ref mut non_indexed_buffer, + } => { + if indexed { + indexed_buffer.push(preprocess_work_item); + } else { + non_indexed_buffer.push(preprocess_work_item); + } + } + } + } } /// One invocation of the preprocessing shader: i.e. one mesh instance in a /// view. -#[derive(Clone, Copy, Pod, Zeroable, ShaderType)] +#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)] #[repr(C)] pub struct PreprocessWorkItem { /// The index of the batch input data in the input buffer that the shader @@ -284,112 +342,378 @@ pub struct PreprocessWorkItem { /// In direct mode, this is the index of the uniform. In indirect mode, this /// is the first index uniform in the batch set. pub output_index: u32, - /// The index of the [`IndirectParameters`] in the - /// [`IndirectParametersBuffer`]. + /// The index of the [`IndirectParametersMetadata`] in the + /// `IndirectParametersBuffers::indexed_metadata` or + /// `IndirectParametersBuffers::non_indexed_metadata`. pub indirect_parameters_index: u32, } -/// The `wgpu` indirect parameters structure. +/// The `wgpu` indirect parameters structure that specifies a GPU draw command. /// -/// This is actually a union of the two following structures: +/// This is the variant for indexed meshes. We generate the instances of this +/// structure in the `build_indirect_params.wgsl` compute shader. +#[derive(Clone, Copy, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct IndirectParametersIndexed { + /// The number of indices that this mesh has. + pub index_count: u32, + /// The number of instances we are to draw. + pub instance_count: u32, + /// The offset of the first index for this mesh in the index buffer slab. + pub first_index: u32, + /// The offset of the first vertex for this mesh in the vertex buffer slab. + pub base_vertex: u32, + /// The index of the first mesh instance in the `MeshUniform` buffer. + pub first_instance: u32, +} + +/// The `wgpu` indirect parameters structure that specifies a GPU draw command. /// -/// ``` -/// #[repr(C)] -/// struct ArrayIndirectParameters { -/// vertex_count: u32, -/// instance_count: u32, -/// first_vertex: u32, -/// first_instance: u32, -/// } +/// This is the variant for non-indexed meshes. We generate the instances of +/// this structure in the `build_indirect_params.wgsl` compute shader. +#[derive(Clone, Copy, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct IndirectParametersNonIndexed { + /// The number of vertices that this mesh has. + pub vertex_count: u32, + /// The number of instances we are to draw. + pub instance_count: u32, + /// The offset of the first vertex for this mesh in the vertex buffer slab. + pub base_vertex: u32, + /// The index of the first mesh instance in the `Mesh` buffer. + pub first_instance: u32, +} + +/// A structure, shared between CPU and GPU, that records how many instances of +/// each mesh are actually to be drawn. /// -/// #[repr(C)] -/// struct ElementIndirectParameters { -/// index_count: u32, -/// instance_count: u32, -/// first_vertex: u32, -/// base_vertex: u32, -/// first_instance: u32, -/// } -/// ``` +/// The CPU writes to this structure in order to initialize the fields other +/// than [`Self::instance_count`]. The GPU mesh preprocessing shader increments +/// the [`Self::instance_count`] as it determines that meshes are visible. The +/// indirect parameter building shader reads this metadata in order to construct +/// the indirect draw parameters. /// -/// We actually generally treat these two variants identically in code. To do -/// that, we make the following two observations: +/// Each batch will have one instance of this structure. +#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct IndirectParametersMetadata { + /// The index of the mesh in the array of `MeshInputUniform`s. + pub mesh_index: u32, + + /// The index of the first instance of this mesh in the array of + /// `MeshUniform`s. + /// + /// Note that this is the *first* output index in this batch. Since each + /// instance of this structure refers to arbitrarily many instances, the + /// `MeshUniform`s corresponding to this batch span the indices + /// `base_output_index..(base_output_index + instance_count)`. + pub base_output_index: u32, + + /// The index of the batch set that this batch belongs to in the + /// [`IndirectBatchSet`] buffer. + /// + /// A *batch set* is a set of meshes that may be multi-drawn together. + /// Multiple batches (and therefore multiple instances of + /// [`IndirectParametersMetadata`] structures) can be part of the same batch + /// set. + pub batch_set_index: u32, + + /// The number of instances that have been judged potentially visible. + /// + /// The CPU sets this value to 0, and the GPU mesh preprocessing shader + /// increments it as it culls mesh instances. + pub instance_count: u32, +} + +/// A structure, shared between CPU and GPU, that holds the number of on-GPU +/// indirect draw commands for each *batch set*. /// -/// 1. `instance_count` is in the same place in both structures. So we can -/// access it regardless of the structure we're looking at. +/// A *batch set* is a set of meshes that may be multi-drawn together. /// -/// 2. The second structure is one word larger than the first. Thus we need to -/// pad out the first structure by one word in order to place both structures in -/// an array. If we pad out `ArrayIndirectParameters` by copying the -/// `first_instance` field into the padding, then the resulting union structure -/// will always have a read-only copy of `first_instance` in the final word. We -/// take advantage of this in the shader to reduce branching. -#[derive(Clone, Copy, Pod, Zeroable, ShaderType)] +/// If the current hardware and driver support `multi_draw_indirect_count`, the +/// indirect parameters building shader increments +/// [`Self::indirect_parameters_count`] as it generates indirect parameters. The +/// `multi_draw_indirect_count` command reads +/// [`Self::indirect_parameters_count`] in order to determine how many commands +/// belong to each batch set. +#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)] #[repr(C)] -pub struct IndirectParameters { - /// For `ArrayIndirectParameters`, `vertex_count`; for - /// `ElementIndirectParameters`, `index_count`. - pub vertex_or_index_count: u32, +pub struct IndirectBatchSet { + /// The number of indirect parameter commands (i.e. batches) in this batch + /// set. + /// + /// The CPU sets this value to 0 before uploading this structure to GPU. The + /// indirect parameters building shader increments this value as it creates + /// indirect parameters. Then the `multi_draw_indirect_count` command reads + /// this value in order to determine how many indirect draw commands to + /// process. + pub indirect_parameters_count: u32, + + /// The offset within the `IndirectParametersBuffers::indexed_data` or + /// `IndirectParametersBuffers::non_indexed_data` of the first indirect draw + /// command for this batch set. + /// + /// The CPU fills out this value. + pub indirect_parameters_base: u32, +} - /// The number of instances we're going to draw. +/// The buffers containing all the information that indirect draw commands +/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene. +/// +/// In addition to the indirect draw buffers themselves, this structure contains +/// the buffers that store [`IndirectParametersMetadata`], which are the +/// structures that culling writes to so that the indirect parameter building +/// pass can determine how many meshes are actually to be drawn. +/// +/// These buffers will remain empty if indirect drawing isn't in use. +#[derive(Resource)] +pub struct IndirectParametersBuffers { + /// The GPU buffer that stores the indirect draw parameters for non-indexed + /// meshes. /// - /// This field is in the same place in both structures. - pub instance_count: u32, + /// The indirect parameters building shader writes to this buffer, while the + /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from + /// it to perform the draws. + non_indexed_data: UninitBufferVec, - /// For `ArrayIndirectParameters`, `first_vertex`; for - /// `ElementIndirectParameters`, `first_index`. - pub first_vertex_or_first_index: u32, + /// The GPU buffer that holds the data used to construct indirect draw + /// parameters for non-indexed meshes. + /// + /// The GPU mesh preprocessing shader writes to this buffer, and the + /// indirect parameters building shader reads this buffer to construct the + /// indirect draw parameters. + non_indexed_metadata: RawBufferVec, - /// For `ArrayIndirectParameters`, `first_instance`; for - /// `ElementIndirectParameters`, `base_vertex`. - pub base_vertex_or_first_instance: u32, + /// The GPU buffer that holds the number of indirect draw commands for each + /// phase of each view, for non-indexed meshes. + /// + /// The indirect parameters building shader writes to this buffer, and the + /// `multi_draw_indirect_count` command reads from it in order to know how + /// many indirect draw commands to process. + non_indexed_batch_sets: RawBufferVec, - /// For `ArrayIndirectParameters`, this is padding; for - /// `ElementIndirectParameters`, this is `first_instance`. + /// The GPU buffer that stores the indirect draw parameters for indexed + /// meshes. /// - /// Conventionally, we copy `first_instance` into this field when padding - /// out `ArrayIndirectParameters`. That way, shader code can read this value - /// at the same place, regardless of the specific structure this represents. - pub first_instance: u32, -} + /// The indirect parameters building shader writes to this buffer, while the + /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from + /// it to perform the draws. + indexed_data: UninitBufferVec, -/// The buffer containing the list of [`IndirectParameters`], for draw commands. -#[derive(Resource)] -pub struct IndirectParametersBuffer { - /// The actual buffer. - buffer: RawBufferVec, + /// The GPU buffer that holds the data used to construct indirect draw + /// parameters for indexed meshes. + /// + /// The GPU mesh preprocessing shader writes to this buffer, and the + /// indirect parameters building shader reads this buffer to construct the + /// indirect draw parameters. + indexed_metadata: RawBufferVec, + + /// The GPU buffer that holds the number of indirect draw commands for each + /// phase of each view, for indexed meshes. + /// + /// The indirect parameters building shader writes to this buffer, and the + /// `multi_draw_indirect_count` command reads from it in order to know how + /// many indirect draw commands to process. + indexed_batch_sets: RawBufferVec, } -impl IndirectParametersBuffer { - /// Creates the indirect parameters buffer. - pub fn new() -> IndirectParametersBuffer { - IndirectParametersBuffer { - buffer: RawBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT), +impl IndirectParametersBuffers { + /// Creates the indirect parameters buffers. + pub fn new() -> IndirectParametersBuffers { + IndirectParametersBuffers { + non_indexed_data: UninitBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT), + non_indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE), + non_indexed_batch_sets: RawBufferVec::new( + BufferUsages::STORAGE | BufferUsages::INDIRECT, + ), + indexed_data: UninitBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT), + indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE), + indexed_batch_sets: RawBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT), } } - /// Returns the underlying GPU buffer. + /// Returns the GPU buffer that stores the indirect draw parameters for + /// indexed meshes. + /// + /// The indirect parameters building shader writes to this buffer, while the + /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from + /// it to perform the draws. + #[inline] + pub fn indexed_data_buffer(&self) -> Option<&Buffer> { + self.indexed_data.buffer() + } + + /// Returns the GPU buffer that holds the data used to construct indirect + /// draw parameters for indexed meshes. + /// + /// The GPU mesh preprocessing shader writes to this buffer, and the + /// indirect parameters building shader reads this buffer to construct the + /// indirect draw parameters. + #[inline] + pub fn indexed_metadata_buffer(&self) -> Option<&Buffer> { + self.indexed_metadata.buffer() + } + + /// Returns the GPU buffer that holds the number of indirect draw commands + /// for each phase of each view, for indexed meshes. + /// + /// The indirect parameters building shader writes to this buffer, and the + /// `multi_draw_indirect_count` command reads from it in order to know how + /// many indirect draw commands to process. + #[inline] + pub fn indexed_batch_sets_buffer(&self) -> Option<&Buffer> { + self.indexed_batch_sets.buffer() + } + + /// Returns the GPU buffer that stores the indirect draw parameters for + /// non-indexed meshes. + /// + /// The indirect parameters building shader writes to this buffer, while the + /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from + /// it to perform the draws. + #[inline] + pub fn non_indexed_data_buffer(&self) -> Option<&Buffer> { + self.non_indexed_data.buffer() + } + + /// Returns the GPU buffer that holds the data used to construct indirect + /// draw parameters for non-indexed meshes. + /// + /// The GPU mesh preprocessing shader writes to this buffer, and the + /// indirect parameters building shader reads this buffer to construct the + /// indirect draw parameters. + #[inline] + pub fn non_indexed_metadata_buffer(&self) -> Option<&Buffer> { + self.non_indexed_metadata.buffer() + } + + /// Returns the GPU buffer that holds the number of indirect draw commands + /// for each phase of each view, for non-indexed meshes. + /// + /// The indirect parameters building shader writes to this buffer, and the + /// `multi_draw_indirect_count` command reads from it in order to know how + /// many indirect draw commands to process. #[inline] - pub fn buffer(&self) -> Option<&Buffer> { - self.buffer.buffer() + pub fn non_indexed_batch_sets_buffer(&self) -> Option<&Buffer> { + self.non_indexed_batch_sets.buffer() } - /// Adds a new set of indirect parameters to the buffer. - pub fn allocate(&mut self, count: u32) -> u32 { - let length = self.buffer.len(); - self.buffer.reserve_internal(count as usize); + /// Reserves space for `count` new batches corresponding to indexed meshes. + /// + /// This allocates in both the [`Self::indexed_metadata`] and + /// [`Self::indexed_data`] buffers. + fn allocate_indexed(&mut self, count: u32) -> u32 { + let length = self.indexed_data.len(); + self.indexed_metadata.reserve_internal(count as usize); for _ in 0..count { - self.buffer.push(Zeroable::zeroed()); + self.indexed_data.add(); + self.indexed_metadata + .push(IndirectParametersMetadata::default()); } length as u32 } - pub fn set(&mut self, index: u32, value: IndirectParameters) { - self.buffer.set(index, value); + /// Reserves space for `count` new batches corresponding to non-indexed + /// meshes. + /// + /// This allocates in both the [`Self::non_indexed_metadata`] and + /// [`Self::non_indexed_data`] buffers. + fn allocate_non_indexed(&mut self, count: u32) -> u32 { + let length = self.non_indexed_data.len(); + self.non_indexed_metadata.reserve_internal(count as usize); + for _ in 0..count { + self.non_indexed_data.add(); + self.non_indexed_metadata + .push(IndirectParametersMetadata::default()); + } + length as u32 + } + + /// Reserves space for `count` new batches. + /// + /// The `indexed` parameter specifies whether the meshes that these batches + /// correspond to are indexed or not. + pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 { + if indexed { + self.allocate_indexed(count) + } else { + self.allocate_non_indexed(count) + } + } + + /// Initializes the batch corresponding to an indexed mesh at the given + /// index with the given [`IndirectParametersMetadata`]. + pub fn set_indexed(&mut self, index: u32, value: IndirectParametersMetadata) { + self.indexed_metadata.set(index, value); + } + + /// Initializes the batch corresponding to a non-indexed mesh at the given + /// index with the given [`IndirectParametersMetadata`]. + pub fn set_non_indexed(&mut self, index: u32, value: IndirectParametersMetadata) { + self.non_indexed_metadata.set(index, value); + } + + /// Returns the number of batches currently allocated. + /// + /// The `indexed` parameter specifies whether the meshes that these batches + /// correspond to are indexed or not. + fn batch_count(&self, indexed: bool) -> usize { + if indexed { + self.indexed_batch_count() + } else { + self.non_indexed_batch_count() + } + } + + /// Returns the number of batches corresponding to indexed meshes that are + /// currently allocated. + #[inline] + pub fn indexed_batch_count(&self) -> usize { + self.indexed_data.len() + } + + /// Returns the number of batches corresponding to non-indexed meshes that + /// are currently allocated. + #[inline] + pub fn non_indexed_batch_count(&self) -> usize { + self.non_indexed_data.len() + } + + /// Returns the number of batch sets currently allocated. + /// + /// The `indexed` parameter specifies whether the meshes that these batch + /// sets correspond to are indexed or not. + pub fn batch_set_count(&self, indexed: bool) -> usize { + if indexed { + self.indexed_batch_sets.len() + } else { + self.non_indexed_batch_sets.len() + } + } + + /// Adds a new batch set to `Self::indexed_batch_sets` or + /// `Self::non_indexed_batch_sets` as appropriate. + /// + /// `indexed` specifies whether the meshes that these batch sets correspond + /// to are indexed or not. `indirect_parameters_base` specifies the offset + /// within `Self::indexed_data` or `Self::non_indexed_data` of the first + /// batch in this batch set. + pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) { + if indexed { + self.indexed_batch_sets.push(IndirectBatchSet { + indirect_parameters_base, + indirect_parameters_count: 0, + }); + } else { + self.non_indexed_batch_sets.push(IndirectBatchSet { + indirect_parameters_base, + indirect_parameters_count: 0, + }); + } } } -impl Default for IndirectParametersBuffer { +impl Default for IndirectParametersBuffers { fn default() -> Self { Self::new() } @@ -454,8 +778,20 @@ where /// Clears out the buffers in preparation for a new frame. pub fn clear(&mut self) { self.data_buffer.clear(); - for work_item_buffer in self.work_item_buffers.values_mut() { - work_item_buffer.buffer.clear(); + + for view_work_item_buffers in self.work_item_buffers.values_mut() { + for phase_work_item_buffers in view_work_item_buffers.values_mut() { + match *phase_work_item_buffers { + PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => buffer_vec.clear(), + PreprocessWorkItemBuffers::Indirect { + ref mut indexed, + ref mut non_indexed, + } => { + indexed.clear(); + non_indexed.clear(); + } + } + } } } } @@ -483,8 +819,11 @@ where /// The index of the first instance in this batch in the instance buffer. instance_start_index: u32, + /// True if the mesh in question has an index buffer; false otherwise. + indexed: bool, + /// The index of the indirect parameters for this batch in the - /// [`IndirectParametersBuffer`]. + /// [`IndirectParametersBuffers`]. /// /// If CPU culling is being used, then this will be `None`. indirect_parameters_index: Option, @@ -505,8 +844,12 @@ where /// /// `instance_end_index` is the index of the last instance in this batch /// plus one. - fn flush(self, instance_end_index: u32, phase: &mut SortedRenderPhase) - where + fn flush( + self, + instance_end_index: u32, + phase: &mut SortedRenderPhase, + indirect_parameters_buffers: &mut IndirectParametersBuffers, + ) where I: CachedRenderPipelinePhaseItem + SortedPhaseItem, { let (batch_range, batch_extra_index) = @@ -514,6 +857,11 @@ where *batch_range = self.instance_start_index..instance_end_index; *batch_extra_index = PhaseItemExtraIndex::maybe_indirect_parameters_index(self.indirect_parameters_index); + + if let Some(indirect_parameters_index) = self.indirect_parameters_index { + indirect_parameters_buffers + .add_batch_set(self.indexed, indirect_parameters_index.into()); + } } } @@ -559,7 +907,7 @@ pub fn delete_old_work_item_buffers( /// trying to combine the draws into a batch. pub fn batch_and_prepare_sorted_render_phase( gpu_array_buffer: ResMut>, - mut indirect_parameters_buffer: ResMut, + mut indirect_parameters_buffers: ResMut, mut sorted_render_phases: ResMut>, mut views: Query<(Entity, &ExtractedView, Has)>, system_param_item: StaticSystemParam, @@ -580,24 +928,15 @@ pub fn batch_and_prepare_sorted_render_phase( }; // Create the work item buffer if necessary. - let work_item_buffer = - work_item_buffers - .entry(view) - .or_insert_with(|| PreprocessWorkItemBuffer { - buffer: BufferVec::new(BufferUsages::STORAGE), - no_indirect_drawing, - }); + let work_item_buffer = work_item_buffers + .entry(view) + .or_insert_with(TypeIdMap::default) + .entry(TypeId::of::()) + .or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing)); // Walk through the list of phase items, building up batches as we go. let mut batch: Option> = None; - // Allocate the indirect parameters if necessary. - let mut indirect_parameters_offset = if no_indirect_drawing { - None - } else { - Some(indirect_parameters_buffer.allocate(phase.items.len() as u32)) - }; - let mut first_output_index = data_buffer.len() as u32; for current_index in 0..phase.items.len() { @@ -605,6 +944,7 @@ pub fn batch_and_prepare_sorted_render_phase( // this entity. let item = &phase.items[current_index]; let entity = item.main_entity(); + let item_is_indexed = item.indexed(); let current_batch_input_index = GFBD::get_index_and_compare_data(&system_param_item, entity); @@ -615,7 +955,11 @@ pub fn batch_and_prepare_sorted_render_phase( let Some((current_input_index, current_meta)) = current_batch_input_index else { // Break a batch if we need to. if let Some(batch) = batch.take() { - batch.flush(data_buffer.len() as u32, phase); + batch.flush( + data_buffer.len() as u32, + phase, + &mut indirect_parameters_buffers, + ); } continue; @@ -634,62 +978,74 @@ pub fn batch_and_prepare_sorted_render_phase( }); // Make space in the data buffer for this instance. - let item = &phase.items[current_index]; - let entity = item.main_entity(); let output_index = data_buffer.add() as u32; // If we can't batch, break the existing batch and make a new one. if !can_batch { // Break a batch if we need to. if let Some(batch) = batch.take() { - batch.flush(output_index, phase); + batch.flush(output_index, phase, &mut indirect_parameters_buffers); } + let indirect_parameters_index = if no_indirect_drawing { + None + } else if item_is_indexed { + Some(indirect_parameters_buffers.allocate_indexed(1)) + } else { + Some(indirect_parameters_buffers.allocate_non_indexed(1)) + }; + // Start a new batch. - if let Some(indirect_parameters_offset) = indirect_parameters_offset { - GFBD::write_batch_indirect_parameters( - &system_param_item, - &mut indirect_parameters_buffer, - indirect_parameters_offset, - entity, + if let Some(indirect_parameters_index) = indirect_parameters_index { + GFBD::write_batch_indirect_parameters_metadata( + current_input_index.into(), + item_is_indexed, + output_index, + None, + &mut indirect_parameters_buffers, + indirect_parameters_index, ); }; batch = Some(SortedRenderBatch { phase_item_start_index: current_index as u32, instance_start_index: output_index, - indirect_parameters_index: indirect_parameters_offset.and_then(NonMaxU32::new), + indexed: item_is_indexed, + indirect_parameters_index: indirect_parameters_index.and_then(NonMaxU32::new), meta: current_meta, }); - if let Some(ref mut indirect_parameters_offset) = indirect_parameters_offset { - *indirect_parameters_offset += 1; - } - first_output_index = output_index; } // Add a new preprocessing work item so that the preprocessing // shader will copy the per-instance data over. if let Some(batch) = batch.as_ref() { - work_item_buffer.buffer.push(PreprocessWorkItem { - input_index: current_input_index.into(), - output_index: if no_indirect_drawing { - output_index - } else { - first_output_index - }, - indirect_parameters_index: match batch.indirect_parameters_index { - Some(indirect_parameters_index) => indirect_parameters_index.into(), - None => 0, + work_item_buffer.push( + item_is_indexed, + PreprocessWorkItem { + input_index: current_input_index.into(), + output_index: if no_indirect_drawing { + output_index + } else { + first_output_index + }, + indirect_parameters_index: match batch.indirect_parameters_index { + Some(indirect_parameters_index) => indirect_parameters_index.into(), + None => 0, + }, }, - }); + ); } } // Flush the final batch if necessary. if let Some(batch) = batch.take() { - batch.flush(data_buffer.len() as u32, phase); + batch.flush( + data_buffer.len() as u32, + phase, + &mut indirect_parameters_buffers, + ); } } } @@ -697,7 +1053,7 @@ pub fn batch_and_prepare_sorted_render_phase( /// Creates batches for a render phase that uses bins. pub fn batch_and_prepare_binned_render_phase( gpu_array_buffer: ResMut>, - mut indirect_parameters_buffer: ResMut, + mut indirect_parameters_buffers: ResMut, mut binned_render_phases: ResMut>, mut views: Query<(Entity, &ExtractedView, Has)>, param: StaticSystemParam, @@ -720,18 +1076,18 @@ pub fn batch_and_prepare_binned_render_phase( // Create the work item buffer if necessary; otherwise, just mark it as // used this frame. - let work_item_buffer = - work_item_buffers - .entry(view) - .or_insert_with(|| PreprocessWorkItemBuffer { - buffer: BufferVec::new(BufferUsages::STORAGE), - no_indirect_drawing, - }); + let work_item_buffer = work_item_buffers + .entry(view) + .or_insert_with(TypeIdMap::default) + .entry(TypeId::of::()) + .or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing)); // Prepare multidrawables. for batch_set_key in &phase.multidrawable_mesh_keys { let mut batch_set = None; + let indirect_parameters_base = + indirect_parameters_buffers.batch_count(batch_set_key.indexed()) as u32; for (bin_key, bin) in &phase.multidrawable_mesh_values[batch_set_key] { let first_output_index = data_buffer.len() as u32; let mut batch: Option = None; @@ -747,33 +1103,47 @@ pub fn batch_and_prepare_binned_render_phase( Some(ref mut batch) => { // Append to the current batch. batch.instance_range.end = output_index + 1; - work_item_buffer.buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index: first_output_index, - indirect_parameters_index: match batch.extra_index { - PhaseItemExtraIndex::IndirectParametersIndex(ref range) => { - range.start - } - PhaseItemExtraIndex::DynamicOffset(_) - | PhaseItemExtraIndex::None => 0, + work_item_buffer.push( + batch_set_key.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_index: first_output_index, + indirect_parameters_index: match batch.extra_index { + PhaseItemExtraIndex::IndirectParametersIndex { + ref range, + .. + } => range.start, + PhaseItemExtraIndex::DynamicOffset(_) + | PhaseItemExtraIndex::None => 0, + }, }, - }); + ); } None => { // Start a new batch, in indirect mode. - let indirect_parameters_index = indirect_parameters_buffer.allocate(1); - GFBD::write_batch_indirect_parameters( - &system_param_item, - &mut indirect_parameters_buffer, - indirect_parameters_index, - main_entity, + let indirect_parameters_index = + indirect_parameters_buffers.allocate(batch_set_key.indexed(), 1); + let batch_set_index = NonMaxU32::new( + indirect_parameters_buffers.batch_set_count(batch_set_key.indexed()) + as u32, ); - work_item_buffer.buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index: first_output_index, + GFBD::write_batch_indirect_parameters_metadata( + input_index.into(), + batch_set_key.indexed(), + output_index, + batch_set_index, + &mut indirect_parameters_buffers, indirect_parameters_index, - }); + ); + work_item_buffer.push( + batch_set_key.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_index: first_output_index, + indirect_parameters_index, + }, + ); batch = Some(BinnedRenderPhaseBatch { representative_entity: (entity, main_entity), instance_range: output_index..output_index + 1, @@ -791,6 +1161,9 @@ pub fn batch_and_prepare_binned_render_phase( batch_set = Some(BinnedRenderPhaseBatchSet { batches: vec![batch], bin_key: bin_key.clone(), + index: indirect_parameters_buffers + .batch_set_count(batch_set_key.indexed()) + as u32, }); } Some(ref mut batch_set) => { @@ -805,6 +1178,8 @@ pub fn batch_and_prepare_binned_render_phase( { if let Some(batch_set) = batch_set { batch_sets.push(batch_set); + indirect_parameters_buffers + .add_batch_set(batch_set_key.indexed(), indirect_parameters_base); } } } @@ -833,37 +1208,50 @@ pub fn batch_and_prepare_binned_render_phase( // tightly-packed buffer if GPU culling discards some of // the instances. Otherwise, we can just write the // output index directly. - work_item_buffer.buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index: if no_indirect_drawing { - output_index - } else { - first_output_index - }, - indirect_parameters_index: match batch.extra_index { - PhaseItemExtraIndex::IndirectParametersIndex(ref range) => { - range.start - } - PhaseItemExtraIndex::DynamicOffset(_) - | PhaseItemExtraIndex::None => 0, + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_index: if no_indirect_drawing { + output_index + } else { + first_output_index + }, + indirect_parameters_index: match batch.extra_index { + PhaseItemExtraIndex::IndirectParametersIndex { + range: ref indirect_parameters_range, + .. + } => indirect_parameters_range.start, + PhaseItemExtraIndex::DynamicOffset(_) + | PhaseItemExtraIndex::None => 0, + }, }, - }); + ); } None if !no_indirect_drawing => { // Start a new batch, in indirect mode. - let indirect_parameters_index = indirect_parameters_buffer.allocate(1); - GFBD::write_batch_indirect_parameters( - &system_param_item, - &mut indirect_parameters_buffer, - indirect_parameters_index, - main_entity, + let indirect_parameters_index = + indirect_parameters_buffers.allocate(key.0.indexed(), 1); + let batch_set_index = NonMaxU32::new( + indirect_parameters_buffers.batch_set_count(key.0.indexed()) as u32, ); - work_item_buffer.buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index: first_output_index, + GFBD::write_batch_indirect_parameters_metadata( + input_index.into(), + key.0.indexed(), + output_index, + batch_set_index, + &mut indirect_parameters_buffers, indirect_parameters_index, - }); + ); + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_index: first_output_index, + indirect_parameters_index, + }, + ); batch = Some(BinnedRenderPhaseBatch { representative_entity: (entity, main_entity), instance_range: output_index..output_index + 1, @@ -875,11 +1263,14 @@ pub fn batch_and_prepare_binned_render_phase( None => { // Start a new batch, in direct mode. - work_item_buffer.buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index, - indirect_parameters_index: 0, - }); + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_index, + indirect_parameters_index: 0, + }, + ); batch = Some(BinnedRenderPhaseBatch { representative_entity: (entity, main_entity), instance_range: output_index..output_index + 1, @@ -905,6 +1296,8 @@ pub fn batch_and_prepare_binned_render_phase( vec.push(BinnedRenderPhaseBatchSet { batches: vec![batch], bin_key: key.1.clone(), + index: indirect_parameters_buffers.batch_set_count(key.0.indexed()) + as u32, }); } } @@ -918,8 +1311,16 @@ pub fn batch_and_prepare_binned_render_phase( // Allocate the indirect parameters if necessary. let mut indirect_parameters_offset = if no_indirect_drawing { None + } else if key.0.indexed() { + Some( + indirect_parameters_buffers + .allocate_indexed(unbatchables.entities.len() as u32), + ) } else { - Some(indirect_parameters_buffer.allocate(unbatchables.entities.len() as u32)) + Some( + indirect_parameters_buffers + .allocate_non_indexed(unbatchables.entities.len() as u32), + ) }; for &(_, main_entity) in &unbatchables.entities { @@ -932,32 +1333,43 @@ pub fn batch_and_prepare_binned_render_phase( if let Some(ref mut indirect_parameters_index) = indirect_parameters_offset { // We're in indirect mode, so add an indirect parameters // index. - GFBD::write_batch_indirect_parameters( - &system_param_item, - &mut indirect_parameters_buffer, + GFBD::write_batch_indirect_parameters_metadata( + input_index.into(), + key.0.indexed(), + output_index, + None, + &mut indirect_parameters_buffers, *indirect_parameters_index, - main_entity, ); - work_item_buffer.buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index, - indirect_parameters_index: *indirect_parameters_index, - }); + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_index, + indirect_parameters_index: *indirect_parameters_index, + }, + ); unbatchables .buffer_indices .add(UnbatchableBinnedEntityIndices { instance_index: *indirect_parameters_index, - extra_index: PhaseItemExtraIndex::IndirectParametersIndex( - *indirect_parameters_index..(*indirect_parameters_index + 1), - ), + extra_index: PhaseItemExtraIndex::IndirectParametersIndex { + range: *indirect_parameters_index..(*indirect_parameters_index + 1), + batch_set_index: None, + }, }); + indirect_parameters_buffers + .add_batch_set(key.0.indexed(), *indirect_parameters_index); *indirect_parameters_index += 1; } else { - work_item_buffer.buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index, - indirect_parameters_index: 0, - }); + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_index, + indirect_parameters_index: 0, + }, + ); unbatchables .buffer_indices .add(UnbatchableBinnedEntityIndices { @@ -980,7 +1392,7 @@ pub fn write_batched_instance_buffers( { let BatchedInstanceBuffers { ref mut data_buffer, - work_item_buffers: ref mut index_buffers, + ref mut work_item_buffers, ref mut current_input_buffer, ref mut previous_input_buffer, } = gpu_array_buffer.into_inner(); @@ -993,22 +1405,60 @@ pub fn write_batched_instance_buffers( .buffer .write_buffer(&render_device, &render_queue); - for index_buffer in index_buffers.values_mut() { - index_buffer - .buffer - .write_buffer(&render_device, &render_queue); + for view_work_item_buffers in work_item_buffers.values_mut() { + for phase_work_item_buffers in view_work_item_buffers.values_mut() { + match *phase_work_item_buffers { + PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => { + buffer_vec.write_buffer(&render_device, &render_queue); + } + PreprocessWorkItemBuffers::Indirect { + ref mut indexed, + ref mut non_indexed, + } => { + indexed.write_buffer(&render_device, &render_queue); + non_indexed.write_buffer(&render_device, &render_queue); + } + } + } } } -pub fn write_indirect_parameters_buffer( +pub fn clear_indirect_parameters_buffers( + mut indirect_parameters_buffers: ResMut, +) { + indirect_parameters_buffers.indexed_data.clear(); + indirect_parameters_buffers.indexed_metadata.clear(); + indirect_parameters_buffers.indexed_batch_sets.clear(); + indirect_parameters_buffers.non_indexed_data.clear(); + indirect_parameters_buffers.non_indexed_metadata.clear(); + indirect_parameters_buffers.non_indexed_batch_sets.clear(); +} + +pub fn write_indirect_parameters_buffers( render_device: Res, render_queue: Res, - mut indirect_parameters_buffer: ResMut, + mut indirect_parameters_buffers: ResMut, ) { - indirect_parameters_buffer - .buffer + indirect_parameters_buffers + .indexed_data + .write_buffer(&render_device); + indirect_parameters_buffers + .non_indexed_data + .write_buffer(&render_device); + + indirect_parameters_buffers + .indexed_metadata + .write_buffer(&render_device, &render_queue); + indirect_parameters_buffers + .non_indexed_metadata + .write_buffer(&render_device, &render_queue); + + indirect_parameters_buffers + .indexed_batch_sets + .write_buffer(&render_device, &render_queue); + indirect_parameters_buffers + .non_indexed_batch_sets .write_buffer(&render_device, &render_queue); - indirect_parameters_buffer.buffer.clear(); } #[cfg(test)] diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs index 590ed94293290..214fdda13644e 100644 --- a/crates/bevy_render/src/batching/mod.rs +++ b/crates/bevy_render/src/batching/mod.rs @@ -6,7 +6,7 @@ use bevy_ecs::{ use bytemuck::Pod; use nonmax::NonMaxU32; -use self::gpu_preprocessing::IndirectParametersBuffer; +use self::gpu_preprocessing::IndirectParametersBuffers; use crate::{render_phase::PhaseItemExtraIndex, sync_world::MainEntity}; use crate::{ render_phase::{ @@ -58,7 +58,9 @@ impl BatchMeta { PhaseItemExtraIndex::DynamicOffset(dynamic_offset) => { NonMaxU32::new(dynamic_offset) } - PhaseItemExtraIndex::None | PhaseItemExtraIndex::IndirectParametersIndex(_) => None, + PhaseItemExtraIndex::None | PhaseItemExtraIndex::IndirectParametersIndex { .. } => { + None + } }, user_data, } @@ -141,17 +143,36 @@ pub trait GetFullBatchData: GetBatchData { query_item: MainEntity, ) -> Option; - /// Writes the [`gpu_preprocessing::IndirectParameters`] necessary to draw - /// this batch into the given [`IndirectParametersBuffer`] at the given - /// index. + /// Writes the [`gpu_preprocessing::IndirectParametersMetadata`] necessary + /// to draw this batch into the given metadata buffer at the given index. /// /// This is only used if GPU culling is enabled (which requires GPU /// preprocessing). - fn write_batch_indirect_parameters( - param: &SystemParamItem, - indirect_parameters_buffer: &mut IndirectParametersBuffer, + /// + /// * `mesh_index` describes the index of the first mesh instance in this + /// batch in the `MeshInputUniform` buffer. + /// + /// * `indexed` is true if the mesh is indexed or false if it's non-indexed. + /// + /// * `base_output_index` is the index of the first mesh instance in this + /// batch in the `MeshUniform` output buffer. + /// + /// * `batch_set_index` is the index of the batch set in the + /// [`gpu_preprocessing::IndirectBatchSet`] buffer, if this batch belongs to + /// a batch set. + /// + /// * `indirect_parameters_buffers` is the buffer in which to write the + /// metadata. + /// + /// * `indirect_parameters_offset` is the index in that buffer at which to + /// write the metadata. + fn write_batch_indirect_parameters_metadata( + mesh_index: u32, + indexed: bool, + base_output_index: u32, + batch_set_index: Option, + indirect_parameters_buffers: &mut IndirectParametersBuffers, indirect_parameters_offset: u32, - entity: MainEntity, ); } diff --git a/crates/bevy_render/src/camera/camera.rs b/crates/bevy_render/src/camera/camera.rs index b3b6b6f7c70aa..1bc4b3737ad81 100644 --- a/crates/bevy_render/src/camera/camera.rs +++ b/crates/bevy_render/src/camera/camera.rs @@ -1153,7 +1153,7 @@ pub fn extract_cameras( hdr: camera.hdr, }, ExtractedView { - retained_view_entity: RetainedViewEntity::new(main_entity.into(), 0), + retained_view_entity: RetainedViewEntity::new(main_entity.into(), None, 0), clip_from_view: camera.clip_from_view(), world_from_view: *transform, clip_from_world: None, diff --git a/crates/bevy_render/src/mesh/components.rs b/crates/bevy_render/src/mesh/components.rs index 10229be41210d..2b887c65d32c5 100644 --- a/crates/bevy_render/src/mesh/components.rs +++ b/crates/bevy_render/src/mesh/components.rs @@ -2,11 +2,15 @@ use crate::{ mesh::Mesh, view::{self, Visibility, VisibilityClass}, }; -use bevy_asset::{AssetId, Handle}; +use bevy_asset::{AssetEvent, AssetId, Handle}; use bevy_derive::{Deref, DerefMut}; -use bevy_ecs::{component::Component, prelude::require, reflect::ReflectComponent}; +use bevy_ecs::{ + change_detection::DetectChangesMut, component::Component, event::EventReader, prelude::require, + reflect::ReflectComponent, system::Query, +}; use bevy_reflect::{std_traits::ReflectDefault, Reflect}; use bevy_transform::components::Transform; +use bevy_utils::{FixedHasher, HashSet}; use derive_more::derive::From; /// A component for 2D meshes. Requires a [`MeshMaterial2d`] to be rendered, commonly using a [`ColorMaterial`]. @@ -101,3 +105,32 @@ impl From<&Mesh3d> for AssetId { mesh.id() } } + +/// A system that marks a [`Mesh3d`] as changed if the associated [`Mesh`] asset +/// has changed. +/// +/// This is needed because the systems that extract meshes, such as +/// `extract_meshes_for_gpu_building`, write some metadata about the mesh (like +/// the location within each slab) into the GPU structures that they build that +/// needs to be kept up to date if the contents of the mesh change. +pub fn mark_3d_meshes_as_changed_if_their_assets_changed( + mut meshes_3d: Query<&mut Mesh3d>, + mut mesh_asset_events: EventReader>, +) { + let mut changed_meshes: HashSet, FixedHasher> = HashSet::default(); + for mesh_asset_event in mesh_asset_events.read() { + if let AssetEvent::Modified { id } = mesh_asset_event { + changed_meshes.insert(*id); + } + } + + if changed_meshes.is_empty() { + return; + } + + for mut mesh_3d in &mut meshes_3d { + if changed_meshes.contains(&mesh_3d.0.id()) { + mesh_3d.set_changed(); + } + } +} diff --git a/crates/bevy_render/src/mesh/mod.rs b/crates/bevy_render/src/mesh/mod.rs index 7a7829e0f4ef1..703333675da74 100644 --- a/crates/bevy_render/src/mesh/mod.rs +++ b/crates/bevy_render/src/mesh/mod.rs @@ -9,6 +9,7 @@ use crate::{ render_asset::{PrepareAssetError, RenderAsset, RenderAssetPlugin, RenderAssets}, render_resource::TextureView, texture::GpuImage, + view::VisibilitySystems, RenderApp, }; use allocator::MeshAllocatorPlugin; @@ -17,6 +18,7 @@ use bevy_asset::{AssetApp, AssetId, RenderAssetUsages}; use bevy_ecs::{ entity::Entity, query::{Changed, With}, + schedule::IntoSystemConfigs, system::Query, }; use bevy_ecs::{ @@ -42,7 +44,12 @@ impl Plugin for MeshPlugin { .register_type::>() // 'Mesh' must be prepared after 'Image' as meshes rely on the morph target image being ready .add_plugins(RenderAssetPlugin::::default()) - .add_plugins(MeshAllocatorPlugin); + .add_plugins(MeshAllocatorPlugin) + .add_systems( + PostUpdate, + components::mark_3d_meshes_as_changed_if_their_assets_changed + .ambiguous_with(VisibilitySystems::CalculateBounds), + ); let Some(render_app) = app.get_sub_app_mut(RenderApp) else { return; @@ -130,6 +137,12 @@ impl RenderMesh { pub fn primitive_topology(&self) -> PrimitiveTopology { self.key_bits.primitive_topology() } + + /// Returns true if this mesh uses an index buffer or false otherwise. + #[inline] + pub fn indexed(&self) -> bool { + matches!(self.buffer_info, RenderMeshBufferInfo::Indexed { .. }) + } } /// The index/vertex buffer info of a [`RenderMesh`]. diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs index d65b46da7cdf6..2b8d0c9e8a362 100644 --- a/crates/bevy_render/src/render_phase/mod.rs +++ b/crates/bevy_render/src/render_phase/mod.rs @@ -36,8 +36,10 @@ pub use draw_state::*; use encase::{internal::WriteInto, ShaderSize}; use nonmax::NonMaxU32; pub use rangefinder::*; +use wgpu::Features; use crate::batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport}; +use crate::renderer::RenderDevice; use crate::sync_world::MainEntity; use crate::view::RetainedViewEntity; use crate::{ @@ -189,6 +191,7 @@ pub enum BinnedRenderPhaseBatchSets { pub struct BinnedRenderPhaseBatchSet { pub(crate) batches: Vec, pub(crate) bin_key: BK, + pub(crate) index: u32, } impl BinnedRenderPhaseBatchSets { @@ -456,6 +459,11 @@ where let draw_functions = world.resource::>(); let mut draw_functions = draw_functions.write(); + let render_device = world.resource::(); + let multi_draw_indirect_count_supported = render_device + .features() + .contains(Features::MULTI_DRAW_INDIRECT_COUNT); + match self.batch_sets { BinnedRenderPhaseBatchSets::DynamicUniforms(ref batch_sets) => { debug_assert_eq!(self.batchable_mesh_keys.len(), batch_sets.len()); @@ -522,6 +530,12 @@ where continue; }; + let batch_set_index = if multi_draw_indirect_count_supported { + NonMaxU32::new(batch_set.index) + } else { + None + }; + let binned_phase_item = BPI::new( batch_set_key.clone(), batch_set.bin_key.clone(), @@ -532,10 +546,12 @@ where PhaseItemExtraIndex::DynamicOffset(ref dynamic_offset) => { PhaseItemExtraIndex::DynamicOffset(*dynamic_offset) } - PhaseItemExtraIndex::IndirectParametersIndex(ref range) => { - PhaseItemExtraIndex::IndirectParametersIndex( - range.start..(range.start + batch_set.batches.len() as u32), - ) + PhaseItemExtraIndex::IndirectParametersIndex { ref range, .. } => { + PhaseItemExtraIndex::IndirectParametersIndex { + range: range.start + ..(range.start + batch_set.batches.len() as u32), + batch_set_index, + } } }, ); @@ -585,10 +601,11 @@ where let first_indirect_parameters_index_for_entity = u32::from(*first_indirect_parameters_index) + entity_index as u32; - PhaseItemExtraIndex::IndirectParametersIndex( - first_indirect_parameters_index_for_entity + PhaseItemExtraIndex::IndirectParametersIndex { + range: first_indirect_parameters_index_for_entity ..(first_indirect_parameters_index_for_entity + 1), - ) + batch_set_index: None, + } } }, }, @@ -725,10 +742,11 @@ impl UnbatchableBinnedEntityIndexSet { u32::from(*first_indirect_parameters_index) + entity_index; Some(UnbatchableBinnedEntityIndices { instance_index: instance_range.start + entity_index, - extra_index: PhaseItemExtraIndex::IndirectParametersIndex( - first_indirect_parameters_index_for_this_batch + extra_index: PhaseItemExtraIndex::IndirectParametersIndex { + range: first_indirect_parameters_index_for_this_batch ..(first_indirect_parameters_index_for_this_batch + 1), - ), + batch_set_index: None, + }, }) } UnbatchableBinnedEntityIndexSet::Dense(ref indices) => { @@ -890,12 +908,17 @@ impl UnbatchableBinnedEntityIndexSet { first_indirect_parameters_index: None, } } - PhaseItemExtraIndex::IndirectParametersIndex(ref range) => { + PhaseItemExtraIndex::IndirectParametersIndex { + range: ref indirect_parameters_index, + .. + } => { // This is the first entity we've seen, and we have compute // shaders. Initialize the fast path. *self = UnbatchableBinnedEntityIndexSet::Sparse { instance_range: indices.instance_index..indices.instance_index + 1, - first_indirect_parameters_index: NonMaxU32::new(range.start), + first_indirect_parameters_index: NonMaxU32::new( + indirect_parameters_index.start, + ), } } } @@ -909,7 +932,10 @@ impl UnbatchableBinnedEntityIndexSet { && indices.extra_index == PhaseItemExtraIndex::None) || first_indirect_parameters_index.is_some_and( |first_indirect_parameters_index| match indices.extra_index { - PhaseItemExtraIndex::IndirectParametersIndex(ref this_range) => { + PhaseItemExtraIndex::IndirectParametersIndex { + range: ref this_range, + .. + } => { u32::from(first_indirect_parameters_index) + instance_range.end - instance_range.start == this_range.start @@ -1129,7 +1155,22 @@ pub enum PhaseItemExtraIndex { /// An index into the buffer that specifies the indirect parameters for this /// [`PhaseItem`]'s drawcall. This is used when indirect mode is on (as used /// for GPU culling). - IndirectParametersIndex(Range), + IndirectParametersIndex { + /// The range of indirect parameters within the indirect parameters array. + /// + /// If we're using `multi_draw_indirect_count`, this specifies the + /// maximum range of indirect parameters within that array. If batches + /// are ultimately culled out on the GPU, the actual number of draw + /// commands might be lower than the length of this range. + range: Range, + /// If `multi_draw_indirect_count` is in use, and this phase item is + /// part of a batch set, specifies the index of the batch set that this + /// phase item is a part of. + /// + /// If `multi_draw_indirect_count` isn't in use, or this phase item + /// isn't part of a batch set, this is `None`. + batch_set_index: Option, + }, } impl PhaseItemExtraIndex { @@ -1139,9 +1180,11 @@ impl PhaseItemExtraIndex { indirect_parameters_index: Option, ) -> PhaseItemExtraIndex { match indirect_parameters_index { - Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex( - u32::from(indirect_parameters_index)..(u32::from(indirect_parameters_index) + 1), - ), + Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex { + range: u32::from(indirect_parameters_index) + ..(u32::from(indirect_parameters_index) + 1), + batch_set_index: None, + }, None => PhaseItemExtraIndex::None, } } @@ -1172,7 +1215,11 @@ pub trait BinnedPhaseItem: PhaseItem { /// reduces the need for rebinding between bins and improves performance. type BinKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash; - type BatchSetKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash; + /// The key used to combine batches into batch sets. + /// + /// A *batch set* is a set of meshes that can potentially be multi-drawn + /// together. + type BatchSetKey: PhaseItemBatchSetKey; /// Creates a new binned phase item from the key and per-entity data. /// @@ -1188,6 +1235,19 @@ pub trait BinnedPhaseItem: PhaseItem { ) -> Self; } +/// A key used to combine batches into batch sets. +/// +/// A *batch set* is a set of meshes that can potentially be multi-drawn +/// together. +pub trait PhaseItemBatchSetKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash { + /// Returns true if this batch set key describes indexed meshes or false if + /// it describes non-indexed meshes. + /// + /// Bevy uses this in order to determine which kind of indirect draw + /// parameters to use, if indirect drawing is enabled. + fn indexed(&self) -> bool; +} + /// Represents phase items that must be sorted. The `SortKey` specifies the /// order that these items are drawn in. These are placed into a single array, /// and the array as a whole is then sorted. @@ -1219,6 +1279,17 @@ pub trait SortedPhaseItem: PhaseItem { fn sort(items: &mut [Self]) { items.sort_unstable_by_key(Self::sort_key); } + + /// Whether this phase item targets indexed meshes (those with both vertex + /// and index buffers as opposed to just vertex buffers). + /// + /// Bevy needs this information in order to properly group phase items + /// together for multi-draw indirect, because the GPU layout of indirect + /// commands differs between indexed and non-indexed meshes. + /// + /// If you're implementing a custom phase item that doesn't describe a mesh, + /// you can safely return false here. + fn indexed(&self) -> bool; } /// A [`PhaseItem`] item, that automatically sets the appropriate render pipeline, diff --git a/crates/bevy_render/src/view/mod.rs b/crates/bevy_render/src/view/mod.rs index 3eb0df75e2f2c..b5ad7a541e297 100644 --- a/crates/bevy_render/src/view/mod.rs +++ b/crates/bevy_render/src/view/mod.rs @@ -191,14 +191,25 @@ impl Msaa { /// stable, and we can't use just [`MainEntity`] because some main world views /// extract to multiple render world views. For example, a directional light /// extracts to one render world view per cascade, and a point light extracts to -/// one render world view per cubemap face. So we pair the main entity with a -/// *subview index*, which *together* uniquely identify a view in the render -/// world in a way that's stable from frame to frame. +/// one render world view per cubemap face. So we pair the main entity with an +/// *auxiliary entity* and a *subview index*, which *together* uniquely identify +/// a view in the render world in a way that's stable from frame to frame. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct RetainedViewEntity { /// The main entity that this view corresponds to. pub main_entity: MainEntity, + /// Another entity associated with the view entity. + /// + /// This is currently used for shadow cascades. If there are multiple + /// cameras, each camera needs to have its own set of shadow cascades. Thus + /// the light and subview index aren't themselves enough to uniquely + /// identify a shadow cascade: we need the camera that the cascade is + /// associated with as well. This entity stores that camera. + /// + /// If not present, this will be `MainEntity(Entity::PLACEHOLDER)`. + pub auxiliary_entity: MainEntity, + /// The index of the view corresponding to the entity. /// /// For example, for point lights that cast shadows, this is the index of @@ -208,14 +219,19 @@ pub struct RetainedViewEntity { } impl RetainedViewEntity { - /// Creates a new [`RetainedViewEntity`] from the given main world entity - /// and subview index. + /// Creates a new [`RetainedViewEntity`] from the given main world entity, + /// auxiliary main world entity, and subview index. /// /// See [`RetainedViewEntity::subview_index`] for an explanation of what - /// `subview_index` is. - pub fn new(main_entity: MainEntity, subview_index: u32) -> Self { + /// `auxiliary_entity` and `subview_index` are. + pub fn new( + main_entity: MainEntity, + auxiliary_entity: Option, + subview_index: u32, + ) -> Self { Self { main_entity, + auxiliary_entity: auxiliary_entity.unwrap_or(Entity::PLACEHOLDER.into()), subview_index, } } diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs index 39b6b15f1a6ea..a4dfc376682c4 100644 --- a/crates/bevy_sprite/src/mesh2d/material.rs +++ b/crates/bevy_sprite/src/mesh2d/material.rs @@ -5,7 +5,9 @@ use crate::{ use bevy_app::{App, Plugin}; use bevy_asset::{Asset, AssetApp, AssetId, AssetServer, Handle}; use bevy_core_pipeline::{ - core_2d::{AlphaMask2d, AlphaMask2dBinKey, Opaque2d, Opaque2dBinKey, Transparent2d}, + core_2d::{ + AlphaMask2d, AlphaMask2dBinKey, BatchSetKey2d, Opaque2d, Opaque2dBinKey, Transparent2d, + }, tonemapping::{DebandDither, Tonemapping}, }; use bevy_derive::{Deref, DerefMut}; @@ -584,7 +586,9 @@ pub fn queue_material2d_meshes( material_bind_group_id: material_2d.get_bind_group_id().0, }; opaque_phase.add( - (), + BatchSetKey2d { + indexed: mesh.indexed(), + }, bin_key, (*render_entity, *visible_entity), binned_render_phase_type, @@ -598,7 +602,9 @@ pub fn queue_material2d_meshes( material_bind_group_id: material_2d.get_bind_group_id().0, }; alpha_mask_phase.add( - (), + BatchSetKey2d { + indexed: mesh.indexed(), + }, bin_key, (*render_entity, *visible_entity), binned_render_phase_type, @@ -617,6 +623,7 @@ pub fn queue_material2d_meshes( // Batching is done in batch_and_prepare_render_phase batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: mesh.indexed(), }); } } diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs index aa7e41dbcbd4f..52309fd492aab 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh.rs +++ b/crates/bevy_sprite/src/mesh2d/mesh.rs @@ -18,7 +18,7 @@ use bevy_image::{BevyDefault, Image, ImageSampler, TextureFormatPixelInfo}; use bevy_math::{Affine3, Vec4}; use bevy_render::{ batching::{ - gpu_preprocessing::IndirectParameters, + gpu_preprocessing::IndirectParametersMetadata, no_gpu_preprocessing::{ self, batch_and_prepare_binned_render_phase, batch_and_prepare_sorted_render_phase, write_batched_instance_buffer, BatchedInstanceBuffer, @@ -403,56 +403,33 @@ impl GetFullBatchData for Mesh2dPipeline { None } - fn write_batch_indirect_parameters( - (mesh_instances, meshes, mesh_allocator): &SystemParamItem, - indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffer, + fn write_batch_indirect_parameters_metadata( + input_index: u32, + indexed: bool, + base_output_index: u32, + batch_set_index: Option, + indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffers, indirect_parameters_offset: u32, - main_entity: MainEntity, ) { - let Some(mesh_instance) = mesh_instances.get(&main_entity) else { - return; - }; - let Some(mesh) = meshes.get(mesh_instance.mesh_asset_id) else { - return; - }; - let Some(vertex_buffer_slice) = - mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) - else { - return; - }; - // Note that `IndirectParameters` covers both of these structures, even // though they actually have distinct layouts. See the comment above that // type for more information. - let indirect_parameters = match mesh.buffer_info { - RenderMeshBufferInfo::Indexed { - count: index_count, .. - } => { - let Some(index_buffer_slice) = - mesh_allocator.mesh_index_slice(&mesh_instance.mesh_asset_id) - else { - return; - }; - IndirectParameters { - vertex_or_index_count: index_count, - instance_count: 0, - first_vertex_or_first_index: index_buffer_slice.range.start, - base_vertex_or_first_instance: vertex_buffer_slice.range.start, - first_instance: 0, - } - } - RenderMeshBufferInfo::NonIndexed => IndirectParameters { - vertex_or_index_count: mesh.vertex_count, - instance_count: 0, - first_vertex_or_first_index: vertex_buffer_slice.range.start, - base_vertex_or_first_instance: 0, - // Use `0xffffffff` as a placeholder to tell the mesh - // preprocessing shader that this is a non-indexed mesh. - first_instance: !0, + let indirect_parameters = IndirectParametersMetadata { + mesh_index: input_index, + base_output_index, + batch_set_index: match batch_set_index { + None => !0, + Some(batch_set_index) => u32::from(batch_set_index), }, + instance_count: 0, }; - indirect_parameters_buffer.set(indirect_parameters_offset, indirect_parameters); + if indexed { + indirect_parameters_buffer.set_indexed(indirect_parameters_offset, indirect_parameters); + } else { + indirect_parameters_buffer + .set_non_indexed(indirect_parameters_offset, indirect_parameters); + } } } diff --git a/crates/bevy_sprite/src/render/mod.rs b/crates/bevy_sprite/src/render/mod.rs index 3dae793d323dd..585229052e04f 100644 --- a/crates/bevy_sprite/src/render/mod.rs +++ b/crates/bevy_sprite/src/render/mod.rs @@ -574,6 +574,7 @@ pub fn queue_sprites( // batch_range and dynamic_offset will be calculated in prepare_sprites batch_range: 0..0, extra_index: PhaseItemExtraIndex::None, + indexed: true, }); } } diff --git a/crates/bevy_ui/src/render/box_shadow.rs b/crates/bevy_ui/src/render/box_shadow.rs index 7f18e935ab8eb..f33a4ed4ded2d 100644 --- a/crates/bevy_ui/src/render/box_shadow.rs +++ b/crates/bevy_ui/src/render/box_shadow.rs @@ -387,6 +387,7 @@ pub fn queue_shadows( ), batch_range: 0..0, extra_index: PhaseItemExtraIndex::None, + indexed: true, }); } } diff --git a/crates/bevy_ui/src/render/mod.rs b/crates/bevy_ui/src/render/mod.rs index bbadf8aef7601..27f94776deae2 100644 --- a/crates/bevy_ui/src/render/mod.rs +++ b/crates/bevy_ui/src/render/mod.rs @@ -626,7 +626,7 @@ pub fn extract_ui_camera_view( // We use `UI_CAMERA_SUBVIEW` here so as not to conflict with the // main 3D or 2D camera, which will have subview index 0. let retained_view_entity = - RetainedViewEntity::new(main_entity.into(), UI_CAMERA_SUBVIEW); + RetainedViewEntity::new(main_entity.into(), None, UI_CAMERA_SUBVIEW); // Creates the UI view. let ui_camera_view = commands .spawn(( @@ -894,6 +894,7 @@ pub fn queue_uinodes( // batch_range will be calculated in prepare_uinodes batch_range: 0..0, extra_index: PhaseItemExtraIndex::None, + indexed: true, }); } } diff --git a/crates/bevy_ui/src/render/render_pass.rs b/crates/bevy_ui/src/render/render_pass.rs index d26844fcb6161..c9f0d3d69c547 100644 --- a/crates/bevy_ui/src/render/render_pass.rs +++ b/crates/bevy_ui/src/render/render_pass.rs @@ -112,6 +112,7 @@ pub struct TransparentUi { pub draw_function: DrawFunctionId, pub batch_range: Range, pub extra_index: PhaseItemExtraIndex, + pub indexed: bool, } impl PhaseItem for TransparentUi { @@ -162,6 +163,11 @@ impl SortedPhaseItem for TransparentUi { fn sort(items: &mut [Self]) { items.sort_by_key(SortedPhaseItem::sort_key); } + + #[inline] + fn indexed(&self) -> bool { + self.indexed + } } impl CachedRenderPipelinePhaseItem for TransparentUi { diff --git a/crates/bevy_ui/src/render/ui_material_pipeline.rs b/crates/bevy_ui/src/render/ui_material_pipeline.rs index c47729105e79a..904a0bd225db4 100644 --- a/crates/bevy_ui/src/render/ui_material_pipeline.rs +++ b/crates/bevy_ui/src/render/ui_material_pipeline.rs @@ -662,6 +662,7 @@ pub fn queue_ui_material_nodes( ), batch_range: 0..0, extra_index: PhaseItemExtraIndex::None, + indexed: false, }); } } diff --git a/crates/bevy_ui/src/render/ui_texture_slice_pipeline.rs b/crates/bevy_ui/src/render/ui_texture_slice_pipeline.rs index 989207b3a2728..869e5f0226c8a 100644 --- a/crates/bevy_ui/src/render/ui_texture_slice_pipeline.rs +++ b/crates/bevy_ui/src/render/ui_texture_slice_pipeline.rs @@ -385,6 +385,7 @@ pub fn queue_ui_slices( ), batch_range: 0..0, extra_index: PhaseItemExtraIndex::None, + indexed: true, }); } } diff --git a/examples/2d/mesh2d_manual.rs b/examples/2d/mesh2d_manual.rs index 09acb6dbb99bc..15d611ce667cf 100644 --- a/examples/2d/mesh2d_manual.rs +++ b/examples/2d/mesh2d_manual.rs @@ -392,10 +392,10 @@ pub fn queue_colored_mesh2d( let mesh2d_transforms = &mesh_instance.transforms; // Get our specialized pipeline let mut mesh2d_key = mesh_key; - if let Some(mesh) = render_meshes.get(mesh2d_handle) { - mesh2d_key |= - Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology()); - } + let Some(mesh) = render_meshes.get(mesh2d_handle) else { + continue; + }; + mesh2d_key |= Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology()); let pipeline_id = pipelines.specialize(&pipeline_cache, &colored_mesh2d_pipeline, mesh2d_key); @@ -411,6 +411,7 @@ pub fn queue_colored_mesh2d( // This material is not batched batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: mesh.indexed(), }); } } diff --git a/examples/shader/custom_shader_instancing.rs b/examples/shader/custom_shader_instancing.rs index 3cdfad774dbae..cd5909c36fff1 100644 --- a/examples/shader/custom_shader_instancing.rs +++ b/examples/shader/custom_shader_instancing.rs @@ -166,6 +166,7 @@ fn queue_custom( distance: rangefinder.distance_translation(&mesh_instance.translation), batch_range: 0..1, extra_index: PhaseItemExtraIndex::None, + indexed: true, }); } } diff --git a/examples/shader/specialized_mesh_pipeline.rs b/examples/shader/specialized_mesh_pipeline.rs index ee5c68bc11a16..bc1ecf113c78b 100644 --- a/examples/shader/specialized_mesh_pipeline.rs +++ b/examples/shader/specialized_mesh_pipeline.rs @@ -6,8 +6,11 @@ //! //! [`SpecializedMeshPipeline`] let's you customize the entire pipeline used when rendering a mesh. +use std::any::TypeId; + use bevy::{ core_pipeline::core_3d::{Opaque3d, Opaque3dBatchSetKey, Opaque3dBinKey, CORE_3D_DEPTH_FORMAT}, + ecs::system::StaticSystemParam, math::{vec3, vec4}, pbr::{ DrawMesh, MeshPipeline, MeshPipelineKey, MeshPipelineViewLayoutKey, RenderMeshInstances, @@ -15,6 +18,14 @@ use bevy::{ }, prelude::*, render::{ + batching::GetFullBatchData, + batching::{ + gpu_preprocessing::{ + BatchedInstanceBuffers, IndirectParametersBuffers, PreprocessWorkItem, + PreprocessWorkItemBuffers, + }, + GetBatchData, + }, extract_component::{ExtractComponent, ExtractComponentPlugin}, mesh::{Indices, MeshVertexBufferLayoutRef, PrimitiveTopology, RenderMesh}, render_asset::{RenderAssetUsages, RenderAssets}, @@ -28,9 +39,11 @@ use bevy::{ RenderPipelineDescriptor, SpecializedMeshPipeline, SpecializedMeshPipelineError, SpecializedMeshPipelines, TextureFormat, VertexState, }, + view::NoIndirectDrawing, view::{self, ExtractedView, RenderVisibleEntities, ViewTarget, VisibilityClass}, Render, RenderApp, RenderSet, }, + utils::TypeIdMap, }; const SHADER_ASSET_PATH: &str = "shaders/specialized_mesh_pipeline.wgsl"; @@ -264,13 +277,39 @@ impl SpecializedMeshPipeline for CustomMeshPipeline { fn queue_custom_mesh_pipeline( pipeline_cache: Res, custom_mesh_pipeline: Res, - mut opaque_render_phases: ResMut>, - opaque_draw_functions: Res>, + (mut opaque_render_phases, opaque_draw_functions): ( + ResMut>, + Res>, + ), mut specialized_mesh_pipelines: ResMut>, - views: Query<(&RenderVisibleEntities, &ExtractedView, &Msaa), With>, - render_meshes: Res>, - render_mesh_instances: Res, + views: Query<( + Entity, + &RenderVisibleEntities, + &ExtractedView, + &Msaa, + Has, + )>, + (render_meshes, render_mesh_instances): ( + Res>, + Res, + ), + param: StaticSystemParam<::Param>, + gpu_array_buffer: ResMut< + BatchedInstanceBuffers< + ::BufferData, + ::BufferInputData, + >, + >, + mut indirect_parameters_buffers: ResMut, ) { + let system_param_item = param.into_inner(); + + let BatchedInstanceBuffers { + ref mut data_buffer, + ref mut work_item_buffers, + .. + } = gpu_array_buffer.into_inner(); + // Get the id for our custom draw function let draw_function_id = opaque_draw_functions .read() @@ -279,15 +318,29 @@ fn queue_custom_mesh_pipeline( // Render phases are per-view, so we need to iterate over all views so that // the entity appears in them. (In this example, we have only one view, but // it's good practice to loop over all views anyway.) - for (view_visible_entities, view, msaa) in views.iter() { + for (view_entity, view_visible_entities, view, msaa, no_indirect_drawing) in views.iter() { let Some(opaque_phase) = opaque_render_phases.get_mut(&view.retained_view_entity) else { continue; }; + // Create a *work item buffer* if necessary. Work item buffers store the + // indices of meshes that are to be rendered when indirect drawing is + // enabled. + let work_item_buffer = work_item_buffers + .entry(view_entity) + .or_insert_with(TypeIdMap::default) + .entry(TypeId::of::()) + .or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing)); + // Create the key based on the view. In this case we only care about MSAA and HDR let view_key = MeshPipelineKey::from_msaa_samples(msaa.samples()) | MeshPipelineKey::from_hdr(view.hdr); + // Set up a slot to hold information about the batch set we're going to + // create. If there are any of our custom meshes in the scene, we'll + // need this information in order for Bevy to kick off the rendering. + let mut mesh_batch_set_info = None; + // Find all the custom rendered entities that are visible from this // view. for &(render_entity, visible_entity) in @@ -310,6 +363,27 @@ fn queue_custom_mesh_pipeline( let mut mesh_key = view_key; mesh_key |= MeshPipelineKey::from_primitive_topology(mesh.primitive_topology()); + // Initialize the batch set information if this was the first custom + // mesh we saw. We'll need that information later to create the + // batch set. + if mesh_batch_set_info.is_none() { + mesh_batch_set_info = Some(MeshBatchSetInfo { + indirect_parameters_index: indirect_parameters_buffers + .allocate(mesh.indexed(), 1), + is_indexed: mesh.indexed(), + }); + } + let mesh_info = mesh_batch_set_info.unwrap(); + + // Allocate some input and output indices. We'll need these to + // create the *work item* below. + let Some(input_index) = + MeshPipeline::get_binned_index(&system_param_item, visible_entity) + else { + continue; + }; + let output_index = data_buffer.add() as u32; + // Finally, we can specialize the pipeline based on the key let pipeline_id = specialized_mesh_pipelines .specialize( @@ -343,6 +417,35 @@ fn queue_custom_mesh_pipeline( // support it you can use `BinnedRenderPhaseType::UnbatchableMesh` BinnedRenderPhaseType::BatchableMesh, ); + + // Create a *work item*. A work item tells the Bevy renderer to + // transform the mesh on GPU. + work_item_buffer.push( + mesh.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_index, + indirect_parameters_index: mesh_info.indirect_parameters_index, + }, + ); + } + + // Now if there were any meshes, we need to add a command to the + // indirect parameters buffer, so that the renderer will end up + // enqueuing a command to draw the mesh. + if let Some(mesh_info) = mesh_batch_set_info { + indirect_parameters_buffers + .add_batch_set(mesh_info.is_indexed, mesh_info.indirect_parameters_index); } } } + +// If we end up having any custom meshes to draw, this contains information +// needed to create the batch set. +#[derive(Clone, Copy)] +struct MeshBatchSetInfo { + /// The first index of the mesh batch in the indirect parameters buffer. + indirect_parameters_index: u32, + /// Whether the mesh is indexed (has an index buffer). + is_indexed: bool, +}