From 79ff607b7c700c5f752b4abc3694a76e830fa56f Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Thu, 19 Sep 2024 17:30:02 +0200 Subject: [PATCH 01/13] First working version -- API needs to be improved --- Cargo.lock | 92 ++ Cargo.toml | 2 + .../src/types/from_register_input.rs | 5 + compositor_api/src/types/register_input.rs | 3 + compositor_pipeline/Cargo.toml | 1 + compositor_pipeline/src/error.rs | 16 + compositor_pipeline/src/pipeline.rs | 77 +- compositor_pipeline/src/pipeline/decoder.rs | 2 + .../src/pipeline/decoder/video.rs | 17 +- .../pipeline/decoder/video/vulkan_video.rs | 108 ++ compositor_pipeline/src/pipeline/input.rs | 1 + .../src/pipeline/input/mp4/mp4_file_reader.rs | 3 +- compositor_pipeline/src/pipeline/types.rs | 6 + compositor_render/src/error.rs | 9 - compositor_render/src/wgpu/ctx.rs | 17 +- compositor_render/src/wgpu/texture/nv12.rs | 4 +- .../examples/raw_channel_input.rs | 16 +- .../examples/raw_channel_output.rs | 17 +- integration_tests/examples/vulkan.rs | 109 ++ src/state.rs | 7 +- vk-video/.gitignore | 5 + vk-video/Cargo.toml | 23 + vk-video/LICENSE | 21 + vk-video/examples/basic.rs | 39 + vk-video/examples/wgpu.rs | 157 ++ vk-video/src/lib.rs | 68 + vk-video/src/parser.rs | 761 +++++++++ vk-video/src/parser/au_splitter.rs | 136 ++ vk-video/src/vulkan_decoder.rs | 1406 +++++++++++++++++ vk-video/src/vulkan_decoder/parameter_sets.rs | 262 +++ vk-video/src/vulkan_decoder/vulkan_ctx.rs | 644 ++++++++ vk-video/src/vulkan_decoder/wrappers.rs | 59 + .../src/vulkan_decoder/wrappers/command.rs | 132 ++ vk-video/src/vulkan_decoder/wrappers/debug.rs | 185 +++ vk-video/src/vulkan_decoder/wrappers/mem.rs | 249 +++ vk-video/src/vulkan_decoder/wrappers/sync.rs | 85 + vk-video/src/vulkan_decoder/wrappers/video.rs | 298 ++++ .../vulkan_decoder/wrappers/vk_extensions.rs | 228 +++ 38 files changed, 5230 insertions(+), 40 deletions(-) create mode 100644 compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs create mode 100644 integration_tests/examples/vulkan.rs create mode 100644 vk-video/.gitignore create mode 100644 vk-video/Cargo.toml create mode 100644 vk-video/LICENSE create mode 100644 vk-video/examples/basic.rs create mode 100644 vk-video/examples/wgpu.rs create mode 100644 vk-video/src/lib.rs create mode 100644 vk-video/src/parser.rs create mode 100644 vk-video/src/parser/au_splitter.rs create mode 100644 vk-video/src/vulkan_decoder.rs create mode 100644 vk-video/src/vulkan_decoder/parameter_sets.rs create mode 100644 vk-video/src/vulkan_decoder/vulkan_ctx.rs create mode 100644 vk-video/src/vulkan_decoder/wrappers.rs create mode 100644 vk-video/src/vulkan_decoder/wrappers/command.rs create mode 100644 vk-video/src/vulkan_decoder/wrappers/debug.rs create mode 100644 vk-video/src/vulkan_decoder/wrappers/mem.rs create mode 100644 vk-video/src/vulkan_decoder/wrappers/sync.rs create mode 100644 vk-video/src/vulkan_decoder/wrappers/video.rs create mode 100644 vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs diff --git a/Cargo.lock b/Cargo.lock index fad06cac6..2cfcc0e00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -305,6 +305,12 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "bitstream-io" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b81e1519b0d82120d2fd469d5bfb2919a9361c48b02d82d04befc1cdd2002452" + [[package]] name = "block" version = "0.1.6" @@ -532,6 +538,7 @@ dependencies = [ "socket2", "thiserror", "tracing", + "vk-video", "webrtc-util", "wgpu", ] @@ -801,6 +808,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "digest" version = "0.10.7" @@ -1073,6 +1091,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "four-cc" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "795cbfc56d419a7ce47ccbb7504dd9a5b7c484c083c356e797de08bd988d9629" + [[package]] name = "fs_extra" version = "1.3.0" @@ -1328,6 +1352,18 @@ dependencies = [ "tracing", ] +[[package]] +name = "h264-reader" +version = "0.7.1-dev" +source = "git+https://github.com/membraneframework-labs/h264-reader.git?branch=@jerzywilczek/scaling-lists#7c982f1089558640021ff8a70a2fa253e3e881c7" +dependencies = [ + "bitstream-io", + "hex-slice", + "log", + "memchr", + "rfc6381-codec", +] + [[package]] name = "half" version = "2.2.1" @@ -1374,6 +1410,12 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +[[package]] +name = "hex-slice" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5491a308e0214554f07a81d8944abe45f552871c12e3c3c6e7e5d354039a6c4c" + [[package]] name = "hexf-parse" version = "0.2.1" @@ -1978,6 +2020,21 @@ dependencies = [ "thiserror", ] +[[package]] +name = "mp4ra-rust" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdbc3d3867085d66ac6270482e66f3dd2c5a18451a3dc9ad7269e94844a536b7" +dependencies = [ + "four-cc", +] + +[[package]] +name = "mpeg4-audio-const" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a1fe2275b68991faded2c80aa4a33dba398b77d276038b8f50701a22e55918" + [[package]] name = "naga" version = "22.1.0" @@ -2653,6 +2710,16 @@ dependencies = [ "usvg", ] +[[package]] +name = "rfc6381-codec" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed54c20f5c3ec82eab6d998b313dc75ec5d5650d4f57675e61d72489040297fd" +dependencies = [ + "mp4ra-rust", + "mpeg4-audio-const", +] + [[package]] name = "rgb" version = "0.8.36" @@ -3886,6 +3953,31 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vk-mem" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cb12b79bcec57a3334d0284f1364c1846f378bb47df9779c6dbfcfc245c9404" +dependencies = [ + "ash", + "bitflags 2.6.0", + "cc", +] + +[[package]] +name = "vk-video" +version = "0.1.0" +dependencies = [ + "ash", + "derivative", + "h264-reader", + "thiserror", + "tracing", + "tracing-subscriber 0.3.18", + "vk-mem", + "wgpu", +] + [[package]] name = "want" version = "0.3.1" diff --git a/Cargo.toml b/Cargo.toml index c9be7ba7f..d6710df22 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ members = [ "decklink", "compositor_api", "compositor_web", + "vk-video", ] resolver = "2" @@ -56,6 +57,7 @@ schemars = { git = "https://github.com/membraneframework-labs/schemars", rev = " "preserve_order", ] } shared_memory = "0.12.4" +vk-video = { path = "vk-video" } wgpu = { version = "22.1.0", default-features = false, features = [ "wgsl", "dx12", diff --git a/compositor_api/src/types/from_register_input.rs b/compositor_api/src/types/from_register_input.rs index 7c493e8bc..c3daf2995 100644 --- a/compositor_api/src/types/from_register_input.rs +++ b/compositor_api/src/types/from_register_input.rs @@ -106,6 +106,11 @@ impl TryFrom for pipeline::RegisterInputOptions { options: match video { InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions { codec: pipeline::VideoCodec::H264, + decoder: pipeline::VideoDecoder::FFmpegH264, + }, + InputRtpVideoOptions::VulkanVideo => decoder::VideoDecoderOptions { + decoder: pipeline::VideoDecoder::VulkanVideo, + codec: pipeline::VideoCodec::H264, }, }, }), diff --git a/compositor_api/src/types/register_input.rs b/compositor_api/src/types/register_input.rs index e66f14fc4..635386e61 100644 --- a/compositor_api/src/types/register_input.rs +++ b/compositor_api/src/types/register_input.rs @@ -126,4 +126,7 @@ pub enum InputRtpAudioOptions { pub enum InputRtpVideoOptions { #[serde(rename = "ffmpeg_h264")] FfmepgH264, + + #[serde(rename = "vulkan_video")] + VulkanVideo, } diff --git a/compositor_pipeline/Cargo.toml b/compositor_pipeline/Cargo.toml index 08a259c3a..b7f6c193c 100644 --- a/compositor_pipeline/Cargo.toml +++ b/compositor_pipeline/Cargo.toml @@ -27,6 +27,7 @@ reqwest = { workspace = true } tracing = { workspace = true } fdk-aac-sys = "0.5.0" rubato = "0.15.0" +vk-video = { workspace = true } wgpu = { workspace = true } glyphon = { workspace = true } diff --git a/compositor_pipeline/src/error.rs b/compositor_pipeline/src/error.rs index 1516fd085..591f4fa96 100644 --- a/compositor_pipeline/src/error.rs +++ b/compositor_pipeline/src/error.rs @@ -9,6 +9,18 @@ use compositor_render::{ use crate::pipeline::{decoder::AacDecoderError, VideoCodec}; use fdk_aac_sys as fdk; +#[derive(Debug, thiserror::Error)] +pub enum InitPipelineError { + #[error(transparent)] + InitRendererEngine(#[from] InitRendererEngineError), + + #[error("Failed to create a download directory.")] + CreateDownloadDir(#[source] std::io::Error), + + #[error(transparent)] + VulkanCtxError(#[from] vk_video::VulkanCtxError), +} + #[derive(Debug, thiserror::Error)] pub enum RegisterInputError { #[error("Failed to register input stream. Stream \"{0}\" is already registered.")] @@ -120,6 +132,10 @@ pub enum InputInitError { #[error("Couldn't read decoder init result.")] CannotReadInitResult, + + #[cfg(target_os = "linux")] + #[error(transparent)] + VulkanDecoderError(#[from] vk_video::DecoderError), } pub enum ErrorType { diff --git a/compositor_pipeline/src/pipeline.rs b/compositor_pipeline/src/pipeline.rs index 34da59cb9..da570e7de 100644 --- a/compositor_pipeline/src/pipeline.rs +++ b/compositor_pipeline/src/pipeline.rs @@ -7,8 +7,7 @@ use std::thread; use std::time::Duration; use compositor_render::error::{ - ErrorStack, InitPipelineError, RegisterRendererError, RequestKeyframeError, - UnregisterRendererError, + ErrorStack, RegisterRendererError, RequestKeyframeError, UnregisterRendererError, }; use compositor_render::scene::Component; use compositor_render::web_renderer::WebRendererInitOptions; @@ -32,6 +31,7 @@ use types::RawDataSender; use crate::audio_mixer::AudioMixer; use crate::audio_mixer::MixingStrategy; use crate::audio_mixer::{AudioChannels, AudioMixingParams}; +use crate::error::InitPipelineError; use crate::error::{ RegisterInputError, RegisterOutputError, UnregisterInputError, UnregisterOutputError, }; @@ -61,6 +61,7 @@ use self::pipeline_output::PipelineOutput; pub use self::types::{ AudioCodec, EncodedChunk, EncodedChunkKind, EncoderOutputEvent, RawDataReceiver, VideoCodec, + VideoDecoder, }; pub use pipeline_output::PipelineOutputEndCondition; @@ -109,7 +110,36 @@ pub struct Pipeline { is_started: bool, } -#[derive(Debug, Clone)] +pub struct PreinitializedContext { + pub device: Arc, + pub queue: Arc, + + #[cfg(target_os = "linux")] + pub vulkan_ctx: Arc, +} + +impl PreinitializedContext { + #[cfg(target_os = "linux")] + pub fn new(features: wgpu::Features, limits: wgpu::Limits) -> Result { + let vulkan_ctx = Arc::new(vk_video::VulkanCtx::new(features, limits)?); + Ok(PreinitializedContext { + device: vulkan_ctx.wgpu_ctx.device.clone(), + queue: vulkan_ctx.wgpu_ctx.queue.clone(), + vulkan_ctx, + }) + } +} + +impl std::fmt::Debug for PreinitializedContext { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PreinitializedContext") + .field("device", &self.device) + .field("queue", &self.queue) + .finish() + } +} + +#[derive(Debug)] pub struct Options { pub queue_options: QueueOptions, pub stream_fallback_timeout: Duration, @@ -118,28 +148,59 @@ pub struct Options { pub download_root: PathBuf, pub output_sample_rate: u32, pub wgpu_features: WgpuFeatures, - pub wgpu_ctx: Option<(Arc, Arc)>, pub load_system_fonts: Option, + pub wgpu_ctx: Option, } -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct PipelineCtx { pub output_sample_rate: u32, pub output_framerate: Framerate, pub download_dir: Arc, pub event_emitter: Arc, + #[cfg(target_os = "linux")] + pub vulkan_ctx: Arc, +} + +impl std::fmt::Debug for PipelineCtx { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PipelineCtx") + .field("output_sample_rate", &self.output_sample_rate) + .field("output_framerate", &self.output_framerate) + .field("download_dir", &self.download_dir) + .field("event_emitter", &self.event_emitter) + .finish() + } } impl Pipeline { pub fn new(opts: Options) -> Result<(Self, Arc), InitPipelineError> { + let preinitialized_ctx = match opts.wgpu_ctx { + Some(ctx) => Some(ctx), + None => { + if cfg!(target_os = "linux") { + Some(PreinitializedContext::new(opts.wgpu_features | wgpu::Features::PUSH_CONSTANTS | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, wgpu::Limits { + max_push_constant_size: 128, + ..Default::default() + })?) + } else { + None + } + } + }; + + let wgpu_ctx = preinitialized_ctx + .as_ref() + .map(|ctx| (ctx.device.clone(), ctx.queue.clone())); + let (renderer, event_loop) = Renderer::new(RendererOptions { web_renderer: opts.web_renderer, framerate: opts.queue_options.output_framerate, stream_fallback_timeout: opts.stream_fallback_timeout, force_gpu: opts.force_gpu, wgpu_features: opts.wgpu_features, - wgpu_ctx: opts.wgpu_ctx, load_system_fonts: opts.load_system_fonts.unwrap_or(true), + wgpu_ctx, })?; let download_dir = opts @@ -160,6 +221,10 @@ impl Pipeline { output_framerate: opts.queue_options.output_framerate, download_dir: download_dir.into(), event_emitter, + #[cfg(target_os = "linux")] + vulkan_ctx: preinitialized_ctx + .map(|ctx| ctx.vulkan_ctx) + .expect("This should not fail on linux"), }, }; diff --git a/compositor_pipeline/src/pipeline/decoder.rs b/compositor_pipeline/src/pipeline/decoder.rs index 87f26addc..8dd84cd09 100644 --- a/compositor_pipeline/src/pipeline/decoder.rs +++ b/compositor_pipeline/src/pipeline/decoder.rs @@ -1,6 +1,7 @@ use crate::{audio_mixer::InputSamples, queue::PipelineEvent}; use super::types::VideoCodec; +use super::types::VideoDecoder; use bytes::Bytes; use compositor_render::Frame; @@ -17,6 +18,7 @@ pub(super) use video::start_video_decoder_thread; #[derive(Debug, Clone, PartialEq, Eq)] pub struct VideoDecoderOptions { + pub decoder: VideoDecoder, pub codec: VideoCodec, } diff --git a/compositor_pipeline/src/pipeline/decoder/video.rs b/compositor_pipeline/src/pipeline/decoder/video.rs index 52475dae8..2a817461c 100644 --- a/compositor_pipeline/src/pipeline/decoder/video.rs +++ b/compositor_pipeline/src/pipeline/decoder/video.rs @@ -3,23 +3,34 @@ use crossbeam_channel::{Receiver, Sender}; use crate::{ error::InputInitError, - pipeline::{types::EncodedChunk, VideoCodec}, + pipeline::{types::EncodedChunk, PipelineCtx, VideoCodec, VideoDecoder}, queue::PipelineEvent, }; use super::VideoDecoderOptions; mod ffmpeg_h264; +mod vulkan_video; pub fn start_video_decoder_thread( options: VideoDecoderOptions, + pipeline_ctx: &PipelineCtx, chunks_receiver: Receiver>, frame_sender: Sender>, input_id: InputId, ) -> Result<(), InputInitError> { - match options.codec { - VideoCodec::H264 => { + match (options.codec, options.decoder) { + (VideoCodec::H264, VideoDecoder::FFmpegH264) => { ffmpeg_h264::start_ffmpeg_decoder_thread(chunks_receiver, frame_sender, input_id) } + + (VideoCodec::H264, VideoDecoder::VulkanVideo) => { + vulkan_video::start_vulkan_video_decoder_thread( + pipeline_ctx.vulkan_ctx.clone(), + chunks_receiver, + frame_sender, + input_id, + ) + } } } diff --git a/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs new file mode 100644 index 000000000..241cdf929 --- /dev/null +++ b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs @@ -0,0 +1,108 @@ +use std::sync::Arc; + +use compositor_render::{Frame, FrameData, InputId, Resolution}; +use crossbeam_channel::{Receiver, Sender}; +use tracing::{debug, error, span, trace, warn, Level}; +use vk_video::{Decoder, VulkanCtx}; + +use crate::{ + error::InputInitError, + pipeline::{EncodedChunk, EncodedChunkKind, VideoCodec}, + queue::PipelineEvent, +}; + +pub fn start_vulkan_video_decoder_thread( + vulkan_ctx: Arc, + chunks_receiver: Receiver>, + frame_sender: Sender>, + input_id: InputId, +) -> Result<(), InputInitError> { + let (init_result_sender, init_result_receiver) = crossbeam_channel::bounded(0); + + std::thread::Builder::new() + .name(format!("h264 vulkan video decoder {}", input_id.0)) + .spawn(move || { + let _span = span!( + Level::INFO, + "h264 vulkan video decoder", + input_id = input_id.to_string() + ) + .entered(); + run_decoder_thread( + vulkan_ctx, + init_result_sender, + chunks_receiver, + frame_sender, + ) + }) + .unwrap(); + + init_result_receiver.recv().unwrap()?; + + Ok(()) +} + +fn run_decoder_thread( + vulkan_ctx: Arc, + init_result_sender: Sender>, + chunks_receiver: Receiver>, + frame_sender: Sender>, +) { + let mut decoder = match Decoder::new(vulkan_ctx) { + Ok(decoder) => { + init_result_sender.send(Ok(())).unwrap(); + decoder + } + Err(err) => { + init_result_sender.send(Err(err.into())).unwrap(); + return; + } + }; + + for chunk in chunks_receiver { + let chunk = match chunk { + PipelineEvent::Data(chunk) => chunk, + PipelineEvent::EOS => { + break; + } + }; + + if chunk.kind != EncodedChunkKind::Video(VideoCodec::H264) { + error!( + "H264 decoder received chunk of wrong kind: {:?}", + chunk.kind + ); + continue; + } + + let result = match decoder.decode_to_wgpu_textures(&chunk.data) { + Ok(res) => res, + Err(err) => { + warn!("Failed to decode frame: {err}"); + continue; + } + }; + + for frame in result { + let resolution = Resolution { + width: frame.width() as usize, + height: frame.height() as usize, + }; + + let frame = Frame { + data: FrameData::Nv12WgpuTexture(frame.into()), + pts: chunk.pts, + resolution, + }; + + trace!(pts=?frame.pts, "H264 decoder produced a frame."); + if frame_sender.send(PipelineEvent::Data(frame)).is_err() { + debug!("Failed to send frame from H264 decoder. Channel closed."); + return; + } + } + } + if frame_sender.send(PipelineEvent::EOS).is_err() { + debug!("Failed to send EOS from H264 decoder. Channel closed.") + } +} diff --git a/compositor_pipeline/src/pipeline/input.rs b/compositor_pipeline/src/pipeline/input.rs index e0342df4d..baba3159a 100644 --- a/compositor_pipeline/src/pipeline/input.rs +++ b/compositor_pipeline/src/pipeline/input.rs @@ -162,6 +162,7 @@ fn start_input_threads( let (sender, receiver) = bounded(10); start_video_decoder_thread( decoder_options, + pipeline_ctx, chunk_receiver, sender, input_id.clone(), diff --git a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs index 37ddfb837..3811ea005 100644 --- a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs +++ b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs @@ -15,7 +15,7 @@ use crate::{ pipeline::{ decoder::{AacDecoderOptions, AudioDecoderOptions, VideoDecoderOptions}, types::{EncodedChunk, EncodedChunkKind}, - AudioCodec, VideoCodec, + AudioCodec, VideoCodec, VideoDecoder, }, queue::PipelineEvent, }; @@ -235,6 +235,7 @@ impl Mp4FileReader { let decoder_options = VideoDecoderOptions { codec: VideoCodec::H264, + decoder: VideoDecoder::FFmpegH264, }; Some(TrackInfo { diff --git a/compositor_pipeline/src/pipeline/types.rs b/compositor_pipeline/src/pipeline/types.rs index a30361073..6f028d402 100644 --- a/compositor_pipeline/src/pipeline/types.rs +++ b/compositor_pipeline/src/pipeline/types.rs @@ -55,6 +55,12 @@ pub struct RawDataSender { pub audio: Option>>, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VideoDecoder { + FFmpegH264, + VulkanVideo, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum VideoCodec { H264, diff --git a/compositor_render/src/error.rs b/compositor_render/src/error.rs index 5f6474355..49884c28e 100644 --- a/compositor_render/src/error.rs +++ b/compositor_render/src/error.rs @@ -13,15 +13,6 @@ use crate::{OutputId, RendererId}; pub use crate::registry::RegisterError; pub use crate::wgpu::WgpuError; -#[derive(Debug, thiserror::Error)] -pub enum InitPipelineError { - #[error(transparent)] - InitRendererEngine(#[from] InitRendererEngineError), - - #[error("Failed to create a download directory.")] - CreateDownloadDir(#[source] std::io::Error), -} - #[derive(Debug, thiserror::Error)] pub enum InitRendererEngineError { #[error("Failed to initialize a wgpu context.")] diff --git a/compositor_render/src/wgpu/ctx.rs b/compositor_render/src/wgpu/ctx.rs index 40207b134..e6ee5e984 100644 --- a/compositor_render/src/wgpu/ctx.rs +++ b/compositor_render/src/wgpu/ctx.rs @@ -42,12 +42,7 @@ impl WgpuCtx { } fn check_wgpu_ctx(device: &wgpu::Device, features: wgpu::Features) { - let expected_features = match cfg!(target_arch = "wasm32") { - false => { - features | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS - } - true => features | wgpu::Features::PUSH_CONSTANTS, - }; + let expected_features = features | required_wgpu_features(); let missing_features = expected_features.difference(device.features()); if !missing_features.is_empty() { @@ -92,6 +87,13 @@ impl WgpuCtx { } } +pub fn required_wgpu_features() -> wgpu::Features { + match cfg!(target_arch = "wasm32") { + false => wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS, + true => wgpu::Features::PUSH_CONSTANTS, + } +} + pub fn create_wgpu_ctx( force_gpu: bool, features: wgpu::Features, @@ -120,8 +122,7 @@ pub fn create_wgpu_ctx( error!("Selected adapter is CPU based. Aborting."); return Err(CreateWgpuCtxError::NoAdapter); } - let required_features = - features | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS; + let required_features = features | required_wgpu_features(); let missing_features = required_features.difference(adapter.features()); if !missing_features.is_empty() { diff --git a/compositor_render/src/wgpu/texture/nv12.rs b/compositor_render/src/wgpu/texture/nv12.rs index 442702742..85c3f6dca 100644 --- a/compositor_render/src/wgpu/texture/nv12.rs +++ b/compositor_render/src/wgpu/texture/nv12.rs @@ -31,7 +31,7 @@ impl<'a> NV12TextureView<'a> { let view_y = texture.create_view(&wgpu::TextureViewDescriptor { label: Some("y plane nv12 texture view"), dimension: Some(wgpu::TextureViewDimension::D2), - format: Some(wgpu::TextureFormat::NV12), + format: Some(wgpu::TextureFormat::R8Unorm), aspect: wgpu::TextureAspect::Plane0, ..Default::default() }); @@ -39,7 +39,7 @@ impl<'a> NV12TextureView<'a> { let view_uv = texture.create_view(&wgpu::TextureViewDescriptor { label: Some("uv plane nv12 texture view"), dimension: Some(wgpu::TextureViewDimension::D2), - format: Some(wgpu::TextureFormat::NV12), + format: Some(wgpu::TextureFormat::Rg8Unorm), aspect: wgpu::TextureAspect::Plane1, ..Default::default() }); diff --git a/integration_tests/examples/raw_channel_input.rs b/integration_tests/examples/raw_channel_input.rs index 2fec88abd..5bb660079 100644 --- a/integration_tests/examples/raw_channel_input.rs +++ b/integration_tests/examples/raw_channel_input.rs @@ -17,12 +17,12 @@ use compositor_pipeline::{ OutputOptions, OutputProtocolOptions, }, rtp::RequestedPort, - Options, Pipeline, PipelineOutputEndCondition, RegisterOutputOptions, VideoCodec, + Options, Pipeline, PipelineOutputEndCondition, PreinitializedContext, + RegisterOutputOptions, VideoCodec, }, queue::{PipelineEvent, QueueInputOptions}, }; use compositor_render::{ - create_wgpu_ctx, error::ErrorStack, scene::{Component, InputStreamComponent}, Frame, FrameData, InputId, OutputId, Resolution, @@ -44,7 +44,15 @@ fn main() { level: "info,wgpu_hal=warn,wgpu_core=warn".to_string(), }); let config = read_config(); - let (wgpu_device, wgpu_queue) = create_wgpu_ctx(false, Default::default()).unwrap(); + let ctx = PreinitializedContext::new( + wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS, + wgpu::Limits { + max_push_constant_size: 128, + ..Default::default() + }, + ) + .unwrap(); + let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone()); // no chromium support, so we can ignore _event_loop let (pipeline, _event_loop) = Pipeline::new(Options { queue_options: config.queue_options, @@ -54,8 +62,8 @@ fn main() { download_root: config.download_root, output_sample_rate: config.output_sample_rate, wgpu_features: config.required_wgpu_features, - wgpu_ctx: Some((wgpu_device.clone(), wgpu_queue.clone())), load_system_fonts: Some(true), + wgpu_ctx: Some(ctx), }) .unwrap_or_else(|err| { panic!( diff --git a/integration_tests/examples/raw_channel_output.rs b/integration_tests/examples/raw_channel_output.rs index a22c655a8..1fa196f5f 100644 --- a/integration_tests/examples/raw_channel_output.rs +++ b/integration_tests/examples/raw_channel_output.rs @@ -16,14 +16,13 @@ use compositor_pipeline::{ InputOptions, }, output::{RawAudioOptions, RawDataOutputOptions, RawVideoOptions}, - Options, PipelineOutputEndCondition, RawDataReceiver, RegisterInputOptions, - RegisterOutputOptions, + Options, PipelineOutputEndCondition, PreinitializedContext, RawDataReceiver, + RegisterInputOptions, RegisterOutputOptions, }, queue::{PipelineEvent, QueueInputOptions}, Pipeline, }; use compositor_render::{ - create_wgpu_ctx, error::ErrorStack, scene::{Component, InputStreamComponent}, Frame, FrameData, InputId, OutputId, Resolution, @@ -58,7 +57,15 @@ fn main() { }); let mut config = read_config(); config.queue_options.ahead_of_time_processing = true; - let (wgpu_device, wgpu_queue) = create_wgpu_ctx(false, Default::default()).unwrap(); + let ctx = PreinitializedContext::new( + wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS, + wgpu::Limits { + max_push_constant_size: 128, + ..Default::default() + }, + ) + .unwrap(); + let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone()); // no chromium support, so we can ignore _event_loop let (pipeline, _event_loop) = Pipeline::new(Options { queue_options: config.queue_options, @@ -68,8 +75,8 @@ fn main() { download_root: config.download_root, output_sample_rate: config.output_sample_rate, wgpu_features: config.required_wgpu_features, - wgpu_ctx: Some((wgpu_device.clone(), wgpu_queue.clone())), load_system_fonts: Some(true), + wgpu_ctx: Some(ctx), }) .unwrap_or_else(|err| { panic!( diff --git a/integration_tests/examples/vulkan.rs b/integration_tests/examples/vulkan.rs new file mode 100644 index 000000000..675985901 --- /dev/null +++ b/integration_tests/examples/vulkan.rs @@ -0,0 +1,109 @@ +use anyhow::Result; +use compositor_api::types::Resolution; +use serde_json::json; +use std::time::Duration; + +use integration_tests::{ + examples::{self, run_example, TestSample}, + ffmpeg::{start_ffmpeg_receive, start_ffmpeg_send}, +}; + +const VIDEO_RESOLUTION: Resolution = Resolution { + width: 1280, + height: 720, +}; + +const IP: &str = "127.0.0.1"; +const INPUT_PORT: u16 = 8002; +const OUTPUT_PORT: u16 = 8004; + +const VIDEOS: u16 = 6; + +fn main() { + run_example(client_code); +} + +fn client_code() -> Result<()> { + start_ffmpeg_receive(Some(OUTPUT_PORT), None)?; + + let mut children = Vec::new(); + + for i in 1..VIDEOS + 1 { + let input_name = format!("input_{i}"); + + examples::post( + &format!("input/{input_name}/register"), + &json!({ + "type": "rtp_stream", + "port": INPUT_PORT + 2 + 2 * i, + "video": { + "decoder": "vulkan_video" + } + }), + )?; + + children.push(json!({ + "type": "input_stream", + "input_id": input_name, + })); + } + + let scene = json!({ + "type": "tiles", + "id": "tile", + "padding": 5, + "background_color_rgba": "#444444FF", + "children": children, + "transition": { + "duration_ms": 700, + "easing_function": { + "function_name": "cubic_bezier", + "points": [0.35, 0.22, 0.1, 0.8] + } + }, + }); + + let shader_source = include_str!("./silly.wgsl"); + examples::post( + "shader/shader_example_1/register", + &json!({ + "source": shader_source, + }), + )?; + + examples::post( + "output/output_1/register", + &json!({ + "type": "rtp_stream", + "port": OUTPUT_PORT, + "ip": IP, + "video": { + "resolution": { + "width": VIDEO_RESOLUTION.width, + "height": VIDEO_RESOLUTION.height, + }, + "encoder": { + "type": "ffmpeg_h264", + "preset": "ultrafast" + }, + "initial": { + "root": scene + } + } + }), + )?; + + std::thread::sleep(Duration::from_millis(500)); + + examples::post("start", &json!({}))?; + + for i in 1..VIDEOS + 1 { + start_ffmpeg_send( + IP, + Some(INPUT_PORT + 2 + 2 * i), + None, + TestSample::BigBuckBunny, + )?; + } + Ok(()) +} diff --git a/src/state.rs b/src/state.rs index e3f735155..236f4bebc 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,8 +1,11 @@ use std::sync::{Arc, Mutex, MutexGuard}; use axum::response::IntoResponse; -use compositor_pipeline::pipeline::{self}; -use compositor_render::{error::InitPipelineError, EventLoop}; +use compositor_pipeline::{ + error::InitPipelineError, + pipeline::{self}, +}; +use compositor_render::EventLoop; use serde::Serialize; diff --git a/vk-video/.gitignore b/vk-video/.gitignore new file mode 100644 index 000000000..dde3e786e --- /dev/null +++ b/vk-video/.gitignore @@ -0,0 +1,5 @@ +/target +Cargo.lock +*.h264 +*.jpeg +*.mp4 diff --git a/vk-video/Cargo.toml b/vk-video/Cargo.toml new file mode 100644 index 000000000..31b60e970 --- /dev/null +++ b/vk-video/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "vk-video" +version = "0.1.0" +edition = "2021" +authors = ["Software Mansion "] +readme = "README.md" +license = "MIT" +repository = "https://github.com/software-mansion/live-compositor" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +ash = "0.38.0" +derivative = "2.2.0" +h264-reader = { git = "https://github.com/membraneframework-labs/h264-reader.git", branch = "@jerzywilczek/scaling-lists" } +thiserror = "1.0.59" +tracing = "0.1.40" +vk-mem = "0.4.0" +wgpu = "22.1.0" + +[dev-dependencies] +tracing-subscriber = "0.3.18" + diff --git a/vk-video/LICENSE b/vk-video/LICENSE new file mode 100644 index 000000000..f9b288684 --- /dev/null +++ b/vk-video/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 Software Mansion + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vk-video/examples/basic.rs b/vk-video/examples/basic.rs new file mode 100644 index 000000000..d75452d81 --- /dev/null +++ b/vk-video/examples/basic.rs @@ -0,0 +1,39 @@ +use std::io::Write; + +fn main() { + let subscriber = tracing_subscriber::FmtSubscriber::builder() + .with_max_level(tracing::Level::INFO) + .finish(); + + tracing::subscriber::set_global_default(subscriber).expect("Failed to initialize tracing"); + + let args = std::env::args().collect::>(); + if args.len() != 2 { + println!("usage: {} FILENAME", args[0]); + return; + } + + let h264_bytestream = std::fs::read(&args[1]).unwrap_or_else(|_| panic!("read {}", args[1])); + + let vulkan_ctx = std::sync::Arc::new( + vk_video::VulkanCtx::new( + wgpu::Features::empty(), + wgpu::Limits { + max_push_constant_size: 128, + ..Default::default() + }, + ) + .unwrap(), + ); + let mut decoder = vk_video::Decoder::new(vulkan_ctx).unwrap(); + + let mut output_file = std::fs::File::create("output.nv12").unwrap(); + + for chunk in h264_bytestream.chunks(256) { + let frames = decoder.decode_to_bytes(chunk).unwrap(); + + for frame in frames { + output_file.write_all(&frame).unwrap(); + } + } +} diff --git a/vk-video/examples/wgpu.rs b/vk-video/examples/wgpu.rs new file mode 100644 index 000000000..a5a8f03bd --- /dev/null +++ b/vk-video/examples/wgpu.rs @@ -0,0 +1,157 @@ +use std::io::Write; + +fn main() { + let subscriber = tracing_subscriber::FmtSubscriber::builder() + .with_max_level(tracing::Level::INFO) + .finish(); + + tracing::subscriber::set_global_default(subscriber).expect("Failed to initialize tracing"); + + let args = std::env::args().collect::>(); + if args.len() != 2 { + println!("usage: {} FILENAME", args[0]); + return; + } + let h264_bytestream = std::fs::read(&args[1]).unwrap_or_else(|_| panic!("read {}", args[1])); + + let vulkan_ctx = std::sync::Arc::new( + vk_video::VulkanCtx::new( + wgpu::Features::empty(), + wgpu::Limits { + max_push_constant_size: 128, + ..Default::default() + }, + ) + .unwrap(), + ); + let mut decoder = vk_video::Decoder::new(vulkan_ctx.clone()).unwrap(); + + let mut output_file = std::fs::File::create("output.nv12").unwrap(); + + for chunk in h264_bytestream.chunks(256) { + let frames = decoder.decode_to_wgpu_textures(chunk).unwrap(); + + let device = &vulkan_ctx.wgpu_ctx.device; + let queue = &vulkan_ctx.wgpu_ctx.queue; + for frame in frames { + let decoded_frame = download_wgpu_texture(device, queue, frame); + output_file.write_all(&decoded_frame).unwrap(); + } + } +} + +fn download_wgpu_texture( + device: &wgpu::Device, + queue: &wgpu::Queue, + frame: wgpu::Texture, +) -> Vec { + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + let y_plane_bytes_per_row = (frame.width() as u64 + 255) / 256 * 256; + let y_plane_size = y_plane_bytes_per_row * frame.height() as u64; + + let uv_plane_bytes_per_row = y_plane_bytes_per_row; + let uv_plane_size = uv_plane_bytes_per_row * frame.height() as u64 / 2; + + let buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: y_plane_size + uv_plane_size, + usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + encoder.copy_texture_to_buffer( + wgpu::ImageCopyTexture { + aspect: wgpu::TextureAspect::Plane0, + origin: wgpu::Origin3d { x: 0, y: 0, z: 0 }, + texture: &frame, + mip_level: 0, + }, + wgpu::ImageCopyBuffer { + buffer: &buffer, + layout: wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: Some(y_plane_bytes_per_row as u32), + rows_per_image: None, + }, + }, + wgpu::Extent3d { + width: frame.width(), + height: frame.height(), + depth_or_array_layers: 1, + }, + ); + + encoder.copy_texture_to_buffer( + wgpu::ImageCopyTexture { + aspect: wgpu::TextureAspect::Plane1, + origin: wgpu::Origin3d { x: 0, y: 0, z: 0 }, + texture: &frame, + mip_level: 0, + }, + wgpu::ImageCopyBuffer { + buffer: &buffer, + layout: wgpu::ImageDataLayout { + offset: y_plane_size, + bytes_per_row: Some(uv_plane_bytes_per_row as u32), + rows_per_image: None, + }, + }, + wgpu::Extent3d { + width: frame.width() / 2, + height: frame.height() / 2, + depth_or_array_layers: 1, + }, + ); + + queue.submit(Some(encoder.finish())); + + let (y_tx, y_rx) = std::sync::mpsc::channel(); + let (uv_tx, uv_rx) = std::sync::mpsc::channel(); + let width = frame.width() as usize; + + wgpu::util::DownloadBuffer::read_buffer( + device, + queue, + &buffer.slice(..y_plane_size), + move |buf| { + let buf = buf.unwrap(); + let mut result = Vec::new(); + + for chunk in buf + .chunks(y_plane_bytes_per_row as usize) + .map(|chunk| &chunk[..width]) + { + result.write_all(chunk).unwrap(); + } + + y_tx.send(result).unwrap(); + }, + ); + + wgpu::util::DownloadBuffer::read_buffer( + device, + queue, + &buffer.slice(y_plane_size..), + move |buf| { + let buf = buf.unwrap(); + let mut result = Vec::new(); + + for chunk in buf + .chunks(uv_plane_bytes_per_row as usize) + .map(|chunk| &chunk[..width]) + { + result.write_all(chunk).unwrap(); + } + + uv_tx.send(result).unwrap(); + }, + ); + + device.poll(wgpu::Maintain::Wait); + + let mut result = Vec::new(); + result.append(&mut y_rx.recv().unwrap()); + result.append(&mut uv_rx.recv().unwrap()); + + result +} diff --git a/vk-video/src/lib.rs b/vk-video/src/lib.rs new file mode 100644 index 000000000..e47c7deda --- /dev/null +++ b/vk-video/src/lib.rs @@ -0,0 +1,68 @@ +mod parser; +mod vulkan_decoder; + +use parser::Parser; +use vulkan_decoder::VulkanDecoder; + +pub use parser::ParserError; +pub use vulkan_decoder::{VulkanCtx, VulkanCtxError, VulkanDecoderError}; + +pub use vulkan_decoder::WgpuCtx; + +pub struct Decoder<'a> { + vulkan_decoder: VulkanDecoder<'a>, + parser: Parser, +} + +#[derive(Debug, thiserror::Error)] +pub enum DecoderError { + #[error("Error originating in the decoder: {0}")] + VulkanDecoderError(#[from] VulkanDecoderError), + + #[error("Error originating in the h264 parser: {0}")] + ParserError(#[from] ParserError), +} + +impl<'a> Decoder<'a> { + pub fn new(vulkan_ctx: std::sync::Arc) -> Result { + let parser = Parser::default(); + let vulkan_decoder = VulkanDecoder::new(vulkan_ctx)?; + + Ok(Self { + parser, + vulkan_decoder, + }) + } +} + +impl Decoder<'_> { + /// The result is a [`Vec`] of [`Vec`]. Each [`Vec`] contains a single frame in the + /// NV12 format. + pub fn decode_to_bytes( + &mut self, + h264_bytestream: &[u8], + ) -> Result>, DecoderError> { + let instructions = self + .parser + .parse(h264_bytestream) + .into_iter() + .collect::, _>>()?; + + Ok(self.vulkan_decoder.decode_to_bytes(&instructions)?) + } + + // TODO: the below hasn't been verified. + /// The produced textures have the [`wgpu::TextureFormat::NV12`] format and can be used as a copy source or a texture binding. + pub fn decode_to_wgpu_textures( + &mut self, + h264_bytestream: &[u8], + ) -> Result, DecoderError> { + let instructions = self + .parser + .parse(h264_bytestream) + .into_iter() + .collect::, _>>()?; + + Ok(self.vulkan_decoder.decode_to_wgpu_textures(&instructions)?) + } +} diff --git a/vk-video/src/parser.rs b/vk-video/src/parser.rs new file mode 100644 index 000000000..0b11a640e --- /dev/null +++ b/vk-video/src/parser.rs @@ -0,0 +1,761 @@ +use std::{ + io::Read, + sync::{mpsc, Arc}, +}; + +use h264_reader::{ + annexb::AnnexBReader, + nal::{ + pps::PicParameterSet, + slice::{DecRefPicMarking, NumRefIdxActive, RefPicListModifications, SliceHeader}, + sps::SeqParameterSet, + Nal, RefNal, + }, + push::{AccumulatedNalHandler, NalAccumulator, NalInterest}, +}; +use tracing::trace; + +mod au_splitter; + +#[derive(Debug, thiserror::Error)] +pub enum ReferenceManagementError { + #[error("B frames are not supported")] + BFramesNotSupported, + + #[error("Long-term references are not supported")] + LongTermRefsNotSupported, + + #[error("SI frames are not supported")] + SIFramesNotSupported, + + #[error("SP frames are not supported")] + SPFramesNotSupported, + + #[error("Adaptive memory control decoded reference picture marking process is not supported")] + AdaptiveMemCtlNotSupported, + + #[error("Reference picture list modifications are not supported")] + RefPicListModificationsNotSupported, + + #[error("PicOrderCntType {0} is not supperted")] + PicOrderCntTypeNotSupported(u8), + + #[error("pic_order_cnt_lsb is not present in a slice header, but is required for decoding")] + PicOrderCntLsbNotPresent, +} + +#[derive(Debug, Default, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct ReferenceId(usize); + +#[derive(Clone, derivative::Derivative)] +#[derivative(Debug)] +#[allow(non_snake_case)] +pub struct DecodeInformation { + pub(crate) reference_list: Option>, + #[derivative(Debug = "ignore")] + pub(crate) rbsp_bytes: Vec, + pub(crate) slice_indices: Vec, + #[derivative(Debug = "ignore")] + pub(crate) header: Arc, + pub(crate) sps_id: u8, + pub(crate) pps_id: u8, + pub(crate) picture_info: PictureInfo, +} + +#[derive(Debug, Clone)] +pub(crate) struct ReferencePictureInfo { + pub(crate) id: ReferenceId, + pub(crate) picture_info: PictureInfo, +} + +#[derive(Debug, Clone, Copy)] +#[allow(non_snake_case)] +pub(crate) struct PictureInfo { + pub(crate) used_for_long_term_reference: bool, + pub(crate) non_existing: bool, + pub(crate) FrameNum: u16, + pub(crate) PicOrderCnt: [i32; 2], +} + +#[derive(Debug, Clone)] +pub enum DecoderInstruction { + Decode { + decode_info: DecodeInformation, + }, + + DecodeAndStoreAs { + decode_info: DecodeInformation, + reference_id: ReferenceId, + }, + + Idr { + decode_info: DecodeInformation, + reference_id: ReferenceId, + }, + + Drop { + reference_ids: Vec, + }, + + Sps(SeqParameterSet), + + Pps(PicParameterSet), +} + +#[derive(Debug, Default)] +struct ReferenceContext { + pictures: ReferencePictures, + next_reference_id: ReferenceId, + _previous_frame_num: usize, + prev_pic_order_cnt_msb: i32, + prev_pic_order_cnt_lsb: i32, +} + +impl ReferenceContext { + fn get_next_reference_id(&mut self) -> ReferenceId { + let result = self.next_reference_id; + self.next_reference_id = ReferenceId(result.0 + 1); + result + } + + fn reset_state(&mut self) { + *self = Self { + pictures: ReferencePictures::default(), + next_reference_id: ReferenceId::default(), + _previous_frame_num: 0, + prev_pic_order_cnt_msb: 0, + prev_pic_order_cnt_lsb: 0, + }; + } + + fn add_short_term_reference( + &mut self, + header: Arc, + pic_order_cnt: [i32; 2], + ) -> ReferenceId { + let id = self.get_next_reference_id(); + self.pictures.short_term.push(ShortTermReferencePicture { + header, + id, + pic_order_cnt, + }); + id + } + + fn put_picture( + &mut self, + mut slices: Vec, + sps: &SeqParameterSet, + pps: &PicParameterSet, + ) -> Result, ParserError> { + let header = slices.last().unwrap().header.clone(); + let mut rbsp_bytes = Vec::new(); + let mut slice_indices = Vec::new(); + for slice in &mut slices { + if slice.rbsp_bytes.is_empty() { + continue; + } + slice_indices.push(rbsp_bytes.len()); + rbsp_bytes.append(&mut slice.rbsp_bytes); + } + + match header.dec_ref_pic_marking { + Some(DecRefPicMarking::Idr { + long_term_reference_flag, + .. + }) => { + if long_term_reference_flag { + Err(ReferenceManagementError::LongTermRefsNotSupported)?; + } + + let decode_info = self.decode_information_for_frame( + header.clone(), + slice_indices, + rbsp_bytes, + sps, + pps, + )?; + + self.reset_state(); + + let reference_id = + self.add_short_term_reference(header, decode_info.picture_info.PicOrderCnt); + + Ok(vec![DecoderInstruction::Idr { + decode_info, + reference_id, + }]) + } + + Some(DecRefPicMarking::SlidingWindow) => { + let num_short_term = self.pictures.short_term.len(); + let num_long_term = self.pictures.long_term.len(); + + let decode_info = self.decode_information_for_frame( + header.clone(), + slice_indices, + rbsp_bytes, + sps, + pps, + )?; + let reference_id = self + .add_short_term_reference(header.clone(), decode_info.picture_info.PicOrderCnt); + + let mut decoder_instructions = vec![DecoderInstruction::DecodeAndStoreAs { + decode_info, + reference_id, + }]; + + if num_short_term + num_long_term == sps.max_num_ref_frames.max(1) as usize + && !self.pictures.short_term.is_empty() + { + let (idx, _) = self + .pictures + .short_term + .iter() + .enumerate() + .min_by_key(|(_, reference)| { + reference + .decode_picture_numbers(header.frame_num as i64, sps) + .unwrap() + .FrameNumWrap + }) + .unwrap(); + + decoder_instructions.push(DecoderInstruction::Drop { + reference_ids: vec![self.pictures.short_term.remove(idx).id], + }) + } + + Ok(decoder_instructions) + } + + Some(DecRefPicMarking::Adaptive(_)) => { + Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)? + } + + // this picture is not a reference + None => Ok(vec![DecoderInstruction::Decode { + decode_info: self.decode_information_for_frame( + header, + slice_indices, + rbsp_bytes, + sps, + pps, + )?, + }]), + } + } + + fn decode_information_for_frame( + &mut self, + header: Arc, + slice_indices: Vec, + rbsp_bytes: Vec, + sps: &SeqParameterSet, + pps: &PicParameterSet, + ) -> Result { + let reference_list = match header.slice_type.family { + h264_reader::nal::slice::SliceFamily::P => { + let reference_list = + self.initialize_reference_picture_list_for_frame(&header, sps, pps)?; + + match &header.ref_pic_list_modification { + Some(RefPicListModifications::P { + ref_pic_list_modification_l0, + }) => { + if !ref_pic_list_modification_l0.is_empty() { + Err(ReferenceManagementError::RefPicListModificationsNotSupported)?; + } + } + + None + | Some(RefPicListModifications::I) + | Some(RefPicListModifications::B { .. }) => unreachable!(), + } + + Some(reference_list) + } + h264_reader::nal::slice::SliceFamily::I => None, + h264_reader::nal::slice::SliceFamily::B => { + return Err(ReferenceManagementError::BFramesNotSupported)? + } + h264_reader::nal::slice::SliceFamily::SP => { + return Err(ReferenceManagementError::SPFramesNotSupported)? + } + h264_reader::nal::slice::SliceFamily::SI => { + return Err(ReferenceManagementError::SIFramesNotSupported)? + } + }; + + let pic_order_cnt = match sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeZero { + log2_max_pic_order_cnt_lsb_minus4, + } => { + // this section is very hard to read, but all of this code is just copied from the + // h.264 spec, where it looks almost exactly like this + + let max_pic_order_cnt_lsb = 2_i32.pow(log2_max_pic_order_cnt_lsb_minus4 as u32 + 4); + + let (prev_pic_order_cnt_msb, prev_pic_order_cnt_lsb) = + if header.idr_pic_id.is_some() { + (0, 0) + } else { + (self.prev_pic_order_cnt_msb, self.prev_pic_order_cnt_lsb) + }; + + let (pic_order_cnt_lsb, delta_pic_order_cnt_bottom) = match header + .pic_order_cnt_lsb + .as_ref() + .ok_or(ReferenceManagementError::PicOrderCntLsbNotPresent)? + { + h264_reader::nal::slice::PicOrderCountLsb::Frame(pic_order_cnt_lsb) => { + (*pic_order_cnt_lsb, 0) + } + h264_reader::nal::slice::PicOrderCountLsb::FieldsAbsolute { + pic_order_cnt_lsb, + delta_pic_order_cnt_bottom, + } => (*pic_order_cnt_lsb, *delta_pic_order_cnt_bottom), + h264_reader::nal::slice::PicOrderCountLsb::FieldsDelta(_) => { + Err(ReferenceManagementError::PicOrderCntLsbNotPresent)? + } + }; + + let pic_order_cnt_lsb = pic_order_cnt_lsb as i32; + + let pic_order_cnt_msb = if pic_order_cnt_lsb < prev_pic_order_cnt_lsb + && prev_pic_order_cnt_lsb - pic_order_cnt_lsb >= max_pic_order_cnt_lsb / 2 + { + prev_pic_order_cnt_msb + max_pic_order_cnt_lsb + } else if pic_order_cnt_lsb > prev_pic_order_cnt_lsb + && pic_order_cnt_lsb - prev_pic_order_cnt_lsb > max_pic_order_cnt_lsb / 2 + { + prev_pic_order_cnt_msb - max_pic_order_cnt_lsb + } else { + prev_pic_order_cnt_msb + }; + + let pic_order_cnt = if header.field_pic == h264_reader::nal::slice::FieldPic::Frame + { + let top_field_order_cnt = pic_order_cnt_msb + pic_order_cnt_lsb; + + let bottom_field_order_cnt = top_field_order_cnt + delta_pic_order_cnt_bottom; + + top_field_order_cnt.min(bottom_field_order_cnt) + } else { + pic_order_cnt_msb + pic_order_cnt_lsb + }; + + self.prev_pic_order_cnt_msb = pic_order_cnt_msb; + self.prev_pic_order_cnt_lsb = pic_order_cnt_lsb; + + pic_order_cnt + } + + h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => { + Err(ReferenceManagementError::PicOrderCntTypeNotSupported(1))? + } + + h264_reader::nal::sps::PicOrderCntType::TypeTwo => match header.dec_ref_pic_marking { + None => 2 * header.frame_num as i32 - 1, + Some(DecRefPicMarking::Idr { .. }) | Some(DecRefPicMarking::SlidingWindow) => { + 2 * header.frame_num as i32 + } + Some(DecRefPicMarking::Adaptive(..)) => { + Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)? + } + }, + }; + + let pic_order_cnt = [pic_order_cnt; 2]; + + Ok(DecodeInformation { + reference_list, + header: header.clone(), + slice_indices, + rbsp_bytes, + sps_id: sps.id().id(), + pps_id: pps.pic_parameter_set_id.id(), + picture_info: PictureInfo { + non_existing: false, + used_for_long_term_reference: false, + PicOrderCnt: pic_order_cnt, + FrameNum: header.frame_num, + }, + }) + } + + fn initialize_short_term_reference_picture_list_for_frame( + &self, + header: &SliceHeader, + sps: &SeqParameterSet, + ) -> Result, ParserError> { + let mut short_term_reference_list = self + .pictures + .short_term + .iter() + .map(|reference| { + Ok(( + reference, + reference.decode_picture_numbers(header.frame_num.into(), sps)?, + )) + }) + .collect::, ParserError>>()?; + + short_term_reference_list.sort_by_key(|(_, numbers)| -numbers.PicNum); + + let short_term_reference_list = short_term_reference_list + .into_iter() + .map(|(reference, numbers)| ReferencePictureInfo { + id: reference.id, + picture_info: PictureInfo { + FrameNum: numbers.FrameNum as u16, + used_for_long_term_reference: false, + non_existing: false, + PicOrderCnt: reference.pic_order_cnt, + }, + }) + .collect::>(); + + Ok(short_term_reference_list) + } + + fn initialize_long_term_reference_picture_list_for_frame( + &self, + ) -> Result, ReferenceManagementError> { + if !self.pictures.long_term.is_empty() { + panic!("long-term references are not supported!"); + } + + Ok(Vec::new()) + } + + fn initialize_reference_picture_list_for_frame( + &self, + header: &SliceHeader, + sps: &SeqParameterSet, + pps: &PicParameterSet, + ) -> Result, ParserError> { + let num_ref_idx_l0_active = header + .num_ref_idx_active + .as_ref() + .map(|num| match num { + NumRefIdxActive::P { + num_ref_idx_l0_active_minus1, + } => Ok(*num_ref_idx_l0_active_minus1), + NumRefIdxActive::B { .. } => Err(ReferenceManagementError::BFramesNotSupported), + }) + .unwrap_or(Ok(pps.num_ref_idx_l0_default_active_minus1))? + + 1; + + let short_term_reference_list = + self.initialize_short_term_reference_picture_list_for_frame(header, sps)?; + + let long_term_reference_list = + self.initialize_long_term_reference_picture_list_for_frame()?; + + let mut reference_list = short_term_reference_list + .into_iter() + .chain(long_term_reference_list) + .collect::>(); + + reference_list.truncate(num_ref_idx_l0_active as usize); + + Ok(reference_list) + } +} + +#[derive(Debug)] +struct ShortTermReferencePicture { + header: Arc, + id: ReferenceId, + pic_order_cnt: [i32; 2], +} + +impl ShortTermReferencePicture { + #[allow(non_snake_case)] + fn decode_picture_numbers( + &self, + current_frame_num: i64, + sps: &SeqParameterSet, + ) -> Result { + if self.header.field_pic != h264_reader::nal::slice::FieldPic::Frame { + return Err(ParserError::FieldsNotSupported); + } + + let MaxFrameNum = sps.max_frame_num(); + + let FrameNum = self.header.frame_num as i64; + + let FrameNumWrap = if FrameNum > current_frame_num { + FrameNum - MaxFrameNum + } else { + FrameNum + }; + + // this assumes we're dealing with a short-term reference frame + let PicNum = FrameNumWrap; + + Ok(ShortTermReferencePictureNumbers { + FrameNum, + FrameNumWrap, + PicNum, + }) + } +} + +#[derive(Debug)] +struct LongTermReferencePicture { + _header: Arc, + _id: ReferenceId, +} + +#[allow(non_snake_case)] +struct ShortTermReferencePictureNumbers { + FrameNum: i64, + + FrameNumWrap: i64, + + PicNum: i64, +} + +#[derive(Debug, Default)] +struct ReferencePictures { + long_term: Vec, + short_term: Vec, +} + +#[derive(Debug, thiserror::Error)] +pub enum ParserError { + #[error(transparent)] + ReferenceManagementError(#[from] ReferenceManagementError), + + #[error("Bitstreams that allow gaps in frame_num are not supported")] + GapsInFrameNumNotSupported, + + #[error("Streams containing fields instead of frames are not supported")] + FieldsNotSupported, + + #[error("Error while parsing a NAL header: {0:?}")] + NalHeaderParseError(h264_reader::nal::NalHeaderError), + + #[error("Error while parsing SPS: {0:?}")] + SpsParseError(h264_reader::nal::sps::SpsError), + + #[error("Error while parsing PPS: {0:?}")] + PpsParseError(h264_reader::nal::pps::PpsError), + + #[error("Error while parsing a slice: {0:?}")] + SliceParseError(h264_reader::nal::slice::SliceHeaderError), +} + +struct NalReceiver { + parser_ctx: h264_reader::Context, + au_splitter: au_splitter::AUSplitter, + reference_ctx: ReferenceContext, + debug_channel: mpsc::Sender, + decoder_channel: mpsc::Sender>, +} + +impl AccumulatedNalHandler for NalReceiver { + fn nal(&mut self, nal: RefNal<'_>) -> NalInterest { + if !nal.is_complete() { + return NalInterest::Buffer; + } + + match self.handle_nal(nal) { + Ok((debug_nalu, instructions)) => { + self.debug_channel.send(debug_nalu).unwrap(); + for instruction in instructions { + self.decoder_channel.send(Ok(instruction)).unwrap(); + } + } + + Err(err) => { + self.decoder_channel.send(Err(err)).unwrap(); + } + } + + NalInterest::Ignore + } +} + +impl NalReceiver { + fn handle_nal( + &mut self, + nal: RefNal<'_>, + ) -> Result<(NaluDebugInfo, Vec), ParserError> { + let nal_unit_type = nal + .header() + .map_err(ParserError::NalHeaderParseError)? + .nal_unit_type(); + + match nal_unit_type { + h264_reader::nal::UnitType::SeqParameterSet => { + let parsed = h264_reader::nal::sps::SeqParameterSet::from_bits(nal.rbsp_bits()) + .map_err(ParserError::SpsParseError)?; + + // Perhaps this shouldn't be here, but this is the only place we process sps + // before sending them to the decoder. It also seems that this is the only thing we + // need to check about the sps. + if parsed.gaps_in_frame_num_value_allowed_flag { + // TODO: what else to do here? sure we'll throw an error, but shouldn't we also + // terminate the parser somehow? + // perhaps this should be considered in other places we throw errors too + Err(ParserError::GapsInFrameNumNotSupported) + } else { + self.parser_ctx.put_seq_param_set(parsed.clone()); + Ok(( + NaluDebugInfo::Sps(parsed.clone()), + vec![DecoderInstruction::Sps(parsed)], + )) + } + } + + h264_reader::nal::UnitType::PicParameterSet => { + let parsed = h264_reader::nal::pps::PicParameterSet::from_bits( + &self.parser_ctx, + nal.rbsp_bits(), + ) + .map_err(ParserError::PpsParseError)?; + + self.parser_ctx.put_pic_param_set(parsed.clone()); + + Ok(( + NaluDebugInfo::Pps(parsed.clone()), + vec![DecoderInstruction::Pps(parsed)], + )) + } + + h264_reader::nal::UnitType::SliceLayerWithoutPartitioningNonIdr + | h264_reader::nal::UnitType::SliceLayerWithoutPartitioningIdr => { + let (header, sps, pps) = h264_reader::nal::slice::SliceHeader::from_bits( + &self.parser_ctx, + &mut nal.rbsp_bits(), + nal.header().unwrap(), + ) + .map_err(ParserError::SliceParseError)?; + + let header = Arc::new(header); + + let debug_nalu = match nal_unit_type { + h264_reader::nal::UnitType::SliceLayerWithoutPartitioningIdr => { + NaluDebugInfo::SliceWithoutPartitioningHeaderIdr(header.clone()) + } + h264_reader::nal::UnitType::SliceLayerWithoutPartitioningNonIdr => { + NaluDebugInfo::SliceWithoutPartitioningHeaderNonIdr(header.clone()) + } + _ => unreachable!(), + }; + + let mut rbsp_bytes = vec![0, 0, 0, 1]; + nal.reader().read_to_end(&mut rbsp_bytes).unwrap(); + let slice = Slice { + nal_header: nal.header().unwrap(), + header, + pps_id: pps.pic_parameter_set_id, + rbsp_bytes, + }; + + let Some(slices) = self.au_splitter.put_slice(slice) else { + return Ok((debug_nalu, Vec::new())); + }; + + let instructions = self.reference_ctx.put_picture(slices, sps, pps)?; + + Ok((debug_nalu, instructions)) + } + + h264_reader::nal::UnitType::Unspecified(_) + | h264_reader::nal::UnitType::SliceDataPartitionALayer + | h264_reader::nal::UnitType::SliceDataPartitionBLayer + | h264_reader::nal::UnitType::SliceDataPartitionCLayer + | h264_reader::nal::UnitType::SEI + | h264_reader::nal::UnitType::AccessUnitDelimiter + | h264_reader::nal::UnitType::EndOfSeq + | h264_reader::nal::UnitType::EndOfStream + | h264_reader::nal::UnitType::FillerData + | h264_reader::nal::UnitType::SeqParameterSetExtension + | h264_reader::nal::UnitType::PrefixNALUnit + | h264_reader::nal::UnitType::SubsetSeqParameterSet + | h264_reader::nal::UnitType::DepthParameterSet + | h264_reader::nal::UnitType::SliceLayerWithoutPartitioningAux + | h264_reader::nal::UnitType::SliceExtension + | h264_reader::nal::UnitType::SliceExtensionViewComponent + | h264_reader::nal::UnitType::Reserved(_) => Ok(( + NaluDebugInfo::Other(format!("{:?}", nal.header().unwrap().nal_unit_type())), + Vec::new(), + )), + } + } +} + +trait SpsExt { + fn max_frame_num(&self) -> i64; +} + +impl SpsExt for SeqParameterSet { + fn max_frame_num(&self) -> i64 { + 1 << self.log2_max_frame_num() + } +} + +#[derive(Debug)] +// this struct is only ever printed out in debug mode, but clippy detects this as it not being +// used. +#[allow(dead_code)] +pub enum NaluDebugInfo { + Sps(SeqParameterSet), + Pps(PicParameterSet), + SliceWithoutPartitioningHeaderNonIdr(Arc), + SliceWithoutPartitioningHeaderIdr(Arc), + Other(String), +} + +pub struct Slice { + pub nal_header: h264_reader::nal::NalHeader, + pub pps_id: h264_reader::nal::pps::PicParamSetId, + pub header: Arc, + pub rbsp_bytes: Vec, +} + +pub struct Parser { + reader: AnnexBReader>, + debug_channel: mpsc::Receiver, + decoder_channel: mpsc::Receiver>, +} + +impl Default for Parser { + fn default() -> Self { + let (debug_tx, debug_rx) = mpsc::channel(); + let (decoder_tx, decoder_rx) = mpsc::channel(); + + Parser { + reader: AnnexBReader::accumulate(NalReceiver { + reference_ctx: ReferenceContext::default(), + au_splitter: au_splitter::AUSplitter::default(), + debug_channel: debug_tx, + decoder_channel: decoder_tx, + parser_ctx: h264_reader::Context::new(), + }), + debug_channel: debug_rx, + decoder_channel: decoder_rx, + } + } +} + +impl Parser { + pub fn parse(&mut self, bytes: &[u8]) -> Vec> { + self.reader.push(bytes); + + let mut instructions = Vec::new(); + while let Ok(instruction) = self.decoder_channel.try_recv() { + instructions.push(instruction); + } + while let Ok(nalu) = self.debug_channel.try_recv() { + trace!("parsed nalu: {nalu:#?}"); + } + + instructions + } +} diff --git a/vk-video/src/parser/au_splitter.rs b/vk-video/src/parser/au_splitter.rs new file mode 100644 index 000000000..29a84d964 --- /dev/null +++ b/vk-video/src/parser/au_splitter.rs @@ -0,0 +1,136 @@ +use h264_reader::nal::slice::PicOrderCountLsb; + +use super::Slice; + +#[derive(Default)] +pub(crate) struct AUSplitter { + buffered_nals: Vec, +} + +impl AUSplitter { + pub(crate) fn put_slice(&mut self, slice: Slice) -> Option> { + if self.is_new_au(&slice) { + let au = std::mem::take(&mut self.buffered_nals); + self.buffered_nals.push(slice); + if !au.is_empty() { + Some(au) + } else { + None + } + } else { + self.buffered_nals.push(slice); + None + } + } + + /// returns `true` if `slice` is a first slice in an Access Unit + fn is_new_au(&self, slice: &Slice) -> bool { + let Some(last) = self.buffered_nals.last() else { + return true; + }; + + first_mb_in_slice_zero(slice) + || frame_num_differs(last, slice) + || pps_id_differs(last, slice) + || field_pic_flag_differs(last, slice) + || nal_ref_idc_differs_one_zero(last, slice) + || pic_order_cnt_zero_check(last, slice) + || idr_and_non_idr(last, slice) + || idrs_where_idr_pic_id_differs(last, slice) + } +} + +// defguardp first_mb_in_slice_zero(a) +// when a.first_mb_in_slice == 0 and +// a.nal_unit_type in [1, 2, 5] +// +fn first_mb_in_slice_zero(slice: &Slice) -> bool { + slice.header.first_mb_in_slice == 0 +} + +// defguardp frame_num_differs(a, b) when a.frame_num != b.frame_num +// +fn frame_num_differs(last: &Slice, curr: &Slice) -> bool { + last.header.frame_num != curr.header.frame_num +} + +// defguardp pic_parameter_set_id_differs(a, b) +// when a.pic_parameter_set_id != b.pic_parameter_set_id +// +fn pps_id_differs(last: &Slice, curr: &Slice) -> bool { + last.pps_id != curr.pps_id +} + +// defguardp field_pic_flag_differs(a, b) when a.field_pic_flag != b.field_pic_flag +// +// defguardp bottom_field_flag_differs(a, b) when a.bottom_field_flag != b.bottom_field_flag +// +fn field_pic_flag_differs(last: &Slice, curr: &Slice) -> bool { + last.header.field_pic != curr.header.field_pic +} + +// defguardp nal_ref_idc_differs_one_zero(a, b) +// when (a.nal_ref_idc == 0 or b.nal_ref_idc == 0) and +// a.nal_ref_idc != b.nal_ref_idc +// +fn nal_ref_idc_differs_one_zero(last: &Slice, curr: &Slice) -> bool { + (last.nal_header.nal_ref_idc() == 0 || curr.nal_header.nal_ref_idc() == 0) + && last.nal_header.nal_ref_idc() != curr.nal_header.nal_ref_idc() +} + +// defguardp pic_order_cnt_zero_check(a, b) +// when a.pic_order_cnt_type == 0 and b.pic_order_cnt_type == 0 and +// (a.pic_order_cnt_lsb != b.pic_order_cnt_lsb or +// a.delta_pic_order_cnt_bottom != b.delta_pic_order_cnt_bottom) +// +fn pic_order_cnt_zero_check(last: &Slice, curr: &Slice) -> bool { + let (last_pic_order_cnt_lsb, last_delta_pic_order_cnt_bottom) = + match last.header.pic_order_cnt_lsb { + Some(PicOrderCountLsb::Frame(pic_order_cnt_lsb)) => (pic_order_cnt_lsb, 0), + Some(PicOrderCountLsb::FieldsAbsolute { + pic_order_cnt_lsb, + delta_pic_order_cnt_bottom, + }) => (pic_order_cnt_lsb, delta_pic_order_cnt_bottom), + _ => return false, + }; + + let (curr_pic_order_cnt_lsb, curr_delta_pic_order_cnt_bottom) = + match curr.header.pic_order_cnt_lsb { + Some(PicOrderCountLsb::Frame(pic_order_cnt_lsb)) => (pic_order_cnt_lsb, 0), + Some(PicOrderCountLsb::FieldsAbsolute { + pic_order_cnt_lsb, + delta_pic_order_cnt_bottom, + }) => (pic_order_cnt_lsb, delta_pic_order_cnt_bottom), + _ => return false, + }; + + last_pic_order_cnt_lsb != curr_pic_order_cnt_lsb + || last_delta_pic_order_cnt_bottom != curr_delta_pic_order_cnt_bottom +} + +// defguardp pic_order_cnt_one_check_zero(a, b) +// when a.pic_order_cnt_type == 1 and b.pic_order_cnt_type == 1 and +// hd(a.delta_pic_order_cnt) != hd(b.delta_pic_order_cnt) +// TODO + +// defguardp pic_order_cnt_one_check_one(a, b) +// when a.pic_order_cnt_type == 1 and b.pic_order_cnt_type == 1 and +// hd(hd(a.delta_pic_order_cnt)) != hd(hd(b.delta_pic_order_cnt)) +// TODO + +// defguardp idr_and_non_idr(a, b) +// when (a.nal_unit_type == 5 or b.nal_unit_type == 5) and +// a.nal_unit_type != b.nal_unit_type +// +fn idr_and_non_idr(last: &Slice, curr: &Slice) -> bool { + (last.nal_header.nal_unit_type().id() == 5) ^ (curr.nal_header.nal_unit_type().id() == 5) +} + +// defguardp idrs_with_idr_pic_id_differ(a, b) +// when a.nal_unit_type == 5 and b.nal_unit_type == 5 and a.idr_pic_id != b.idr_pic_id +fn idrs_where_idr_pic_id_differs(last: &Slice, curr: &Slice) -> bool { + match (last.header.idr_pic_id, curr.header.idr_pic_id) { + (Some(last), Some(curr)) => last != curr, + _ => false, + } +} diff --git a/vk-video/src/vulkan_decoder.rs b/vk-video/src/vulkan_decoder.rs new file mode 100644 index 000000000..9b78b74c4 --- /dev/null +++ b/vk-video/src/vulkan_decoder.rs @@ -0,0 +1,1406 @@ +use std::sync::Arc; + +use ash::vk; + +use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet}; +use tracing::error; +use wrappers::*; + +use crate::parser::{DecodeInformation, DecoderInstruction, ReferenceId}; + +mod parameter_sets; +mod vulkan_ctx; +mod wrappers; + +pub use vulkan_ctx::*; + +const MACROBLOCK_SIZE: u32 = 16; + +pub struct VulkanDecoder<'a> { + vulkan_ctx: Arc, + video_session_resources: Option>, + command_buffers: CommandBuffers, + _command_pools: CommandPools, + sync_structures: SyncStructures, + reference_id_to_dpb_slot_index: std::collections::HashMap, + decode_query_pool: Option, +} + +struct SyncStructures { + sem_decode_done: Semaphore, + fence_transfer_done: Fence, + fence_memory_barrier_completed: Fence, +} + +struct CommandBuffers { + decode_buffer: CommandBuffer, + gpu_to_mem_transfer_buffer: CommandBuffer, + vulkan_to_wgpu_transfer_buffer: CommandBuffer, +} + +struct VideoSessionResources<'a> { + video_session: VideoSession, + parameters_manager: VideoSessionParametersManager, + decoding_images: DecodingImages<'a>, +} + +/// this cannot outlive the image and semaphore it borrows, but it seems impossible to encode that +/// in the lifetimes +struct DecodeOutput { + image: vk::Image, + dimensions: vk::Extent2D, + current_layout: vk::ImageLayout, + layer: u32, + wait_semaphore: vk::Semaphore, + _input_buffer: Buffer, +} + +#[derive(Debug, thiserror::Error)] +pub enum VulkanDecoderError { + #[error("Vulkan error: {0}")] + VkError(#[from] vk::Result), + + #[error("Cannot find enough memory of the right type on the deivce")] + NoMemory, + + #[error("The decoder instruction is not supported: {0:?}")] + DecoderInstructionNotSupported(Box), + + #[error("Setting the frame cropping flag in sps is not supported")] + FrameCroppingNotSupported, + + #[error("Bitstreams that contain fields rather than frames are not supported")] + FieldsNotSupported, + + #[error("Scaling lists are not supported")] + ScalingListsNotSupported, + + #[error("A NALU requiring a session received before a session was created (probably before receiving first SPS)")] + NoSession, + + #[error("A slot in the Decoded Pictures Buffer was requested, but all slots are taken")] + NoFreeSlotsInDpb, + + #[error("A picture which is not in the decoded pictures buffer was requested as a reference picture")] + NonExistantReferenceRequested, + + #[error("A vulkan decode operation failed with code {0:?}")] + DecodeOperationFailed(vk::QueryResultStatusKHR), + + #[error(transparent)] + VulkanCtxError(#[from] VulkanCtxError), +} + +impl<'a> VulkanDecoder<'a> { + pub fn new(vulkan_ctx: Arc) -> Result { + let decode_pool = Arc::new(CommandPool::new( + vulkan_ctx.device.clone(), + vulkan_ctx.queues.h264_decode.idx, + )?); + + let transfer_pool = Arc::new(CommandPool::new( + vulkan_ctx.device.clone(), + vulkan_ctx.queues.transfer.idx, + )?); + + let decode_buffer = CommandBuffer::new_primary(decode_pool.clone())?; + + let gpu_to_mem_transfer_buffer = CommandBuffer::new_primary(transfer_pool.clone())?; + + let vulkan_to_wgpu_transfer_buffer = CommandBuffer::new_primary(transfer_pool.clone())?; + + let command_pools = CommandPools { + _decode_pool: decode_pool, + _transfer_pool: transfer_pool, + }; + + let sync_structures = SyncStructures { + sem_decode_done: Semaphore::new(vulkan_ctx.device.clone())?, + fence_transfer_done: Fence::new(vulkan_ctx.device.clone(), false)?, + fence_memory_barrier_completed: Fence::new(vulkan_ctx.device.clone(), false)?, + }; + + let decode_query_pool = if vulkan_ctx + .queues + .h264_decode + .supports_result_status_queries() + { + Some(DecodeQueryPool::new( + vulkan_ctx.device.clone(), + H264ProfileInfo::decode_h264_yuv420().profile_info, + )?) + } else { + None + }; + + Ok(Self { + vulkan_ctx, + video_session_resources: None, + _command_pools: command_pools, + command_buffers: CommandBuffers { + decode_buffer, + gpu_to_mem_transfer_buffer, + vulkan_to_wgpu_transfer_buffer, + }, + sync_structures, + decode_query_pool, + reference_id_to_dpb_slot_index: Default::default(), + }) + } +} + +impl VulkanDecoder<'_> { + pub fn decode_to_bytes( + &mut self, + decoder_instructions: &[DecoderInstruction], + ) -> Result>, VulkanDecoderError> { + let mut result = Vec::new(); + for instruction in decoder_instructions { + if let Some(output) = self.decode(instruction)? { + result.push(self.download_output(output)?) + } + } + + Ok(result) + } + + pub fn decode_to_wgpu_textures( + &mut self, + decoder_instructions: &[DecoderInstruction], + ) -> Result, VulkanDecoderError> { + let mut result = Vec::new(); + for instruction in decoder_instructions { + if let Some(output) = self.decode(instruction)? { + result.push(self.output_to_wgpu_texture(output)?) + } + } + + Ok(result) + } + + fn decode( + &mut self, + instruction: &DecoderInstruction, + ) -> Result, VulkanDecoderError> { + match instruction { + DecoderInstruction::Decode { .. } => { + return Err(VulkanDecoderError::DecoderInstructionNotSupported( + Box::new(instruction.clone()), + )) + } + + DecoderInstruction::DecodeAndStoreAs { + decode_info, + reference_id, + } => { + return self + .process_reference_p_frame(decode_info, *reference_id) + .map(Option::Some) + } + + DecoderInstruction::Idr { + decode_info, + reference_id, + } => { + return self + .process_idr(decode_info, *reference_id) + .map(Option::Some) + } + + DecoderInstruction::Drop { reference_ids } => { + for reference_id in reference_ids { + match self.reference_id_to_dpb_slot_index.remove(reference_id) { + Some(dpb_idx) => self + .video_session_resources + .as_mut() + .map(|s| s.decoding_images.free_reference_picture(dpb_idx)), + None => return Err(VulkanDecoderError::NonExistantReferenceRequested), + }; + } + } + + DecoderInstruction::Sps(sps) => self.process_sps(sps)?, + + DecoderInstruction::Pps(pps) => self.process_pps(pps)?, + } + + Ok(None) + } + + fn process_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> { + let profile = H264ProfileInfo::decode_h264_yuv420(); + + let width = match sps.frame_cropping { + None => (sps.pic_width_in_mbs_minus1 + 1) * MACROBLOCK_SIZE, + Some(_) => return Err(VulkanDecoderError::FrameCroppingNotSupported), + }; + + let height = match sps.frame_mbs_flags { + h264_reader::nal::sps::FrameMbsFlags::Frames => { + (sps.pic_height_in_map_units_minus1 + 1) * MACROBLOCK_SIZE + } + h264_reader::nal::sps::FrameMbsFlags::Fields { .. } => { + return Err(VulkanDecoderError::FieldsNotSupported) + } + }; + + let max_coded_extent = vk::Extent2D { width, height }; + // +1 for current frame + let max_dpb_slots = sps.max_num_ref_frames + 1; + let max_active_references = sps.max_num_ref_frames; + + if let Some(VideoSessionResources { + video_session, + parameters_manager: parameters, + .. + }) = &mut self.video_session_resources + { + if video_session.max_coded_extent.width >= width + && video_session.max_coded_extent.height >= height + && video_session.max_dpb_slots >= max_dpb_slots + { + // no need to change the session + parameters.put_sps(sps)?; + return Ok(()); + } + } + + let video_session = VideoSession::new( + &self.vulkan_ctx, + &profile.profile_info, + max_coded_extent, + max_dpb_slots, + max_active_references, + &self.vulkan_ctx.video_capabilities.std_header_version, + )?; + + let parameters = self + .video_session_resources + .take() + .map(|r| r.parameters_manager); + + let mut parameters = match parameters { + Some(mut parameters) => { + parameters.change_session(video_session.session)?; + parameters + } + None => VideoSessionParametersManager::new(&self.vulkan_ctx, video_session.session)?, + }; + + parameters.put_sps(sps)?; + + // FIXME: usually, sps arrives either at the start of the stream (when all spses are sent + // at the begginning of the stream) or right before an IDR. It is however possible for an + // sps nal to arrive in between P-frames. This would cause us to loose the reference + // pictures we need to decode the stream until we receive a new IDR. Don't know if this is + // an issue worth fixing, I don't think I ever saw a stream like this. + let (decoding_images, memory_barrier) = DecodingImages::new( + &self.vulkan_ctx, + profile, + &self.vulkan_ctx.h264_dpb_format_properties, + &self.vulkan_ctx.h264_dst_format_properties, + max_coded_extent, + max_dpb_slots, + )?; + + self.command_buffers.decode_buffer.begin()?; + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.decode_buffer, + &vk::DependencyInfo::default().image_memory_barriers(&memory_barrier), + ); + } + + self.command_buffers.decode_buffer.end()?; + + self.command_buffers.decode_buffer.submit( + *self.vulkan_ctx.queues.h264_decode.queue.lock().unwrap(), + &[], + &[], + Some(*self.sync_structures.fence_memory_barrier_completed), + )?; + + // TODO: this shouldn't be a fence + self.sync_structures + .fence_memory_barrier_completed + .wait_and_reset(u64::MAX)?; + + self.video_session_resources = Some(VideoSessionResources { + video_session, + parameters_manager: parameters, + decoding_images, + }); + + Ok(()) + } + + fn process_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> { + self.video_session_resources + .as_mut() + .map(|r| &mut r.parameters_manager) + .ok_or(VulkanDecoderError::NoSession)? + .put_pps(pps)?; + + Ok(()) + } + + fn pad_size_to_alignment(size: u64, align: u64) -> u64 { + if size % align == 0 { + size + } else { + (size + align) / align * align + } + } + + fn process_idr( + &mut self, + decode_information: &DecodeInformation, + reference_id: ReferenceId, + ) -> Result { + self.do_decode(decode_information, reference_id, true, true) + } + + fn process_reference_p_frame( + &mut self, + decode_information: &DecodeInformation, + reference_id: ReferenceId, + ) -> Result { + self.do_decode(decode_information, reference_id, false, true) + } + + fn do_decode( + &mut self, + decode_information: &DecodeInformation, + reference_id: ReferenceId, + is_idr: bool, + is_reference: bool, + ) -> Result { + // upload data to a buffer + let size = Self::pad_size_to_alignment( + decode_information.rbsp_bytes.len() as u64, + self.vulkan_ctx + .video_capabilities + .min_bitstream_buffer_offset_alignment, + ); + + let decode_buffer = + self.upload_decode_data_to_buffer(&decode_information.rbsp_bytes, size)?; + + // decode + let video_session_resources = self + .video_session_resources + .as_mut() + .ok_or(VulkanDecoderError::NoSession)?; + + // IDR - remove all reference picures + if is_idr { + video_session_resources + .decoding_images + .reset_all_allocations(); + + self.reference_id_to_dpb_slot_index = Default::default(); + } + + // begin video coding + self.command_buffers.decode_buffer.begin()?; + + let memory_barrier = vk::MemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::VIDEO_DECODE_KHR) + .src_access_mask(vk::AccessFlags2::VIDEO_DECODE_WRITE_KHR) + .dst_stage_mask(vk::PipelineStageFlags2::VIDEO_DECODE_KHR) + .dst_access_mask( + vk::AccessFlags2::VIDEO_DECODE_READ_KHR | vk::AccessFlags2::VIDEO_DECODE_WRITE_KHR, + ); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.decode_buffer, + &vk::DependencyInfo::default().memory_barriers(&[memory_barrier]), + ) + }; + + if let Some(pool) = self.decode_query_pool.as_ref() { + pool.reset(*self.command_buffers.decode_buffer); + } + + let reference_slots = video_session_resources + .decoding_images + .reference_slot_info(); + + let begin_info = vk::VideoBeginCodingInfoKHR::default() + .video_session(video_session_resources.video_session.session) + .video_session_parameters(video_session_resources.parameters_manager.parameters()) + .reference_slots(&reference_slots); + + unsafe { + self.vulkan_ctx + .device + .video_queue_ext + .cmd_begin_video_coding_khr(*self.command_buffers.decode_buffer, &begin_info) + }; + + // IDR - issue the reset command to the video session + if is_idr { + let control_info = vk::VideoCodingControlInfoKHR::default() + .flags(vk::VideoCodingControlFlagsKHR::RESET); + + unsafe { + self.vulkan_ctx + .device + .video_queue_ext + .cmd_control_video_coding_khr( + *self.command_buffers.decode_buffer, + &control_info, + ) + }; + } + + // allocate a new reference picture and fill out the forms to get it set up + let new_reference_slot_index = video_session_resources + .decoding_images + .allocate_reference_picture()?; + + let new_reference_slot_std_reference_info = decode_information.picture_info.into(); + let mut new_reference_slot_dpb_slot_info = vk::VideoDecodeH264DpbSlotInfoKHR::default() + .std_reference_info(&new_reference_slot_std_reference_info); + + let new_reference_slot_video_picture_resource_info = video_session_resources + .decoding_images + .video_resource_info(new_reference_slot_index) + .unwrap(); + + let setup_reference_slot = vk::VideoReferenceSlotInfoKHR::default() + .picture_resource(new_reference_slot_video_picture_resource_info) + .slot_index(new_reference_slot_index as i32) + .push_next(&mut new_reference_slot_dpb_slot_info); + + // prepare the reference list + let reference_slots = video_session_resources + .decoding_images + .reference_slot_info(); + + let references_std_ref_info = Self::prepare_references_std_ref_info(decode_information); + + let mut references_dpb_slot_info = + Self::prepare_references_dpb_slot_info(&references_std_ref_info); + + let pic_reference_slots = Self::prepare_reference_list_slot_info( + &self.reference_id_to_dpb_slot_index, + &reference_slots, + &mut references_dpb_slot_info, + decode_information, + )?; + + // prepare the decode target picture + let std_picture_info = vk::native::StdVideoDecodeH264PictureInfo { + flags: vk::native::StdVideoDecodeH264PictureInfoFlags { + _bitfield_align_1: [], + __bindgen_padding_0: [0; 3], + _bitfield_1: vk::native::StdVideoDecodeH264PictureInfoFlags::new_bitfield_1( + matches!( + decode_information.header.field_pic, + h264_reader::nal::slice::FieldPic::Field(..) + ) + .into(), + is_idr.into(), + is_idr.into(), + 0, + is_reference.into(), + 0, + ), + }, + PicOrderCnt: decode_information.picture_info.PicOrderCnt, + seq_parameter_set_id: decode_information.sps_id, + pic_parameter_set_id: decode_information.pps_id, + frame_num: decode_information.header.frame_num, + idr_pic_id: decode_information + .header + .idr_pic_id + .map(|a| a as u16) + .unwrap_or(0), + reserved1: 0, + reserved2: 0, + }; + + let slice_offsets = decode_information + .slice_indices + .iter() + .map(|&x| x as u32) + .collect::>(); + + let mut decode_h264_picture_info = vk::VideoDecodeH264PictureInfoKHR::default() + .std_picture_info(&std_picture_info) + .slice_offsets(&slice_offsets); + + let dst_picture_resource_info = match &video_session_resources.decoding_images.dst_image { + Some(image) => image.video_resource_info[0], + None => *new_reference_slot_video_picture_resource_info, + }; + + // these 3 veriables are for copying the result later + let (dst_image, dst_image_layout, dst_layer) = + match &video_session_resources.decoding_images.dst_image { + Some(image) => (**image.image, vk::ImageLayout::VIDEO_DECODE_DST_KHR, 0), + None => ( + **video_session_resources.decoding_images.dpb_image.image, + vk::ImageLayout::VIDEO_DECODE_DPB_KHR, + new_reference_slot_index, + ), + }; + + // fill out the final struct and issue the command + let decode_info = vk::VideoDecodeInfoKHR::default() + .src_buffer(*decode_buffer) + .src_buffer_offset(0) + .src_buffer_range(size) + .dst_picture_resource(dst_picture_resource_info) + .setup_reference_slot(&setup_reference_slot) + .reference_slots(&pic_reference_slots) + .push_next(&mut decode_h264_picture_info); + + if let Some(pool) = self.decode_query_pool.as_ref() { + pool.begin_query(*self.command_buffers.decode_buffer); + } + + unsafe { + self.vulkan_ctx + .device + .video_decode_queue_ext + .cmd_decode_video_khr(*self.command_buffers.decode_buffer, &decode_info) + }; + + if let Some(pool) = self.decode_query_pool.as_ref() { + pool.end_query(*self.command_buffers.decode_buffer); + } + + unsafe { + self.vulkan_ctx + .device + .video_queue_ext + .cmd_end_video_coding_khr( + *self.command_buffers.decode_buffer, + &vk::VideoEndCodingInfoKHR::default(), + ) + }; + + self.command_buffers.decode_buffer.end()?; + + self.command_buffers.decode_buffer.submit( + *self.vulkan_ctx.queues.h264_decode.queue.lock().unwrap(), + &[], + &[( + *self.sync_structures.sem_decode_done, + vk::PipelineStageFlags2::VIDEO_DECODE_KHR, + )], + None, + )?; + + // after the decode save the new reference picture + self.reference_id_to_dpb_slot_index + .insert(reference_id, new_reference_slot_index); + + // TODO: those are not the real dimensions of the image. the real dimensions should be + // calculated from the sps + let dimensions = video_session_resources.video_session.max_coded_extent; + + Ok(DecodeOutput { + image: dst_image, + wait_semaphore: *self.sync_structures.sem_decode_done, + layer: dst_layer as u32, + current_layout: dst_image_layout, + dimensions, + _input_buffer: decode_buffer, + }) + } + + fn output_to_wgpu_texture( + &self, + decode_output: DecodeOutput, + ) -> Result { + let copy_extent = vk::Extent3D { + width: decode_output.dimensions.width, + height: decode_output.dimensions.height, + depth: 1, + }; + + let queue_indices = [ + self.vulkan_ctx.queues.transfer.idx as u32, + self.vulkan_ctx.queues.wgpu.idx as u32, + ]; + + let create_info = vk::ImageCreateInfo::default() + .flags(vk::ImageCreateFlags::MUTABLE_FORMAT) + .image_type(vk::ImageType::TYPE_2D) + .format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .extent(copy_extent) + .mip_levels(1) + .array_layers(1) + .samples(vk::SampleCountFlags::TYPE_1) + .tiling(vk::ImageTiling::OPTIMAL) + .usage( + vk::ImageUsageFlags::SAMPLED + | vk::ImageUsageFlags::TRANSFER_DST + | vk::ImageUsageFlags::TRANSFER_SRC, + ) + .sharing_mode(vk::SharingMode::CONCURRENT) + .queue_family_indices(&queue_indices) + .initial_layout(vk::ImageLayout::UNDEFINED); + + let image = Arc::new(Image::new(self.vulkan_ctx.allocator.clone(), &create_info)?); + + self.command_buffers + .vulkan_to_wgpu_transfer_buffer + .begin()?; + + let memory_barrier_src = vk::ImageMemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::NONE) + .src_access_mask(vk::AccessFlags2::NONE) + .dst_stage_mask(vk::PipelineStageFlags2::COPY) + .dst_access_mask(vk::AccessFlags2::TRANSFER_READ) + .old_layout(decode_output.current_layout) + .new_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(decode_output.image) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: decode_output.layer, + layer_count: 1, + }); + + let memory_barrier_dst = vk::ImageMemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::NONE) + .src_access_mask(vk::AccessFlags2::NONE) + .dst_stage_mask(vk::PipelineStageFlags2::COPY) + .dst_access_mask(vk::AccessFlags2::TRANSFER_WRITE) + .old_layout(vk::ImageLayout::UNDEFINED) + .new_layout(vk::ImageLayout::TRANSFER_DST_OPTIMAL) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(**image) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.vulkan_to_wgpu_transfer_buffer, + &vk::DependencyInfo::default() + .image_memory_barriers(&[memory_barrier_src, memory_barrier_dst]), + ) + }; + + let copy_info = [ + vk::ImageCopy::default() + .src_subresource(vk::ImageSubresourceLayers { + base_array_layer: decode_output.layer, + mip_level: 0, + layer_count: 1, + aspect_mask: vk::ImageAspectFlags::PLANE_0, + }) + .src_offset(vk::Offset3D::default()) + .dst_subresource(vk::ImageSubresourceLayers { + base_array_layer: 0, + mip_level: 0, + layer_count: 1, + aspect_mask: vk::ImageAspectFlags::PLANE_0, + }) + .dst_offset(vk::Offset3D::default()) + .extent(copy_extent), + vk::ImageCopy::default() + .src_subresource(vk::ImageSubresourceLayers { + base_array_layer: decode_output.layer, + mip_level: 0, + layer_count: 1, + aspect_mask: vk::ImageAspectFlags::PLANE_1, + }) + .src_offset(vk::Offset3D::default()) + .dst_subresource(vk::ImageSubresourceLayers { + base_array_layer: 0, + mip_level: 0, + layer_count: 1, + aspect_mask: vk::ImageAspectFlags::PLANE_1, + }) + .dst_offset(vk::Offset3D::default()) + .extent(vk::Extent3D { + width: copy_extent.width / 2, + height: copy_extent.height / 2, + ..copy_extent + }), + ]; + + unsafe { + self.vulkan_ctx.device.cmd_copy_image( + *self.command_buffers.vulkan_to_wgpu_transfer_buffer, + decode_output.image, + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + **image, + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + ©_info, + ); + } + + let memory_barrier_src = memory_barrier_src + .src_stage_mask(vk::PipelineStageFlags2::COPY) + .src_access_mask(vk::AccessFlags2::TRANSFER_READ) + .dst_stage_mask(vk::PipelineStageFlags2::NONE) + .dst_access_mask(vk::AccessFlags2::NONE) + .old_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL) + .new_layout(decode_output.current_layout); + + let memory_barrier_dst = memory_barrier_dst + .src_stage_mask(vk::PipelineStageFlags2::COPY) + .src_access_mask(vk::AccessFlags2::TRANSFER_WRITE) + .dst_stage_mask(vk::PipelineStageFlags2::NONE) + .dst_access_mask(vk::AccessFlags2::NONE) + .old_layout(vk::ImageLayout::TRANSFER_DST_OPTIMAL) + .new_layout(vk::ImageLayout::GENERAL); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.vulkan_to_wgpu_transfer_buffer, + &vk::DependencyInfo::default() + .image_memory_barriers(&[memory_barrier_src, memory_barrier_dst]), + ) + }; + + self.command_buffers.vulkan_to_wgpu_transfer_buffer.end()?; + + self.command_buffers.vulkan_to_wgpu_transfer_buffer.submit( + *self.vulkan_ctx.queues.transfer.queue.lock().unwrap(), + &[( + decode_output.wait_semaphore, + vk::PipelineStageFlags2::TOP_OF_PIPE, + )], + &[], + Some(*self.sync_structures.fence_transfer_done), + )?; + + self.sync_structures + .fence_transfer_done + .wait_and_reset(u64::MAX)?; + + let result = self + .decode_query_pool + .as_ref() + .map(|pool| pool.get_result_blocking()); + + if let Some(result) = result { + let result = result?; + if result.as_raw() < 0 { + return Err(VulkanDecoderError::DecodeOperationFailed(result)); + } + } + + let hal_texture = unsafe { + wgpu::hal::vulkan::Device::texture_from_raw( + **image, + &wgpu::hal::TextureDescriptor { + label: Some("vulkan video output texture"), + usage: wgpu::hal::TextureUses::RESOURCE + | wgpu::hal::TextureUses::COPY_DST + | wgpu::hal::TextureUses::COPY_SRC, + memory_flags: wgpu::hal::MemoryFlags::empty(), + size: wgpu::Extent3d { + width: copy_extent.width, + height: copy_extent.height, + depth_or_array_layers: copy_extent.depth, + }, + dimension: wgpu::TextureDimension::D2, + sample_count: 1, + view_formats: Vec::new(), + format: wgpu::TextureFormat::NV12, + mip_level_count: 1, + }, + Some(Box::new(image.clone())), + ) + }; + + let wgpu_texture = unsafe { + self.vulkan_ctx + .wgpu_ctx + .device + .create_texture_from_hal::( + hal_texture, + &wgpu::TextureDescriptor { + label: Some("vulkan video output texture"), + usage: wgpu::TextureUsages::COPY_DST + | wgpu::TextureUsages::TEXTURE_BINDING + | wgpu::TextureUsages::COPY_SRC, + size: wgpu::Extent3d { + width: copy_extent.width, + height: copy_extent.height, + depth_or_array_layers: copy_extent.depth, + }, + dimension: wgpu::TextureDimension::D2, + sample_count: 1, + view_formats: &[], + format: wgpu::TextureFormat::NV12, + mip_level_count: 1, + }, + ) + }; + + Ok(wgpu_texture) + } + + fn download_output(&self, decode_output: DecodeOutput) -> Result, VulkanDecoderError> { + let mut dst_buffer = self.copy_image_to_buffer( + decode_output.image, + decode_output.dimensions, + decode_output.current_layout, + decode_output.layer, + &[(decode_output.wait_semaphore, vk::PipelineStageFlags2::COPY)], + &[], + Some(*self.sync_structures.fence_transfer_done), + )?; + + self.sync_structures + .fence_transfer_done + .wait_and_reset(u64::MAX)?; + + let output = unsafe { + self.download_data_from_buffer( + &mut dst_buffer, + decode_output.dimensions.width as usize + * decode_output.dimensions.height as usize + * 3 + / 2, + )? + }; + + Ok(output) + } + + fn prepare_references_std_ref_info( + decode_information: &DecodeInformation, + ) -> Vec { + decode_information + .reference_list + .iter() + .flatten() + .map(|ref_info| ref_info.picture_info.into()) + .collect::>() + } + + fn prepare_references_dpb_slot_info( + references_std_ref_info: &[vk::native::StdVideoDecodeH264ReferenceInfo], + ) -> Vec { + references_std_ref_info + .iter() + .map(|info| vk::VideoDecodeH264DpbSlotInfoKHR::default().std_reference_info(info)) + .collect::>() + } + + fn prepare_reference_list_slot_info<'a>( + reference_id_to_dpb_slot_index: &std::collections::HashMap, + reference_slots: &'a [vk::VideoReferenceSlotInfoKHR<'a>], + references_dpb_slot_info: &'a mut [vk::VideoDecodeH264DpbSlotInfoKHR<'a>], + decode_information: &'a DecodeInformation, + ) -> Result>, VulkanDecoderError> { + let mut pic_reference_slots = Vec::new(); + for (ref_info, dpb_slot_info) in decode_information + .reference_list + .iter() + .flatten() + .zip(references_dpb_slot_info.iter_mut()) + { + let i = *reference_id_to_dpb_slot_index + .get(&ref_info.id) + .ok_or(VulkanDecoderError::NonExistantReferenceRequested)?; + + let reference = *reference_slots + .get(i) + .ok_or(VulkanDecoderError::NonExistantReferenceRequested)?; + + if reference.slot_index < 0 || reference.p_picture_resource.is_null() { + return Err(VulkanDecoderError::NonExistantReferenceRequested); + } + + let reference = reference.push_next(dpb_slot_info); + + pic_reference_slots.push(reference); + } + + Ok(pic_reference_slots) + } + + /// ## Safety + /// the buffer has to be mappable and readable + unsafe fn download_data_from_buffer( + &self, + buffer: &mut Buffer, + size: usize, + ) -> Result, VulkanDecoderError> { + let mut output = Vec::new(); + unsafe { + let memory = self + .vulkan_ctx + .allocator + .map_memory(&mut buffer.allocation)?; + let memory_slice = std::slice::from_raw_parts_mut(memory, size); + output.extend_from_slice(memory_slice); + self.vulkan_ctx + .allocator + .unmap_memory(&mut buffer.allocation); + } + + Ok(output) + } + + fn upload_decode_data_to_buffer( + &self, + data: &[u8], + buffer_size: u64, + ) -> Result { + let mut decode_buffer = Buffer::new_decode( + self.vulkan_ctx.allocator.clone(), + buffer_size, + &H264ProfileInfo::decode_h264_yuv420(), + )?; + + unsafe { + let mem = self + .vulkan_ctx + .allocator + .map_memory(&mut decode_buffer.allocation)?; + let slice = std::slice::from_raw_parts_mut(mem.cast(), data.len()); + slice.copy_from_slice(data); + self.vulkan_ctx + .allocator + .unmap_memory(&mut decode_buffer.allocation); + } + + Ok(decode_buffer) + } + + #[allow(clippy::too_many_arguments)] + fn copy_image_to_buffer( + &self, + image: vk::Image, + dimensions: vk::Extent2D, + current_image_layout: vk::ImageLayout, + layer: u32, + wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + fence: Option, + ) -> Result { + self.command_buffers.gpu_to_mem_transfer_buffer.begin()?; + + let memory_barrier = vk::ImageMemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::NONE) + .src_access_mask(vk::AccessFlags2::NONE) + .dst_stage_mask(vk::PipelineStageFlags2::COPY) + .dst_access_mask(vk::AccessFlags2::TRANSFER_READ) + .old_layout(current_image_layout) + .new_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(image) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: layer, + layer_count: 1, + }); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.gpu_to_mem_transfer_buffer, + &vk::DependencyInfo::default().image_memory_barriers(&[memory_barrier]), + ) + }; + + // TODO: in this section, we shouldn't be using `max_coded_extent` and use the real frame + // resolution + let y_plane_size = dimensions.width as u64 * dimensions.height as u64; + + let dst_buffer = Buffer::new_transfer( + self.vulkan_ctx.allocator.clone(), + y_plane_size * 3 / 2, + TransferDirection::GpuToMem, + )?; + + let copy_info = [ + vk::BufferImageCopy::default() + .image_subresource(vk::ImageSubresourceLayers { + mip_level: 0, + layer_count: 1, + base_array_layer: layer, + aspect_mask: vk::ImageAspectFlags::PLANE_0, + }) + .image_offset(vk::Offset3D { x: 0, y: 0, z: 0 }) + .image_extent(vk::Extent3D { + width: dimensions.width, + height: dimensions.height, + depth: 1, + }) + .buffer_offset(0) + .buffer_row_length(0) + .buffer_image_height(0), + vk::BufferImageCopy::default() + .image_subresource(vk::ImageSubresourceLayers { + mip_level: 0, + layer_count: 1, + base_array_layer: layer, + aspect_mask: vk::ImageAspectFlags::PLANE_1, + }) + .image_offset(vk::Offset3D { x: 0, y: 0, z: 0 }) + .image_extent(vk::Extent3D { + width: dimensions.width / 2, + height: dimensions.height / 2, + depth: 1, + }) + .buffer_offset(y_plane_size) + .buffer_row_length(0) + .buffer_image_height(0), + ]; + + unsafe { + self.vulkan_ctx.device.cmd_copy_image_to_buffer( + *self.command_buffers.gpu_to_mem_transfer_buffer, + image, + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + *dst_buffer, + ©_info, + ) + }; + + let memory_barrier = memory_barrier + .src_stage_mask(vk::PipelineStageFlags2::COPY) + .src_access_mask(vk::AccessFlags2::TRANSFER_READ) + .dst_stage_mask(vk::PipelineStageFlags2::NONE) + .dst_access_mask(vk::AccessFlags2::NONE) + .old_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL) + .new_layout(current_image_layout); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.gpu_to_mem_transfer_buffer, + &vk::DependencyInfo::default().image_memory_barriers(&[memory_barrier]), + ) + }; + + self.command_buffers.gpu_to_mem_transfer_buffer.end()?; + + self.command_buffers.gpu_to_mem_transfer_buffer.submit( + *self.vulkan_ctx.queues.transfer.queue.lock().unwrap(), + wait_semaphores, + signal_semaphores, + fence, + )?; + + Ok(dst_buffer) + } +} + +impl From for vk::native::StdVideoDecodeH264ReferenceInfo { + fn from(picture_info: crate::parser::PictureInfo) -> Self { + vk::native::StdVideoDecodeH264ReferenceInfo { + flags: vk::native::StdVideoDecodeH264ReferenceInfoFlags { + __bindgen_padding_0: [0; 3], + _bitfield_align_1: [], + _bitfield_1: vk::native::StdVideoDecodeH264ReferenceInfoFlags::new_bitfield_1( + 0, + 0, + picture_info.used_for_long_term_reference.into(), + picture_info.non_existing.into(), + ), + }, + FrameNum: picture_info.FrameNum, + PicOrderCnt: picture_info.PicOrderCnt, + reserved: 0, + } + } +} + +pub(crate) struct DecodingImages<'a> { + pub(crate) dpb_image: DecodingImageBundle<'a>, + pub(crate) dpb_slot_active: Vec, + pub(crate) dst_image: Option>, +} + +pub(crate) struct DecodingImageBundle<'a> { + pub(crate) image: Arc, + pub(crate) _image_view: ImageView, + pub(crate) video_resource_info: Vec>, +} + +impl<'a> DecodingImageBundle<'a> { + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + format: &vk::VideoFormatPropertiesKHR<'a>, + dimensions: vk::Extent2D, + image_usage: vk::ImageUsageFlags, + profile_info: &H264ProfileInfo, + array_layer_count: u32, + queue_indices: Option<&[u32]>, + layout: vk::ImageLayout, + ) -> Result<(Self, vk::ImageMemoryBarrier2<'a>), VulkanDecoderError> { + let mut profile_list_info = vk::VideoProfileListInfoKHR::default() + .profiles(std::slice::from_ref(&profile_info.profile_info)); + + let mut image_create_info = vk::ImageCreateInfo::default() + .flags(format.image_create_flags) + .image_type(format.image_type) + .format(format.format) + .extent(vk::Extent3D { + width: dimensions.width, + height: dimensions.height, + depth: 1, + }) + .mip_levels(1) + .array_layers(array_layer_count) + .samples(vk::SampleCountFlags::TYPE_1) + .tiling(format.image_tiling) + .usage(image_usage) + .initial_layout(vk::ImageLayout::UNDEFINED) + .push_next(&mut profile_list_info); + + match queue_indices { + Some(indices) => { + image_create_info = image_create_info + .sharing_mode(vk::SharingMode::CONCURRENT) + .queue_family_indices(indices); + } + None => { + image_create_info = image_create_info.sharing_mode(vk::SharingMode::EXCLUSIVE); + } + } + + let image = Arc::new(Image::new( + vulkan_ctx.allocator.clone(), + &image_create_info, + )?); + + let subresource_range = vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: vk::REMAINING_ARRAY_LAYERS, + }; + + let image_view_create_info = vk::ImageViewCreateInfo::default() + .flags(vk::ImageViewCreateFlags::empty()) + .image(**image) + .view_type(if array_layer_count == 1 { + vk::ImageViewType::TYPE_2D + } else { + vk::ImageViewType::TYPE_2D_ARRAY + }) + .format(format.format) + .components(vk::ComponentMapping::default()) + .subresource_range(subresource_range); + + let image_view = ImageView::new( + vulkan_ctx.device.clone(), + image.clone(), + &image_view_create_info, + )?; + + let video_resource_info = (0..array_layer_count) + .map(|i| { + vk::VideoPictureResourceInfoKHR::default() + .coded_offset(vk::Offset2D { x: 0, y: 0 }) + .coded_extent(dimensions) + .base_array_layer(i) + .image_view_binding(image_view.view) + }) + .collect(); + + let image_memory_barrier = vk::ImageMemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::NONE) + .src_access_mask(vk::AccessFlags2::NONE) + .dst_stage_mask(vk::PipelineStageFlags2::NONE) + .dst_access_mask(vk::AccessFlags2::NONE) + .old_layout(vk::ImageLayout::UNDEFINED) + .new_layout(layout) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(**image) + .subresource_range(subresource_range); + + Ok(( + Self { + image, + _image_view: image_view, + video_resource_info, + }, + image_memory_barrier, + )) + } +} + +impl<'a> DecodingImages<'a> { + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + profile: H264ProfileInfo, + dpb_format: &vk::VideoFormatPropertiesKHR<'a>, + dst_format: &Option>, + dimensions: vk::Extent2D, + max_dpb_slots: u32, + ) -> Result<(Self, Vec>), VulkanDecoderError> { + let dpb_image_usage = if dst_format.is_some() { + dpb_format.image_usage_flags & vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR + } else { + dpb_format.image_usage_flags + & (vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR + | vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR + | vk::ImageUsageFlags::TRANSFER_SRC) + }; + + let queue_indices = [ + vulkan_ctx.queues.transfer.idx as u32, + vulkan_ctx.queues.h264_decode.idx as u32, + ]; + + let (dpb_image, dpb_memory_barrier) = DecodingImageBundle::new( + vulkan_ctx, + dpb_format, + dimensions, + dpb_image_usage, + &profile, + max_dpb_slots, + if dst_format.is_some() { + None + } else { + Some(&queue_indices) + }, + vk::ImageLayout::VIDEO_DECODE_DPB_KHR, + )?; + + let output = dst_format + .map(|dst_format| { + let dst_image_usage = dst_format.image_usage_flags + & (vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR + | vk::ImageUsageFlags::TRANSFER_SRC); + DecodingImageBundle::new( + vulkan_ctx, + &dst_format, + dimensions, + dst_image_usage, + &profile, + 1, + Some(&queue_indices), + vk::ImageLayout::VIDEO_DECODE_DST_KHR, + ) + }) + .transpose()?; + + let (dst_image, dst_memory_barrier) = match output { + Some((output_images, output_memory_barrier)) => { + (Some(output_images), Some(output_memory_barrier)) + } + None => (None, None), + }; + + let barriers = [dpb_memory_barrier] + .into_iter() + .chain(dst_memory_barrier) + .collect::>(); + + Ok(( + Self { + dpb_image, + dpb_slot_active: vec![false; max_dpb_slots as usize], + dst_image, + }, + barriers, + )) + } + + fn reference_slot_info(&self) -> Vec { + self.dpb_image + .video_resource_info + .iter() + .enumerate() + .map(|(i, info)| { + vk::VideoReferenceSlotInfoKHR::default() + .picture_resource(info) + .slot_index(if self.dpb_slot_active[i] { + i as i32 + } else { + -1 + }) + }) + .collect() + } + + fn allocate_reference_picture(&mut self) -> Result { + let i = self + .dpb_slot_active + .iter() + .enumerate() + .find(|(_, &v)| !v) + .map(|(i, _)| i) + .ok_or(VulkanDecoderError::NoFreeSlotsInDpb)?; + + self.dpb_slot_active[i] = true; + + Ok(i) + } + + fn video_resource_info(&self, i: usize) -> Option<&vk::VideoPictureResourceInfoKHR> { + self.dpb_image.video_resource_info.get(i) + } + + fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> { + self.dpb_slot_active[i] = false; + + Ok(()) + } + + fn reset_all_allocations(&mut self) { + self.dpb_slot_active + .iter_mut() + .for_each(|slot| *slot = false); + } +} + +pub(crate) struct H264ProfileInfo<'a> { + profile_info: vk::VideoProfileInfoKHR<'a>, + h264_info_ptr: *mut vk::VideoDecodeH264ProfileInfoKHR<'a>, +} + +impl H264ProfileInfo<'_> { + fn decode_h264_yuv420() -> Self { + let h264_profile_info = Box::leak(Box::new( + vk::VideoDecodeH264ProfileInfoKHR::default() + .std_profile_idc( + vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_BASELINE, + ) + .picture_layout(vk::VideoDecodeH264PictureLayoutFlagsKHR::PROGRESSIVE), + )); + + let h264_info_ptr = h264_profile_info as *mut _; + let profile_info = vk::VideoProfileInfoKHR::default() + .video_codec_operation(vk::VideoCodecOperationFlagsKHR::DECODE_H264) + .chroma_subsampling(vk::VideoChromaSubsamplingFlagsKHR::TYPE_420) + .luma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8) + .chroma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8) + .push_next(h264_profile_info); + + Self { + profile_info, + h264_info_ptr, + } + } +} + +impl<'a> Drop for H264ProfileInfo<'a> { + fn drop(&mut self) { + unsafe { + let _ = Box::from_raw(self.h264_info_ptr); + } + } +} diff --git a/vk-video/src/vulkan_decoder/parameter_sets.rs b/vk-video/src/vulkan_decoder/parameter_sets.rs new file mode 100644 index 000000000..ebf4267c5 --- /dev/null +++ b/vk-video/src/vulkan_decoder/parameter_sets.rs @@ -0,0 +1,262 @@ +use ash::vk; +use h264_reader::nal::sps::SeqParameterSet; + +use super::VulkanDecoderError; + +pub(crate) struct VkSequenceParameterSet { + pub(crate) sps: vk::native::StdVideoH264SequenceParameterSet, + // in the future, heap-allocated VUI and HRD parameters can be put here to have everything + // together +} + +impl TryFrom<&'_ SeqParameterSet> for VkSequenceParameterSet { + type Error = VulkanDecoderError; + + #[allow(non_snake_case)] + fn try_from(sps: &SeqParameterSet) -> Result { + let flags = vk::native::StdVideoH264SpsFlags { + _bitfield_1: vk::native::StdVideoH264SpsFlags::new_bitfield_1( + sps.constraint_flags.flag0().into(), + sps.constraint_flags.flag1().into(), + sps.constraint_flags.flag2().into(), + sps.constraint_flags.flag3().into(), + sps.constraint_flags.flag4().into(), + sps.constraint_flags.flag5().into(), + sps.direct_8x8_inference_flag.into(), + match sps.frame_mbs_flags { + h264_reader::nal::sps::FrameMbsFlags::Frames => 0, + h264_reader::nal::sps::FrameMbsFlags::Fields { + mb_adaptive_frame_field_flag, + } => mb_adaptive_frame_field_flag.into(), + }, + matches!( + sps.frame_mbs_flags, + h264_reader::nal::sps::FrameMbsFlags::Frames + ) + .into(), + match sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeOne { + delta_pic_order_always_zero_flag, + .. + } => delta_pic_order_always_zero_flag.into(), + // The spec doesn't say what to do if this flag is not present... + h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } + | h264_reader::nal::sps::PicOrderCntType::TypeTwo => 0, + }, + sps.chroma_info.separate_colour_plane_flag.into(), + sps.gaps_in_frame_num_value_allowed_flag.into(), + sps.chroma_info.qpprime_y_zero_transform_bypass_flag.into(), + sps.frame_cropping.is_some().into(), + sps.chroma_info.scaling_matrix.is_some().into(), + 0, + ), + _bitfield_align_1: [], + __bindgen_padding_0: 0, + }; + + let profile_idc: u8 = sps.profile_idc.into(); + + let pic_order_cnt_type = match sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } => 0, + h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => 1, + h264_reader::nal::sps::PicOrderCntType::TypeTwo => 2, + }; + + let ( + offset_for_non_ref_pic, + offset_for_top_to_bottom_field, + num_ref_frames_in_pic_order_cnt_cycle, + ) = match &sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeOne { + offset_for_non_ref_pic, + offset_for_top_to_bottom_field, + offsets_for_ref_frame, + .. + } => ( + *offset_for_non_ref_pic, + *offset_for_top_to_bottom_field, + offsets_for_ref_frame.len() as u8, + ), + h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } => (0, 0, 0), + h264_reader::nal::sps::PicOrderCntType::TypeTwo => (0, 0, 0), + }; + + let log2_max_pic_order_cnt_lsb_minus4 = match &sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeZero { + log2_max_pic_order_cnt_lsb_minus4, + } => *log2_max_pic_order_cnt_lsb_minus4, + h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } + | h264_reader::nal::sps::PicOrderCntType::TypeTwo => 0, + }; + + let ( + frame_crop_left_offset, + frame_crop_right_offset, + frame_crop_top_offset, + frame_crop_bottom_offset, + ) = match sps.frame_cropping { + Some(h264_reader::nal::sps::FrameCropping { + left_offset, + right_offset, + top_offset, + bottom_offset, + }) => (left_offset, right_offset, top_offset, bottom_offset), + None => (0, 0, 0, 0), + }; + + let pOffsetForRefFrame = match &sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeOne { + offsets_for_ref_frame, + .. + } => offsets_for_ref_frame.as_ptr(), + h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } + | h264_reader::nal::sps::PicOrderCntType::TypeTwo => std::ptr::null(), + }; + + let pScalingLists = match sps.chroma_info.scaling_matrix { + Some(_) => return Err(VulkanDecoderError::ScalingListsNotSupported), + None => std::ptr::null(), + }; + + // TODO: this is not necessary to reconstruct samples. I don't know why the decoder would + // need this. Maybe we can do this in the future. + let pSequenceParameterSetVui = std::ptr::null(); + + Ok(Self { + sps: vk::native::StdVideoH264SequenceParameterSet { + flags, + profile_idc: profile_idc as u32, + level_idc: h264_level_idc_to_vk(sps.level_idc), + chroma_format_idc: sps.chroma_info.chroma_format.to_chroma_format_idc(), + seq_parameter_set_id: sps.seq_parameter_set_id.id(), + bit_depth_luma_minus8: sps.chroma_info.bit_depth_luma_minus8, + bit_depth_chroma_minus8: sps.chroma_info.bit_depth_chroma_minus8, + log2_max_frame_num_minus4: sps.log2_max_frame_num_minus4, + pic_order_cnt_type, + offset_for_non_ref_pic, + offset_for_top_to_bottom_field, + num_ref_frames_in_pic_order_cnt_cycle, + log2_max_pic_order_cnt_lsb_minus4, + max_num_ref_frames: sps.max_num_ref_frames as u8, + reserved1: 0, + pic_width_in_mbs_minus1: sps.pic_width_in_mbs_minus1, + pic_height_in_map_units_minus1: sps.pic_height_in_map_units_minus1, + frame_crop_left_offset, + frame_crop_right_offset, + frame_crop_top_offset, + frame_crop_bottom_offset, + reserved2: 0, + pOffsetForRefFrame, + pScalingLists, + pSequenceParameterSetVui, + }, + }) + } +} + +trait ChromaFormatExt { + fn to_chroma_format_idc(&self) -> u32; +} + +impl ChromaFormatExt for h264_reader::nal::sps::ChromaFormat { + fn to_chroma_format_idc(&self) -> u32 { + match self { + h264_reader::nal::sps::ChromaFormat::Monochrome => 0, + h264_reader::nal::sps::ChromaFormat::YUV420 => 1, + h264_reader::nal::sps::ChromaFormat::YUV422 => 2, + h264_reader::nal::sps::ChromaFormat::YUV444 => 3, + h264_reader::nal::sps::ChromaFormat::Invalid(v) => *v, + } + } +} + +fn h264_level_idc_to_vk(level_idc: u8) -> u32 { + match level_idc { + 10 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_0, + 11 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_1, + 12 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_2, + 13 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_3, + 20 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_0, + 21 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_1, + 22 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_2, + 30 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_0, + 31 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_1, + 32 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_2, + 40 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_0, + 41 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_1, + 42 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_2, + 50 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_0, + 51 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_1, + 52 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_2, + 60 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_0, + 61 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_1, + 62 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_2, + _ => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_INVALID, + } +} + +pub(super) struct VkPictureParameterSet { + pub(super) pps: vk::native::StdVideoH264PictureParameterSet, +} + +impl TryFrom<&'_ h264_reader::nal::pps::PicParameterSet> for VkPictureParameterSet { + type Error = VulkanDecoderError; + + #[allow(non_snake_case)] + fn try_from(pps: &h264_reader::nal::pps::PicParameterSet) -> Result { + let flags = vk::native::StdVideoH264PpsFlags { + _bitfield_align_1: [], + __bindgen_padding_0: [0; 3], + _bitfield_1: vk::native::StdVideoH264PpsFlags::new_bitfield_1( + pps.extension + .as_ref() + .map(|ext| ext.transform_8x8_mode_flag.into()) + .unwrap_or(0), + pps.redundant_pic_cnt_present_flag.into(), + pps.constrained_intra_pred_flag.into(), + pps.deblocking_filter_control_present_flag.into(), + pps.weighted_pred_flag.into(), + pps.bottom_field_pic_order_in_frame_present_flag.into(), + pps.entropy_coding_mode_flag.into(), + pps.extension + .as_ref() + .map(|ext| ext.pic_scaling_matrix.is_some().into()) + .unwrap_or(0), + ), + }; + + let chroma_qp_index_offset = pps.chroma_qp_index_offset as i8; + + let second_chroma_qp_index_offset = pps + .extension + .as_ref() + .map(|ext| ext.second_chroma_qp_index_offset as i8) + .unwrap_or(chroma_qp_index_offset); + + let pScalingLists = match pps.extension { + Some(h264_reader::nal::pps::PicParameterSetExtra { + pic_scaling_matrix: Some(_), + .. + }) => return Err(VulkanDecoderError::ScalingListsNotSupported), + _ => std::ptr::null(), + }; + + Ok(Self { + pps: vk::native::StdVideoH264PictureParameterSet { + flags, + seq_parameter_set_id: pps.seq_parameter_set_id.id(), + pic_parameter_set_id: pps.pic_parameter_set_id.id(), + num_ref_idx_l0_default_active_minus1: pps.num_ref_idx_l0_default_active_minus1 + as u8, + num_ref_idx_l1_default_active_minus1: pps.num_ref_idx_l1_default_active_minus1 + as u8, + weighted_bipred_idc: pps.weighted_bipred_idc.into(), + pic_init_qp_minus26: pps.pic_init_qp_minus26 as i8, + pic_init_qs_minus26: pps.pic_init_qs_minus26 as i8, + chroma_qp_index_offset, + second_chroma_qp_index_offset, + pScalingLists, + }, + }) + } +} diff --git a/vk-video/src/vulkan_decoder/vulkan_ctx.rs b/vk-video/src/vulkan_decoder/vulkan_ctx.rs new file mode 100644 index 000000000..a6481f721 --- /dev/null +++ b/vk-video/src/vulkan_decoder/vulkan_ctx.rs @@ -0,0 +1,644 @@ +use std::{ + ffi::{c_void, CStr}, + sync::Arc, +}; + +use ash::{vk, Entry}; +use tracing::{error, info}; + +use super::{Allocator, CommandPool, DebugMessenger, Device, H264ProfileInfo, Instance}; + +const REQUIRED_EXTENSIONS: &[&CStr] = &[ + vk::KHR_VIDEO_QUEUE_NAME, + vk::KHR_VIDEO_DECODE_QUEUE_NAME, + vk::KHR_VIDEO_DECODE_H264_NAME, +]; + +#[derive(thiserror::Error, Debug)] +pub enum VulkanCtxError { + #[error("Error loading vulkan: {0}")] + LoadingError(#[from] ash::LoadingError), + + #[error("Vulkan error: {0}")] + VkError(#[from] vk::Result), + + #[error("wgpu instance error: {0}")] + WgpuInstanceError(#[from] wgpu::hal::InstanceError), + + #[error("wgpu device error: {0}")] + WgpuDeviceError(#[from] wgpu::hal::DeviceError), + + #[error("wgpu request device error: {0}")] + WgpuRequestDeviceError(#[from] wgpu::RequestDeviceError), + + #[error("cannot create a wgpu adapter")] + WgpuAdapterNotCreated, + + #[error("Cannot find a suitable physical device")] + NoDevice, + + #[error("String conversion error: {0}")] + StringConversionError(#[from] std::ffi::FromBytesUntilNulError), +} + +pub struct VulkanCtx { + _entry: Arc, + _instance: Arc, + _physical_device: vk::PhysicalDevice, + pub(crate) device: Arc, + pub(crate) allocator: Arc, + pub(crate) queues: Queues, + _debug_messenger: Option, + pub(crate) video_capabilities: vk::VideoCapabilitiesKHR<'static>, + pub(crate) h264_dpb_format_properties: vk::VideoFormatPropertiesKHR<'static>, + pub(crate) h264_dst_format_properties: Option>, + pub wgpu_ctx: WgpuCtx, +} + +pub struct WgpuCtx { + pub instance: Arc, + pub adapter: Arc, + pub device: Arc, + pub queue: Arc, +} + +pub(crate) struct CommandPools { + pub(crate) _decode_pool: Arc, + pub(crate) _transfer_pool: Arc, +} + +pub(crate) struct Queue { + pub(crate) queue: std::sync::Mutex, + pub(crate) idx: usize, + _video_properties: vk::QueueFamilyVideoPropertiesKHR<'static>, + pub(crate) query_result_status_properties: + vk::QueueFamilyQueryResultStatusPropertiesKHR<'static>, +} + +impl Queue { + pub(crate) fn supports_result_status_queries(&self) -> bool { + self.query_result_status_properties + .query_result_status_support + == vk::TRUE + } +} + +pub(crate) struct Queues { + pub(crate) transfer: Queue, + pub(crate) h264_decode: Queue, + pub(crate) wgpu: Queue, +} + +impl VulkanCtx { + pub fn new( + wgpu_features: wgpu::Features, + wgpu_limits: wgpu::Limits, + ) -> Result { + let entry = Arc::new(unsafe { Entry::load()? }); + + let instance_extension_properties = + unsafe { entry.enumerate_instance_extension_properties(None)? }; + info!( + "instance_extension_properties amount: {}", + instance_extension_properties.len() + ); + + let api_version = vk::make_api_version(0, 1, 3, 0); + let app_info = vk::ApplicationInfo { + api_version, + ..Default::default() + }; + + let layers = if cfg!(debug_assertions) { + vec![c"VK_LAYER_KHRONOS_validation".as_ptr()] + } else { + Vec::new() + }; + + let extensions = if cfg!(debug_assertions) { + vec![vk::EXT_DEBUG_UTILS_NAME] + } else { + Vec::new() + }; + + let wgpu_extensions = wgpu::hal::vulkan::Instance::desired_extensions( + &entry, + api_version, + wgpu::InstanceFlags::empty(), + )?; + + let extensions = extensions + .into_iter() + .chain(wgpu_extensions) + .collect::>(); + + let extension_ptrs = extensions.iter().map(|e| e.as_ptr()).collect::>(); + + let create_info = vk::InstanceCreateInfo::default() + .application_info(&app_info) + .enabled_layer_names(&layers) + .enabled_extension_names(&extension_ptrs); + + let instance = unsafe { entry.create_instance(&create_info, None) }?; + let video_queue_instance_ext = ash::khr::video_queue::Instance::new(&entry, &instance); + let debug_utils_instance_ext = ash::ext::debug_utils::Instance::new(&entry, &instance); + + let instance = Arc::new(Instance { + instance, + _entry: entry.clone(), + video_queue_instance_ext, + debug_utils_instance_ext, + }); + + let debug_messenger = if cfg!(debug_assertions) { + Some(DebugMessenger::new(instance.clone())?) + } else { + None + }; + + let wgpu_instance = unsafe { + wgpu::hal::vulkan::Instance::from_raw( + (*entry).clone(), + instance.instance.clone(), + api_version, + 0, + None, + extensions, + wgpu::InstanceFlags::empty(), + false, + None, + )? + }; + + let physical_devices = unsafe { instance.enumerate_physical_devices()? }; + + let ChosenDevice { + physical_device, + queue_indices, + h264_dpb_format_properties, + h264_dst_format_properties, + video_capabilities, + } = find_device(&physical_devices, &instance, REQUIRED_EXTENSIONS)?; + + let wgpu_adapter = wgpu_instance + .expose_adapter(physical_device) + .ok_or(VulkanCtxError::WgpuAdapterNotCreated)?; + + let wgpu_features = wgpu_features | wgpu::Features::TEXTURE_FORMAT_NV12; + + // TODO: we can only get the required extensions after exposing the adapter; the creation + // of the adapter and verification of whether the device supports all extensions should + // happen while picking the device. + let wgpu_extensions = wgpu_adapter + .adapter + .required_device_extensions(wgpu_features); + + let required_extensions = REQUIRED_EXTENSIONS + .iter() + .copied() + .chain(wgpu_extensions) + .collect::>(); + + let required_extensions_as_ptrs = required_extensions + .iter() + .map(|e| e.as_ptr()) + .collect::>(); + + let queue_create_infos = queue_indices.queue_create_infos(); + + let mut wgpu_physical_device_features = wgpu_adapter + .adapter + .physical_device_features(&required_extensions, wgpu_features); + + let mut vk_synch_2_feature = + vk::PhysicalDeviceSynchronization2Features::default().synchronization2(true); + + let device_create_info = vk::DeviceCreateInfo::default() + .queue_create_infos(&queue_create_infos) + .enabled_extension_names(&required_extensions_as_ptrs); + + let device_create_info = wgpu_physical_device_features + .add_to_device_create(device_create_info) + .push_next(&mut vk_synch_2_feature); + + let device = unsafe { instance.create_device(physical_device, &device_create_info, None)? }; + let h264_decode_queue = + unsafe { device.get_device_queue(queue_indices.h264_decode.idx as u32, 0) }; + let transfer_queue = + unsafe { device.get_device_queue(queue_indices.transfer.idx as u32, 0) }; + let wgpu_queue = unsafe { + device.get_device_queue(queue_indices.graphics_transfer_compute.idx as u32, 0) + }; + let queues = Queues { + transfer: Queue { + queue: transfer_queue.into(), + idx: queue_indices.transfer.idx, + _video_properties: queue_indices.transfer.video_properties, + query_result_status_properties: queue_indices + .transfer + .query_result_status_properties, + }, + h264_decode: Queue { + queue: h264_decode_queue.into(), + idx: queue_indices.h264_decode.idx, + _video_properties: queue_indices.h264_decode.video_properties, + query_result_status_properties: queue_indices + .h264_decode + .query_result_status_properties, + }, + wgpu: Queue { + queue: wgpu_queue.into(), + idx: queue_indices.graphics_transfer_compute.idx, + _video_properties: queue_indices.graphics_transfer_compute.video_properties, + query_result_status_properties: queue_indices + .graphics_transfer_compute + .query_result_status_properties, + }, + }; + + let video_queue_ext = ash::khr::video_queue::Device::new(&instance, &device); + let video_decode_queue_ext = ash::khr::video_decode_queue::Device::new(&instance, &device); + + let device = Arc::new(Device { + device, + video_queue_ext, + video_decode_queue_ext, + _instance: instance.clone(), + }); + + let wgpu_device = unsafe { + wgpu_adapter.adapter.device_from_raw( + device.device.clone(), + false, + &required_extensions, + wgpu_features, + &wgpu::MemoryHints::default(), + queue_indices.graphics_transfer_compute.idx as u32, + 0, + )? + }; + + let allocator = Arc::new(Allocator::new( + instance.clone(), + physical_device, + device.clone(), + )?); + + let wgpu_instance = + unsafe { wgpu::Instance::from_hal::(wgpu_instance) }; + let wgpu_adapter = unsafe { wgpu_instance.create_adapter_from_hal(wgpu_adapter) }; + let (wgpu_device, wgpu_queue) = unsafe { + wgpu_adapter.create_device_from_hal( + wgpu_device, + &wgpu::DeviceDescriptor { + label: Some("wgpu device created by the vulkan video decoder"), + memory_hints: wgpu::MemoryHints::default(), + required_limits: wgpu_limits, + required_features: wgpu_features, + }, + None, + )? + }; + + let wgpu_ctx = WgpuCtx { + instance: Arc::new(wgpu_instance), + adapter: Arc::new(wgpu_adapter), + device: Arc::new(wgpu_device), + queue: Arc::new(wgpu_queue), + }; + + Ok(Self { + _entry: entry, + _instance: instance, + _physical_device: physical_device, + device, + allocator, + queues, + _debug_messenger: debug_messenger, + video_capabilities, + h264_dpb_format_properties, + h264_dst_format_properties, + wgpu_ctx, + }) + } +} + +struct ChosenDevice<'a> { + physical_device: vk::PhysicalDevice, + queue_indices: QueueIndices<'a>, + h264_dpb_format_properties: vk::VideoFormatPropertiesKHR<'a>, + h264_dst_format_properties: Option>, + video_capabilities: vk::VideoCapabilitiesKHR<'a>, +} + +fn find_device<'a>( + devices: &[vk::PhysicalDevice], + instance: &Instance, + required_extension_names: &[&CStr], +) -> Result, VulkanCtxError> { + for &device in devices { + let properties = unsafe { instance.get_physical_device_properties(device) }; + + let mut vk_13_features = vk::PhysicalDeviceVulkan13Features::default(); + let mut features = vk::PhysicalDeviceFeatures2::default().push_next(&mut vk_13_features); + + unsafe { instance.get_physical_device_features2(device, &mut features) }; + let extensions = unsafe { instance.enumerate_device_extension_properties(device)? }; + + if vk_13_features.synchronization2 == 0 { + error!( + "device {:?} does not support the required synchronization2 feature", + properties.device_name_as_c_str()? + ); + } + + if !required_extension_names.iter().all(|&extension_name| { + extensions.iter().any(|ext| { + let Ok(name) = ext.extension_name_as_c_str() else { + return false; + }; + + if name != extension_name { + return false; + }; + + true + }) + }) { + error!( + "device {:?} does not support the required extensions", + properties.device_name_as_c_str()? + ); + continue; + } + + let queues_len = + unsafe { instance.get_physical_device_queue_family_properties2_len(device) }; + let mut queues = vec![vk::QueueFamilyProperties2::default(); queues_len]; + let mut video_properties = vec![vk::QueueFamilyVideoPropertiesKHR::default(); queues_len]; + let mut query_result_status_properties = + vec![vk::QueueFamilyQueryResultStatusPropertiesKHR::default(); queues_len]; + + for ((queue, video_properties), query_result_properties) in queues + .iter_mut() + .zip(video_properties.iter_mut()) + .zip(query_result_status_properties.iter_mut()) + { + *queue = queue + .push_next(video_properties) + .push_next(query_result_properties); + } + + unsafe { instance.get_physical_device_queue_family_properties2(device, &mut queues) }; + + let profile_info = H264ProfileInfo::decode_h264_yuv420(); + + let mut h264_caps = vk::VideoDecodeH264CapabilitiesKHR::default(); + let mut decode_caps = vk::VideoDecodeCapabilitiesKHR { + p_next: (&mut h264_caps as *mut _) as *mut c_void, // why does this not have `.push_next()`? wtf + ..Default::default() + }; + + let mut caps = vk::VideoCapabilitiesKHR::default().push_next(&mut decode_caps); + + unsafe { + (instance + .video_queue_instance_ext + .fp() + .get_physical_device_video_capabilities_khr)( + device, + &profile_info.profile_info, + &mut caps, + ) + .result()? + }; + + let video_capabilities = vk::VideoCapabilitiesKHR::default() + .flags(caps.flags) + .min_bitstream_buffer_size_alignment(caps.min_bitstream_buffer_size_alignment) + .min_bitstream_buffer_offset_alignment(caps.min_bitstream_buffer_offset_alignment) + .picture_access_granularity(caps.picture_access_granularity) + .min_coded_extent(caps.min_coded_extent) + .max_coded_extent(caps.max_coded_extent) + .max_dpb_slots(caps.max_dpb_slots) + .max_active_reference_pictures(caps.max_active_reference_pictures) + .std_header_version(caps.std_header_version); + info!("caps: {caps:#?}"); + + let flags = decode_caps.flags; + + let h264_dpb_format_properties = + if flags.contains(vk::VideoDecodeCapabilityFlagsKHR::DPB_AND_OUTPUT_COINCIDE) { + query_video_format_properties( + device, + &instance.video_queue_instance_ext, + &profile_info, + vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR + | vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR + | vk::ImageUsageFlags::TRANSFER_SRC, + )? + } else { + query_video_format_properties( + device, + &instance.video_queue_instance_ext, + &profile_info, + vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR, + )? + }; + + let h264_dst_format_properties = + if flags.contains(vk::VideoDecodeCapabilityFlagsKHR::DPB_AND_OUTPUT_COINCIDE) { + None + } else { + Some(query_video_format_properties( + device, + &instance.video_queue_instance_ext, + &profile_info, + vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR | vk::ImageUsageFlags::TRANSFER_SRC, + )?) + }; + + let h264_dpb_format_properties = + if flags.contains(vk::VideoDecodeCapabilityFlagsKHR::DPB_AND_OUTPUT_COINCIDE) { + match h264_dpb_format_properties + .into_iter() + .find(|f| f.format == vk::Format::G8_B8R8_2PLANE_420_UNORM) + { + Some(f) => f, + None => continue, + } + } else { + h264_dpb_format_properties[0] + }; + + let h264_dst_format_properties = match h264_dst_format_properties { + Some(format_properties) => match format_properties + .into_iter() + .find(|f| f.format == vk::Format::G8_B8R8_2PLANE_420_UNORM) + { + Some(f) => Some(f), + None => continue, + }, + None => None, + }; + + let video_queues = queues + .iter() + .enumerate() + .filter(|(_, q)| { + q.queue_family_properties + .queue_flags + .contains(vk::QueueFlags::VIDEO_DECODE_KHR) + }) + .map(|(i, _)| i) + .collect::>(); // TODO: have to split the queues + + let Some(transfer_queue_idx) = queues + .iter() + .enumerate() + .find(|(_, q)| { + q.queue_family_properties + .queue_flags + .contains(vk::QueueFlags::TRANSFER) + && !q + .queue_family_properties + .queue_flags + .intersects(vk::QueueFlags::GRAPHICS) + }) + .map(|(i, _)| i) + else { + continue; + }; + + let Some(graphics_transfer_compute_queue_idx) = queues + .iter() + .enumerate() + .find(|(_, q)| { + q.queue_family_properties.queue_flags.contains( + vk::QueueFlags::GRAPHICS | vk::QueueFlags::TRANSFER | vk::QueueFlags::COMPUTE, + ) + }) + .map(|(i, _)| i) + else { + continue; + }; + + let Some(decode_queue_idx) = video_queues.into_iter().find(|&i| { + video_properties[i] + .video_codec_operations + .contains(vk::VideoCodecOperationFlagsKHR::DECODE_H264) + }) else { + continue; + }; + + info!("deocde_caps: {decode_caps:#?}"); + info!("h264_caps: {h264_caps:#?}"); + info!("dpb_format_properties: {h264_dpb_format_properties:#?}"); + info!("dst_format_properties: {h264_dst_format_properties:#?}"); + + return Ok(ChosenDevice { + physical_device: device, + queue_indices: QueueIndices { + transfer: QueueIndex { + idx: transfer_queue_idx, + video_properties: video_properties[transfer_queue_idx], + query_result_status_properties: query_result_status_properties + [transfer_queue_idx], + }, + h264_decode: QueueIndex { + idx: decode_queue_idx, + video_properties: video_properties[decode_queue_idx], + query_result_status_properties: query_result_status_properties + [decode_queue_idx], + }, + graphics_transfer_compute: QueueIndex { + idx: graphics_transfer_compute_queue_idx, + video_properties: video_properties[graphics_transfer_compute_queue_idx], + query_result_status_properties: query_result_status_properties + [graphics_transfer_compute_queue_idx], + }, + }, + h264_dpb_format_properties, + h264_dst_format_properties, + video_capabilities, + }); + } + + Err(VulkanCtxError::NoDevice) +} + +fn query_video_format_properties<'a>( + device: vk::PhysicalDevice, + video_queue_instance_ext: &ash::khr::video_queue::Instance, + profile_info: &H264ProfileInfo, + image_usage: vk::ImageUsageFlags, +) -> Result>, VulkanCtxError> { + let mut profile_list_info = vk::VideoProfileListInfoKHR::default() + .profiles(std::slice::from_ref(&profile_info.profile_info)); + + let format_info = vk::PhysicalDeviceVideoFormatInfoKHR::default() + .image_usage(image_usage) + .push_next(&mut profile_list_info); + + let mut format_info_length = 0; + + unsafe { + (video_queue_instance_ext + .fp() + .get_physical_device_video_format_properties_khr)( + device, + &format_info, + &mut format_info_length, + std::ptr::null_mut(), + ) + .result()?; + } + + let mut format_properties = + vec![vk::VideoFormatPropertiesKHR::default(); format_info_length as usize]; + + unsafe { + (video_queue_instance_ext + .fp() + .get_physical_device_video_format_properties_khr)( + device, + &format_info, + &mut format_info_length, + format_properties.as_mut_ptr(), + ) + .result()?; + } + + Ok(format_properties) +} + +struct QueueIndex<'a> { + idx: usize, + video_properties: vk::QueueFamilyVideoPropertiesKHR<'a>, + query_result_status_properties: vk::QueueFamilyQueryResultStatusPropertiesKHR<'a>, +} + +pub(crate) struct QueueIndices<'a> { + transfer: QueueIndex<'a>, + h264_decode: QueueIndex<'a>, + graphics_transfer_compute: QueueIndex<'a>, +} + +impl QueueIndices<'_> { + fn queue_create_infos(&self) -> Vec { + [ + self.h264_decode.idx, + self.transfer.idx, + self.graphics_transfer_compute.idx, + ] + .into_iter() + .collect::>() + .into_iter() + .map(|i| { + vk::DeviceQueueCreateInfo::default() + .queue_family_index(i as u32) + .queue_priorities(&[1.0]) + }) + .collect::>() + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers.rs b/vk-video/src/vulkan_decoder/wrappers.rs new file mode 100644 index 000000000..2d02da14c --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers.rs @@ -0,0 +1,59 @@ +use std::sync::Arc; + +use ash::Entry; + +mod command; +mod debug; +mod mem; +mod sync; +mod video; +mod vk_extensions; + +pub(crate) use command::*; +pub(crate) use debug::*; +pub(crate) use mem::*; +pub(crate) use sync::*; +pub(crate) use video::*; +pub(crate) use vk_extensions::*; + +pub(crate) struct Instance { + pub(crate) instance: ash::Instance, + pub(crate) _entry: Arc, + pub(crate) video_queue_instance_ext: ash::khr::video_queue::Instance, + pub(crate) debug_utils_instance_ext: ash::ext::debug_utils::Instance, +} + +impl Drop for Instance { + fn drop(&mut self) { + unsafe { self.destroy_instance(None) }; + } +} + +impl std::ops::Deref for Instance { + type Target = ash::Instance; + + fn deref(&self) -> &Self::Target { + &self.instance + } +} + +pub(crate) struct Device { + pub(crate) device: ash::Device, + pub(crate) video_queue_ext: ash::khr::video_queue::Device, + pub(crate) video_decode_queue_ext: ash::khr::video_decode_queue::Device, + pub(crate) _instance: Arc, +} + +impl std::ops::Deref for Device { + type Target = ash::Device; + + fn deref(&self) -> &Self::Target { + &self.device + } +} + +impl Drop for Device { + fn drop(&mut self) { + unsafe { self.destroy_device(None) }; + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/command.rs b/vk-video/src/vulkan_decoder/wrappers/command.rs new file mode 100644 index 000000000..d76eb5249 --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/command.rs @@ -0,0 +1,132 @@ +use std::sync::Arc; + +use ash::vk; + +use crate::vulkan_decoder::{VulkanCtxError, VulkanDecoderError}; + +use super::Device; + +pub(crate) struct CommandPool { + pub(crate) command_pool: vk::CommandPool, + device: Arc, +} + +impl CommandPool { + pub(crate) fn new( + device: Arc, + queue_family_index: usize, + ) -> Result { + let create_info = vk::CommandPoolCreateInfo::default() + .flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER) + .queue_family_index(queue_family_index as u32); + + let command_pool = unsafe { device.create_command_pool(&create_info, None)? }; + + Ok(Self { + device, + command_pool, + }) + } +} + +impl Drop for CommandPool { + fn drop(&mut self) { + unsafe { + self.device.destroy_command_pool(self.command_pool, None); + } + } +} + +impl std::ops::Deref for CommandPool { + type Target = vk::CommandPool; + + fn deref(&self) -> &Self::Target { + &self.command_pool + } +} + +pub(crate) struct CommandBuffer { + pool: Arc, + pub(crate) buffer: vk::CommandBuffer, +} + +impl CommandBuffer { + pub(crate) fn new_primary(pool: Arc) -> Result { + let allocate_info = vk::CommandBufferAllocateInfo::default() + .command_pool(**pool) + .level(vk::CommandBufferLevel::PRIMARY) + .command_buffer_count(1); + + let buffer = unsafe { pool.device.allocate_command_buffers(&allocate_info)?[0] }; + + Ok(Self { pool, buffer }) + } + + pub(crate) fn submit( + &self, + queue: vk::Queue, + wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + fence: Option, + ) -> Result<(), VulkanDecoderError> { + fn to_sem_submit_info( + submits: &[(vk::Semaphore, vk::PipelineStageFlags2)], + ) -> Vec { + submits + .iter() + .map(|&(sem, stage)| { + vk::SemaphoreSubmitInfo::default() + .semaphore(sem) + .stage_mask(stage) + }) + .collect::>() + } + + let wait_semaphores = to_sem_submit_info(wait_semaphores); + let signal_semaphores = to_sem_submit_info(signal_semaphores); + + let buffer_submit_info = + [vk::CommandBufferSubmitInfo::default().command_buffer(self.buffer)]; + + let submit_info = [vk::SubmitInfo2::default() + .wait_semaphore_infos(&wait_semaphores) + .signal_semaphore_infos(&signal_semaphores) + .command_buffer_infos(&buffer_submit_info)]; + + unsafe { + self.device() + .queue_submit2(queue, &submit_info, fence.unwrap_or(vk::Fence::null()))? + }; + + Ok(()) + } + + pub(crate) fn begin(&self) -> Result<(), VulkanDecoderError> { + unsafe { + self.device().begin_command_buffer( + self.buffer, + &vk::CommandBufferBeginInfo::default() + .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT), + )? + }; + Ok(()) + } + + pub(crate) fn end(&self) -> Result<(), VulkanDecoderError> { + unsafe { self.device().end_command_buffer(self.buffer)? }; + + Ok(()) + } + + fn device(&self) -> &Device { + &self.pool.device + } +} + +impl std::ops::Deref for CommandBuffer { + type Target = vk::CommandBuffer; + + fn deref(&self) -> &Self::Target { + &self.buffer + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/debug.rs b/vk-video/src/vulkan_decoder/wrappers/debug.rs new file mode 100644 index 000000000..339fd599a --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/debug.rs @@ -0,0 +1,185 @@ +use std::{ffi::c_void, sync::Arc}; + +use ash::vk::{self, QueryType}; +use tracing::{error, info, trace, warn}; + +use crate::vulkan_decoder::{VulkanCtxError, VulkanDecoderError}; + +use super::{Device, Instance}; + +pub(crate) struct DebugMessenger { + messenger: vk::DebugUtilsMessengerEXT, + instance: Arc, +} + +impl DebugMessenger { + pub(crate) fn new(instance: Arc) -> Result { + let debug_messenger_create_info = vk::DebugUtilsMessengerCreateInfoEXT::default() + .message_severity( + vk::DebugUtilsMessageSeverityFlagsEXT::ERROR + | vk::DebugUtilsMessageSeverityFlagsEXT::WARNING + | vk::DebugUtilsMessageSeverityFlagsEXT::INFO + | vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE, + ) + .message_type( + vk::DebugUtilsMessageTypeFlagsEXT::GENERAL + | vk::DebugUtilsMessageTypeFlagsEXT::VALIDATION + | vk::DebugUtilsMessageTypeFlagsEXT::PERFORMANCE, + ) + .pfn_user_callback(Some(debug_messenger_callback)); + + let messenger = unsafe { + instance + .debug_utils_instance_ext + .create_debug_utils_messenger(&debug_messenger_create_info, None)? + }; + + Ok(Self { + instance, + messenger, + }) + } +} + +impl Drop for DebugMessenger { + fn drop(&mut self) { + unsafe { + self.instance + .debug_utils_instance_ext + .destroy_debug_utils_messenger(self.messenger, None) + }; + } +} + +unsafe extern "system" fn debug_messenger_callback( + message_severity: vk::DebugUtilsMessageSeverityFlagsEXT, + message_types: vk::DebugUtilsMessageTypeFlagsEXT, + p_callback_data: *const vk::DebugUtilsMessengerCallbackDataEXT<'_>, + _p_user_data: *mut c_void, +) -> vk::Bool32 { + let callback_data = unsafe { *p_callback_data }; + let message_id = callback_data + .message_id_name_as_c_str() + .unwrap_or(c"") + .to_str() + .unwrap(); + let message = callback_data + .message_as_c_str() + .unwrap_or(c"") + .to_str() + .unwrap(); + let t = format!("{:?}", message_types); + match message_severity { + vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE => { + trace!("[{t}][{message_id}] {message}"); + } + + vk::DebugUtilsMessageSeverityFlagsEXT::INFO => { + info!("[{t}][{message_id}] {message}"); + } + + vk::DebugUtilsMessageSeverityFlagsEXT::WARNING => { + warn!("[{t}][{message_id}] {message}"); + } + + vk::DebugUtilsMessageSeverityFlagsEXT::ERROR => { + error!("[{t}][{message_id}] {message}"); + } + _ => {} + } + + vk::FALSE +} + +pub(crate) struct DecodeQueryPool { + pool: QueryPool, +} + +impl DecodeQueryPool { + pub(crate) fn new( + device: Arc, + profile: vk::VideoProfileInfoKHR, + ) -> Result { + let pool = QueryPool::new(device, QueryType::RESULT_STATUS_ONLY_KHR, 1, Some(profile))?; + Ok(Self { pool }) + } + + pub(crate) fn reset(&self, buffer: vk::CommandBuffer) { + unsafe { + self.pool + .device + .cmd_reset_query_pool(buffer, self.pool.pool, 0, 1) + }; + } + + // if we want to switch to inline queries we can use this, but we need to check how many + // implementations support them + pub(crate) fn _inline_query(&self) -> vk::VideoInlineQueryInfoKHR { + vk::VideoInlineQueryInfoKHR::default() + .query_pool(self.pool.pool) + .first_query(0) + .query_count(1) + } + + pub(crate) fn begin_query(&self, buffer: vk::CommandBuffer) { + unsafe { + self.pool.device.cmd_begin_query( + buffer, + self.pool.pool, + 0, + vk::QueryControlFlags::empty(), + ) + } + } + + pub(crate) fn end_query(&self, buffer: vk::CommandBuffer) { + unsafe { self.pool.device.cmd_end_query(buffer, self.pool.pool, 0) } + } + + pub(crate) fn get_result_blocking( + &self, + ) -> Result { + let mut result = vk::QueryResultStatusKHR::NOT_READY; + unsafe { + self.pool.device.get_query_pool_results( + self.pool.pool, + 0, + std::slice::from_mut(&mut result), + vk::QueryResultFlags::WAIT | vk::QueryResultFlags::WITH_STATUS_KHR, + )? + }; + + Ok(result) + } +} + +pub(crate) struct QueryPool { + pool: vk::QueryPool, + device: Arc, +} + +impl QueryPool { + pub(crate) fn new( + device: Arc, + ty: vk::QueryType, + count: u32, + mut p_next: Option, + ) -> Result { + let mut create_info = vk::QueryPoolCreateInfo::default() + .query_type(ty) + .query_count(count); + + if let Some(p_next) = p_next.as_mut() { + create_info = create_info.push_next(p_next) + } + let pool = unsafe { device.create_query_pool(&create_info, None)? }; + + Ok(Self { pool, device }) + } +} + +impl Drop for QueryPool { + fn drop(&mut self) { + unsafe { self.device.destroy_query_pool(self.pool, None) }; + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/mem.rs b/vk-video/src/vulkan_decoder/wrappers/mem.rs new file mode 100644 index 000000000..37f8e83bf --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/mem.rs @@ -0,0 +1,249 @@ +use std::sync::Arc; + +use ash::vk; +use vk_mem::Alloc; + +use crate::vulkan_decoder::{H264ProfileInfo, VulkanCtxError, VulkanDecoderError}; + +use super::{Device, Instance}; + +pub(crate) struct Allocator { + allocator: vk_mem::Allocator, + _instance: Arc, + _device: Arc, +} + +impl Allocator { + pub(crate) fn new( + instance: Arc, + physical_device: vk::PhysicalDevice, + device: Arc, + ) -> Result { + let mut allocator_create_info = + vk_mem::AllocatorCreateInfo::new(&instance, &device, physical_device); + allocator_create_info.vulkan_api_version = vk::API_VERSION_1_3; + + let allocator = unsafe { vk_mem::Allocator::new(allocator_create_info)? }; + + Ok(Self { + allocator, + _device: device, + _instance: instance, + }) + } +} + +impl std::ops::Deref for Allocator { + type Target = vk_mem::Allocator; + + fn deref(&self) -> &Self::Target { + &self.allocator + } +} + +pub(crate) struct MemoryAllocation { + pub(crate) allocation: vk_mem::Allocation, + allocator: Arc, +} + +impl MemoryAllocation { + pub(crate) fn new( + allocator: Arc, + memory_requirements: &vk::MemoryRequirements, + alloc_info: &vk_mem::AllocationCreateInfo, + ) -> Result { + let allocation = unsafe { allocator.allocate_memory(memory_requirements, alloc_info)? }; + + Ok(Self { + allocation, + allocator, + }) + } + + pub(crate) fn allocation_info(&self) -> vk_mem::AllocationInfo { + self.allocator.get_allocation_info(&self.allocation) + } +} + +impl std::ops::Deref for MemoryAllocation { + type Target = vk_mem::Allocation; + + fn deref(&self) -> &Self::Target { + &self.allocation + } +} + +impl Drop for MemoryAllocation { + fn drop(&mut self) { + unsafe { self.allocator.free_memory(&mut self.allocation) }; + } +} + +pub(crate) struct Buffer { + pub(crate) buffer: vk::Buffer, + pub(crate) allocation: vk_mem::Allocation, + allocator: Arc, +} + +#[derive(Debug, Clone, Copy)] +pub(crate) enum TransferDirection { + GpuToMem, +} + +impl Buffer { + pub(crate) fn new_decode( + allocator: Arc, + size: u64, + profile: &H264ProfileInfo, + ) -> Result { + let mut profile_list_info = vk::VideoProfileListInfoKHR::default() + .profiles(std::slice::from_ref(&profile.profile_info)); + + let buffer_create_info = vk::BufferCreateInfo::default() + .size(size) + .usage(vk::BufferUsageFlags::VIDEO_DECODE_SRC_KHR) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .push_next(&mut profile_list_info); + + let allocation_create_info = vk_mem::AllocationCreateInfo { + usage: vk_mem::MemoryUsage::Auto, + required_flags: vk::MemoryPropertyFlags::HOST_COHERENT, + flags: vk_mem::AllocationCreateFlags::HOST_ACCESS_SEQUENTIAL_WRITE, + ..Default::default() + }; + + Self::new(allocator, buffer_create_info, allocation_create_info) + } + + pub(crate) fn new_transfer( + allocator: Arc, + size: u64, + direction: TransferDirection, + ) -> Result { + let usage = match direction { + TransferDirection::GpuToMem => vk::BufferUsageFlags::TRANSFER_DST, + }; + + let allocation_flags = match direction { + TransferDirection::GpuToMem => vk_mem::AllocationCreateFlags::HOST_ACCESS_RANDOM, + }; + + let buffer_create_info = vk::BufferCreateInfo::default() + .size(size) + .usage(usage) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + let allocation_create_info = vk_mem::AllocationCreateInfo { + usage: vk_mem::MemoryUsage::Auto, + required_flags: vk::MemoryPropertyFlags::HOST_COHERENT, + flags: allocation_flags, + ..Default::default() + }; + + Self::new(allocator, buffer_create_info, allocation_create_info) + } + + fn new( + allocator: Arc, + create_info: vk::BufferCreateInfo, + allocation_create_info: vk_mem::AllocationCreateInfo, + ) -> Result { + let (buffer, allocation) = + unsafe { allocator.create_buffer(&create_info, &allocation_create_info)? }; + + Ok(Self { + buffer, + allocation, + allocator, + }) + } +} + +impl Drop for Buffer { + fn drop(&mut self) { + unsafe { + self.allocator + .destroy_buffer(self.buffer, &mut self.allocation) + } + } +} + +impl std::ops::Deref for Buffer { + type Target = vk::Buffer; + + fn deref(&self) -> &Self::Target { + &self.buffer + } +} + +pub(crate) struct Image { + pub(crate) image: vk::Image, + allocation: vk_mem::Allocation, + allocator: Arc, +} + +impl Image { + pub(crate) fn new( + allocator: Arc, + image_create_info: &vk::ImageCreateInfo, + ) -> Result { + let alloc_info = vk_mem::AllocationCreateInfo { + usage: vk_mem::MemoryUsage::Auto, + ..Default::default() + }; + + let (image, allocation) = + unsafe { allocator.create_image(image_create_info, &alloc_info)? }; + + Ok(Image { + image, + allocation, + allocator, + }) + } +} + +impl std::ops::Deref for Image { + type Target = vk::Image; + + fn deref(&self) -> &Self::Target { + &self.image + } +} + +impl Drop for Image { + fn drop(&mut self) { + unsafe { + self.allocator + .destroy_image(self.image, &mut self.allocation) + }; + } +} + +pub(crate) struct ImageView { + pub(crate) view: vk::ImageView, + pub(crate) _image: Arc, + pub(crate) device: Arc, +} + +impl ImageView { + pub(crate) fn new( + device: Arc, + image: Arc, + create_info: &vk::ImageViewCreateInfo, + ) -> Result { + let view = unsafe { device.create_image_view(create_info, None)? }; + + Ok(ImageView { + view, + _image: image, + device: device.clone(), + }) + } +} + +impl Drop for ImageView { + fn drop(&mut self) { + unsafe { self.device.destroy_image_view(self.view, None) }; + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/sync.rs b/vk-video/src/vulkan_decoder/wrappers/sync.rs new file mode 100644 index 000000000..b0a3061e2 --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/sync.rs @@ -0,0 +1,85 @@ +use std::sync::Arc; + +use ash::vk; + +use crate::vulkan_decoder::VulkanDecoderError; + +use super::Device; + +pub(crate) struct Fence { + pub(crate) fence: vk::Fence, + device: Arc, +} + +impl Fence { + pub(crate) fn new(device: Arc, signaled: bool) -> Result { + let flags = if signaled { + vk::FenceCreateFlags::SIGNALED + } else { + vk::FenceCreateFlags::empty() + }; + let create_info = vk::FenceCreateInfo::default().flags(flags); + let fence = unsafe { device.create_fence(&create_info, None)? }; + + Ok(Self { device, fence }) + } + + pub(crate) fn wait(&self, timeout: u64) -> Result<(), VulkanDecoderError> { + unsafe { self.device.wait_for_fences(&[self.fence], true, timeout)? }; + Ok(()) + } + + pub(crate) fn reset(&self) -> Result<(), VulkanDecoderError> { + unsafe { self.device.reset_fences(&[self.fence])? }; + Ok(()) + } + + pub(crate) fn wait_and_reset(&self, timeout: u64) -> Result<(), VulkanDecoderError> { + self.wait(timeout)?; + self.reset()?; + + Ok(()) + } +} + +impl Drop for Fence { + fn drop(&mut self) { + unsafe { self.device.destroy_fence(self.fence, None) }; + } +} + +impl std::ops::Deref for Fence { + type Target = vk::Fence; + + fn deref(&self) -> &Self::Target { + &self.fence + } +} + +pub(crate) struct Semaphore { + pub(crate) semaphore: vk::Semaphore, + device: Arc, +} + +impl Semaphore { + pub(crate) fn new(device: Arc) -> Result { + let create_info = vk::SemaphoreCreateInfo::default(); + let semaphore = unsafe { device.create_semaphore(&create_info, None)? }; + + Ok(Self { device, semaphore }) + } +} + +impl Drop for Semaphore { + fn drop(&mut self) { + unsafe { self.device.destroy_semaphore(self.semaphore, None) }; + } +} + +impl std::ops::Deref for Semaphore { + type Target = vk::Semaphore; + + fn deref(&self) -> &Self::Target { + &self.semaphore + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/video.rs b/vk-video/src/vulkan_decoder/wrappers/video.rs new file mode 100644 index 000000000..7dc745c1b --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/video.rs @@ -0,0 +1,298 @@ +use std::{collections::HashMap, sync::Arc}; + +use ash::vk; +use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet}; + +use crate::{ + vulkan_decoder::{ + parameter_sets::{VkPictureParameterSet, VkSequenceParameterSet}, + VulkanDecoderError, + }, + VulkanCtx, +}; + +use super::{Device, MemoryAllocation, VideoQueueExt}; + +/// Since `VideoSessionParameters` can only add sps and pps values (inserting sps or pps with an +/// existing id is prohibited), this is an abstraction which provides the capability to replace an +/// existing sps or pps. +pub(crate) struct VideoSessionParametersManager { + pub(crate) parameters: VideoSessionParameters, + sps: HashMap, + pps: HashMap<(u8, u8), VkPictureParameterSet>, + device: Arc, + session: vk::VideoSessionKHR, +} + +impl VideoSessionParametersManager { + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + session: vk::VideoSessionKHR, + ) -> Result { + Ok(Self { + parameters: VideoSessionParameters::new( + vulkan_ctx.device.clone(), + session, + &[], + &[], + None, + )?, + sps: HashMap::new(), + pps: HashMap::new(), + device: vulkan_ctx.device.clone(), + session, + }) + } + + pub(crate) fn parameters(&self) -> vk::VideoSessionParametersKHR { + self.parameters.parameters + } + + pub(crate) fn change_session( + &mut self, + session: vk::VideoSessionKHR, + ) -> Result<(), VulkanDecoderError> { + if self.session == session { + return Ok(()); + } + self.session = session; + + let sps = self.sps.values().map(|sps| sps.sps).collect::>(); + let pps = self.pps.values().map(|pps| pps.pps).collect::>(); + + self.parameters = + VideoSessionParameters::new(self.device.clone(), session, &sps, &pps, None)?; + + Ok(()) + } + + // it is probably not optimal to insert sps and pps searately. this could be optimized, so that + // the insertion happens lazily when the parameters are bound to a session. + pub(crate) fn put_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> { + let key = sps.seq_parameter_set_id.id(); + match self.sps.entry(key) { + std::collections::hash_map::Entry::Occupied(mut e) => { + e.insert(sps.try_into()?); + + self.parameters = VideoSessionParameters::new( + self.device.clone(), + self.session, + &[self.sps[&key].sps], + &[], + Some(&self.parameters), + )? + } + std::collections::hash_map::Entry::Vacant(e) => { + e.insert(sps.try_into()?); + + self.parameters.add(&[self.sps[&key].sps], &[])?; + } + } + + Ok(()) + } + + pub(crate) fn put_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> { + let key = (pps.seq_parameter_set_id.id(), pps.pic_parameter_set_id.id()); + match self.pps.entry(key) { + std::collections::hash_map::Entry::Occupied(mut e) => { + e.insert(pps.try_into()?); + + self.parameters = VideoSessionParameters::new( + self.device.clone(), + self.session, + &[], + &[self.pps[&key].pps], + Some(&self.parameters), + )?; + } + + std::collections::hash_map::Entry::Vacant(e) => { + e.insert(pps.try_into()?); + + self.parameters.add(&[], &[self.pps[&key].pps])?; + } + } + + Ok(()) + } +} + +pub(crate) struct VideoSessionParameters { + pub(crate) parameters: vk::VideoSessionParametersKHR, + update_sequence_count: u32, + device: Arc, +} + +impl VideoSessionParameters { + pub(crate) fn new( + device: Arc, + session: vk::VideoSessionKHR, + initial_sps: &[vk::native::StdVideoH264SequenceParameterSet], + initial_pps: &[vk::native::StdVideoH264PictureParameterSet], + template: Option<&Self>, + ) -> Result { + let parameters_add_info = vk::VideoDecodeH264SessionParametersAddInfoKHR::default() + .std_sp_ss(initial_sps) + .std_pp_ss(initial_pps); + + let mut h264_create_info = vk::VideoDecodeH264SessionParametersCreateInfoKHR::default() + .max_std_sps_count(32) + .max_std_pps_count(32) + .parameters_add_info(¶meters_add_info); + + let create_info = vk::VideoSessionParametersCreateInfoKHR::default() + .flags(vk::VideoSessionParametersCreateFlagsKHR::empty()) + .video_session_parameters_template( + template + .map(|t| t.parameters) + .unwrap_or_else(vk::VideoSessionParametersKHR::null), + ) + .video_session(session) + .push_next(&mut h264_create_info); + + let parameters = unsafe { + device + .video_queue_ext + .create_video_session_parameters_khr(&create_info, None)? + }; + + Ok(Self { + parameters, + update_sequence_count: 0, + device: device.clone(), + }) + } + + pub(crate) fn add( + &mut self, + sps: &[vk::native::StdVideoH264SequenceParameterSet], + pps: &[vk::native::StdVideoH264PictureParameterSet], + ) -> Result<(), VulkanDecoderError> { + let mut parameters_add_info = vk::VideoDecodeH264SessionParametersAddInfoKHR::default() + .std_sp_ss(sps) + .std_pp_ss(pps); + + self.update_sequence_count += 1; + let update_info = vk::VideoSessionParametersUpdateInfoKHR::default() + .update_sequence_count(self.update_sequence_count) + .push_next(&mut parameters_add_info); + + unsafe { + self.device + .video_queue_ext + .update_video_session_parameters_khr(self.parameters, &update_info)? + }; + + Ok(()) + } +} + +impl Drop for VideoSessionParameters { + fn drop(&mut self) { + unsafe { + self.device + .video_queue_ext + .destroy_video_session_parameters_khr(self.parameters, None) + } + } +} + +pub(crate) struct VideoSession { + pub(crate) session: vk::VideoSessionKHR, + pub(crate) device: Arc, + pub(crate) _allocations: Vec, + pub(crate) max_coded_extent: vk::Extent2D, + pub(crate) max_dpb_slots: u32, +} + +impl VideoSession { + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + profile_info: &vk::VideoProfileInfoKHR, + max_coded_extent: vk::Extent2D, + max_dpb_slots: u32, + max_active_references: u32, + std_header_version: &vk::ExtensionProperties, + ) -> Result { + // TODO: this probably works, but this format needs to be detected and set + // based on what the GPU supports + let format = vk::Format::G8_B8R8_2PLANE_420_UNORM; + + let session_create_info = vk::VideoSessionCreateInfoKHR::default() + .queue_family_index(vulkan_ctx.queues.h264_decode.idx as u32) + .video_profile(profile_info) + .picture_format(format) + .max_coded_extent(max_coded_extent) + .reference_picture_format(format) + .max_dpb_slots(max_dpb_slots) + .max_active_reference_pictures(max_active_references) + .std_header_version(std_header_version); + + let video_session = unsafe { + vulkan_ctx + .device + .video_queue_ext + .create_video_session_khr(&session_create_info, None)? + }; + + let memory_requirements = unsafe { + vulkan_ctx + .device + .video_queue_ext + .get_video_session_memory_requirements_khr(video_session)? + }; + + let allocations = memory_requirements + .iter() + .map(|req| { + MemoryAllocation::new( + vulkan_ctx.allocator.clone(), + &req.memory_requirements, + &vk_mem::AllocationCreateInfo { + usage: vk_mem::MemoryUsage::Unknown, + ..Default::default() + }, + ) + }) + .collect::, _>>()?; + + let memory_bind_infos = memory_requirements + .into_iter() + .zip(allocations.iter()) + .map(|(req, allocation)| { + let allocation_info = allocation.allocation_info(); + vk::BindVideoSessionMemoryInfoKHR::default() + .memory_bind_index(req.memory_bind_index) + .memory(allocation_info.device_memory) + .memory_offset(allocation_info.offset) + .memory_size(allocation_info.size) + }) + .collect::>(); + + unsafe { + vulkan_ctx + .device + .video_queue_ext + .bind_video_session_memory_khr(video_session, &memory_bind_infos)? + }; + + Ok(VideoSession { + session: video_session, + _allocations: allocations, + device: vulkan_ctx.device.clone(), + max_coded_extent, + max_dpb_slots, + }) + } +} + +impl Drop for VideoSession { + fn drop(&mut self) { + unsafe { + self.device + .video_queue_ext + .destroy_video_session_khr(self.session, None) + }; + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs b/vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs new file mode 100644 index 000000000..8384443e3 --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs @@ -0,0 +1,228 @@ +use ash::{prelude::VkResult, vk, RawPtr}; + +pub(crate) trait VideoQueueExt { + unsafe fn cmd_begin_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + begin_info: &vk::VideoBeginCodingInfoKHR, + ); + + unsafe fn cmd_end_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + end_info: &vk::VideoEndCodingInfoKHR, + ); + + unsafe fn cmd_control_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + control_info: &vk::VideoCodingControlInfoKHR, + ); + + unsafe fn get_video_session_memory_requirements_khr( + &self, + video_session: vk::VideoSessionKHR, + ) -> VkResult>; + + unsafe fn create_video_session_khr( + &self, + create_info: &vk::VideoSessionCreateInfoKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) -> VkResult; + + unsafe fn bind_video_session_memory_khr( + &self, + video_session: vk::VideoSessionKHR, + memory_bind_infos: &[vk::BindVideoSessionMemoryInfoKHR], + ) -> VkResult<()>; + + unsafe fn destroy_video_session_khr( + &self, + video_session: vk::VideoSessionKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ); + + unsafe fn create_video_session_parameters_khr( + &self, + create_info: &vk::VideoSessionParametersCreateInfoKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) -> VkResult; + + unsafe fn destroy_video_session_parameters_khr( + &self, + parameters: vk::VideoSessionParametersKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ); + + unsafe fn update_video_session_parameters_khr( + &self, + parameters: vk::VideoSessionParametersKHR, + update_info: &vk::VideoSessionParametersUpdateInfoKHR, + ) -> VkResult<()>; +} + +impl VideoQueueExt for ash::khr::video_queue::Device { + unsafe fn cmd_begin_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + begin_info: &vk::VideoBeginCodingInfoKHR, + ) { + unsafe { (self.fp().cmd_begin_video_coding_khr)(command_buffer, begin_info) } + } + + unsafe fn cmd_end_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + end_info: &vk::VideoEndCodingInfoKHR, + ) { + unsafe { (self.fp().cmd_end_video_coding_khr)(command_buffer, end_info) } + } + + unsafe fn cmd_control_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + control_info: &vk::VideoCodingControlInfoKHR, + ) { + unsafe { (self.fp().cmd_control_video_coding_khr)(command_buffer, control_info) } + } + + unsafe fn get_video_session_memory_requirements_khr( + &self, + video_session: vk::VideoSessionKHR, + ) -> VkResult> { + let mut memory_requirements_len = 0; + unsafe { + (self.fp().get_video_session_memory_requirements_khr)( + self.device(), + video_session, + &mut memory_requirements_len, + std::ptr::null_mut(), + ) + .result()?; + } + + let mut memory_requirements = vec![ + vk::VideoSessionMemoryRequirementsKHR::default(); + memory_requirements_len as usize + ]; + + unsafe { + (self.fp().get_video_session_memory_requirements_khr)( + self.device(), + video_session, + &mut memory_requirements_len, + memory_requirements.as_mut_ptr(), + ) + .result_with_success(memory_requirements) + } + } + + unsafe fn create_video_session_khr( + &self, + create_info: &vk::VideoSessionCreateInfoKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) -> VkResult { + let mut video_session = vk::VideoSessionKHR::default(); + + unsafe { + (self.fp().create_video_session_khr)( + self.device(), + create_info, + allocation_callbacks.as_raw_ptr(), + &mut video_session, + ) + .result_with_success(video_session) + } + } + + unsafe fn bind_video_session_memory_khr( + &self, + video_session: vk::VideoSessionKHR, + memory_bind_infos: &[vk::BindVideoSessionMemoryInfoKHR], + ) -> VkResult<()> { + unsafe { + (self.fp().bind_video_session_memory_khr)( + self.device(), + video_session, + memory_bind_infos.len() as u32, + memory_bind_infos.as_ptr(), + ) + .result() + } + } + + unsafe fn destroy_video_session_khr( + &self, + video_session: vk::VideoSessionKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) { + unsafe { + (self.fp().destroy_video_session_khr)( + self.device(), + video_session, + allocation_callbacks.as_raw_ptr(), + ) + } + } + + unsafe fn create_video_session_parameters_khr( + &self, + create_info: &vk::VideoSessionParametersCreateInfoKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) -> VkResult { + let mut parameters = vk::VideoSessionParametersKHR::default(); + + unsafe { + (self.fp().create_video_session_parameters_khr)( + self.device(), + create_info, + allocation_callbacks.as_raw_ptr(), + &mut parameters, + ) + .result_with_success(parameters) + } + } + + unsafe fn destroy_video_session_parameters_khr( + &self, + parameters: vk::VideoSessionParametersKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) { + unsafe { + (self.fp().destroy_video_session_parameters_khr)( + self.device(), + parameters, + allocation_callbacks.as_raw_ptr(), + ) + } + } + + unsafe fn update_video_session_parameters_khr( + &self, + parameters: vk::VideoSessionParametersKHR, + update_info: &vk::VideoSessionParametersUpdateInfoKHR, + ) -> VkResult<()> { + unsafe { + (self.fp().update_video_session_parameters_khr)(self.device(), parameters, update_info) + .result() + } + } +} + +pub(crate) trait VideoDecodeQueueExt { + unsafe fn cmd_decode_video_khr( + &self, + command_buffer: vk::CommandBuffer, + decode_info: &vk::VideoDecodeInfoKHR, + ); +} + +impl VideoDecodeQueueExt for ash::khr::video_decode_queue::Device { + unsafe fn cmd_decode_video_khr( + &self, + command_buffer: vk::CommandBuffer, + decode_info: &vk::VideoDecodeInfoKHR, + ) { + unsafe { (self.fp().cmd_decode_video_khr)(command_buffer, decode_info) } + } +} From 579ca0c1deb85048b918067578025f54015a49cc Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Wed, 25 Sep 2024 14:12:51 +0200 Subject: [PATCH 02/13] Improve the API so that it is similar to what we had before --- compositor_pipeline/src/error.rs | 3 + compositor_pipeline/src/pipeline.rs | 73 ++++++++++++++----- .../src/pipeline/decoder/video.rs | 6 +- .../examples/raw_channel_input.rs | 13 +--- .../examples/raw_channel_output.rs | 11 +-- 5 files changed, 66 insertions(+), 40 deletions(-) diff --git a/compositor_pipeline/src/error.rs b/compositor_pipeline/src/error.rs index 591f4fa96..6a18be2f2 100644 --- a/compositor_pipeline/src/error.rs +++ b/compositor_pipeline/src/error.rs @@ -136,6 +136,9 @@ pub enum InputInitError { #[cfg(target_os = "linux")] #[error(transparent)] VulkanDecoderError(#[from] vk_video::DecoderError), + + #[error("Vulkan context is not available. Cannot create a vulkan video decoder")] + VulkanContextRequiredForVulkanDecoder, } pub enum ErrorType { diff --git a/compositor_pipeline/src/pipeline.rs b/compositor_pipeline/src/pipeline.rs index da570e7de..2e07e7370 100644 --- a/compositor_pipeline/src/pipeline.rs +++ b/compositor_pipeline/src/pipeline.rs @@ -110,27 +110,61 @@ pub struct Pipeline { is_started: bool, } -pub struct PreinitializedContext { +pub struct GraphicsContext { pub device: Arc, pub queue: Arc, #[cfg(target_os = "linux")] - pub vulkan_ctx: Arc, + pub vulkan_ctx: Option>, } -impl PreinitializedContext { +impl GraphicsContext { #[cfg(target_os = "linux")] - pub fn new(features: wgpu::Features, limits: wgpu::Limits) -> Result { - let vulkan_ctx = Arc::new(vk_video::VulkanCtx::new(features, limits)?); - Ok(PreinitializedContext { - device: vulkan_ctx.wgpu_ctx.device.clone(), - queue: vulkan_ctx.wgpu_ctx.queue.clone(), - vulkan_ctx, - }) + pub fn new( + force_gpu: bool, + features: wgpu::Features, + limits: wgpu::Limits, + ) -> Result { + use compositor_render::{create_wgpu_ctx, error::InitRendererEngineError}; + + let vulkan_features = features + | wgpu::Features::TEXTURE_BINDING_ARRAY + | wgpu::Features::PUSH_CONSTANTS + | wgpu::Features::TEXTURE_FORMAT_NV12; + + let limits = if limits.max_push_constant_size < 128 { + wgpu::Limits { + max_push_constant_size: 128, + ..limits + } + } else { + limits + }; + + match vk_video::VulkanCtx::new(vulkan_features, limits) { + Ok(ctx) => Ok(GraphicsContext { + device: ctx.wgpu_ctx.device.clone(), + queue: ctx.wgpu_ctx.queue.clone(), + vulkan_ctx: Some(ctx.into()), + }), + + Err(err) => { + info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support."); + + let (device, queue) = create_wgpu_ctx(force_gpu, features) + .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; + + Ok(GraphicsContext { + device, + queue, + vulkan_ctx: None, + }) + } + } } } -impl std::fmt::Debug for PreinitializedContext { +impl std::fmt::Debug for GraphicsContext { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("PreinitializedContext") .field("device", &self.device) @@ -149,7 +183,7 @@ pub struct Options { pub output_sample_rate: u32, pub wgpu_features: WgpuFeatures, pub load_system_fonts: Option, - pub wgpu_ctx: Option, + pub wgpu_ctx: Option, } #[derive(Clone)] @@ -159,7 +193,7 @@ pub struct PipelineCtx { pub download_dir: Arc, pub event_emitter: Arc, #[cfg(target_os = "linux")] - pub vulkan_ctx: Arc, + pub vulkan_ctx: Option>, } impl std::fmt::Debug for PipelineCtx { @@ -179,10 +213,11 @@ impl Pipeline { Some(ctx) => Some(ctx), None => { if cfg!(target_os = "linux") { - Some(PreinitializedContext::new(opts.wgpu_features | wgpu::Features::PUSH_CONSTANTS | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, wgpu::Limits { - max_push_constant_size: 128, - ..Default::default() - })?) + Some(GraphicsContext::new( + opts.force_gpu, + opts.wgpu_features, + Default::default(), + )?) } else { None } @@ -222,9 +257,7 @@ impl Pipeline { download_dir: download_dir.into(), event_emitter, #[cfg(target_os = "linux")] - vulkan_ctx: preinitialized_ctx - .map(|ctx| ctx.vulkan_ctx) - .expect("This should not fail on linux"), + vulkan_ctx: preinitialized_ctx.and_then(|ctx| ctx.vulkan_ctx), }, }; diff --git a/compositor_pipeline/src/pipeline/decoder/video.rs b/compositor_pipeline/src/pipeline/decoder/video.rs index 2a817461c..fad0da153 100644 --- a/compositor_pipeline/src/pipeline/decoder/video.rs +++ b/compositor_pipeline/src/pipeline/decoder/video.rs @@ -25,8 +25,12 @@ pub fn start_video_decoder_thread( } (VideoCodec::H264, VideoDecoder::VulkanVideo) => { + let Some(vulkan_ctx) = pipeline_ctx.vulkan_ctx.as_ref().map(|ctx| ctx.clone()) else { + return Err(InputInitError::VulkanContextRequiredForVulkanDecoder); + }; + vulkan_video::start_vulkan_video_decoder_thread( - pipeline_ctx.vulkan_ctx.clone(), + vulkan_ctx, chunks_receiver, frame_sender, input_id, diff --git a/integration_tests/examples/raw_channel_input.rs b/integration_tests/examples/raw_channel_input.rs index 5bb660079..9a4272098 100644 --- a/integration_tests/examples/raw_channel_input.rs +++ b/integration_tests/examples/raw_channel_input.rs @@ -17,8 +17,8 @@ use compositor_pipeline::{ OutputOptions, OutputProtocolOptions, }, rtp::RequestedPort, - Options, Pipeline, PipelineOutputEndCondition, PreinitializedContext, - RegisterOutputOptions, VideoCodec, + GraphicsContext, Options, Pipeline, PipelineOutputEndCondition, RegisterOutputOptions, + VideoCodec, }, queue::{PipelineEvent, QueueInputOptions}, }; @@ -44,14 +44,7 @@ fn main() { level: "info,wgpu_hal=warn,wgpu_core=warn".to_string(), }); let config = read_config(); - let ctx = PreinitializedContext::new( - wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS, - wgpu::Limits { - max_push_constant_size: 128, - ..Default::default() - }, - ) - .unwrap(); + let ctx = GraphicsContext::new(false, Default::default(), Default::default()).unwrap(); let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone()); // no chromium support, so we can ignore _event_loop let (pipeline, _event_loop) = Pipeline::new(Options { diff --git a/integration_tests/examples/raw_channel_output.rs b/integration_tests/examples/raw_channel_output.rs index 1fa196f5f..8d18ca3b3 100644 --- a/integration_tests/examples/raw_channel_output.rs +++ b/integration_tests/examples/raw_channel_output.rs @@ -16,7 +16,7 @@ use compositor_pipeline::{ InputOptions, }, output::{RawAudioOptions, RawDataOutputOptions, RawVideoOptions}, - Options, PipelineOutputEndCondition, PreinitializedContext, RawDataReceiver, + GraphicsContext, Options, PipelineOutputEndCondition, RawDataReceiver, RegisterInputOptions, RegisterOutputOptions, }, queue::{PipelineEvent, QueueInputOptions}, @@ -57,14 +57,7 @@ fn main() { }); let mut config = read_config(); config.queue_options.ahead_of_time_processing = true; - let ctx = PreinitializedContext::new( - wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS, - wgpu::Limits { - max_push_constant_size: 128, - ..Default::default() - }, - ) - .unwrap(); + let ctx = GraphicsContext::new(false, Default::default(), Default::default()).unwrap(); let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone()); // no chromium support, so we can ignore _event_loop let (pipeline, _event_loop) = Pipeline::new(Options { From 47fb257e2e5c7c256c61c7f896aa7cc62374bba8 Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Fri, 27 Sep 2024 11:12:30 +0200 Subject: [PATCH 03/13] Add proper handling for compilation on macos. --- Cargo.toml | 3 + compositor_api/Cargo.toml | 1 + .../src/types/from_register_input.rs | 11 +++- compositor_pipeline/Cargo.toml | 5 +- compositor_pipeline/src/error.rs | 5 +- compositor_pipeline/src/pipeline.rs | 61 ++++++++++--------- .../src/pipeline/decoder/video.rs | 17 +++--- .../src/pipeline/decoder/video/ffmpeg_h264.rs | 6 +- .../pipeline/decoder/video/vulkan_video.rs | 8 ++- compositor_pipeline/src/pipeline/types.rs | 1 + compositor_render/src/lib.rs | 2 +- compositor_render/src/wgpu.rs | 2 +- compositor_render/src/wgpu/ctx.rs | 15 +++-- src/snapshot_tests/utils.rs | 2 +- vk-video/examples/basic.rs | 24 +++++++- vk-video/examples/wgpu.rs | 33 +++++++++- vk-video/src/lib.rs | 1 + 17 files changed, 138 insertions(+), 59 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d6710df22..bea6c7e76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,6 +102,9 @@ http-body-util = "0.1.2" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] shared_memory = { workspace = true } +[target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] +compositor_api = { workspace = true, features = ["vk-video"] } + [[bin]] name = "process_helper" path = "src/bin/process_helper/main.rs" diff --git a/compositor_api/Cargo.toml b/compositor_api/Cargo.toml index 5a8911708..69af23089 100644 --- a/compositor_api/Cargo.toml +++ b/compositor_api/Cargo.toml @@ -7,6 +7,7 @@ license = "BUSL-1.1" [features] decklink = ["compositor_pipeline/decklink"] web_renderer = ["compositor_render/web_renderer"] +vk-video = ["compositor_pipeline/vk-video"] [dependencies] compositor_render = { workspace = true } diff --git a/compositor_api/src/types/from_register_input.rs b/compositor_api/src/types/from_register_input.rs index c3daf2995..e3d705a2b 100644 --- a/compositor_api/src/types/from_register_input.rs +++ b/compositor_api/src/types/from_register_input.rs @@ -102,18 +102,23 @@ impl TryFrom for pipeline::RegisterInputOptions { } let rtp_stream = input::rtp::RtpStream { - video: video.as_ref().map(|video| input::rtp::InputVideoStream { + video: video.as_ref().map(|video| Ok(input::rtp::InputVideoStream { options: match video { InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions { codec: pipeline::VideoCodec::H264, decoder: pipeline::VideoDecoder::FFmpegH264, }, + #[cfg(feature = "vk-video")] InputRtpVideoOptions::VulkanVideo => decoder::VideoDecoderOptions { decoder: pipeline::VideoDecoder::VulkanVideo, codec: pipeline::VideoCodec::H264, }, - }, - }), + #[cfg(not(feature = "vk-video"))] + InputRtpVideoOptions::VulkanVideo => return Err(TypeError::new( + "This Live Compositor binary was build without Vulkan Video support. Rebuilt it on a platform which supports Vulkan Video." + )), + } + })).transpose()?, audio: audio.map(TryFrom::try_from).transpose()?, }; diff --git a/compositor_pipeline/Cargo.toml b/compositor_pipeline/Cargo.toml index b7f6c193c..199f9e224 100644 --- a/compositor_pipeline/Cargo.toml +++ b/compositor_pipeline/Cargo.toml @@ -7,6 +7,7 @@ license = "BUSL-1.1" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] decklink = ["dep:decklink"] +vk-video = ["dep:vk-video"] [dependencies] compositor_render = { workspace = true } @@ -27,9 +28,11 @@ reqwest = { workspace = true } tracing = { workspace = true } fdk-aac-sys = "0.5.0" rubato = "0.15.0" -vk-video = { workspace = true } wgpu = { workspace = true } glyphon = { workspace = true } [target.x86_64-unknown-linux-gnu.dependencies] decklink = { path = "../decklink", optional = true } + +[target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] +vk-video = { path = "../vk-video/", optional = true } diff --git a/compositor_pipeline/src/error.rs b/compositor_pipeline/src/error.rs index 6a18be2f2..03dfcace6 100644 --- a/compositor_pipeline/src/error.rs +++ b/compositor_pipeline/src/error.rs @@ -17,6 +17,7 @@ pub enum InitPipelineError { #[error("Failed to create a download directory.")] CreateDownloadDir(#[source] std::io::Error), + #[cfg(feature = "vk-video")] #[error(transparent)] VulkanCtxError(#[from] vk_video::VulkanCtxError), } @@ -133,11 +134,11 @@ pub enum InputInitError { #[error("Couldn't read decoder init result.")] CannotReadInitResult, - #[cfg(target_os = "linux")] + #[cfg(feature = "vk-video")] #[error(transparent)] VulkanDecoderError(#[from] vk_video::DecoderError), - #[error("Vulkan context is not available. Cannot create a vulkan video decoder")] + #[error("Pipeline couldn't detect a vulkan video compatible device when it was being initialized. Cannot create a vulkan video decoder")] VulkanContextRequiredForVulkanDecoder, } diff --git a/compositor_pipeline/src/pipeline.rs b/compositor_pipeline/src/pipeline.rs index 2e07e7370..d3e4e58d8 100644 --- a/compositor_pipeline/src/pipeline.rs +++ b/compositor_pipeline/src/pipeline.rs @@ -63,6 +63,7 @@ pub use self::types::{ AudioCodec, EncodedChunk, EncodedChunkKind, EncoderOutputEvent, RawDataReceiver, VideoCodec, VideoDecoder, }; +use compositor_render::{create_wgpu_ctx, error::InitRendererEngineError}; pub use pipeline_output::PipelineOutputEndCondition; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -114,34 +115,25 @@ pub struct GraphicsContext { pub device: Arc, pub queue: Arc, - #[cfg(target_os = "linux")] + #[cfg(feature = "vk-video")] pub vulkan_ctx: Option>, } impl GraphicsContext { - #[cfg(target_os = "linux")] + #[cfg(feature = "vk-video")] pub fn new( force_gpu: bool, features: wgpu::Features, limits: wgpu::Limits, ) -> Result { - use compositor_render::{create_wgpu_ctx, error::InitRendererEngineError}; + use compositor_render::{required_wgpu_features, set_required_wgpu_limits}; - let vulkan_features = features - | wgpu::Features::TEXTURE_BINDING_ARRAY - | wgpu::Features::PUSH_CONSTANTS - | wgpu::Features::TEXTURE_FORMAT_NV12; + let vulkan_features = + features | required_wgpu_features() | wgpu::Features::TEXTURE_FORMAT_NV12; - let limits = if limits.max_push_constant_size < 128 { - wgpu::Limits { - max_push_constant_size: 128, - ..limits - } - } else { - limits - }; + let limits = set_required_wgpu_limits(limits); - match vk_video::VulkanCtx::new(vulkan_features, limits) { + match vk_video::VulkanCtx::new(vulkan_features, limits.clone()) { Ok(ctx) => Ok(GraphicsContext { device: ctx.wgpu_ctx.device.clone(), queue: ctx.wgpu_ctx.queue.clone(), @@ -151,7 +143,7 @@ impl GraphicsContext { Err(err) => { info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support."); - let (device, queue) = create_wgpu_ctx(force_gpu, features) + let (device, queue) = create_wgpu_ctx(force_gpu, features, limits) .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; Ok(GraphicsContext { @@ -162,6 +154,18 @@ impl GraphicsContext { } } } + + #[cfg(not(feature = "vk-video"))] + pub fn new( + force_gpu: bool, + features: wgpu::Features, + limits: wgpu::Limits, + ) -> Result { + let (device, queue) = create_wgpu_ctx(force_gpu, features, limits) + .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; + + Ok(GraphicsContext { device, queue }) + } } impl std::fmt::Debug for GraphicsContext { @@ -192,7 +196,7 @@ pub struct PipelineCtx { pub output_framerate: Framerate, pub download_dir: Arc, pub event_emitter: Arc, - #[cfg(target_os = "linux")] + #[cfg(feature = "vk-video")] pub vulkan_ctx: Option>, } @@ -211,17 +215,14 @@ impl Pipeline { pub fn new(opts: Options) -> Result<(Self, Arc), InitPipelineError> { let preinitialized_ctx = match opts.wgpu_ctx { Some(ctx) => Some(ctx), - None => { - if cfg!(target_os = "linux") { - Some(GraphicsContext::new( - opts.force_gpu, - opts.wgpu_features, - Default::default(), - )?) - } else { - None - } - } + #[cfg(feature = "vk-video")] + None => Some(GraphicsContext::new( + opts.force_gpu, + opts.wgpu_features, + Default::default(), + )?), + #[cfg(not(feature = "vk-video"))] + None => None, }; let wgpu_ctx = preinitialized_ctx @@ -256,7 +257,7 @@ impl Pipeline { output_framerate: opts.queue_options.output_framerate, download_dir: download_dir.into(), event_emitter, - #[cfg(target_os = "linux")] + #[cfg(feature = "vk-video")] vulkan_ctx: preinitialized_ctx.and_then(|ctx| ctx.vulkan_ctx), }, }; diff --git a/compositor_pipeline/src/pipeline/decoder/video.rs b/compositor_pipeline/src/pipeline/decoder/video.rs index fad0da153..5129ee06d 100644 --- a/compositor_pipeline/src/pipeline/decoder/video.rs +++ b/compositor_pipeline/src/pipeline/decoder/video.rs @@ -10,6 +10,7 @@ use crate::{ use super::VideoDecoderOptions; mod ffmpeg_h264; +#[cfg(feature = "vk-video")] mod vulkan_video; pub fn start_video_decoder_thread( @@ -20,17 +21,17 @@ pub fn start_video_decoder_thread( input_id: InputId, ) -> Result<(), InputInitError> { match (options.codec, options.decoder) { - (VideoCodec::H264, VideoDecoder::FFmpegH264) => { - ffmpeg_h264::start_ffmpeg_decoder_thread(chunks_receiver, frame_sender, input_id) - } + (VideoCodec::H264, VideoDecoder::FFmpegH264) => ffmpeg_h264::start_ffmpeg_decoder_thread( + pipeline_ctx, + chunks_receiver, + frame_sender, + input_id, + ), + #[cfg(feature = "vk-video")] (VideoCodec::H264, VideoDecoder::VulkanVideo) => { - let Some(vulkan_ctx) = pipeline_ctx.vulkan_ctx.as_ref().map(|ctx| ctx.clone()) else { - return Err(InputInitError::VulkanContextRequiredForVulkanDecoder); - }; - vulkan_video::start_vulkan_video_decoder_thread( - vulkan_ctx, + pipeline_ctx, chunks_receiver, frame_sender, input_id, diff --git a/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs b/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs index 04df9e654..8f6c2cdd1 100644 --- a/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs +++ b/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs @@ -2,7 +2,10 @@ use std::time::Duration; use crate::{ error::InputInitError, - pipeline::types::{EncodedChunk, EncodedChunkKind, VideoCodec}, + pipeline::{ + types::{EncodedChunk, EncodedChunkKind, VideoCodec}, + PipelineCtx, + }, queue::PipelineEvent, }; @@ -18,6 +21,7 @@ use ffmpeg_next::{ use tracing::{debug, error, span, trace, warn, Level}; pub fn start_ffmpeg_decoder_thread( + _pipeline_ctx: &PipelineCtx, chunks_receiver: Receiver>, frame_sender: Sender>, input_id: InputId, diff --git a/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs index 241cdf929..8e2363412 100644 --- a/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs +++ b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs @@ -7,16 +7,20 @@ use vk_video::{Decoder, VulkanCtx}; use crate::{ error::InputInitError, - pipeline::{EncodedChunk, EncodedChunkKind, VideoCodec}, + pipeline::{EncodedChunk, EncodedChunkKind, PipelineCtx, VideoCodec}, queue::PipelineEvent, }; pub fn start_vulkan_video_decoder_thread( - vulkan_ctx: Arc, + pipeline_ctx: &PipelineCtx, chunks_receiver: Receiver>, frame_sender: Sender>, input_id: InputId, ) -> Result<(), InputInitError> { + let Some(vulkan_ctx) = pipeline_ctx.vulkan_ctx.as_ref().map(|ctx| ctx.clone()) else { + return Err(InputInitError::VulkanContextRequiredForVulkanDecoder); + }; + let (init_result_sender, init_result_receiver) = crossbeam_channel::bounded(0); std::thread::Builder::new() diff --git a/compositor_pipeline/src/pipeline/types.rs b/compositor_pipeline/src/pipeline/types.rs index 6f028d402..680097626 100644 --- a/compositor_pipeline/src/pipeline/types.rs +++ b/compositor_pipeline/src/pipeline/types.rs @@ -58,6 +58,7 @@ pub struct RawDataSender { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum VideoDecoder { FFmpegH264, + #[cfg(feature = "vk-video")] VulkanVideo, } diff --git a/compositor_render/src/lib.rs b/compositor_render/src/lib.rs index 5c6d93694..5d598473f 100644 --- a/compositor_render/src/lib.rs +++ b/compositor_render/src/lib.rs @@ -19,8 +19,8 @@ pub use state::Renderer; pub use state::RendererOptions; pub use state::RendererSpec; -pub use wgpu::create_wgpu_ctx; pub use wgpu::WgpuFeatures; +pub use wgpu::{create_wgpu_ctx, required_wgpu_features, set_required_wgpu_limits}; pub mod image { pub use crate::transformations::image_renderer::{ImageSource, ImageSpec, ImageType}; diff --git a/compositor_render/src/wgpu.rs b/compositor_render/src/wgpu.rs index d3814eb13..ff9921a65 100644 --- a/compositor_render/src/wgpu.rs +++ b/compositor_render/src/wgpu.rs @@ -6,8 +6,8 @@ pub(crate) mod format; pub(crate) mod texture; pub(crate) mod utils; -pub use ctx::create_wgpu_ctx; pub(crate) use ctx::WgpuCtx; +pub use ctx::{create_wgpu_ctx, required_wgpu_features, set_required_wgpu_limits}; pub use wgpu::Features as WgpuFeatures; #[must_use] diff --git a/compositor_render/src/wgpu/ctx.rs b/compositor_render/src/wgpu/ctx.rs index e6ee5e984..4bf23706f 100644 --- a/compositor_render/src/wgpu/ctx.rs +++ b/compositor_render/src/wgpu/ctx.rs @@ -34,7 +34,7 @@ impl WgpuCtx { Self::new_from_device_queue(device, queue)? } None => { - let (device, queue) = create_wgpu_ctx(force_gpu, features)?; + let (device, queue) = create_wgpu_ctx(force_gpu, features, Default::default())?; Self::new_from_device_queue(device, queue)? } }; @@ -94,9 +94,17 @@ pub fn required_wgpu_features() -> wgpu::Features { } } +pub fn set_required_wgpu_limits(limits: wgpu::Limits) -> wgpu::Limits { + wgpu::Limits { + max_push_constant_size: limits.max_push_constant_size.max(128), + ..limits + } +} + pub fn create_wgpu_ctx( force_gpu: bool, features: wgpu::Features, + limits: wgpu::Limits, ) -> Result<(Arc, Arc), CreateWgpuCtxError> { let instance = wgpu::Instance::new(wgpu::InstanceDescriptor { backends: wgpu::Backends::all(), @@ -134,10 +142,7 @@ pub fn create_wgpu_ctx( let (device, queue) = pollster::block_on(adapter.request_device( &wgpu::DeviceDescriptor { label: None, - required_limits: wgpu::Limits { - max_push_constant_size: 128, - ..Default::default() - }, + required_limits: set_required_wgpu_limits(limits), required_features, memory_hints: wgpu::MemoryHints::default(), }, diff --git a/src/snapshot_tests/utils.rs b/src/snapshot_tests/utils.rs index 99379b35b..d79b8cb67 100644 --- a/src/snapshot_tests/utils.rs +++ b/src/snapshot_tests/utils.rs @@ -22,7 +22,7 @@ fn global_wgpu_ctx( ) -> (Arc, Arc) { static CTX: OnceLock<(Arc, Arc)> = OnceLock::new(); - CTX.get_or_init(|| create_wgpu_ctx(force_gpu, features).unwrap()) + CTX.get_or_init(|| create_wgpu_ctx(force_gpu, features, Default::default()).unwrap()) .clone() } diff --git a/vk-video/examples/basic.rs b/vk-video/examples/basic.rs index d75452d81..d57ab696c 100644 --- a/vk-video/examples/basic.rs +++ b/vk-video/examples/basic.rs @@ -1,6 +1,13 @@ -use std::io::Write; - +#[cfg(any( + windows, + all( + unix, + not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) + ) +))] fn main() { + use std::io::Write; + let subscriber = tracing_subscriber::FmtSubscriber::builder() .with_max_level(tracing::Level::INFO) .finish(); @@ -37,3 +44,16 @@ fn main() { } } } + +#[cfg(not(any( + windows, + all( + unix, + not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) + ) +)))] +fn main() { + println!( + "This crate doesn't work on your operating system, because it does not support vulkan" + ); +} diff --git a/vk-video/examples/wgpu.rs b/vk-video/examples/wgpu.rs index a5a8f03bd..c535b0744 100644 --- a/vk-video/examples/wgpu.rs +++ b/vk-video/examples/wgpu.rs @@ -1,6 +1,13 @@ -use std::io::Write; - +#[cfg(any( + windows, + all( + unix, + not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) + ) +))] fn main() { + use std::io::Write; + let subscriber = tracing_subscriber::FmtSubscriber::builder() .with_max_level(tracing::Level::INFO) .finish(); @@ -40,11 +47,33 @@ fn main() { } } +#[cfg(not(any( + windows, + all( + unix, + not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) + ) +)))] +fn main() { + println!( + "This crate doesn't work on your operating system, because it does not support vulkan" + ); +} + +#[cfg(any( + windows, + all( + unix, + not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) + ) +))] fn download_wgpu_texture( device: &wgpu::Device, queue: &wgpu::Queue, frame: wgpu::Texture, ) -> Vec { + use std::io::Write; + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); let y_plane_bytes_per_row = (frame.width() as u64 + 255) / 256 * 256; let y_plane_size = y_plane_bytes_per_row * frame.height() as u64; diff --git a/vk-video/src/lib.rs b/vk-video/src/lib.rs index e47c7deda..1b0fff581 100644 --- a/vk-video/src/lib.rs +++ b/vk-video/src/lib.rs @@ -1,3 +1,4 @@ +#![cfg(not(target_os = "macos"))] mod parser; mod vulkan_decoder; From 0bc4e492d6d10c3348c7bc4bc1e35105b9adc0bf Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Mon, 30 Sep 2024 13:17:44 +0200 Subject: [PATCH 04/13] Refactor video session resources to a separate module --- vk-video/src/vulkan_decoder.rs | 495 ++---------------- .../src/vulkan_decoder/session_resources.rs | 170 ++++++ .../session_resources/images.rs | 291 ++++++++++ .../session_resources/parameters.rs | 116 ++++ vk-video/src/vulkan_decoder/vulkan_ctx.rs | 72 ++- vk-video/src/vulkan_decoder/wrappers.rs | 2 + .../src/vulkan_decoder/wrappers/command.rs | 39 -- vk-video/src/vulkan_decoder/wrappers/mem.rs | 41 ++ .../{ => wrappers}/parameter_sets.rs | 33 +- vk-video/src/vulkan_decoder/wrappers/video.rs | 136 +---- 10 files changed, 774 insertions(+), 621 deletions(-) create mode 100644 vk-video/src/vulkan_decoder/session_resources.rs create mode 100644 vk-video/src/vulkan_decoder/session_resources/images.rs create mode 100644 vk-video/src/vulkan_decoder/session_resources/parameters.rs rename vk-video/src/vulkan_decoder/{ => wrappers}/parameter_sets.rs (91%) diff --git a/vk-video/src/vulkan_decoder.rs b/vk-video/src/vulkan_decoder.rs index 9b78b74c4..c2b10aa09 100644 --- a/vk-video/src/vulkan_decoder.rs +++ b/vk-video/src/vulkan_decoder.rs @@ -3,19 +3,18 @@ use std::sync::Arc; use ash::vk; use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet}; +use session_resources::VideoSessionResources; use tracing::error; use wrappers::*; use crate::parser::{DecodeInformation, DecoderInstruction, ReferenceId}; -mod parameter_sets; +mod session_resources; mod vulkan_ctx; mod wrappers; pub use vulkan_ctx::*; -const MACROBLOCK_SIZE: u32 = 16; - pub struct VulkanDecoder<'a> { vulkan_ctx: Arc, video_session_resources: Option>, @@ -38,12 +37,6 @@ struct CommandBuffers { vulkan_to_wgpu_transfer_buffer: CommandBuffer, } -struct VideoSessionResources<'a> { - video_session: VideoSession, - parameters_manager: VideoSessionParametersManager, - decoding_images: DecodingImages<'a>, -} - /// this cannot outlive the image and semaphore it borrows, but it seems impossible to encode that /// in the lifetimes struct DecodeOutput { @@ -213,7 +206,7 @@ impl VulkanDecoder<'_> { Some(dpb_idx) => self .video_session_resources .as_mut() - .map(|s| s.decoding_images.free_reference_picture(dpb_idx)), + .map(|s| s.free_reference_picture(dpb_idx)), None => return Err(VulkanDecoderError::NonExistantReferenceRequested), }; } @@ -228,119 +221,31 @@ impl VulkanDecoder<'_> { } fn process_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> { - let profile = H264ProfileInfo::decode_h264_yuv420(); - - let width = match sps.frame_cropping { - None => (sps.pic_width_in_mbs_minus1 + 1) * MACROBLOCK_SIZE, - Some(_) => return Err(VulkanDecoderError::FrameCroppingNotSupported), - }; - - let height = match sps.frame_mbs_flags { - h264_reader::nal::sps::FrameMbsFlags::Frames => { - (sps.pic_height_in_map_units_minus1 + 1) * MACROBLOCK_SIZE - } - h264_reader::nal::sps::FrameMbsFlags::Fields { .. } => { - return Err(VulkanDecoderError::FieldsNotSupported) - } - }; - - let max_coded_extent = vk::Extent2D { width, height }; - // +1 for current frame - let max_dpb_slots = sps.max_num_ref_frames + 1; - let max_active_references = sps.max_num_ref_frames; - - if let Some(VideoSessionResources { - video_session, - parameters_manager: parameters, - .. - }) = &mut self.video_session_resources - { - if video_session.max_coded_extent.width >= width - && video_session.max_coded_extent.height >= height - && video_session.max_dpb_slots >= max_dpb_slots - { - // no need to change the session - parameters.put_sps(sps)?; - return Ok(()); - } - } - - let video_session = VideoSession::new( - &self.vulkan_ctx, - &profile.profile_info, - max_coded_extent, - max_dpb_slots, - max_active_references, - &self.vulkan_ctx.video_capabilities.std_header_version, - )?; - - let parameters = self - .video_session_resources - .take() - .map(|r| r.parameters_manager); - - let mut parameters = match parameters { - Some(mut parameters) => { - parameters.change_session(video_session.session)?; - parameters + match self.video_session_resources.as_mut() { + Some(session) => session.process_sps( + &self.vulkan_ctx, + &self.command_buffers.decode_buffer, + sps, + &self.sync_structures.fence_memory_barrier_completed, + )?, + None => { + self.video_session_resources = Some(VideoSessionResources::new_from_sps( + &self.vulkan_ctx, + &self.command_buffers.decode_buffer, + sps, + &self.sync_structures.fence_memory_barrier_completed, + )?) } - None => VideoSessionParametersManager::new(&self.vulkan_ctx, video_session.session)?, - }; - - parameters.put_sps(sps)?; - - // FIXME: usually, sps arrives either at the start of the stream (when all spses are sent - // at the begginning of the stream) or right before an IDR. It is however possible for an - // sps nal to arrive in between P-frames. This would cause us to loose the reference - // pictures we need to decode the stream until we receive a new IDR. Don't know if this is - // an issue worth fixing, I don't think I ever saw a stream like this. - let (decoding_images, memory_barrier) = DecodingImages::new( - &self.vulkan_ctx, - profile, - &self.vulkan_ctx.h264_dpb_format_properties, - &self.vulkan_ctx.h264_dst_format_properties, - max_coded_extent, - max_dpb_slots, - )?; - - self.command_buffers.decode_buffer.begin()?; - - unsafe { - self.vulkan_ctx.device.cmd_pipeline_barrier2( - *self.command_buffers.decode_buffer, - &vk::DependencyInfo::default().image_memory_barriers(&memory_barrier), - ); } - self.command_buffers.decode_buffer.end()?; - - self.command_buffers.decode_buffer.submit( - *self.vulkan_ctx.queues.h264_decode.queue.lock().unwrap(), - &[], - &[], - Some(*self.sync_structures.fence_memory_barrier_completed), - )?; - - // TODO: this shouldn't be a fence - self.sync_structures - .fence_memory_barrier_completed - .wait_and_reset(u64::MAX)?; - - self.video_session_resources = Some(VideoSessionResources { - video_session, - parameters_manager: parameters, - decoding_images, - }); - Ok(()) } fn process_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> { self.video_session_resources .as_mut() - .map(|r| &mut r.parameters_manager) .ok_or(VulkanDecoderError::NoSession)? - .put_pps(pps)?; + .process_pps(pps)?; Ok(()) } @@ -384,8 +289,11 @@ impl VulkanDecoder<'_> { .min_bitstream_buffer_offset_alignment, ); - let decode_buffer = - self.upload_decode_data_to_buffer(&decode_information.rbsp_bytes, size)?; + let decode_buffer = Buffer::new_with_decode_data( + self.vulkan_ctx.allocator.clone(), + &decode_information.rbsp_bytes, + size, + )?; // decode let video_session_resources = self @@ -533,28 +441,22 @@ impl VulkanDecoder<'_> { .std_picture_info(&std_picture_info) .slice_offsets(&slice_offsets); - let dst_picture_resource_info = match &video_session_resources.decoding_images.dst_image { - Some(image) => image.video_resource_info[0], - None => *new_reference_slot_video_picture_resource_info, - }; + let dst_picture_resource_info = &video_session_resources + .decoding_images + .target_picture_resource_info(new_reference_slot_index) + .unwrap(); // these 3 veriables are for copying the result later - let (dst_image, dst_image_layout, dst_layer) = - match &video_session_resources.decoding_images.dst_image { - Some(image) => (**image.image, vk::ImageLayout::VIDEO_DECODE_DST_KHR, 0), - None => ( - **video_session_resources.decoding_images.dpb_image.image, - vk::ImageLayout::VIDEO_DECODE_DPB_KHR, - new_reference_slot_index, - ), - }; + let (target_image, target_image_layout, target_layer) = video_session_resources + .decoding_images + .target_info(new_reference_slot_index); // fill out the final struct and issue the command let decode_info = vk::VideoDecodeInfoKHR::default() .src_buffer(*decode_buffer) .src_buffer_offset(0) .src_buffer_range(size) - .dst_picture_resource(dst_picture_resource_info) + .dst_picture_resource(*dst_picture_resource_info) .setup_reference_slot(&setup_reference_slot) .reference_slots(&pic_reference_slots) .push_next(&mut decode_h264_picture_info); @@ -586,8 +488,8 @@ impl VulkanDecoder<'_> { self.command_buffers.decode_buffer.end()?; - self.command_buffers.decode_buffer.submit( - *self.vulkan_ctx.queues.h264_decode.queue.lock().unwrap(), + self.vulkan_ctx.queues.h264_decode.submit( + &self.command_buffers.decode_buffer, &[], &[( *self.sync_structures.sem_decode_done, @@ -605,10 +507,10 @@ impl VulkanDecoder<'_> { let dimensions = video_session_resources.video_session.max_coded_extent; Ok(DecodeOutput { - image: dst_image, + image: target_image, wait_semaphore: *self.sync_structures.sem_decode_done, - layer: dst_layer as u32, - current_layout: dst_image_layout, + layer: target_layer as u32, + current_layout: target_image_layout, dimensions, _input_buffer: decode_buffer, }) @@ -773,8 +675,8 @@ impl VulkanDecoder<'_> { self.command_buffers.vulkan_to_wgpu_transfer_buffer.end()?; - self.command_buffers.vulkan_to_wgpu_transfer_buffer.submit( - *self.vulkan_ctx.queues.transfer.queue.lock().unwrap(), + self.vulkan_ctx.queues.transfer.submit( + &self.command_buffers.vulkan_to_wgpu_transfer_buffer, &[( decode_output.wait_semaphore, vk::PipelineStageFlags2::TOP_OF_PIPE, @@ -867,8 +769,7 @@ impl VulkanDecoder<'_> { .wait_and_reset(u64::MAX)?; let output = unsafe { - self.download_data_from_buffer( - &mut dst_buffer, + dst_buffer.download_data_from_buffer( decode_output.dimensions.width as usize * decode_output.dimensions.height as usize * 3 @@ -932,55 +833,6 @@ impl VulkanDecoder<'_> { Ok(pic_reference_slots) } - /// ## Safety - /// the buffer has to be mappable and readable - unsafe fn download_data_from_buffer( - &self, - buffer: &mut Buffer, - size: usize, - ) -> Result, VulkanDecoderError> { - let mut output = Vec::new(); - unsafe { - let memory = self - .vulkan_ctx - .allocator - .map_memory(&mut buffer.allocation)?; - let memory_slice = std::slice::from_raw_parts_mut(memory, size); - output.extend_from_slice(memory_slice); - self.vulkan_ctx - .allocator - .unmap_memory(&mut buffer.allocation); - } - - Ok(output) - } - - fn upload_decode_data_to_buffer( - &self, - data: &[u8], - buffer_size: u64, - ) -> Result { - let mut decode_buffer = Buffer::new_decode( - self.vulkan_ctx.allocator.clone(), - buffer_size, - &H264ProfileInfo::decode_h264_yuv420(), - )?; - - unsafe { - let mem = self - .vulkan_ctx - .allocator - .map_memory(&mut decode_buffer.allocation)?; - let slice = std::slice::from_raw_parts_mut(mem.cast(), data.len()); - slice.copy_from_slice(data); - self.vulkan_ctx - .allocator - .unmap_memory(&mut decode_buffer.allocation); - } - - Ok(decode_buffer) - } - #[allow(clippy::too_many_arguments)] fn copy_image_to_buffer( &self, @@ -1091,8 +943,8 @@ impl VulkanDecoder<'_> { self.command_buffers.gpu_to_mem_transfer_buffer.end()?; - self.command_buffers.gpu_to_mem_transfer_buffer.submit( - *self.vulkan_ctx.queues.transfer.queue.lock().unwrap(), + self.vulkan_ctx.queues.transfer.submit( + &self.command_buffers.gpu_to_mem_transfer_buffer, wait_semaphores, signal_semaphores, fence, @@ -1102,271 +954,6 @@ impl VulkanDecoder<'_> { } } -impl From for vk::native::StdVideoDecodeH264ReferenceInfo { - fn from(picture_info: crate::parser::PictureInfo) -> Self { - vk::native::StdVideoDecodeH264ReferenceInfo { - flags: vk::native::StdVideoDecodeH264ReferenceInfoFlags { - __bindgen_padding_0: [0; 3], - _bitfield_align_1: [], - _bitfield_1: vk::native::StdVideoDecodeH264ReferenceInfoFlags::new_bitfield_1( - 0, - 0, - picture_info.used_for_long_term_reference.into(), - picture_info.non_existing.into(), - ), - }, - FrameNum: picture_info.FrameNum, - PicOrderCnt: picture_info.PicOrderCnt, - reserved: 0, - } - } -} - -pub(crate) struct DecodingImages<'a> { - pub(crate) dpb_image: DecodingImageBundle<'a>, - pub(crate) dpb_slot_active: Vec, - pub(crate) dst_image: Option>, -} - -pub(crate) struct DecodingImageBundle<'a> { - pub(crate) image: Arc, - pub(crate) _image_view: ImageView, - pub(crate) video_resource_info: Vec>, -} - -impl<'a> DecodingImageBundle<'a> { - #[allow(clippy::too_many_arguments)] - pub(crate) fn new( - vulkan_ctx: &VulkanCtx, - format: &vk::VideoFormatPropertiesKHR<'a>, - dimensions: vk::Extent2D, - image_usage: vk::ImageUsageFlags, - profile_info: &H264ProfileInfo, - array_layer_count: u32, - queue_indices: Option<&[u32]>, - layout: vk::ImageLayout, - ) -> Result<(Self, vk::ImageMemoryBarrier2<'a>), VulkanDecoderError> { - let mut profile_list_info = vk::VideoProfileListInfoKHR::default() - .profiles(std::slice::from_ref(&profile_info.profile_info)); - - let mut image_create_info = vk::ImageCreateInfo::default() - .flags(format.image_create_flags) - .image_type(format.image_type) - .format(format.format) - .extent(vk::Extent3D { - width: dimensions.width, - height: dimensions.height, - depth: 1, - }) - .mip_levels(1) - .array_layers(array_layer_count) - .samples(vk::SampleCountFlags::TYPE_1) - .tiling(format.image_tiling) - .usage(image_usage) - .initial_layout(vk::ImageLayout::UNDEFINED) - .push_next(&mut profile_list_info); - - match queue_indices { - Some(indices) => { - image_create_info = image_create_info - .sharing_mode(vk::SharingMode::CONCURRENT) - .queue_family_indices(indices); - } - None => { - image_create_info = image_create_info.sharing_mode(vk::SharingMode::EXCLUSIVE); - } - } - - let image = Arc::new(Image::new( - vulkan_ctx.allocator.clone(), - &image_create_info, - )?); - - let subresource_range = vk::ImageSubresourceRange { - aspect_mask: vk::ImageAspectFlags::COLOR, - base_mip_level: 0, - level_count: 1, - base_array_layer: 0, - layer_count: vk::REMAINING_ARRAY_LAYERS, - }; - - let image_view_create_info = vk::ImageViewCreateInfo::default() - .flags(vk::ImageViewCreateFlags::empty()) - .image(**image) - .view_type(if array_layer_count == 1 { - vk::ImageViewType::TYPE_2D - } else { - vk::ImageViewType::TYPE_2D_ARRAY - }) - .format(format.format) - .components(vk::ComponentMapping::default()) - .subresource_range(subresource_range); - - let image_view = ImageView::new( - vulkan_ctx.device.clone(), - image.clone(), - &image_view_create_info, - )?; - - let video_resource_info = (0..array_layer_count) - .map(|i| { - vk::VideoPictureResourceInfoKHR::default() - .coded_offset(vk::Offset2D { x: 0, y: 0 }) - .coded_extent(dimensions) - .base_array_layer(i) - .image_view_binding(image_view.view) - }) - .collect(); - - let image_memory_barrier = vk::ImageMemoryBarrier2::default() - .src_stage_mask(vk::PipelineStageFlags2::NONE) - .src_access_mask(vk::AccessFlags2::NONE) - .dst_stage_mask(vk::PipelineStageFlags2::NONE) - .dst_access_mask(vk::AccessFlags2::NONE) - .old_layout(vk::ImageLayout::UNDEFINED) - .new_layout(layout) - .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .image(**image) - .subresource_range(subresource_range); - - Ok(( - Self { - image, - _image_view: image_view, - video_resource_info, - }, - image_memory_barrier, - )) - } -} - -impl<'a> DecodingImages<'a> { - pub(crate) fn new( - vulkan_ctx: &VulkanCtx, - profile: H264ProfileInfo, - dpb_format: &vk::VideoFormatPropertiesKHR<'a>, - dst_format: &Option>, - dimensions: vk::Extent2D, - max_dpb_slots: u32, - ) -> Result<(Self, Vec>), VulkanDecoderError> { - let dpb_image_usage = if dst_format.is_some() { - dpb_format.image_usage_flags & vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR - } else { - dpb_format.image_usage_flags - & (vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR - | vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR - | vk::ImageUsageFlags::TRANSFER_SRC) - }; - - let queue_indices = [ - vulkan_ctx.queues.transfer.idx as u32, - vulkan_ctx.queues.h264_decode.idx as u32, - ]; - - let (dpb_image, dpb_memory_barrier) = DecodingImageBundle::new( - vulkan_ctx, - dpb_format, - dimensions, - dpb_image_usage, - &profile, - max_dpb_slots, - if dst_format.is_some() { - None - } else { - Some(&queue_indices) - }, - vk::ImageLayout::VIDEO_DECODE_DPB_KHR, - )?; - - let output = dst_format - .map(|dst_format| { - let dst_image_usage = dst_format.image_usage_flags - & (vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR - | vk::ImageUsageFlags::TRANSFER_SRC); - DecodingImageBundle::new( - vulkan_ctx, - &dst_format, - dimensions, - dst_image_usage, - &profile, - 1, - Some(&queue_indices), - vk::ImageLayout::VIDEO_DECODE_DST_KHR, - ) - }) - .transpose()?; - - let (dst_image, dst_memory_barrier) = match output { - Some((output_images, output_memory_barrier)) => { - (Some(output_images), Some(output_memory_barrier)) - } - None => (None, None), - }; - - let barriers = [dpb_memory_barrier] - .into_iter() - .chain(dst_memory_barrier) - .collect::>(); - - Ok(( - Self { - dpb_image, - dpb_slot_active: vec![false; max_dpb_slots as usize], - dst_image, - }, - barriers, - )) - } - - fn reference_slot_info(&self) -> Vec { - self.dpb_image - .video_resource_info - .iter() - .enumerate() - .map(|(i, info)| { - vk::VideoReferenceSlotInfoKHR::default() - .picture_resource(info) - .slot_index(if self.dpb_slot_active[i] { - i as i32 - } else { - -1 - }) - }) - .collect() - } - - fn allocate_reference_picture(&mut self) -> Result { - let i = self - .dpb_slot_active - .iter() - .enumerate() - .find(|(_, &v)| !v) - .map(|(i, _)| i) - .ok_or(VulkanDecoderError::NoFreeSlotsInDpb)?; - - self.dpb_slot_active[i] = true; - - Ok(i) - } - - fn video_resource_info(&self, i: usize) -> Option<&vk::VideoPictureResourceInfoKHR> { - self.dpb_image.video_resource_info.get(i) - } - - fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> { - self.dpb_slot_active[i] = false; - - Ok(()) - } - - fn reset_all_allocations(&mut self) { - self.dpb_slot_active - .iter_mut() - .for_each(|slot| *slot = false); - } -} - pub(crate) struct H264ProfileInfo<'a> { profile_info: vk::VideoProfileInfoKHR<'a>, h264_info_ptr: *mut vk::VideoDecodeH264ProfileInfoKHR<'a>, diff --git a/vk-video/src/vulkan_decoder/session_resources.rs b/vk-video/src/vulkan_decoder/session_resources.rs new file mode 100644 index 000000000..e59c71e09 --- /dev/null +++ b/vk-video/src/vulkan_decoder/session_resources.rs @@ -0,0 +1,170 @@ +use ash::vk; +use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet}; +use images::DecodingImages; +use parameters::VideoSessionParametersManager; + +use super::{ + CommandBuffer, Fence, H264ProfileInfo, SeqParameterSetExt, VideoSession, VulkanCtx, + VulkanDecoderError, +}; + +mod images; +mod parameters; + +pub(super) struct VideoSessionResources<'a> { + pub(crate) video_session: VideoSession, + pub(crate) parameters_manager: VideoSessionParametersManager, + pub(crate) decoding_images: DecodingImages<'a>, +} + +impl VideoSessionResources<'_> { + pub(crate) fn new_from_sps( + vulkan_ctx: &VulkanCtx, + decode_buffer: &CommandBuffer, + sps: &SeqParameterSet, + fence_memory_barrier_completed: &Fence, + ) -> Result { + let profile = H264ProfileInfo::decode_h264_yuv420(); + + let width = sps.width()?; + let height = sps.height()?; + + let max_coded_extent = vk::Extent2D { width, height }; + // +1 for current frame + let max_dpb_slots = sps.max_num_ref_frames + 1; + let max_active_references = sps.max_num_ref_frames; + + let video_session = VideoSession::new( + vulkan_ctx, + &profile.profile_info, + max_coded_extent, + max_dpb_slots, + max_active_references, + &vulkan_ctx.video_capabilities.std_header_version, + )?; + + let mut parameters_manager = + VideoSessionParametersManager::new(vulkan_ctx, video_session.session)?; + + parameters_manager.put_sps(sps)?; + + let decoding_images = Self::new_decoding_images( + vulkan_ctx, + max_coded_extent, + max_dpb_slots, + decode_buffer, + fence_memory_barrier_completed, + )?; + + Ok(VideoSessionResources { + video_session, + parameters_manager, + decoding_images, + }) + } + + pub(crate) fn process_sps( + &mut self, + vulkan_ctx: &VulkanCtx, + decode_buffer: &CommandBuffer, + sps: &SeqParameterSet, + fence_memory_barrier_completed: &Fence, + ) -> Result<(), VulkanDecoderError> { + let profile = H264ProfileInfo::decode_h264_yuv420(); + + let width = sps.width()?; + let height = sps.height()?; + + let max_coded_extent = vk::Extent2D { width, height }; + // +1 for current frame + let max_dpb_slots = sps.max_num_ref_frames + 1; + let max_active_references = sps.max_num_ref_frames; + + if self.video_session.max_coded_extent.width >= width + && self.video_session.max_coded_extent.height >= height + && self.video_session.max_dpb_slots >= max_dpb_slots + { + // no need to change the session + self.parameters_manager.put_sps(sps)?; + return Ok(()); + } + + self.video_session = VideoSession::new( + vulkan_ctx, + &profile.profile_info, + max_coded_extent, + max_dpb_slots, + max_active_references, + &vulkan_ctx.video_capabilities.std_header_version, + )?; + + self.parameters_manager + .change_session(self.video_session.session)?; + self.parameters_manager.put_sps(sps)?; + + self.decoding_images = Self::new_decoding_images( + vulkan_ctx, + max_coded_extent, + max_dpb_slots, + decode_buffer, + fence_memory_barrier_completed, + )?; + + Ok(()) + } + + pub(crate) fn process_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> { + self.parameters_manager.put_pps(pps) + } + + fn new_decoding_images<'a>( + vulkan_ctx: &VulkanCtx, + max_coded_extent: vk::Extent2D, + max_dpb_slots: u32, + decode_buffer: &CommandBuffer, + fence_memory_barrier_completed: &Fence, + ) -> Result, VulkanDecoderError> { + let profile = H264ProfileInfo::decode_h264_yuv420(); + + // FIXME: usually, sps arrives either at the start of the stream (when all spses are sent + // at the begginning of the stream) or right before an IDR. It is however possible for an + // sps nal to arrive in between P-frames. This would cause us to loose the reference + // pictures we need to decode the stream until we receive a new IDR. Don't know if this is + // an issue worth fixing, I don't think I ever saw a stream like this. + let (decoding_images, memory_barrier) = DecodingImages::new( + vulkan_ctx, + profile, + &vulkan_ctx.h264_dpb_format_properties, + &vulkan_ctx.h264_dst_format_properties, + max_coded_extent, + max_dpb_slots, + )?; + + decode_buffer.begin()?; + + unsafe { + vulkan_ctx.device.cmd_pipeline_barrier2( + **decode_buffer, + &vk::DependencyInfo::default().image_memory_barriers(&memory_barrier), + ); + } + + decode_buffer.end()?; + + vulkan_ctx.queues.h264_decode.submit( + decode_buffer, + &[], + &[], + Some(**fence_memory_barrier_completed), + )?; + + // TODO: this shouldn't be a fence + fence_memory_barrier_completed.wait_and_reset(u64::MAX)?; + + Ok(decoding_images) + } + + pub(crate) fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> { + self.decoding_images.free_reference_picture(i) + } +} diff --git a/vk-video/src/vulkan_decoder/session_resources/images.rs b/vk-video/src/vulkan_decoder/session_resources/images.rs new file mode 100644 index 000000000..c62afbd50 --- /dev/null +++ b/vk-video/src/vulkan_decoder/session_resources/images.rs @@ -0,0 +1,291 @@ +use std::sync::Arc; + +use ash::vk; + +use crate::{ + vulkan_decoder::{H264ProfileInfo, Image, ImageView}, + VulkanCtx, VulkanDecoderError, +}; + +pub(crate) struct DecodingImages<'a> { + pub(crate) dpb_image: DecodingImageBundle<'a>, + pub(crate) dpb_slot_active: Vec, + pub(crate) dst_image: Option>, +} + +pub(crate) struct DecodingImageBundle<'a> { + pub(crate) image: Arc, + pub(crate) _image_view: ImageView, + pub(crate) video_resource_info: Vec>, +} + +impl<'a> DecodingImageBundle<'a> { + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + format: &vk::VideoFormatPropertiesKHR<'a>, + dimensions: vk::Extent2D, + image_usage: vk::ImageUsageFlags, + profile_info: &H264ProfileInfo, + array_layer_count: u32, + queue_indices: Option<&[u32]>, + layout: vk::ImageLayout, + ) -> Result<(Self, vk::ImageMemoryBarrier2<'a>), VulkanDecoderError> { + let mut profile_list_info = vk::VideoProfileListInfoKHR::default() + .profiles(std::slice::from_ref(&profile_info.profile_info)); + + let mut image_create_info = vk::ImageCreateInfo::default() + .flags(format.image_create_flags) + .image_type(format.image_type) + .format(format.format) + .extent(vk::Extent3D { + width: dimensions.width, + height: dimensions.height, + depth: 1, + }) + .mip_levels(1) + .array_layers(array_layer_count) + .samples(vk::SampleCountFlags::TYPE_1) + .tiling(format.image_tiling) + .usage(image_usage) + .initial_layout(vk::ImageLayout::UNDEFINED) + .push_next(&mut profile_list_info); + + match queue_indices { + Some(indices) => { + image_create_info = image_create_info + .sharing_mode(vk::SharingMode::CONCURRENT) + .queue_family_indices(indices); + } + None => { + image_create_info = image_create_info.sharing_mode(vk::SharingMode::EXCLUSIVE); + } + } + + let image = Arc::new(Image::new( + vulkan_ctx.allocator.clone(), + &image_create_info, + )?); + + let subresource_range = vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: vk::REMAINING_ARRAY_LAYERS, + }; + + let image_view_create_info = vk::ImageViewCreateInfo::default() + .flags(vk::ImageViewCreateFlags::empty()) + .image(**image) + .view_type(if array_layer_count == 1 { + vk::ImageViewType::TYPE_2D + } else { + vk::ImageViewType::TYPE_2D_ARRAY + }) + .format(format.format) + .components(vk::ComponentMapping::default()) + .subresource_range(subresource_range); + + let image_view = ImageView::new( + vulkan_ctx.device.clone(), + image.clone(), + &image_view_create_info, + )?; + + let video_resource_info = (0..array_layer_count) + .map(|i| { + vk::VideoPictureResourceInfoKHR::default() + .coded_offset(vk::Offset2D { x: 0, y: 0 }) + .coded_extent(dimensions) + .base_array_layer(i) + .image_view_binding(image_view.view) + }) + .collect(); + + let image_memory_barrier = vk::ImageMemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::NONE) + .src_access_mask(vk::AccessFlags2::NONE) + .dst_stage_mask(vk::PipelineStageFlags2::NONE) + .dst_access_mask(vk::AccessFlags2::NONE) + .old_layout(vk::ImageLayout::UNDEFINED) + .new_layout(layout) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(**image) + .subresource_range(subresource_range); + + Ok(( + Self { + image, + _image_view: image_view, + video_resource_info, + }, + image_memory_barrier, + )) + } + + fn extent(&self) -> vk::Extent3D { + self.image.extent + } +} + +impl<'a> DecodingImages<'a> { + pub(crate) fn target_picture_resource_info( + &'a self, + new_reference_slot_index: usize, + ) -> Option> { + match &self.dst_image { + Some(image) => Some(image.video_resource_info[0]), + None => self.video_resource_info(new_reference_slot_index).copied(), + } + } + + pub(crate) fn target_info( + &self, + new_reference_slot_index: usize, + ) -> (vk::Image, vk::ImageLayout, usize) { + match &self.dst_image { + Some(image) => (**image.image, vk::ImageLayout::VIDEO_DECODE_DST_KHR, 0), + None => ( + **self.dpb_image.image, + vk::ImageLayout::VIDEO_DECODE_DPB_KHR, + new_reference_slot_index, + ), + } + } + + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + profile: H264ProfileInfo, + dpb_format: &vk::VideoFormatPropertiesKHR<'a>, + dst_format: &Option>, + dimensions: vk::Extent2D, + max_dpb_slots: u32, + ) -> Result<(Self, Vec>), VulkanDecoderError> { + let dpb_image_usage = if dst_format.is_some() { + dpb_format.image_usage_flags & vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR + } else { + dpb_format.image_usage_flags + & (vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR + | vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR + | vk::ImageUsageFlags::TRANSFER_SRC) + }; + + let queue_indices = [ + vulkan_ctx.queues.transfer.idx as u32, + vulkan_ctx.queues.h264_decode.idx as u32, + ]; + + let (dpb_image, dpb_memory_barrier) = DecodingImageBundle::new( + vulkan_ctx, + dpb_format, + dimensions, + dpb_image_usage, + &profile, + max_dpb_slots, + if dst_format.is_some() { + None + } else { + Some(&queue_indices) + }, + vk::ImageLayout::VIDEO_DECODE_DPB_KHR, + )?; + + let output = dst_format + .map(|dst_format| { + let dst_image_usage = dst_format.image_usage_flags + & (vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR + | vk::ImageUsageFlags::TRANSFER_SRC); + DecodingImageBundle::new( + vulkan_ctx, + &dst_format, + dimensions, + dst_image_usage, + &profile, + 1, + Some(&queue_indices), + vk::ImageLayout::VIDEO_DECODE_DST_KHR, + ) + }) + .transpose()?; + + let (dst_image, dst_memory_barrier) = match output { + Some((output_images, output_memory_barrier)) => { + (Some(output_images), Some(output_memory_barrier)) + } + None => (None, None), + }; + + let barriers = [dpb_memory_barrier] + .into_iter() + .chain(dst_memory_barrier) + .collect::>(); + + Ok(( + Self { + dpb_image, + dpb_slot_active: vec![false; max_dpb_slots as usize], + dst_image, + }, + barriers, + )) + } + + #[allow(dead_code)] + pub(crate) fn dbp_extent(&self) -> vk::Extent3D { + self.dpb_image.extent() + } + + #[allow(dead_code)] + pub(crate) fn dst_extent(&self) -> Option { + self.dst_image.as_ref().map(|i| i.extent()) + } + + pub(crate) fn reference_slot_info(&self) -> Vec { + self.dpb_image + .video_resource_info + .iter() + .enumerate() + .map(|(i, info)| { + vk::VideoReferenceSlotInfoKHR::default() + .picture_resource(info) + .slot_index(if self.dpb_slot_active[i] { + i as i32 + } else { + -1 + }) + }) + .collect() + } + + pub(crate) fn allocate_reference_picture(&mut self) -> Result { + let i = self + .dpb_slot_active + .iter() + .enumerate() + .find(|(_, &v)| !v) + .map(|(i, _)| i) + .ok_or(VulkanDecoderError::NoFreeSlotsInDpb)?; + + self.dpb_slot_active[i] = true; + + Ok(i) + } + + pub(crate) fn video_resource_info(&self, i: usize) -> Option<&vk::VideoPictureResourceInfoKHR> { + self.dpb_image.video_resource_info.get(i) + } + + pub(crate) fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> { + self.dpb_slot_active[i] = false; + + Ok(()) + } + + pub(crate) fn reset_all_allocations(&mut self) { + self.dpb_slot_active + .iter_mut() + .for_each(|slot| *slot = false); + } +} diff --git a/vk-video/src/vulkan_decoder/session_resources/parameters.rs b/vk-video/src/vulkan_decoder/session_resources/parameters.rs new file mode 100644 index 000000000..e5f366f67 --- /dev/null +++ b/vk-video/src/vulkan_decoder/session_resources/parameters.rs @@ -0,0 +1,116 @@ +use std::{collections::HashMap, sync::Arc}; + +use ash::vk; +use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet}; + +use crate::{ + vulkan_decoder::{ + Device, VideoSessionParameters, VkPictureParameterSet, VkSequenceParameterSet, + }, + VulkanCtx, VulkanDecoderError, +}; + +/// Since `VideoSessionParameters` can only add sps and pps values (inserting sps or pps with an +/// existing id is prohibited), this is an abstraction which provides the capability to replace an +/// existing sps or pps. +pub(crate) struct VideoSessionParametersManager { + pub(crate) parameters: VideoSessionParameters, + sps: HashMap, + pps: HashMap<(u8, u8), VkPictureParameterSet>, + device: Arc, + session: vk::VideoSessionKHR, +} + +impl VideoSessionParametersManager { + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + session: vk::VideoSessionKHR, + ) -> Result { + Ok(Self { + parameters: VideoSessionParameters::new( + vulkan_ctx.device.clone(), + session, + &[], + &[], + None, + )?, + sps: HashMap::new(), + pps: HashMap::new(), + device: vulkan_ctx.device.clone(), + session, + }) + } + + pub(crate) fn parameters(&self) -> vk::VideoSessionParametersKHR { + self.parameters.parameters + } + + pub(crate) fn change_session( + &mut self, + session: vk::VideoSessionKHR, + ) -> Result<(), VulkanDecoderError> { + if self.session == session { + return Ok(()); + } + self.session = session; + + let sps = self.sps.values().map(|sps| sps.sps).collect::>(); + let pps = self.pps.values().map(|pps| pps.pps).collect::>(); + + self.parameters = + VideoSessionParameters::new(self.device.clone(), session, &sps, &pps, None)?; + + Ok(()) + } + + // it is probably not optimal to insert sps and pps searately. this could be optimized, so that + // the insertion happens lazily when the parameters are bound to a session. + pub(crate) fn put_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> { + let key = sps.seq_parameter_set_id.id(); + match self.sps.entry(key) { + std::collections::hash_map::Entry::Occupied(mut e) => { + e.insert(sps.try_into()?); + + self.parameters = VideoSessionParameters::new( + self.device.clone(), + self.session, + &[self.sps[&key].sps], + &[], + Some(&self.parameters), + )? + } + std::collections::hash_map::Entry::Vacant(e) => { + e.insert(sps.try_into()?); + + self.parameters.add(&[self.sps[&key].sps], &[])?; + } + } + + Ok(()) + } + + pub(crate) fn put_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> { + let key = (pps.seq_parameter_set_id.id(), pps.pic_parameter_set_id.id()); + match self.pps.entry(key) { + std::collections::hash_map::Entry::Occupied(mut e) => { + e.insert(pps.try_into()?); + + self.parameters = VideoSessionParameters::new( + self.device.clone(), + self.session, + &[], + &[self.pps[&key].pps], + Some(&self.parameters), + )?; + } + + std::collections::hash_map::Entry::Vacant(e) => { + e.insert(pps.try_into()?); + + self.parameters.add(&[], &[self.pps[&key].pps])?; + } + } + + Ok(()) + } +} diff --git a/vk-video/src/vulkan_decoder/vulkan_ctx.rs b/vk-video/src/vulkan_decoder/vulkan_ctx.rs index a6481f721..03683237f 100644 --- a/vk-video/src/vulkan_decoder/vulkan_ctx.rs +++ b/vk-video/src/vulkan_decoder/vulkan_ctx.rs @@ -6,7 +6,10 @@ use std::{ use ash::{vk, Entry}; use tracing::{error, info}; -use super::{Allocator, CommandPool, DebugMessenger, Device, H264ProfileInfo, Instance}; +use super::{ + Allocator, CommandBuffer, CommandPool, DebugMessenger, Device, H264ProfileInfo, Instance, + VulkanDecoderError, +}; const REQUIRED_EXTENSIONS: &[&CStr] = &[ vk::KHR_VIDEO_QUEUE_NAME, @@ -73,6 +76,7 @@ pub(crate) struct Queue { _video_properties: vk::QueueFamilyVideoPropertiesKHR<'static>, pub(crate) query_result_status_properties: vk::QueueFamilyQueryResultStatusPropertiesKHR<'static>, + device: Arc, } impl Queue { @@ -81,6 +85,48 @@ impl Queue { .query_result_status_support == vk::TRUE } + + pub(crate) fn submit( + &self, + buffer: &CommandBuffer, + wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + fence: Option, + ) -> Result<(), VulkanDecoderError> { + fn to_sem_submit_info( + submits: &[(vk::Semaphore, vk::PipelineStageFlags2)], + ) -> Vec { + submits + .iter() + .map(|&(sem, stage)| { + vk::SemaphoreSubmitInfo::default() + .semaphore(sem) + .stage_mask(stage) + }) + .collect::>() + } + + let wait_semaphores = to_sem_submit_info(wait_semaphores); + let signal_semaphores = to_sem_submit_info(signal_semaphores); + + let buffer_submit_info = + [vk::CommandBufferSubmitInfo::default().command_buffer(buffer.buffer)]; + + let submit_info = [vk::SubmitInfo2::default() + .wait_semaphore_infos(&wait_semaphores) + .signal_semaphore_infos(&signal_semaphores) + .command_buffer_infos(&buffer_submit_info)]; + + unsafe { + self.device.queue_submit2( + *self.queue.lock().unwrap(), + &submit_info, + fence.unwrap_or(vk::Fence::null()), + )? + }; + + Ok(()) + } } pub(crate) struct Queues { @@ -222,6 +268,16 @@ impl VulkanCtx { .push_next(&mut vk_synch_2_feature); let device = unsafe { instance.create_device(physical_device, &device_create_info, None)? }; + let video_queue_ext = ash::khr::video_queue::Device::new(&instance, &device); + let video_decode_queue_ext = ash::khr::video_decode_queue::Device::new(&instance, &device); + + let device = Arc::new(Device { + device, + video_queue_ext, + video_decode_queue_ext, + _instance: instance.clone(), + }); + let h264_decode_queue = unsafe { device.get_device_queue(queue_indices.h264_decode.idx as u32, 0) }; let transfer_queue = @@ -229,6 +285,7 @@ impl VulkanCtx { let wgpu_queue = unsafe { device.get_device_queue(queue_indices.graphics_transfer_compute.idx as u32, 0) }; + let queues = Queues { transfer: Queue { queue: transfer_queue.into(), @@ -237,6 +294,7 @@ impl VulkanCtx { query_result_status_properties: queue_indices .transfer .query_result_status_properties, + device: device.clone(), }, h264_decode: Queue { queue: h264_decode_queue.into(), @@ -245,6 +303,7 @@ impl VulkanCtx { query_result_status_properties: queue_indices .h264_decode .query_result_status_properties, + device: device.clone(), }, wgpu: Queue { queue: wgpu_queue.into(), @@ -253,19 +312,10 @@ impl VulkanCtx { query_result_status_properties: queue_indices .graphics_transfer_compute .query_result_status_properties, + device: device.clone(), }, }; - let video_queue_ext = ash::khr::video_queue::Device::new(&instance, &device); - let video_decode_queue_ext = ash::khr::video_decode_queue::Device::new(&instance, &device); - - let device = Arc::new(Device { - device, - video_queue_ext, - video_decode_queue_ext, - _instance: instance.clone(), - }); - let wgpu_device = unsafe { wgpu_adapter.adapter.device_from_raw( device.device.clone(), diff --git a/vk-video/src/vulkan_decoder/wrappers.rs b/vk-video/src/vulkan_decoder/wrappers.rs index 2d02da14c..5067ae512 100644 --- a/vk-video/src/vulkan_decoder/wrappers.rs +++ b/vk-video/src/vulkan_decoder/wrappers.rs @@ -5,6 +5,7 @@ use ash::Entry; mod command; mod debug; mod mem; +mod parameter_sets; mod sync; mod video; mod vk_extensions; @@ -12,6 +13,7 @@ mod vk_extensions; pub(crate) use command::*; pub(crate) use debug::*; pub(crate) use mem::*; +pub(crate) use parameter_sets::*; pub(crate) use sync::*; pub(crate) use video::*; pub(crate) use vk_extensions::*; diff --git a/vk-video/src/vulkan_decoder/wrappers/command.rs b/vk-video/src/vulkan_decoder/wrappers/command.rs index d76eb5249..8add7a1fa 100644 --- a/vk-video/src/vulkan_decoder/wrappers/command.rs +++ b/vk-video/src/vulkan_decoder/wrappers/command.rs @@ -62,45 +62,6 @@ impl CommandBuffer { Ok(Self { pool, buffer }) } - pub(crate) fn submit( - &self, - queue: vk::Queue, - wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], - signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], - fence: Option, - ) -> Result<(), VulkanDecoderError> { - fn to_sem_submit_info( - submits: &[(vk::Semaphore, vk::PipelineStageFlags2)], - ) -> Vec { - submits - .iter() - .map(|&(sem, stage)| { - vk::SemaphoreSubmitInfo::default() - .semaphore(sem) - .stage_mask(stage) - }) - .collect::>() - } - - let wait_semaphores = to_sem_submit_info(wait_semaphores); - let signal_semaphores = to_sem_submit_info(signal_semaphores); - - let buffer_submit_info = - [vk::CommandBufferSubmitInfo::default().command_buffer(self.buffer)]; - - let submit_info = [vk::SubmitInfo2::default() - .wait_semaphore_infos(&wait_semaphores) - .signal_semaphore_infos(&signal_semaphores) - .command_buffer_infos(&buffer_submit_info)]; - - unsafe { - self.device() - .queue_submit2(queue, &submit_info, fence.unwrap_or(vk::Fence::null()))? - }; - - Ok(()) - } - pub(crate) fn begin(&self) -> Result<(), VulkanDecoderError> { unsafe { self.device().begin_command_buffer( diff --git a/vk-video/src/vulkan_decoder/wrappers/mem.rs b/vk-video/src/vulkan_decoder/wrappers/mem.rs index 37f8e83bf..5876702ec 100644 --- a/vk-video/src/vulkan_decoder/wrappers/mem.rs +++ b/vk-video/src/vulkan_decoder/wrappers/mem.rs @@ -157,6 +157,44 @@ impl Buffer { allocator, }) } + + /// ## Safety + /// the buffer has to be mappable and readable + pub(crate) unsafe fn download_data_from_buffer( + &mut self, + size: usize, + ) -> Result, VulkanDecoderError> { + let mut output = Vec::new(); + unsafe { + let memory = self.allocator.map_memory(&mut self.allocation)?; + let memory_slice = std::slice::from_raw_parts_mut(memory, size); + output.extend_from_slice(memory_slice); + self.allocator.unmap_memory(&mut self.allocation); + } + + Ok(output) + } + + pub(crate) fn new_with_decode_data( + allocator: Arc, + data: &[u8], + buffer_size: u64, + ) -> Result { + let mut decode_buffer = Buffer::new_decode( + allocator.clone(), + buffer_size, + &H264ProfileInfo::decode_h264_yuv420(), + )?; + + unsafe { + let mem = allocator.map_memory(&mut decode_buffer.allocation)?; + let slice = std::slice::from_raw_parts_mut(mem.cast(), data.len()); + slice.copy_from_slice(data); + allocator.unmap_memory(&mut decode_buffer.allocation); + } + + Ok(decode_buffer) + } } impl Drop for Buffer { @@ -180,6 +218,7 @@ pub(crate) struct Image { pub(crate) image: vk::Image, allocation: vk_mem::Allocation, allocator: Arc, + pub(crate) extent: vk::Extent3D, } impl Image { @@ -187,6 +226,7 @@ impl Image { allocator: Arc, image_create_info: &vk::ImageCreateInfo, ) -> Result { + let extent = image_create_info.extent; let alloc_info = vk_mem::AllocationCreateInfo { usage: vk_mem::MemoryUsage::Auto, ..Default::default() @@ -199,6 +239,7 @@ impl Image { image, allocation, allocator, + extent, }) } } diff --git a/vk-video/src/vulkan_decoder/parameter_sets.rs b/vk-video/src/vulkan_decoder/wrappers/parameter_sets.rs similarity index 91% rename from vk-video/src/vulkan_decoder/parameter_sets.rs rename to vk-video/src/vulkan_decoder/wrappers/parameter_sets.rs index ebf4267c5..2229662e3 100644 --- a/vk-video/src/vulkan_decoder/parameter_sets.rs +++ b/vk-video/src/vulkan_decoder/wrappers/parameter_sets.rs @@ -1,7 +1,34 @@ use ash::vk; use h264_reader::nal::sps::SeqParameterSet; -use super::VulkanDecoderError; +use crate::VulkanDecoderError; + +const MACROBLOCK_SIZE: u32 = 16; + +pub(crate) trait SeqParameterSetExt { + fn width(&self) -> Result; + fn height(&self) -> Result; +} + +impl SeqParameterSetExt for SeqParameterSet { + fn width(&self) -> Result { + match self.frame_cropping { + None => Ok((self.pic_width_in_mbs_minus1 + 1) * MACROBLOCK_SIZE), + Some(_) => Err(VulkanDecoderError::FrameCroppingNotSupported), + } + } + + fn height(&self) -> Result { + match self.frame_mbs_flags { + h264_reader::nal::sps::FrameMbsFlags::Frames => { + Ok((self.pic_height_in_map_units_minus1 + 1) * MACROBLOCK_SIZE) + } + h264_reader::nal::sps::FrameMbsFlags::Fields { .. } => { + Err(VulkanDecoderError::FieldsNotSupported) + } + } + } +} pub(crate) struct VkSequenceParameterSet { pub(crate) sps: vk::native::StdVideoH264SequenceParameterSet, @@ -195,8 +222,8 @@ fn h264_level_idc_to_vk(level_idc: u8) -> u32 { } } -pub(super) struct VkPictureParameterSet { - pub(super) pps: vk::native::StdVideoH264PictureParameterSet, +pub(crate) struct VkPictureParameterSet { + pub(crate) pps: vk::native::StdVideoH264PictureParameterSet, } impl TryFrom<&'_ h264_reader::nal::pps::PicParameterSet> for VkPictureParameterSet { diff --git a/vk-video/src/vulkan_decoder/wrappers/video.rs b/vk-video/src/vulkan_decoder/wrappers/video.rs index 7dc745c1b..e548f6ea8 100644 --- a/vk-video/src/vulkan_decoder/wrappers/video.rs +++ b/vk-video/src/vulkan_decoder/wrappers/video.rs @@ -1,123 +1,11 @@ -use std::{collections::HashMap, sync::Arc}; +use std::sync::Arc; use ash::vk; -use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet}; -use crate::{ - vulkan_decoder::{ - parameter_sets::{VkPictureParameterSet, VkSequenceParameterSet}, - VulkanDecoderError, - }, - VulkanCtx, -}; +use crate::{vulkan_decoder::VulkanDecoderError, VulkanCtx}; use super::{Device, MemoryAllocation, VideoQueueExt}; -/// Since `VideoSessionParameters` can only add sps and pps values (inserting sps or pps with an -/// existing id is prohibited), this is an abstraction which provides the capability to replace an -/// existing sps or pps. -pub(crate) struct VideoSessionParametersManager { - pub(crate) parameters: VideoSessionParameters, - sps: HashMap, - pps: HashMap<(u8, u8), VkPictureParameterSet>, - device: Arc, - session: vk::VideoSessionKHR, -} - -impl VideoSessionParametersManager { - pub(crate) fn new( - vulkan_ctx: &VulkanCtx, - session: vk::VideoSessionKHR, - ) -> Result { - Ok(Self { - parameters: VideoSessionParameters::new( - vulkan_ctx.device.clone(), - session, - &[], - &[], - None, - )?, - sps: HashMap::new(), - pps: HashMap::new(), - device: vulkan_ctx.device.clone(), - session, - }) - } - - pub(crate) fn parameters(&self) -> vk::VideoSessionParametersKHR { - self.parameters.parameters - } - - pub(crate) fn change_session( - &mut self, - session: vk::VideoSessionKHR, - ) -> Result<(), VulkanDecoderError> { - if self.session == session { - return Ok(()); - } - self.session = session; - - let sps = self.sps.values().map(|sps| sps.sps).collect::>(); - let pps = self.pps.values().map(|pps| pps.pps).collect::>(); - - self.parameters = - VideoSessionParameters::new(self.device.clone(), session, &sps, &pps, None)?; - - Ok(()) - } - - // it is probably not optimal to insert sps and pps searately. this could be optimized, so that - // the insertion happens lazily when the parameters are bound to a session. - pub(crate) fn put_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> { - let key = sps.seq_parameter_set_id.id(); - match self.sps.entry(key) { - std::collections::hash_map::Entry::Occupied(mut e) => { - e.insert(sps.try_into()?); - - self.parameters = VideoSessionParameters::new( - self.device.clone(), - self.session, - &[self.sps[&key].sps], - &[], - Some(&self.parameters), - )? - } - std::collections::hash_map::Entry::Vacant(e) => { - e.insert(sps.try_into()?); - - self.parameters.add(&[self.sps[&key].sps], &[])?; - } - } - - Ok(()) - } - - pub(crate) fn put_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> { - let key = (pps.seq_parameter_set_id.id(), pps.pic_parameter_set_id.id()); - match self.pps.entry(key) { - std::collections::hash_map::Entry::Occupied(mut e) => { - e.insert(pps.try_into()?); - - self.parameters = VideoSessionParameters::new( - self.device.clone(), - self.session, - &[], - &[self.pps[&key].pps], - Some(&self.parameters), - )?; - } - - std::collections::hash_map::Entry::Vacant(e) => { - e.insert(pps.try_into()?); - - self.parameters.add(&[], &[self.pps[&key].pps])?; - } - } - - Ok(()) - } -} - pub(crate) struct VideoSessionParameters { pub(crate) parameters: vk::VideoSessionParametersKHR, update_sequence_count: u32, @@ -296,3 +184,23 @@ impl Drop for VideoSession { }; } } + +impl From for vk::native::StdVideoDecodeH264ReferenceInfo { + fn from(picture_info: crate::parser::PictureInfo) -> Self { + vk::native::StdVideoDecodeH264ReferenceInfo { + flags: vk::native::StdVideoDecodeH264ReferenceInfoFlags { + __bindgen_padding_0: [0; 3], + _bitfield_align_1: [], + _bitfield_1: vk::native::StdVideoDecodeH264ReferenceInfoFlags::new_bitfield_1( + 0, + 0, + picture_info.used_for_long_term_reference.into(), + picture_info.non_existing.into(), + ), + }, + FrameNum: picture_info.FrameNum, + PicOrderCnt: picture_info.PicOrderCnt, + reserved: 0, + } + } +} From cb8e4966dfc2aff255d1dc524b75ad06553ff893 Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Tue, 1 Oct 2024 15:40:36 +0200 Subject: [PATCH 05/13] Refactor the reference manager to a new module. --- vk-video/src/parser.rs | 462 +--------------------- vk-video/src/parser/reference_manager.rs | 468 +++++++++++++++++++++++ 2 files changed, 472 insertions(+), 458 deletions(-) create mode 100644 vk-video/src/parser/reference_manager.rs diff --git a/vk-video/src/parser.rs b/vk-video/src/parser.rs index 0b11a640e..622e86187 100644 --- a/vk-video/src/parser.rs +++ b/vk-video/src/parser.rs @@ -5,47 +5,16 @@ use std::{ use h264_reader::{ annexb::AnnexBReader, - nal::{ - pps::PicParameterSet, - slice::{DecRefPicMarking, NumRefIdxActive, RefPicListModifications, SliceHeader}, - sps::SeqParameterSet, - Nal, RefNal, - }, + nal::{pps::PicParameterSet, slice::SliceHeader, sps::SeqParameterSet, Nal, RefNal}, push::{AccumulatedNalHandler, NalAccumulator, NalInterest}, }; +use reference_manager::ReferenceContext; use tracing::trace; mod au_splitter; +mod reference_manager; -#[derive(Debug, thiserror::Error)] -pub enum ReferenceManagementError { - #[error("B frames are not supported")] - BFramesNotSupported, - - #[error("Long-term references are not supported")] - LongTermRefsNotSupported, - - #[error("SI frames are not supported")] - SIFramesNotSupported, - - #[error("SP frames are not supported")] - SPFramesNotSupported, - - #[error("Adaptive memory control decoded reference picture marking process is not supported")] - AdaptiveMemCtlNotSupported, - - #[error("Reference picture list modifications are not supported")] - RefPicListModificationsNotSupported, - - #[error("PicOrderCntType {0} is not supperted")] - PicOrderCntTypeNotSupported(u8), - - #[error("pic_order_cnt_lsb is not present in a slice header, but is required for decoding")] - PicOrderCntLsbNotPresent, -} - -#[derive(Debug, Default, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct ReferenceId(usize); +pub use reference_manager::{ReferenceId, ReferenceManagementError}; #[derive(Clone, derivative::Derivative)] #[derivative(Debug)] @@ -102,429 +71,6 @@ pub enum DecoderInstruction { Pps(PicParameterSet), } -#[derive(Debug, Default)] -struct ReferenceContext { - pictures: ReferencePictures, - next_reference_id: ReferenceId, - _previous_frame_num: usize, - prev_pic_order_cnt_msb: i32, - prev_pic_order_cnt_lsb: i32, -} - -impl ReferenceContext { - fn get_next_reference_id(&mut self) -> ReferenceId { - let result = self.next_reference_id; - self.next_reference_id = ReferenceId(result.0 + 1); - result - } - - fn reset_state(&mut self) { - *self = Self { - pictures: ReferencePictures::default(), - next_reference_id: ReferenceId::default(), - _previous_frame_num: 0, - prev_pic_order_cnt_msb: 0, - prev_pic_order_cnt_lsb: 0, - }; - } - - fn add_short_term_reference( - &mut self, - header: Arc, - pic_order_cnt: [i32; 2], - ) -> ReferenceId { - let id = self.get_next_reference_id(); - self.pictures.short_term.push(ShortTermReferencePicture { - header, - id, - pic_order_cnt, - }); - id - } - - fn put_picture( - &mut self, - mut slices: Vec, - sps: &SeqParameterSet, - pps: &PicParameterSet, - ) -> Result, ParserError> { - let header = slices.last().unwrap().header.clone(); - let mut rbsp_bytes = Vec::new(); - let mut slice_indices = Vec::new(); - for slice in &mut slices { - if slice.rbsp_bytes.is_empty() { - continue; - } - slice_indices.push(rbsp_bytes.len()); - rbsp_bytes.append(&mut slice.rbsp_bytes); - } - - match header.dec_ref_pic_marking { - Some(DecRefPicMarking::Idr { - long_term_reference_flag, - .. - }) => { - if long_term_reference_flag { - Err(ReferenceManagementError::LongTermRefsNotSupported)?; - } - - let decode_info = self.decode_information_for_frame( - header.clone(), - slice_indices, - rbsp_bytes, - sps, - pps, - )?; - - self.reset_state(); - - let reference_id = - self.add_short_term_reference(header, decode_info.picture_info.PicOrderCnt); - - Ok(vec![DecoderInstruction::Idr { - decode_info, - reference_id, - }]) - } - - Some(DecRefPicMarking::SlidingWindow) => { - let num_short_term = self.pictures.short_term.len(); - let num_long_term = self.pictures.long_term.len(); - - let decode_info = self.decode_information_for_frame( - header.clone(), - slice_indices, - rbsp_bytes, - sps, - pps, - )?; - let reference_id = self - .add_short_term_reference(header.clone(), decode_info.picture_info.PicOrderCnt); - - let mut decoder_instructions = vec![DecoderInstruction::DecodeAndStoreAs { - decode_info, - reference_id, - }]; - - if num_short_term + num_long_term == sps.max_num_ref_frames.max(1) as usize - && !self.pictures.short_term.is_empty() - { - let (idx, _) = self - .pictures - .short_term - .iter() - .enumerate() - .min_by_key(|(_, reference)| { - reference - .decode_picture_numbers(header.frame_num as i64, sps) - .unwrap() - .FrameNumWrap - }) - .unwrap(); - - decoder_instructions.push(DecoderInstruction::Drop { - reference_ids: vec![self.pictures.short_term.remove(idx).id], - }) - } - - Ok(decoder_instructions) - } - - Some(DecRefPicMarking::Adaptive(_)) => { - Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)? - } - - // this picture is not a reference - None => Ok(vec![DecoderInstruction::Decode { - decode_info: self.decode_information_for_frame( - header, - slice_indices, - rbsp_bytes, - sps, - pps, - )?, - }]), - } - } - - fn decode_information_for_frame( - &mut self, - header: Arc, - slice_indices: Vec, - rbsp_bytes: Vec, - sps: &SeqParameterSet, - pps: &PicParameterSet, - ) -> Result { - let reference_list = match header.slice_type.family { - h264_reader::nal::slice::SliceFamily::P => { - let reference_list = - self.initialize_reference_picture_list_for_frame(&header, sps, pps)?; - - match &header.ref_pic_list_modification { - Some(RefPicListModifications::P { - ref_pic_list_modification_l0, - }) => { - if !ref_pic_list_modification_l0.is_empty() { - Err(ReferenceManagementError::RefPicListModificationsNotSupported)?; - } - } - - None - | Some(RefPicListModifications::I) - | Some(RefPicListModifications::B { .. }) => unreachable!(), - } - - Some(reference_list) - } - h264_reader::nal::slice::SliceFamily::I => None, - h264_reader::nal::slice::SliceFamily::B => { - return Err(ReferenceManagementError::BFramesNotSupported)? - } - h264_reader::nal::slice::SliceFamily::SP => { - return Err(ReferenceManagementError::SPFramesNotSupported)? - } - h264_reader::nal::slice::SliceFamily::SI => { - return Err(ReferenceManagementError::SIFramesNotSupported)? - } - }; - - let pic_order_cnt = match sps.pic_order_cnt { - h264_reader::nal::sps::PicOrderCntType::TypeZero { - log2_max_pic_order_cnt_lsb_minus4, - } => { - // this section is very hard to read, but all of this code is just copied from the - // h.264 spec, where it looks almost exactly like this - - let max_pic_order_cnt_lsb = 2_i32.pow(log2_max_pic_order_cnt_lsb_minus4 as u32 + 4); - - let (prev_pic_order_cnt_msb, prev_pic_order_cnt_lsb) = - if header.idr_pic_id.is_some() { - (0, 0) - } else { - (self.prev_pic_order_cnt_msb, self.prev_pic_order_cnt_lsb) - }; - - let (pic_order_cnt_lsb, delta_pic_order_cnt_bottom) = match header - .pic_order_cnt_lsb - .as_ref() - .ok_or(ReferenceManagementError::PicOrderCntLsbNotPresent)? - { - h264_reader::nal::slice::PicOrderCountLsb::Frame(pic_order_cnt_lsb) => { - (*pic_order_cnt_lsb, 0) - } - h264_reader::nal::slice::PicOrderCountLsb::FieldsAbsolute { - pic_order_cnt_lsb, - delta_pic_order_cnt_bottom, - } => (*pic_order_cnt_lsb, *delta_pic_order_cnt_bottom), - h264_reader::nal::slice::PicOrderCountLsb::FieldsDelta(_) => { - Err(ReferenceManagementError::PicOrderCntLsbNotPresent)? - } - }; - - let pic_order_cnt_lsb = pic_order_cnt_lsb as i32; - - let pic_order_cnt_msb = if pic_order_cnt_lsb < prev_pic_order_cnt_lsb - && prev_pic_order_cnt_lsb - pic_order_cnt_lsb >= max_pic_order_cnt_lsb / 2 - { - prev_pic_order_cnt_msb + max_pic_order_cnt_lsb - } else if pic_order_cnt_lsb > prev_pic_order_cnt_lsb - && pic_order_cnt_lsb - prev_pic_order_cnt_lsb > max_pic_order_cnt_lsb / 2 - { - prev_pic_order_cnt_msb - max_pic_order_cnt_lsb - } else { - prev_pic_order_cnt_msb - }; - - let pic_order_cnt = if header.field_pic == h264_reader::nal::slice::FieldPic::Frame - { - let top_field_order_cnt = pic_order_cnt_msb + pic_order_cnt_lsb; - - let bottom_field_order_cnt = top_field_order_cnt + delta_pic_order_cnt_bottom; - - top_field_order_cnt.min(bottom_field_order_cnt) - } else { - pic_order_cnt_msb + pic_order_cnt_lsb - }; - - self.prev_pic_order_cnt_msb = pic_order_cnt_msb; - self.prev_pic_order_cnt_lsb = pic_order_cnt_lsb; - - pic_order_cnt - } - - h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => { - Err(ReferenceManagementError::PicOrderCntTypeNotSupported(1))? - } - - h264_reader::nal::sps::PicOrderCntType::TypeTwo => match header.dec_ref_pic_marking { - None => 2 * header.frame_num as i32 - 1, - Some(DecRefPicMarking::Idr { .. }) | Some(DecRefPicMarking::SlidingWindow) => { - 2 * header.frame_num as i32 - } - Some(DecRefPicMarking::Adaptive(..)) => { - Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)? - } - }, - }; - - let pic_order_cnt = [pic_order_cnt; 2]; - - Ok(DecodeInformation { - reference_list, - header: header.clone(), - slice_indices, - rbsp_bytes, - sps_id: sps.id().id(), - pps_id: pps.pic_parameter_set_id.id(), - picture_info: PictureInfo { - non_existing: false, - used_for_long_term_reference: false, - PicOrderCnt: pic_order_cnt, - FrameNum: header.frame_num, - }, - }) - } - - fn initialize_short_term_reference_picture_list_for_frame( - &self, - header: &SliceHeader, - sps: &SeqParameterSet, - ) -> Result, ParserError> { - let mut short_term_reference_list = self - .pictures - .short_term - .iter() - .map(|reference| { - Ok(( - reference, - reference.decode_picture_numbers(header.frame_num.into(), sps)?, - )) - }) - .collect::, ParserError>>()?; - - short_term_reference_list.sort_by_key(|(_, numbers)| -numbers.PicNum); - - let short_term_reference_list = short_term_reference_list - .into_iter() - .map(|(reference, numbers)| ReferencePictureInfo { - id: reference.id, - picture_info: PictureInfo { - FrameNum: numbers.FrameNum as u16, - used_for_long_term_reference: false, - non_existing: false, - PicOrderCnt: reference.pic_order_cnt, - }, - }) - .collect::>(); - - Ok(short_term_reference_list) - } - - fn initialize_long_term_reference_picture_list_for_frame( - &self, - ) -> Result, ReferenceManagementError> { - if !self.pictures.long_term.is_empty() { - panic!("long-term references are not supported!"); - } - - Ok(Vec::new()) - } - - fn initialize_reference_picture_list_for_frame( - &self, - header: &SliceHeader, - sps: &SeqParameterSet, - pps: &PicParameterSet, - ) -> Result, ParserError> { - let num_ref_idx_l0_active = header - .num_ref_idx_active - .as_ref() - .map(|num| match num { - NumRefIdxActive::P { - num_ref_idx_l0_active_minus1, - } => Ok(*num_ref_idx_l0_active_minus1), - NumRefIdxActive::B { .. } => Err(ReferenceManagementError::BFramesNotSupported), - }) - .unwrap_or(Ok(pps.num_ref_idx_l0_default_active_minus1))? - + 1; - - let short_term_reference_list = - self.initialize_short_term_reference_picture_list_for_frame(header, sps)?; - - let long_term_reference_list = - self.initialize_long_term_reference_picture_list_for_frame()?; - - let mut reference_list = short_term_reference_list - .into_iter() - .chain(long_term_reference_list) - .collect::>(); - - reference_list.truncate(num_ref_idx_l0_active as usize); - - Ok(reference_list) - } -} - -#[derive(Debug)] -struct ShortTermReferencePicture { - header: Arc, - id: ReferenceId, - pic_order_cnt: [i32; 2], -} - -impl ShortTermReferencePicture { - #[allow(non_snake_case)] - fn decode_picture_numbers( - &self, - current_frame_num: i64, - sps: &SeqParameterSet, - ) -> Result { - if self.header.field_pic != h264_reader::nal::slice::FieldPic::Frame { - return Err(ParserError::FieldsNotSupported); - } - - let MaxFrameNum = sps.max_frame_num(); - - let FrameNum = self.header.frame_num as i64; - - let FrameNumWrap = if FrameNum > current_frame_num { - FrameNum - MaxFrameNum - } else { - FrameNum - }; - - // this assumes we're dealing with a short-term reference frame - let PicNum = FrameNumWrap; - - Ok(ShortTermReferencePictureNumbers { - FrameNum, - FrameNumWrap, - PicNum, - }) - } -} - -#[derive(Debug)] -struct LongTermReferencePicture { - _header: Arc, - _id: ReferenceId, -} - -#[allow(non_snake_case)] -struct ShortTermReferencePictureNumbers { - FrameNum: i64, - - FrameNumWrap: i64, - - PicNum: i64, -} - -#[derive(Debug, Default)] -struct ReferencePictures { - long_term: Vec, - short_term: Vec, -} - #[derive(Debug, thiserror::Error)] pub enum ParserError { #[error(transparent)] diff --git a/vk-video/src/parser/reference_manager.rs b/vk-video/src/parser/reference_manager.rs new file mode 100644 index 000000000..424a74321 --- /dev/null +++ b/vk-video/src/parser/reference_manager.rs @@ -0,0 +1,468 @@ +use std::sync::Arc; + +use h264_reader::nal::{ + pps::PicParameterSet, + slice::{DecRefPicMarking, NumRefIdxActive, RefPicListModifications, SliceHeader}, + sps::SeqParameterSet, +}; + +use super::{ + DecodeInformation, DecoderInstruction, ParserError, PictureInfo, ReferencePictureInfo, Slice, + SpsExt, +}; + +#[derive(Debug, thiserror::Error)] +pub enum ReferenceManagementError { + #[error("B frames are not supported")] + BFramesNotSupported, + + #[error("Long-term references are not supported")] + LongTermRefsNotSupported, + + #[error("SI frames are not supported")] + SIFramesNotSupported, + + #[error("SP frames are not supported")] + SPFramesNotSupported, + + #[error("Adaptive memory control decoded reference picture marking process is not supported")] + AdaptiveMemCtlNotSupported, + + #[error("Reference picture list modifications are not supported")] + RefPicListModificationsNotSupported, + + #[error("PicOrderCntType {0} is not supperted")] + PicOrderCntTypeNotSupported(u8), + + #[error("pic_order_cnt_lsb is not present in a slice header, but is required for decoding")] + PicOrderCntLsbNotPresent, +} + +#[derive(Debug, Default, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct ReferenceId(usize); + +#[derive(Debug, Default)] +pub(crate) struct ReferenceContext { + pictures: ReferencePictures, + next_reference_id: ReferenceId, + _previous_frame_num: usize, + prev_pic_order_cnt_msb: i32, + prev_pic_order_cnt_lsb: i32, +} + +impl ReferenceContext { + fn get_next_reference_id(&mut self) -> ReferenceId { + let result = self.next_reference_id; + self.next_reference_id = ReferenceId(result.0 + 1); + result + } + + fn reset_state(&mut self) { + *self = Self { + pictures: ReferencePictures::default(), + next_reference_id: ReferenceId::default(), + _previous_frame_num: 0, + prev_pic_order_cnt_msb: 0, + prev_pic_order_cnt_lsb: 0, + }; + } + + fn add_short_term_reference( + &mut self, + header: Arc, + pic_order_cnt: [i32; 2], + ) -> ReferenceId { + let id = self.get_next_reference_id(); + self.pictures.short_term.push(ShortTermReferencePicture { + header, + id, + pic_order_cnt, + }); + id + } + + pub(crate) fn put_picture( + &mut self, + mut slices: Vec, + sps: &SeqParameterSet, + pps: &PicParameterSet, + ) -> Result, ParserError> { + let header = slices.last().unwrap().header.clone(); + + // maybe this should be done in a different place, but if you think about it, there's not + // really that many places to put this code in + let mut rbsp_bytes = Vec::new(); + let mut slice_indices = Vec::new(); + for slice in &mut slices { + if slice.rbsp_bytes.is_empty() { + continue; + } + slice_indices.push(rbsp_bytes.len()); + rbsp_bytes.append(&mut slice.rbsp_bytes); + } + + match header.dec_ref_pic_marking { + Some(DecRefPicMarking::Idr { + long_term_reference_flag, + .. + }) => { + if long_term_reference_flag { + Err(ReferenceManagementError::LongTermRefsNotSupported)?; + } + + let decode_info = self.decode_information_for_frame( + header.clone(), + slice_indices, + rbsp_bytes, + sps, + pps, + )?; + + self.reset_state(); + + let reference_id = + self.add_short_term_reference(header, decode_info.picture_info.PicOrderCnt); + + Ok(vec![DecoderInstruction::Idr { + decode_info, + reference_id, + }]) + } + + Some(DecRefPicMarking::SlidingWindow) => { + let num_short_term = self.pictures.short_term.len(); + let num_long_term = self.pictures.long_term.len(); + + let decode_info = self.decode_information_for_frame( + header.clone(), + slice_indices, + rbsp_bytes, + sps, + pps, + )?; + let reference_id = self + .add_short_term_reference(header.clone(), decode_info.picture_info.PicOrderCnt); + + let mut decoder_instructions = vec![DecoderInstruction::DecodeAndStoreAs { + decode_info, + reference_id, + }]; + + if num_short_term + num_long_term == sps.max_num_ref_frames.max(1) as usize + && !self.pictures.short_term.is_empty() + { + let (idx, _) = self + .pictures + .short_term + .iter() + .enumerate() + .min_by_key(|(_, reference)| { + reference + .decode_picture_numbers(header.frame_num as i64, sps) + .unwrap() + .FrameNumWrap + }) + .unwrap(); + + decoder_instructions.push(DecoderInstruction::Drop { + reference_ids: vec![self.pictures.short_term.remove(idx).id], + }) + } + + Ok(decoder_instructions) + } + + Some(DecRefPicMarking::Adaptive(_)) => { + Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)? + } + + // this picture is not a reference + None => Ok(vec![DecoderInstruction::Decode { + decode_info: self.decode_information_for_frame( + header, + slice_indices, + rbsp_bytes, + sps, + pps, + )?, + }]), + } + } + + fn decode_information_for_frame( + &mut self, + header: Arc, + slice_indices: Vec, + rbsp_bytes: Vec, + sps: &SeqParameterSet, + pps: &PicParameterSet, + ) -> Result { + let reference_list = match header.slice_type.family { + h264_reader::nal::slice::SliceFamily::P => { + let reference_list = + self.initialize_reference_picture_list_for_frame(&header, sps, pps)?; + + match &header.ref_pic_list_modification { + Some(RefPicListModifications::P { + ref_pic_list_modification_l0, + }) => { + if !ref_pic_list_modification_l0.is_empty() { + Err(ReferenceManagementError::RefPicListModificationsNotSupported)?; + } + } + + None + | Some(RefPicListModifications::I) + | Some(RefPicListModifications::B { .. }) => unreachable!(), + } + + Some(reference_list) + } + h264_reader::nal::slice::SliceFamily::I => None, + h264_reader::nal::slice::SliceFamily::B => { + return Err(ReferenceManagementError::BFramesNotSupported)? + } + h264_reader::nal::slice::SliceFamily::SP => { + return Err(ReferenceManagementError::SPFramesNotSupported)? + } + h264_reader::nal::slice::SliceFamily::SI => { + return Err(ReferenceManagementError::SIFramesNotSupported)? + } + }; + + let pic_order_cnt = match sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeZero { + log2_max_pic_order_cnt_lsb_minus4, + } => { + // this section is very hard to read, but all of this code is just copied from the + // h.264 spec, where it looks almost exactly like this + + let max_pic_order_cnt_lsb = 2_i32.pow(log2_max_pic_order_cnt_lsb_minus4 as u32 + 4); + + let (prev_pic_order_cnt_msb, prev_pic_order_cnt_lsb) = + if header.idr_pic_id.is_some() { + (0, 0) + } else { + (self.prev_pic_order_cnt_msb, self.prev_pic_order_cnt_lsb) + }; + + let (pic_order_cnt_lsb, delta_pic_order_cnt_bottom) = match header + .pic_order_cnt_lsb + .as_ref() + .ok_or(ReferenceManagementError::PicOrderCntLsbNotPresent)? + { + h264_reader::nal::slice::PicOrderCountLsb::Frame(pic_order_cnt_lsb) => { + (*pic_order_cnt_lsb, 0) + } + h264_reader::nal::slice::PicOrderCountLsb::FieldsAbsolute { + pic_order_cnt_lsb, + delta_pic_order_cnt_bottom, + } => (*pic_order_cnt_lsb, *delta_pic_order_cnt_bottom), + h264_reader::nal::slice::PicOrderCountLsb::FieldsDelta(_) => { + Err(ReferenceManagementError::PicOrderCntLsbNotPresent)? + } + }; + + let pic_order_cnt_lsb = pic_order_cnt_lsb as i32; + + let pic_order_cnt_msb = if pic_order_cnt_lsb < prev_pic_order_cnt_lsb + && prev_pic_order_cnt_lsb - pic_order_cnt_lsb >= max_pic_order_cnt_lsb / 2 + { + prev_pic_order_cnt_msb + max_pic_order_cnt_lsb + } else if pic_order_cnt_lsb > prev_pic_order_cnt_lsb + && pic_order_cnt_lsb - prev_pic_order_cnt_lsb > max_pic_order_cnt_lsb / 2 + { + prev_pic_order_cnt_msb - max_pic_order_cnt_lsb + } else { + prev_pic_order_cnt_msb + }; + + let pic_order_cnt = if header.field_pic == h264_reader::nal::slice::FieldPic::Frame + { + let top_field_order_cnt = pic_order_cnt_msb + pic_order_cnt_lsb; + + let bottom_field_order_cnt = top_field_order_cnt + delta_pic_order_cnt_bottom; + + top_field_order_cnt.min(bottom_field_order_cnt) + } else { + pic_order_cnt_msb + pic_order_cnt_lsb + }; + + self.prev_pic_order_cnt_msb = pic_order_cnt_msb; + self.prev_pic_order_cnt_lsb = pic_order_cnt_lsb; + + pic_order_cnt + } + + h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => { + Err(ReferenceManagementError::PicOrderCntTypeNotSupported(1))? + } + + h264_reader::nal::sps::PicOrderCntType::TypeTwo => match header.dec_ref_pic_marking { + None => 2 * header.frame_num as i32 - 1, + Some(DecRefPicMarking::Idr { .. }) | Some(DecRefPicMarking::SlidingWindow) => { + 2 * header.frame_num as i32 + } + Some(DecRefPicMarking::Adaptive(..)) => { + Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)? + } + }, + }; + + let pic_order_cnt = [pic_order_cnt; 2]; + + Ok(DecodeInformation { + reference_list, + header: header.clone(), + slice_indices, + rbsp_bytes, + sps_id: sps.id().id(), + pps_id: pps.pic_parameter_set_id.id(), + picture_info: PictureInfo { + non_existing: false, + used_for_long_term_reference: false, + PicOrderCnt: pic_order_cnt, + FrameNum: header.frame_num, + }, + }) + } + + fn initialize_short_term_reference_picture_list_for_frame( + &self, + header: &SliceHeader, + sps: &SeqParameterSet, + ) -> Result, ParserError> { + let mut short_term_reference_list = self + .pictures + .short_term + .iter() + .map(|reference| { + Ok(( + reference, + reference.decode_picture_numbers(header.frame_num.into(), sps)?, + )) + }) + .collect::, ParserError>>()?; + + short_term_reference_list.sort_by_key(|(_, numbers)| -numbers.PicNum); + + let short_term_reference_list = short_term_reference_list + .into_iter() + .map(|(reference, numbers)| ReferencePictureInfo { + id: reference.id, + picture_info: PictureInfo { + FrameNum: numbers.FrameNum as u16, + used_for_long_term_reference: false, + non_existing: false, + PicOrderCnt: reference.pic_order_cnt, + }, + }) + .collect::>(); + + Ok(short_term_reference_list) + } + + fn initialize_long_term_reference_picture_list_for_frame( + &self, + ) -> Result, ReferenceManagementError> { + if !self.pictures.long_term.is_empty() { + panic!("long-term references are not supported!"); + } + + Ok(Vec::new()) + } + + fn initialize_reference_picture_list_for_frame( + &self, + header: &SliceHeader, + sps: &SeqParameterSet, + pps: &PicParameterSet, + ) -> Result, ParserError> { + let num_ref_idx_l0_active = header + .num_ref_idx_active + .as_ref() + .map(|num| match num { + NumRefIdxActive::P { + num_ref_idx_l0_active_minus1, + } => Ok(*num_ref_idx_l0_active_minus1), + NumRefIdxActive::B { .. } => Err(ReferenceManagementError::BFramesNotSupported), + }) + .unwrap_or(Ok(pps.num_ref_idx_l0_default_active_minus1))? + + 1; + + let short_term_reference_list = + self.initialize_short_term_reference_picture_list_for_frame(header, sps)?; + + let long_term_reference_list = + self.initialize_long_term_reference_picture_list_for_frame()?; + + let mut reference_list = short_term_reference_list + .into_iter() + .chain(long_term_reference_list) + .collect::>(); + + reference_list.truncate(num_ref_idx_l0_active as usize); + + Ok(reference_list) + } +} + +#[derive(Debug)] +struct ShortTermReferencePicture { + header: Arc, + id: ReferenceId, + pic_order_cnt: [i32; 2], +} + +impl ShortTermReferencePicture { + #[allow(non_snake_case)] + fn decode_picture_numbers( + &self, + current_frame_num: i64, + sps: &SeqParameterSet, + ) -> Result { + if self.header.field_pic != h264_reader::nal::slice::FieldPic::Frame { + return Err(ParserError::FieldsNotSupported); + } + + let MaxFrameNum = sps.max_frame_num(); + + let FrameNum = self.header.frame_num as i64; + + let FrameNumWrap = if FrameNum > current_frame_num { + FrameNum - MaxFrameNum + } else { + FrameNum + }; + + // this assumes we're dealing with a short-term reference frame + let PicNum = FrameNumWrap; + + Ok(ShortTermReferencePictureNumbers { + FrameNum, + FrameNumWrap, + PicNum, + }) + } +} + +#[derive(Debug)] +struct LongTermReferencePicture { + _header: Arc, + _id: ReferenceId, +} + +#[allow(non_snake_case)] +struct ShortTermReferencePictureNumbers { + FrameNum: i64, + + FrameNumWrap: i64, + + PicNum: i64, +} + +#[derive(Debug, Default)] +struct ReferencePictures { + long_term: Vec, + short_term: Vec, +} From d3605dc20fa9fdcea55676318cffd3bfa0e52d20 Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Wed, 2 Oct 2024 11:54:11 +0200 Subject: [PATCH 06/13] Add a comment. --- vk-video/src/parser/au_splitter.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vk-video/src/parser/au_splitter.rs b/vk-video/src/parser/au_splitter.rs index 29a84d964..ad46ffabd 100644 --- a/vk-video/src/parser/au_splitter.rs +++ b/vk-video/src/parser/au_splitter.rs @@ -40,6 +40,9 @@ impl AUSplitter { } } +// The below code is taken from Membrane's AU Splitter in their h264 parser. The comments contain +// elixir versions of the functions below them. + // defguardp first_mb_in_slice_zero(a) // when a.first_mb_in_slice == 0 and // a.nal_unit_type in [1, 2, 5] From 748a75e41ce01d1b24f67a615da3bc2337256da4 Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Mon, 7 Oct 2024 13:03:30 +0200 Subject: [PATCH 07/13] Review suggestions --- Cargo.lock | 15 +- Cargo.toml | 1 + .../src/types/from_register_input.rs | 30 ++- compositor_api/src/types/register_input.rs | 3 - compositor_pipeline/Cargo.toml | 1 + integration_tests/examples/vulkan.rs | 211 +++++++++++++----- integration_tests/src/examples.rs | 2 +- vk-video/Cargo.toml | 2 + vk-video/build.rs | 13 ++ vk-video/examples/basic.rs | 16 +- vk-video/examples/wgpu.rs | 24 +- 11 files changed, 196 insertions(+), 122 deletions(-) create mode 100644 vk-video/build.rs diff --git a/Cargo.lock b/Cargo.lock index 2cfcc0e00..c9e9d1f48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -395,6 +395,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chromium_sys" version = "0.1.0" @@ -2044,7 +2050,7 @@ dependencies = [ "arrayvec", "bit-set", "bitflags 2.6.0", - "cfg_aliases", + "cfg_aliases 0.1.1", "codespan-reporting", "hexf-parse", "indexmap 2.0.1", @@ -3969,6 +3975,7 @@ name = "vk-video" version = "0.1.0" dependencies = [ "ash", + "cfg_aliases 0.2.1", "derivative", "h264-reader", "thiserror", @@ -4114,7 +4121,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d1c4ba43f80542cf63a0a6ed3134629ae73e8ab51e4b765a67f3aa062eb433" dependencies = [ "arrayvec", - "cfg_aliases", + "cfg_aliases 0.1.1", "document-features", "js-sys", "log", @@ -4141,7 +4148,7 @@ dependencies = [ "arrayvec", "bit-vec", "bitflags 2.6.0", - "cfg_aliases", + "cfg_aliases 0.1.1", "document-features", "indexmap 2.0.1", "log", @@ -4169,7 +4176,7 @@ dependencies = [ "bit-set", "bitflags 2.6.0", "block", - "cfg_aliases", + "cfg_aliases 0.1.1", "core-graphics-types", "d3d12", "glow", diff --git a/Cargo.toml b/Cargo.toml index bea6c7e76..c86786cf6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,6 +102,7 @@ http-body-util = "0.1.2" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] shared_memory = { workspace = true } +# platforms that support vulkan are: windows and all non-apple unixes. emscripten is something for the web, where vulkan is not available either [target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] compositor_api = { workspace = true, features = ["vk-video"] } diff --git a/compositor_api/src/types/from_register_input.rs b/compositor_api/src/types/from_register_input.rs index e3d705a2b..54b092f3f 100644 --- a/compositor_api/src/types/from_register_input.rs +++ b/compositor_api/src/types/from_register_input.rs @@ -102,23 +102,19 @@ impl TryFrom for pipeline::RegisterInputOptions { } let rtp_stream = input::rtp::RtpStream { - video: video.as_ref().map(|video| Ok(input::rtp::InputVideoStream { - options: match video { - InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions { - codec: pipeline::VideoCodec::H264, - decoder: pipeline::VideoDecoder::FFmpegH264, - }, - #[cfg(feature = "vk-video")] - InputRtpVideoOptions::VulkanVideo => decoder::VideoDecoderOptions { - decoder: pipeline::VideoDecoder::VulkanVideo, - codec: pipeline::VideoCodec::H264, - }, - #[cfg(not(feature = "vk-video"))] - InputRtpVideoOptions::VulkanVideo => return Err(TypeError::new( - "This Live Compositor binary was build without Vulkan Video support. Rebuilt it on a platform which supports Vulkan Video." - )), - } - })).transpose()?, + video: video + .as_ref() + .map(|video| { + Ok(input::rtp::InputVideoStream { + options: match video { + InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions { + codec: pipeline::VideoCodec::H264, + decoder: pipeline::VideoDecoder::FFmpegH264, + }, + }, + }) + }) + .transpose()?, audio: audio.map(TryFrom::try_from).transpose()?, }; diff --git a/compositor_api/src/types/register_input.rs b/compositor_api/src/types/register_input.rs index 635386e61..e66f14fc4 100644 --- a/compositor_api/src/types/register_input.rs +++ b/compositor_api/src/types/register_input.rs @@ -126,7 +126,4 @@ pub enum InputRtpAudioOptions { pub enum InputRtpVideoOptions { #[serde(rename = "ffmpeg_h264")] FfmepgH264, - - #[serde(rename = "vulkan_video")] - VulkanVideo, } diff --git a/compositor_pipeline/Cargo.toml b/compositor_pipeline/Cargo.toml index 199f9e224..4a15b344c 100644 --- a/compositor_pipeline/Cargo.toml +++ b/compositor_pipeline/Cargo.toml @@ -34,5 +34,6 @@ glyphon = { workspace = true } [target.x86_64-unknown-linux-gnu.dependencies] decklink = { path = "../decklink", optional = true } +# platforms that support vulkan are: windows and all non-apple unixes. emscripten is something for the web, where vulkan is not available either [target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] vk-video = { path = "../vk-video/", optional = true } diff --git a/integration_tests/examples/vulkan.rs b/integration_tests/examples/vulkan.rs index 675985901..2fcc4b70a 100644 --- a/integration_tests/examples/vulkan.rs +++ b/integration_tests/examples/vulkan.rs @@ -1,10 +1,47 @@ use anyhow::Result; use compositor_api::types::Resolution; -use serde_json::json; -use std::time::Duration; +use compositor_pipeline::{ + pipeline::{ + decoder::VideoDecoderOptions, + encoder::{ + ffmpeg_h264::{EncoderPreset, Options as H264Options}, + VideoEncoderOptions, + }, + input::{ + rtp::{InputVideoStream, RtpReceiverOptions, RtpStream}, + InputOptions, + }, + output::{ + rtp::{RtpConnectionOptions, RtpSenderOptions}, + OutputOptions, OutputProtocolOptions, + }, + rtp::{RequestedPort, TransportProtocol}, + Options, OutputVideoOptions, PipelineOutputEndCondition, Port, RegisterInputOptions, + RegisterOutputOptions, VideoCodec, VideoDecoder, + }, + queue::QueueInputOptions, + Pipeline, +}; +use compositor_render::{ + error::ErrorStack, + scene::{ + Component, ComponentId, HorizontalAlign, InputStreamComponent, RGBAColor, TilesComponent, + VerticalAlign, + }, + InputId, OutputId, +}; +use live_compositor::{ + config::{read_config, LoggerConfig, LoggerFormat}, + logger::{self, FfmpegLogLevel}, +}; +use signal_hook::{consts, iterator::Signals}; +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; use integration_tests::{ - examples::{self, run_example, TestSample}, + examples::{download_all_assets, TestSample}, ffmpeg::{start_ffmpeg_receive, start_ffmpeg_send}, }; @@ -20,82 +57,120 @@ const OUTPUT_PORT: u16 = 8004; const VIDEOS: u16 = 6; fn main() { - run_example(client_code); + ffmpeg_next::format::network::init(); + logger::init_logger(LoggerConfig { + ffmpeg_logger_level: FfmpegLogLevel::Info, + format: LoggerFormat::Compact, + level: "info,wgpu_hal=warn,wgpu_core=warn".to_string(), + }); + + download_all_assets().unwrap(); + + client_code().unwrap(); } fn client_code() -> Result<()> { start_ffmpeg_receive(Some(OUTPUT_PORT), None)?; + let config = read_config(); + let (pipeline, event_loop) = Pipeline::new(Options { + queue_options: config.queue_options, + stream_fallback_timeout: config.stream_fallback_timeout, + web_renderer: config.web_renderer, + force_gpu: config.force_gpu, + download_root: config.download_root, + output_sample_rate: config.output_sample_rate, + wgpu_features: config.required_wgpu_features, + load_system_fonts: Some(true), + wgpu_ctx: None, + }) + .unwrap_or_else(|err| { + panic!( + "Failed to start compositor.\n{}", + ErrorStack::new(&err).into_string() + ) + }); + + let pipeline = Arc::new(Mutex::new(pipeline)); + let mut children = Vec::new(); for i in 1..VIDEOS + 1 { - let input_name = format!("input_{i}"); - - examples::post( - &format!("input/{input_name}/register"), - &json!({ - "type": "rtp_stream", - "port": INPUT_PORT + 2 + 2 * i, - "video": { - "decoder": "vulkan_video" - } + let input_id = InputId(format!("input_{i}").into()); + + let input_options = RegisterInputOptions { + input_options: InputOptions::Rtp(RtpReceiverOptions { + port: RequestedPort::Exact(INPUT_PORT + 2 + 2 * i), + transport_protocol: TransportProtocol::Udp, + stream: RtpStream { + video: Some(InputVideoStream { + options: VideoDecoderOptions { + codec: VideoCodec::H264, + decoder: VideoDecoder::VulkanVideo, + }, + }), + audio: None, + }, }), - )?; - - children.push(json!({ - "type": "input_stream", - "input_id": input_name, + queue_options: QueueInputOptions { + offset: Some(Duration::ZERO), + required: false, + buffer_duration: None, + }, + }; + + Pipeline::register_input(&pipeline, input_id.clone(), input_options).unwrap(); + + children.push(Component::InputStream(InputStreamComponent { + id: None, + input_id, })); } - let scene = json!({ - "type": "tiles", - "id": "tile", - "padding": 5, - "background_color_rgba": "#444444FF", - "children": children, - "transition": { - "duration_ms": 700, - "easing_function": { - "function_name": "cubic_bezier", - "points": [0.35, 0.22, 0.1, 0.8] - } + let output_options = RegisterOutputOptions { + output_options: OutputOptions { + output_protocol: OutputProtocolOptions::Rtp(RtpSenderOptions { + video: Some(VideoCodec::H264), + audio: None, + connection_options: RtpConnectionOptions::Udp { + port: Port(OUTPUT_PORT), + ip: IP.into(), + }, + }), + video: Some(VideoEncoderOptions::H264(H264Options { + preset: EncoderPreset::Ultrafast, + resolution: VIDEO_RESOLUTION.into(), + raw_options: Vec::new(), + })), + audio: None, }, - }); + video: Some(OutputVideoOptions { + initial: Component::Tiles(TilesComponent { + id: Some(ComponentId("tiles".into())), + padding: 5.0, + background_color: RGBAColor(0x44, 0x44, 0x44, 0xff), + children, + width: None, + height: None, + margin: 0.0, + transition: None, + vertical_align: VerticalAlign::Center, + horizontal_align: HorizontalAlign::Center, + tile_aspect_ratio: (16, 9), + }), - let shader_source = include_str!("./silly.wgsl"); - examples::post( - "shader/shader_example_1/register", - &json!({ - "source": shader_source, + end_condition: PipelineOutputEndCondition::Never, }), - )?; - - examples::post( - "output/output_1/register", - &json!({ - "type": "rtp_stream", - "port": OUTPUT_PORT, - "ip": IP, - "video": { - "resolution": { - "width": VIDEO_RESOLUTION.width, - "height": VIDEO_RESOLUTION.height, - }, - "encoder": { - "type": "ffmpeg_h264", - "preset": "ultrafast" - }, - "initial": { - "root": scene - } - } - }), - )?; + audio: None, + }; - std::thread::sleep(Duration::from_millis(500)); + pipeline + .lock() + .unwrap() + .register_output(OutputId("output_1".into()), output_options) + .unwrap(); - examples::post("start", &json!({}))?; + Pipeline::start(&pipeline); for i in 1..VIDEOS + 1 { start_ffmpeg_send( @@ -105,5 +180,17 @@ fn client_code() -> Result<()> { TestSample::BigBuckBunny, )?; } + + let event_loop_fallback = || { + let mut signals = Signals::new([consts::SIGINT]).unwrap(); + signals.forever().next(); + }; + if let Err(err) = event_loop.run_with_fallback(&event_loop_fallback) { + panic!( + "Failed to start event loop.\n{}", + ErrorStack::new(&err).into_string() + ) + } + Ok(()) } diff --git a/integration_tests/src/examples.rs b/integration_tests/src/examples.rs index 92db7929a..9a3f08d4a 100644 --- a/integration_tests/src/examples.rs +++ b/integration_tests/src/examples.rs @@ -200,7 +200,7 @@ struct AssetData { path: PathBuf, } -fn download_all_assets() -> Result<()> { +pub fn download_all_assets() -> Result<()> { let assets = [AssetData { url: String::from("https://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4"), path: examples_root_dir().join("examples/assets/BigBuckBunny.mp4"), diff --git a/vk-video/Cargo.toml b/vk-video/Cargo.toml index 31b60e970..becd44adf 100644 --- a/vk-video/Cargo.toml +++ b/vk-video/Cargo.toml @@ -21,3 +21,5 @@ wgpu = "22.1.0" [dev-dependencies] tracing-subscriber = "0.3.18" +[build-dependencies] +cfg_aliases = "0.2.1" diff --git a/vk-video/build.rs b/vk-video/build.rs new file mode 100644 index 000000000..6fc1495c2 --- /dev/null +++ b/vk-video/build.rs @@ -0,0 +1,13 @@ +fn main() { + cfg_aliases::cfg_aliases! { + vulkan: { + any( + windows, + all( + unix, + not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) + ) + ) + }, + } +} diff --git a/vk-video/examples/basic.rs b/vk-video/examples/basic.rs index d57ab696c..388078fb3 100644 --- a/vk-video/examples/basic.rs +++ b/vk-video/examples/basic.rs @@ -1,10 +1,4 @@ -#[cfg(any( - windows, - all( - unix, - not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) - ) -))] +#[cfg(vulkan)] fn main() { use std::io::Write; @@ -45,13 +39,7 @@ fn main() { } } -#[cfg(not(any( - windows, - all( - unix, - not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) - ) -)))] +#[cfg(not(vulkan))] fn main() { println!( "This crate doesn't work on your operating system, because it does not support vulkan" diff --git a/vk-video/examples/wgpu.rs b/vk-video/examples/wgpu.rs index c535b0744..36f52be03 100644 --- a/vk-video/examples/wgpu.rs +++ b/vk-video/examples/wgpu.rs @@ -1,10 +1,4 @@ -#[cfg(any( - windows, - all( - unix, - not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) - ) -))] +#[cfg(vulkan)] fn main() { use std::io::Write; @@ -47,26 +41,14 @@ fn main() { } } -#[cfg(not(any( - windows, - all( - unix, - not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) - ) -)))] +#[cfg(not(vulkan))] fn main() { println!( "This crate doesn't work on your operating system, because it does not support vulkan" ); } -#[cfg(any( - windows, - all( - unix, - not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) - ) -))] +#[cfg(vulkan)] fn download_wgpu_texture( device: &wgpu::Device, queue: &wgpu::Queue, From 54dedce8da5d9bf73af8aed3d144a2da1e45792d Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Mon, 7 Oct 2024 16:38:36 +0200 Subject: [PATCH 08/13] Move GraphicsContext to its own module --- compositor_pipeline/src/pipeline.rs | 70 +----------------- .../src/pipeline/graphics_context.rs | 72 +++++++++++++++++++ 2 files changed, 75 insertions(+), 67 deletions(-) create mode 100644 compositor_pipeline/src/pipeline/graphics_context.rs diff --git a/compositor_pipeline/src/pipeline.rs b/compositor_pipeline/src/pipeline.rs index d3e4e58d8..6e534d124 100644 --- a/compositor_pipeline/src/pipeline.rs +++ b/compositor_pipeline/src/pipeline.rs @@ -48,6 +48,7 @@ use self::input::InputOptions; pub mod decoder; pub mod encoder; +mod graphics_context; pub mod input; pub mod output; mod pipeline_input; @@ -63,9 +64,10 @@ pub use self::types::{ AudioCodec, EncodedChunk, EncodedChunkKind, EncoderOutputEvent, RawDataReceiver, VideoCodec, VideoDecoder, }; -use compositor_render::{create_wgpu_ctx, error::InitRendererEngineError}; pub use pipeline_output::PipelineOutputEndCondition; +pub use graphics_context::GraphicsContext; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Port(pub u16); @@ -111,72 +113,6 @@ pub struct Pipeline { is_started: bool, } -pub struct GraphicsContext { - pub device: Arc, - pub queue: Arc, - - #[cfg(feature = "vk-video")] - pub vulkan_ctx: Option>, -} - -impl GraphicsContext { - #[cfg(feature = "vk-video")] - pub fn new( - force_gpu: bool, - features: wgpu::Features, - limits: wgpu::Limits, - ) -> Result { - use compositor_render::{required_wgpu_features, set_required_wgpu_limits}; - - let vulkan_features = - features | required_wgpu_features() | wgpu::Features::TEXTURE_FORMAT_NV12; - - let limits = set_required_wgpu_limits(limits); - - match vk_video::VulkanCtx::new(vulkan_features, limits.clone()) { - Ok(ctx) => Ok(GraphicsContext { - device: ctx.wgpu_ctx.device.clone(), - queue: ctx.wgpu_ctx.queue.clone(), - vulkan_ctx: Some(ctx.into()), - }), - - Err(err) => { - info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support."); - - let (device, queue) = create_wgpu_ctx(force_gpu, features, limits) - .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; - - Ok(GraphicsContext { - device, - queue, - vulkan_ctx: None, - }) - } - } - } - - #[cfg(not(feature = "vk-video"))] - pub fn new( - force_gpu: bool, - features: wgpu::Features, - limits: wgpu::Limits, - ) -> Result { - let (device, queue) = create_wgpu_ctx(force_gpu, features, limits) - .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; - - Ok(GraphicsContext { device, queue }) - } -} - -impl std::fmt::Debug for GraphicsContext { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PreinitializedContext") - .field("device", &self.device) - .field("queue", &self.queue) - .finish() - } -} - #[derive(Debug)] pub struct Options { pub queue_options: QueueOptions, diff --git a/compositor_pipeline/src/pipeline/graphics_context.rs b/compositor_pipeline/src/pipeline/graphics_context.rs new file mode 100644 index 000000000..1724a469e --- /dev/null +++ b/compositor_pipeline/src/pipeline/graphics_context.rs @@ -0,0 +1,72 @@ +use crate::error::InitPipelineError; +use std::sync::Arc; + +pub struct GraphicsContext { + pub device: Arc, + pub queue: Arc, + + #[cfg(feature = "vk-video")] + pub vulkan_ctx: Option>, +} + +impl GraphicsContext { + #[cfg(feature = "vk-video")] + pub fn new( + force_gpu: bool, + features: wgpu::Features, + limits: wgpu::Limits, + ) -> Result { + use compositor_render::{ + create_wgpu_ctx, error::InitRendererEngineError, required_wgpu_features, + set_required_wgpu_limits, + }; + use tracing::info; + + let vulkan_features = + features | required_wgpu_features() | wgpu::Features::TEXTURE_FORMAT_NV12; + + let limits = set_required_wgpu_limits(limits); + + match vk_video::VulkanCtx::new(vulkan_features, limits.clone()) { + Ok(ctx) => Ok(GraphicsContext { + device: ctx.wgpu_ctx.device.clone(), + queue: ctx.wgpu_ctx.queue.clone(), + vulkan_ctx: Some(ctx.into()), + }), + + Err(err) => { + info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support."); + + let (device, queue) = create_wgpu_ctx(force_gpu, features, limits) + .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; + + Ok(GraphicsContext { + device, + queue, + vulkan_ctx: None, + }) + } + } + } + + #[cfg(not(feature = "vk-video"))] + pub fn new( + force_gpu: bool, + features: wgpu::Features, + limits: wgpu::Limits, + ) -> Result { + let (device, queue) = create_wgpu_ctx(force_gpu, features, limits) + .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; + + Ok(GraphicsContext { device, queue }) + } +} + +impl std::fmt::Debug for GraphicsContext { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PreinitializedContext") + .field("device", &self.device) + .field("queue", &self.queue) + .finish() + } +} From ffc70010cbd335db893fc208bb6fc3b748db81ba Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Mon, 7 Oct 2024 16:43:24 +0200 Subject: [PATCH 09/13] Small review suggestions --- Cargo.toml | 2 +- compositor_pipeline/Cargo.toml | 2 +- .../src/pipeline/graphics_context.rs | 4 ++-- vk-video/build.rs | 18 +++++++++--------- vk-video/src/vulkan_decoder.rs | 10 +++++----- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c86786cf6..564977e5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,7 +102,7 @@ http-body-util = "0.1.2" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] shared_memory = { workspace = true } -# platforms that support vulkan are: windows and all non-apple unixes. emscripten is something for the web, where vulkan is not available either +# platforms that support vulkan are: windows and all non-apple unixes. emscripten is a web-based platform, where vulkan is not available either [target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] compositor_api = { workspace = true, features = ["vk-video"] } diff --git a/compositor_pipeline/Cargo.toml b/compositor_pipeline/Cargo.toml index 4a15b344c..c7593a088 100644 --- a/compositor_pipeline/Cargo.toml +++ b/compositor_pipeline/Cargo.toml @@ -34,6 +34,6 @@ glyphon = { workspace = true } [target.x86_64-unknown-linux-gnu.dependencies] decklink = { path = "../decklink", optional = true } -# platforms that support vulkan are: windows and all non-apple unixes. emscripten is something for the web, where vulkan is not available either +# platforms that support vulkan are: windows and all non-apple unixes. emscripten is a web-based platform, where vulkan is not available either [target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] vk-video = { path = "../vk-video/", optional = true } diff --git a/compositor_pipeline/src/pipeline/graphics_context.rs b/compositor_pipeline/src/pipeline/graphics_context.rs index 1724a469e..fd2fae7bf 100644 --- a/compositor_pipeline/src/pipeline/graphics_context.rs +++ b/compositor_pipeline/src/pipeline/graphics_context.rs @@ -20,7 +20,7 @@ impl GraphicsContext { create_wgpu_ctx, error::InitRendererEngineError, required_wgpu_features, set_required_wgpu_limits, }; - use tracing::info; + use tracing::warn; let vulkan_features = features | required_wgpu_features() | wgpu::Features::TEXTURE_FORMAT_NV12; @@ -35,7 +35,7 @@ impl GraphicsContext { }), Err(err) => { - info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support."); + warn!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support."); let (device, queue) = create_wgpu_ctx(force_gpu, features, limits) .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; diff --git a/vk-video/build.rs b/vk-video/build.rs index 6fc1495c2..299b2f9ff 100644 --- a/vk-video/build.rs +++ b/vk-video/build.rs @@ -1,13 +1,13 @@ fn main() { cfg_aliases::cfg_aliases! { - vulkan: { - any( - windows, - all( - unix, - not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) - ) + vulkan: { + any( + windows, + all( + unix, + not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) ) - }, - } + ) + }, +} } diff --git a/vk-video/src/vulkan_decoder.rs b/vk-video/src/vulkan_decoder.rs index c2b10aa09..f2152e472 100644 --- a/vk-video/src/vulkan_decoder.rs +++ b/vk-video/src/vulkan_decoder.rs @@ -75,7 +75,7 @@ pub enum VulkanDecoderError { NoFreeSlotsInDpb, #[error("A picture which is not in the decoded pictures buffer was requested as a reference picture")] - NonExistantReferenceRequested, + NonExistentReferenceRequested, #[error("A vulkan decode operation failed with code {0:?}")] DecodeOperationFailed(vk::QueryResultStatusKHR), @@ -207,7 +207,7 @@ impl VulkanDecoder<'_> { .video_session_resources .as_mut() .map(|s| s.free_reference_picture(dpb_idx)), - None => return Err(VulkanDecoderError::NonExistantReferenceRequested), + None => return Err(VulkanDecoderError::NonExistentReferenceRequested), }; } } @@ -815,14 +815,14 @@ impl VulkanDecoder<'_> { { let i = *reference_id_to_dpb_slot_index .get(&ref_info.id) - .ok_or(VulkanDecoderError::NonExistantReferenceRequested)?; + .ok_or(VulkanDecoderError::NonExistentReferenceRequested)?; let reference = *reference_slots .get(i) - .ok_or(VulkanDecoderError::NonExistantReferenceRequested)?; + .ok_or(VulkanDecoderError::NonExistentReferenceRequested)?; if reference.slot_index < 0 || reference.p_picture_resource.is_null() { - return Err(VulkanDecoderError::NonExistantReferenceRequested); + return Err(VulkanDecoderError::NonExistentReferenceRequested); } let reference = reference.push_next(dpb_slot_info); From fefb847826ec9be70af1aad958ceb66486920bfd Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Mon, 7 Oct 2024 17:05:49 +0200 Subject: [PATCH 10/13] Remove the possibility to configure a codec and a decoder for inputs. --- .../src/types/from_register_input.rs | 1 - compositor_pipeline/src/pipeline/decoder.rs | 2 -- .../src/pipeline/decoder/video.rs | 20 +++++++++---------- .../src/pipeline/input/mp4/mp4_file_reader.rs | 1 - .../src/pipeline/input/rtp/depayloader.rs | 5 +++-- compositor_pipeline/src/pipeline/types.rs | 2 +- integration_tests/examples/vulkan.rs | 3 +-- vk-video/build.rs | 18 ++++++++--------- 8 files changed, 23 insertions(+), 29 deletions(-) diff --git a/compositor_api/src/types/from_register_input.rs b/compositor_api/src/types/from_register_input.rs index 54b092f3f..fa2e0ab51 100644 --- a/compositor_api/src/types/from_register_input.rs +++ b/compositor_api/src/types/from_register_input.rs @@ -108,7 +108,6 @@ impl TryFrom for pipeline::RegisterInputOptions { Ok(input::rtp::InputVideoStream { options: match video { InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions { - codec: pipeline::VideoCodec::H264, decoder: pipeline::VideoDecoder::FFmpegH264, }, }, diff --git a/compositor_pipeline/src/pipeline/decoder.rs b/compositor_pipeline/src/pipeline/decoder.rs index 8dd84cd09..6a6c3538f 100644 --- a/compositor_pipeline/src/pipeline/decoder.rs +++ b/compositor_pipeline/src/pipeline/decoder.rs @@ -1,6 +1,5 @@ use crate::{audio_mixer::InputSamples, queue::PipelineEvent}; -use super::types::VideoCodec; use super::types::VideoDecoder; use bytes::Bytes; @@ -19,7 +18,6 @@ pub(super) use video::start_video_decoder_thread; #[derive(Debug, Clone, PartialEq, Eq)] pub struct VideoDecoderOptions { pub decoder: VideoDecoder, - pub codec: VideoCodec, } #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/compositor_pipeline/src/pipeline/decoder/video.rs b/compositor_pipeline/src/pipeline/decoder/video.rs index 5129ee06d..4dcc79669 100644 --- a/compositor_pipeline/src/pipeline/decoder/video.rs +++ b/compositor_pipeline/src/pipeline/decoder/video.rs @@ -3,7 +3,7 @@ use crossbeam_channel::{Receiver, Sender}; use crate::{ error::InputInitError, - pipeline::{types::EncodedChunk, PipelineCtx, VideoCodec, VideoDecoder}, + pipeline::{types::EncodedChunk, PipelineCtx, VideoDecoder}, queue::PipelineEvent, }; @@ -20,8 +20,8 @@ pub fn start_video_decoder_thread( frame_sender: Sender>, input_id: InputId, ) -> Result<(), InputInitError> { - match (options.codec, options.decoder) { - (VideoCodec::H264, VideoDecoder::FFmpegH264) => ffmpeg_h264::start_ffmpeg_decoder_thread( + match options.decoder { + VideoDecoder::FFmpegH264 => ffmpeg_h264::start_ffmpeg_decoder_thread( pipeline_ctx, chunks_receiver, frame_sender, @@ -29,13 +29,11 @@ pub fn start_video_decoder_thread( ), #[cfg(feature = "vk-video")] - (VideoCodec::H264, VideoDecoder::VulkanVideo) => { - vulkan_video::start_vulkan_video_decoder_thread( - pipeline_ctx, - chunks_receiver, - frame_sender, - input_id, - ) - } + VideoDecoder::VulkanVideoH264 => vulkan_video::start_vulkan_video_decoder_thread( + pipeline_ctx, + chunks_receiver, + frame_sender, + input_id, + ), } } diff --git a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs index 3811ea005..205c76d6d 100644 --- a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs +++ b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs @@ -234,7 +234,6 @@ impl Mp4FileReader { }; let decoder_options = VideoDecoderOptions { - codec: VideoCodec::H264, decoder: VideoDecoder::FFmpegH264, }; diff --git a/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs b/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs index 4c922cbd3..7f5518e10 100644 --- a/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs +++ b/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs @@ -11,6 +11,7 @@ use crate::pipeline::{ decoder::{self, AacDecoderOptions}, rtp::{AUDIO_PAYLOAD_TYPE, VIDEO_PAYLOAD_TYPE}, types::{AudioCodec, EncodedChunk, EncodedChunkKind, VideoCodec}, + VideoDecoder, }; use self::aac::AacDepayloaderNewError; @@ -81,8 +82,8 @@ pub enum VideoDepayloader { impl VideoDepayloader { pub fn new(options: &decoder::VideoDecoderOptions) -> Self { - match options.codec { - VideoCodec::H264 => VideoDepayloader::H264 { + match options.decoder { + VideoDecoder::FFmpegH264 | VideoDecoder::VulkanVideoH264 => VideoDepayloader::H264 { depayloader: H264Packet::default(), buffer: vec![], rollover_state: RolloverState::default(), diff --git a/compositor_pipeline/src/pipeline/types.rs b/compositor_pipeline/src/pipeline/types.rs index 680097626..a2f26c613 100644 --- a/compositor_pipeline/src/pipeline/types.rs +++ b/compositor_pipeline/src/pipeline/types.rs @@ -59,7 +59,7 @@ pub struct RawDataSender { pub enum VideoDecoder { FFmpegH264, #[cfg(feature = "vk-video")] - VulkanVideo, + VulkanVideoH264, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/integration_tests/examples/vulkan.rs b/integration_tests/examples/vulkan.rs index 2fcc4b70a..7409ac83f 100644 --- a/integration_tests/examples/vulkan.rs +++ b/integration_tests/examples/vulkan.rs @@ -105,8 +105,7 @@ fn client_code() -> Result<()> { stream: RtpStream { video: Some(InputVideoStream { options: VideoDecoderOptions { - codec: VideoCodec::H264, - decoder: VideoDecoder::VulkanVideo, + decoder: VideoDecoder::VulkanVideoH264, }, }), audio: None, diff --git a/vk-video/build.rs b/vk-video/build.rs index 299b2f9ff..6fc1495c2 100644 --- a/vk-video/build.rs +++ b/vk-video/build.rs @@ -1,13 +1,13 @@ fn main() { cfg_aliases::cfg_aliases! { - vulkan: { - any( - windows, - all( - unix, - not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) + vulkan: { + any( + windows, + all( + unix, + not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) + ) ) - ) - }, -} + }, + } } From e3aba56c979c126e42595b506444090f6a26fd5a Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Mon, 7 Oct 2024 17:25:32 +0200 Subject: [PATCH 11/13] WIP for manual initialization example. --- .../manual_graphics_initialization.rs | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 integration_tests/examples/manual_graphics_initialization.rs diff --git a/integration_tests/examples/manual_graphics_initialization.rs b/integration_tests/examples/manual_graphics_initialization.rs new file mode 100644 index 000000000..d26ca05ee --- /dev/null +++ b/integration_tests/examples/manual_graphics_initialization.rs @@ -0,0 +1,51 @@ +use compositor_pipeline::{ + pipeline::{GraphicsContext, Options}, + Pipeline, +}; +use live_compositor::config::read_config; + +// This example illustrates how to initialize a GraphicsContext separately to get access to a wgpu +// instance, adapter, queue and device. + +#[cfg(target_os = "linux")] +fn main() { + let graphics_context = + GraphicsContext::new(false, wgpu::Features::default(), wgpu::Limits::default()).unwrap(); + + let _device = graphics_context.device.clone(); + let _queue = graphics_context.queue.clone(); + + let _adapter = graphics_context + .vulkan_ctx + .as_ref() + .unwrap() + .wgpu_ctx + .adapter + .clone(); + + let _instance = graphics_context + .vulkan_ctx + .as_ref() + .unwrap() + .wgpu_ctx + .instance + .clone(); + + let config = read_config(); + + let _pipeline = Pipeline::new(Options { + wgpu_ctx: Some(graphics_context), + queue_options: config.queue_options, + stream_fallback_timeout: config.stream_fallback_timeout, + web_renderer: config.web_renderer, + force_gpu: config.force_gpu, + download_root: config.download_root, + output_sample_rate: config.output_sample_rate, + wgpu_features: config.required_wgpu_features, + load_system_fonts: Some(true), + }) + .unwrap(); +} + +#[cfg(target_os = "macos")] +fn main() {} From d56603a062c015ea7f1ae5a20231adf708404027 Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek Date: Fri, 11 Oct 2024 10:40:58 +0200 Subject: [PATCH 12/13] Impl `Debug` for `VulkanCtx` --- compositor_pipeline/src/pipeline/graphics_context.rs | 10 +--------- vk-video/src/vulkan_decoder/vulkan_ctx.rs | 6 ++++++ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/compositor_pipeline/src/pipeline/graphics_context.rs b/compositor_pipeline/src/pipeline/graphics_context.rs index fd2fae7bf..206d5d55b 100644 --- a/compositor_pipeline/src/pipeline/graphics_context.rs +++ b/compositor_pipeline/src/pipeline/graphics_context.rs @@ -1,6 +1,7 @@ use crate::error::InitPipelineError; use std::sync::Arc; +#[derive(Debug)] pub struct GraphicsContext { pub device: Arc, pub queue: Arc, @@ -61,12 +62,3 @@ impl GraphicsContext { Ok(GraphicsContext { device, queue }) } } - -impl std::fmt::Debug for GraphicsContext { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PreinitializedContext") - .field("device", &self.device) - .field("queue", &self.queue) - .finish() - } -} diff --git a/vk-video/src/vulkan_decoder/vulkan_ctx.rs b/vk-video/src/vulkan_decoder/vulkan_ctx.rs index 03683237f..953b29ea5 100644 --- a/vk-video/src/vulkan_decoder/vulkan_ctx.rs +++ b/vk-video/src/vulkan_decoder/vulkan_ctx.rs @@ -373,6 +373,12 @@ impl VulkanCtx { } } +impl std::fmt::Debug for VulkanCtx { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("VulkanCtx").finish() + } +} + struct ChosenDevice<'a> { physical_device: vk::PhysicalDevice, queue_indices: QueueIndices<'a>, From e67305c034195901e34db8040b3b17f002236b62 Mon Sep 17 00:00:00 2001 From: Jerzy Wilczek <72213407+jerzywilczek@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:43:08 +0200 Subject: [PATCH 13/13] Reword error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Wojciech BarczyƄski <104033489+WojciechBarczynski@users.noreply.github.com> --- vk-video/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vk-video/src/lib.rs b/vk-video/src/lib.rs index 1b0fff581..5826803b8 100644 --- a/vk-video/src/lib.rs +++ b/vk-video/src/lib.rs @@ -17,10 +17,10 @@ pub struct Decoder<'a> { #[derive(Debug, thiserror::Error)] pub enum DecoderError { - #[error("Error originating in the decoder: {0}")] + #[error("Decoder error: {0}")] VulkanDecoderError(#[from] VulkanDecoderError), - #[error("Error originating in the h264 parser: {0}")] + #[error("H264 parser error: {0}")] ParserError(#[from] ParserError), }