diff --git a/Cargo.lock b/Cargo.lock index fad06cac6..c9e9d1f48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -305,6 +305,12 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "bitstream-io" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b81e1519b0d82120d2fd469d5bfb2919a9361c48b02d82d04befc1cdd2002452" + [[package]] name = "block" version = "0.1.6" @@ -389,6 +395,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chromium_sys" version = "0.1.0" @@ -532,6 +544,7 @@ dependencies = [ "socket2", "thiserror", "tracing", + "vk-video", "webrtc-util", "wgpu", ] @@ -801,6 +814,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "digest" version = "0.10.7" @@ -1073,6 +1097,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "four-cc" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "795cbfc56d419a7ce47ccbb7504dd9a5b7c484c083c356e797de08bd988d9629" + [[package]] name = "fs_extra" version = "1.3.0" @@ -1328,6 +1358,18 @@ dependencies = [ "tracing", ] +[[package]] +name = "h264-reader" +version = "0.7.1-dev" +source = "git+https://github.com/membraneframework-labs/h264-reader.git?branch=@jerzywilczek/scaling-lists#7c982f1089558640021ff8a70a2fa253e3e881c7" +dependencies = [ + "bitstream-io", + "hex-slice", + "log", + "memchr", + "rfc6381-codec", +] + [[package]] name = "half" version = "2.2.1" @@ -1374,6 +1416,12 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +[[package]] +name = "hex-slice" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5491a308e0214554f07a81d8944abe45f552871c12e3c3c6e7e5d354039a6c4c" + [[package]] name = "hexf-parse" version = "0.2.1" @@ -1978,6 +2026,21 @@ dependencies = [ "thiserror", ] +[[package]] +name = "mp4ra-rust" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdbc3d3867085d66ac6270482e66f3dd2c5a18451a3dc9ad7269e94844a536b7" +dependencies = [ + "four-cc", +] + +[[package]] +name = "mpeg4-audio-const" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a1fe2275b68991faded2c80aa4a33dba398b77d276038b8f50701a22e55918" + [[package]] name = "naga" version = "22.1.0" @@ -1987,7 +2050,7 @@ dependencies = [ "arrayvec", "bit-set", "bitflags 2.6.0", - "cfg_aliases", + "cfg_aliases 0.1.1", "codespan-reporting", "hexf-parse", "indexmap 2.0.1", @@ -2653,6 +2716,16 @@ dependencies = [ "usvg", ] +[[package]] +name = "rfc6381-codec" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed54c20f5c3ec82eab6d998b313dc75ec5d5650d4f57675e61d72489040297fd" +dependencies = [ + "mp4ra-rust", + "mpeg4-audio-const", +] + [[package]] name = "rgb" version = "0.8.36" @@ -3886,6 +3959,32 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vk-mem" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cb12b79bcec57a3334d0284f1364c1846f378bb47df9779c6dbfcfc245c9404" +dependencies = [ + "ash", + "bitflags 2.6.0", + "cc", +] + +[[package]] +name = "vk-video" +version = "0.1.0" +dependencies = [ + "ash", + "cfg_aliases 0.2.1", + "derivative", + "h264-reader", + "thiserror", + "tracing", + "tracing-subscriber 0.3.18", + "vk-mem", + "wgpu", +] + [[package]] name = "want" version = "0.3.1" @@ -4022,7 +4121,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d1c4ba43f80542cf63a0a6ed3134629ae73e8ab51e4b765a67f3aa062eb433" dependencies = [ "arrayvec", - "cfg_aliases", + "cfg_aliases 0.1.1", "document-features", "js-sys", "log", @@ -4049,7 +4148,7 @@ dependencies = [ "arrayvec", "bit-vec", "bitflags 2.6.0", - "cfg_aliases", + "cfg_aliases 0.1.1", "document-features", "indexmap 2.0.1", "log", @@ -4077,7 +4176,7 @@ dependencies = [ "bit-set", "bitflags 2.6.0", "block", - "cfg_aliases", + "cfg_aliases 0.1.1", "core-graphics-types", "d3d12", "glow", diff --git a/Cargo.toml b/Cargo.toml index c9be7ba7f..564977e5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ members = [ "decklink", "compositor_api", "compositor_web", + "vk-video", ] resolver = "2" @@ -56,6 +57,7 @@ schemars = { git = "https://github.com/membraneframework-labs/schemars", rev = " "preserve_order", ] } shared_memory = "0.12.4" +vk-video = { path = "vk-video" } wgpu = { version = "22.1.0", default-features = false, features = [ "wgsl", "dx12", @@ -100,6 +102,10 @@ http-body-util = "0.1.2" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] shared_memory = { workspace = true } +# platforms that support vulkan are: windows and all non-apple unixes. emscripten is a web-based platform, where vulkan is not available either +[target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] +compositor_api = { workspace = true, features = ["vk-video"] } + [[bin]] name = "process_helper" path = "src/bin/process_helper/main.rs" diff --git a/compositor_api/Cargo.toml b/compositor_api/Cargo.toml index 5a8911708..69af23089 100644 --- a/compositor_api/Cargo.toml +++ b/compositor_api/Cargo.toml @@ -7,6 +7,7 @@ license = "BUSL-1.1" [features] decklink = ["compositor_pipeline/decklink"] web_renderer = ["compositor_render/web_renderer"] +vk-video = ["compositor_pipeline/vk-video"] [dependencies] compositor_render = { workspace = true } diff --git a/compositor_api/src/types/from_register_input.rs b/compositor_api/src/types/from_register_input.rs index 7c493e8bc..fa2e0ab51 100644 --- a/compositor_api/src/types/from_register_input.rs +++ b/compositor_api/src/types/from_register_input.rs @@ -102,13 +102,18 @@ impl TryFrom for pipeline::RegisterInputOptions { } let rtp_stream = input::rtp::RtpStream { - video: video.as_ref().map(|video| input::rtp::InputVideoStream { - options: match video { - InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions { - codec: pipeline::VideoCodec::H264, - }, - }, - }), + video: video + .as_ref() + .map(|video| { + Ok(input::rtp::InputVideoStream { + options: match video { + InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions { + decoder: pipeline::VideoDecoder::FFmpegH264, + }, + }, + }) + }) + .transpose()?, audio: audio.map(TryFrom::try_from).transpose()?, }; diff --git a/compositor_pipeline/Cargo.toml b/compositor_pipeline/Cargo.toml index 08a259c3a..c7593a088 100644 --- a/compositor_pipeline/Cargo.toml +++ b/compositor_pipeline/Cargo.toml @@ -7,6 +7,7 @@ license = "BUSL-1.1" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] decklink = ["dep:decklink"] +vk-video = ["dep:vk-video"] [dependencies] compositor_render = { workspace = true } @@ -32,3 +33,7 @@ glyphon = { workspace = true } [target.x86_64-unknown-linux-gnu.dependencies] decklink = { path = "../decklink", optional = true } + +# platforms that support vulkan are: windows and all non-apple unixes. emscripten is a web-based platform, where vulkan is not available either +[target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies] +vk-video = { path = "../vk-video/", optional = true } diff --git a/compositor_pipeline/src/error.rs b/compositor_pipeline/src/error.rs index 1516fd085..03dfcace6 100644 --- a/compositor_pipeline/src/error.rs +++ b/compositor_pipeline/src/error.rs @@ -9,6 +9,19 @@ use compositor_render::{ use crate::pipeline::{decoder::AacDecoderError, VideoCodec}; use fdk_aac_sys as fdk; +#[derive(Debug, thiserror::Error)] +pub enum InitPipelineError { + #[error(transparent)] + InitRendererEngine(#[from] InitRendererEngineError), + + #[error("Failed to create a download directory.")] + CreateDownloadDir(#[source] std::io::Error), + + #[cfg(feature = "vk-video")] + #[error(transparent)] + VulkanCtxError(#[from] vk_video::VulkanCtxError), +} + #[derive(Debug, thiserror::Error)] pub enum RegisterInputError { #[error("Failed to register input stream. Stream \"{0}\" is already registered.")] @@ -120,6 +133,13 @@ pub enum InputInitError { #[error("Couldn't read decoder init result.")] CannotReadInitResult, + + #[cfg(feature = "vk-video")] + #[error(transparent)] + VulkanDecoderError(#[from] vk_video::DecoderError), + + #[error("Pipeline couldn't detect a vulkan video compatible device when it was being initialized. Cannot create a vulkan video decoder")] + VulkanContextRequiredForVulkanDecoder, } pub enum ErrorType { diff --git a/compositor_pipeline/src/pipeline.rs b/compositor_pipeline/src/pipeline.rs index 34da59cb9..6e534d124 100644 --- a/compositor_pipeline/src/pipeline.rs +++ b/compositor_pipeline/src/pipeline.rs @@ -7,8 +7,7 @@ use std::thread; use std::time::Duration; use compositor_render::error::{ - ErrorStack, InitPipelineError, RegisterRendererError, RequestKeyframeError, - UnregisterRendererError, + ErrorStack, RegisterRendererError, RequestKeyframeError, UnregisterRendererError, }; use compositor_render::scene::Component; use compositor_render::web_renderer::WebRendererInitOptions; @@ -32,6 +31,7 @@ use types::RawDataSender; use crate::audio_mixer::AudioMixer; use crate::audio_mixer::MixingStrategy; use crate::audio_mixer::{AudioChannels, AudioMixingParams}; +use crate::error::InitPipelineError; use crate::error::{ RegisterInputError, RegisterOutputError, UnregisterInputError, UnregisterOutputError, }; @@ -48,6 +48,7 @@ use self::input::InputOptions; pub mod decoder; pub mod encoder; +mod graphics_context; pub mod input; pub mod output; mod pipeline_input; @@ -61,9 +62,12 @@ use self::pipeline_output::PipelineOutput; pub use self::types::{ AudioCodec, EncodedChunk, EncodedChunkKind, EncoderOutputEvent, RawDataReceiver, VideoCodec, + VideoDecoder, }; pub use pipeline_output::PipelineOutputEndCondition; +pub use graphics_context::GraphicsContext; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Port(pub u16); @@ -109,7 +113,7 @@ pub struct Pipeline { is_started: bool, } -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct Options { pub queue_options: QueueOptions, pub stream_fallback_timeout: Duration, @@ -118,28 +122,57 @@ pub struct Options { pub download_root: PathBuf, pub output_sample_rate: u32, pub wgpu_features: WgpuFeatures, - pub wgpu_ctx: Option<(Arc, Arc)>, pub load_system_fonts: Option, + pub wgpu_ctx: Option, } -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct PipelineCtx { pub output_sample_rate: u32, pub output_framerate: Framerate, pub download_dir: Arc, pub event_emitter: Arc, + #[cfg(feature = "vk-video")] + pub vulkan_ctx: Option>, +} + +impl std::fmt::Debug for PipelineCtx { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PipelineCtx") + .field("output_sample_rate", &self.output_sample_rate) + .field("output_framerate", &self.output_framerate) + .field("download_dir", &self.download_dir) + .field("event_emitter", &self.event_emitter) + .finish() + } } impl Pipeline { pub fn new(opts: Options) -> Result<(Self, Arc), InitPipelineError> { + let preinitialized_ctx = match opts.wgpu_ctx { + Some(ctx) => Some(ctx), + #[cfg(feature = "vk-video")] + None => Some(GraphicsContext::new( + opts.force_gpu, + opts.wgpu_features, + Default::default(), + )?), + #[cfg(not(feature = "vk-video"))] + None => None, + }; + + let wgpu_ctx = preinitialized_ctx + .as_ref() + .map(|ctx| (ctx.device.clone(), ctx.queue.clone())); + let (renderer, event_loop) = Renderer::new(RendererOptions { web_renderer: opts.web_renderer, framerate: opts.queue_options.output_framerate, stream_fallback_timeout: opts.stream_fallback_timeout, force_gpu: opts.force_gpu, wgpu_features: opts.wgpu_features, - wgpu_ctx: opts.wgpu_ctx, load_system_fonts: opts.load_system_fonts.unwrap_or(true), + wgpu_ctx, })?; let download_dir = opts @@ -160,6 +193,8 @@ impl Pipeline { output_framerate: opts.queue_options.output_framerate, download_dir: download_dir.into(), event_emitter, + #[cfg(feature = "vk-video")] + vulkan_ctx: preinitialized_ctx.and_then(|ctx| ctx.vulkan_ctx), }, }; diff --git a/compositor_pipeline/src/pipeline/decoder.rs b/compositor_pipeline/src/pipeline/decoder.rs index 87f26addc..6a6c3538f 100644 --- a/compositor_pipeline/src/pipeline/decoder.rs +++ b/compositor_pipeline/src/pipeline/decoder.rs @@ -1,6 +1,6 @@ use crate::{audio_mixer::InputSamples, queue::PipelineEvent}; -use super::types::VideoCodec; +use super::types::VideoDecoder; use bytes::Bytes; use compositor_render::Frame; @@ -17,7 +17,7 @@ pub(super) use video::start_video_decoder_thread; #[derive(Debug, Clone, PartialEq, Eq)] pub struct VideoDecoderOptions { - pub codec: VideoCodec, + pub decoder: VideoDecoder, } #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/compositor_pipeline/src/pipeline/decoder/video.rs b/compositor_pipeline/src/pipeline/decoder/video.rs index 52475dae8..4dcc79669 100644 --- a/compositor_pipeline/src/pipeline/decoder/video.rs +++ b/compositor_pipeline/src/pipeline/decoder/video.rs @@ -3,23 +3,37 @@ use crossbeam_channel::{Receiver, Sender}; use crate::{ error::InputInitError, - pipeline::{types::EncodedChunk, VideoCodec}, + pipeline::{types::EncodedChunk, PipelineCtx, VideoDecoder}, queue::PipelineEvent, }; use super::VideoDecoderOptions; mod ffmpeg_h264; +#[cfg(feature = "vk-video")] +mod vulkan_video; pub fn start_video_decoder_thread( options: VideoDecoderOptions, + pipeline_ctx: &PipelineCtx, chunks_receiver: Receiver>, frame_sender: Sender>, input_id: InputId, ) -> Result<(), InputInitError> { - match options.codec { - VideoCodec::H264 => { - ffmpeg_h264::start_ffmpeg_decoder_thread(chunks_receiver, frame_sender, input_id) - } + match options.decoder { + VideoDecoder::FFmpegH264 => ffmpeg_h264::start_ffmpeg_decoder_thread( + pipeline_ctx, + chunks_receiver, + frame_sender, + input_id, + ), + + #[cfg(feature = "vk-video")] + VideoDecoder::VulkanVideoH264 => vulkan_video::start_vulkan_video_decoder_thread( + pipeline_ctx, + chunks_receiver, + frame_sender, + input_id, + ), } } diff --git a/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs b/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs index 04df9e654..8f6c2cdd1 100644 --- a/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs +++ b/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs @@ -2,7 +2,10 @@ use std::time::Duration; use crate::{ error::InputInitError, - pipeline::types::{EncodedChunk, EncodedChunkKind, VideoCodec}, + pipeline::{ + types::{EncodedChunk, EncodedChunkKind, VideoCodec}, + PipelineCtx, + }, queue::PipelineEvent, }; @@ -18,6 +21,7 @@ use ffmpeg_next::{ use tracing::{debug, error, span, trace, warn, Level}; pub fn start_ffmpeg_decoder_thread( + _pipeline_ctx: &PipelineCtx, chunks_receiver: Receiver>, frame_sender: Sender>, input_id: InputId, diff --git a/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs new file mode 100644 index 000000000..8e2363412 --- /dev/null +++ b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs @@ -0,0 +1,112 @@ +use std::sync::Arc; + +use compositor_render::{Frame, FrameData, InputId, Resolution}; +use crossbeam_channel::{Receiver, Sender}; +use tracing::{debug, error, span, trace, warn, Level}; +use vk_video::{Decoder, VulkanCtx}; + +use crate::{ + error::InputInitError, + pipeline::{EncodedChunk, EncodedChunkKind, PipelineCtx, VideoCodec}, + queue::PipelineEvent, +}; + +pub fn start_vulkan_video_decoder_thread( + pipeline_ctx: &PipelineCtx, + chunks_receiver: Receiver>, + frame_sender: Sender>, + input_id: InputId, +) -> Result<(), InputInitError> { + let Some(vulkan_ctx) = pipeline_ctx.vulkan_ctx.as_ref().map(|ctx| ctx.clone()) else { + return Err(InputInitError::VulkanContextRequiredForVulkanDecoder); + }; + + let (init_result_sender, init_result_receiver) = crossbeam_channel::bounded(0); + + std::thread::Builder::new() + .name(format!("h264 vulkan video decoder {}", input_id.0)) + .spawn(move || { + let _span = span!( + Level::INFO, + "h264 vulkan video decoder", + input_id = input_id.to_string() + ) + .entered(); + run_decoder_thread( + vulkan_ctx, + init_result_sender, + chunks_receiver, + frame_sender, + ) + }) + .unwrap(); + + init_result_receiver.recv().unwrap()?; + + Ok(()) +} + +fn run_decoder_thread( + vulkan_ctx: Arc, + init_result_sender: Sender>, + chunks_receiver: Receiver>, + frame_sender: Sender>, +) { + let mut decoder = match Decoder::new(vulkan_ctx) { + Ok(decoder) => { + init_result_sender.send(Ok(())).unwrap(); + decoder + } + Err(err) => { + init_result_sender.send(Err(err.into())).unwrap(); + return; + } + }; + + for chunk in chunks_receiver { + let chunk = match chunk { + PipelineEvent::Data(chunk) => chunk, + PipelineEvent::EOS => { + break; + } + }; + + if chunk.kind != EncodedChunkKind::Video(VideoCodec::H264) { + error!( + "H264 decoder received chunk of wrong kind: {:?}", + chunk.kind + ); + continue; + } + + let result = match decoder.decode_to_wgpu_textures(&chunk.data) { + Ok(res) => res, + Err(err) => { + warn!("Failed to decode frame: {err}"); + continue; + } + }; + + for frame in result { + let resolution = Resolution { + width: frame.width() as usize, + height: frame.height() as usize, + }; + + let frame = Frame { + data: FrameData::Nv12WgpuTexture(frame.into()), + pts: chunk.pts, + resolution, + }; + + trace!(pts=?frame.pts, "H264 decoder produced a frame."); + if frame_sender.send(PipelineEvent::Data(frame)).is_err() { + debug!("Failed to send frame from H264 decoder. Channel closed."); + return; + } + } + } + if frame_sender.send(PipelineEvent::EOS).is_err() { + debug!("Failed to send EOS from H264 decoder. Channel closed.") + } +} diff --git a/compositor_pipeline/src/pipeline/graphics_context.rs b/compositor_pipeline/src/pipeline/graphics_context.rs new file mode 100644 index 000000000..206d5d55b --- /dev/null +++ b/compositor_pipeline/src/pipeline/graphics_context.rs @@ -0,0 +1,64 @@ +use crate::error::InitPipelineError; +use std::sync::Arc; + +#[derive(Debug)] +pub struct GraphicsContext { + pub device: Arc, + pub queue: Arc, + + #[cfg(feature = "vk-video")] + pub vulkan_ctx: Option>, +} + +impl GraphicsContext { + #[cfg(feature = "vk-video")] + pub fn new( + force_gpu: bool, + features: wgpu::Features, + limits: wgpu::Limits, + ) -> Result { + use compositor_render::{ + create_wgpu_ctx, error::InitRendererEngineError, required_wgpu_features, + set_required_wgpu_limits, + }; + use tracing::warn; + + let vulkan_features = + features | required_wgpu_features() | wgpu::Features::TEXTURE_FORMAT_NV12; + + let limits = set_required_wgpu_limits(limits); + + match vk_video::VulkanCtx::new(vulkan_features, limits.clone()) { + Ok(ctx) => Ok(GraphicsContext { + device: ctx.wgpu_ctx.device.clone(), + queue: ctx.wgpu_ctx.queue.clone(), + vulkan_ctx: Some(ctx.into()), + }), + + Err(err) => { + warn!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support."); + + let (device, queue) = create_wgpu_ctx(force_gpu, features, limits) + .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; + + Ok(GraphicsContext { + device, + queue, + vulkan_ctx: None, + }) + } + } + } + + #[cfg(not(feature = "vk-video"))] + pub fn new( + force_gpu: bool, + features: wgpu::Features, + limits: wgpu::Limits, + ) -> Result { + let (device, queue) = create_wgpu_ctx(force_gpu, features, limits) + .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?; + + Ok(GraphicsContext { device, queue }) + } +} diff --git a/compositor_pipeline/src/pipeline/input.rs b/compositor_pipeline/src/pipeline/input.rs index e0342df4d..baba3159a 100644 --- a/compositor_pipeline/src/pipeline/input.rs +++ b/compositor_pipeline/src/pipeline/input.rs @@ -162,6 +162,7 @@ fn start_input_threads( let (sender, receiver) = bounded(10); start_video_decoder_thread( decoder_options, + pipeline_ctx, chunk_receiver, sender, input_id.clone(), diff --git a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs index 37ddfb837..205c76d6d 100644 --- a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs +++ b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs @@ -15,7 +15,7 @@ use crate::{ pipeline::{ decoder::{AacDecoderOptions, AudioDecoderOptions, VideoDecoderOptions}, types::{EncodedChunk, EncodedChunkKind}, - AudioCodec, VideoCodec, + AudioCodec, VideoCodec, VideoDecoder, }, queue::PipelineEvent, }; @@ -234,7 +234,7 @@ impl Mp4FileReader { }; let decoder_options = VideoDecoderOptions { - codec: VideoCodec::H264, + decoder: VideoDecoder::FFmpegH264, }; Some(TrackInfo { diff --git a/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs b/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs index 4c922cbd3..7f5518e10 100644 --- a/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs +++ b/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs @@ -11,6 +11,7 @@ use crate::pipeline::{ decoder::{self, AacDecoderOptions}, rtp::{AUDIO_PAYLOAD_TYPE, VIDEO_PAYLOAD_TYPE}, types::{AudioCodec, EncodedChunk, EncodedChunkKind, VideoCodec}, + VideoDecoder, }; use self::aac::AacDepayloaderNewError; @@ -81,8 +82,8 @@ pub enum VideoDepayloader { impl VideoDepayloader { pub fn new(options: &decoder::VideoDecoderOptions) -> Self { - match options.codec { - VideoCodec::H264 => VideoDepayloader::H264 { + match options.decoder { + VideoDecoder::FFmpegH264 | VideoDecoder::VulkanVideoH264 => VideoDepayloader::H264 { depayloader: H264Packet::default(), buffer: vec![], rollover_state: RolloverState::default(), diff --git a/compositor_pipeline/src/pipeline/types.rs b/compositor_pipeline/src/pipeline/types.rs index a30361073..a2f26c613 100644 --- a/compositor_pipeline/src/pipeline/types.rs +++ b/compositor_pipeline/src/pipeline/types.rs @@ -55,6 +55,13 @@ pub struct RawDataSender { pub audio: Option>>, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VideoDecoder { + FFmpegH264, + #[cfg(feature = "vk-video")] + VulkanVideoH264, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum VideoCodec { H264, diff --git a/compositor_render/src/error.rs b/compositor_render/src/error.rs index 5f6474355..49884c28e 100644 --- a/compositor_render/src/error.rs +++ b/compositor_render/src/error.rs @@ -13,15 +13,6 @@ use crate::{OutputId, RendererId}; pub use crate::registry::RegisterError; pub use crate::wgpu::WgpuError; -#[derive(Debug, thiserror::Error)] -pub enum InitPipelineError { - #[error(transparent)] - InitRendererEngine(#[from] InitRendererEngineError), - - #[error("Failed to create a download directory.")] - CreateDownloadDir(#[source] std::io::Error), -} - #[derive(Debug, thiserror::Error)] pub enum InitRendererEngineError { #[error("Failed to initialize a wgpu context.")] diff --git a/compositor_render/src/lib.rs b/compositor_render/src/lib.rs index 5c6d93694..5d598473f 100644 --- a/compositor_render/src/lib.rs +++ b/compositor_render/src/lib.rs @@ -19,8 +19,8 @@ pub use state::Renderer; pub use state::RendererOptions; pub use state::RendererSpec; -pub use wgpu::create_wgpu_ctx; pub use wgpu::WgpuFeatures; +pub use wgpu::{create_wgpu_ctx, required_wgpu_features, set_required_wgpu_limits}; pub mod image { pub use crate::transformations::image_renderer::{ImageSource, ImageSpec, ImageType}; diff --git a/compositor_render/src/wgpu.rs b/compositor_render/src/wgpu.rs index d3814eb13..ff9921a65 100644 --- a/compositor_render/src/wgpu.rs +++ b/compositor_render/src/wgpu.rs @@ -6,8 +6,8 @@ pub(crate) mod format; pub(crate) mod texture; pub(crate) mod utils; -pub use ctx::create_wgpu_ctx; pub(crate) use ctx::WgpuCtx; +pub use ctx::{create_wgpu_ctx, required_wgpu_features, set_required_wgpu_limits}; pub use wgpu::Features as WgpuFeatures; #[must_use] diff --git a/compositor_render/src/wgpu/ctx.rs b/compositor_render/src/wgpu/ctx.rs index 40207b134..4bf23706f 100644 --- a/compositor_render/src/wgpu/ctx.rs +++ b/compositor_render/src/wgpu/ctx.rs @@ -34,7 +34,7 @@ impl WgpuCtx { Self::new_from_device_queue(device, queue)? } None => { - let (device, queue) = create_wgpu_ctx(force_gpu, features)?; + let (device, queue) = create_wgpu_ctx(force_gpu, features, Default::default())?; Self::new_from_device_queue(device, queue)? } }; @@ -42,12 +42,7 @@ impl WgpuCtx { } fn check_wgpu_ctx(device: &wgpu::Device, features: wgpu::Features) { - let expected_features = match cfg!(target_arch = "wasm32") { - false => { - features | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS - } - true => features | wgpu::Features::PUSH_CONSTANTS, - }; + let expected_features = features | required_wgpu_features(); let missing_features = expected_features.difference(device.features()); if !missing_features.is_empty() { @@ -92,9 +87,24 @@ impl WgpuCtx { } } +pub fn required_wgpu_features() -> wgpu::Features { + match cfg!(target_arch = "wasm32") { + false => wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS, + true => wgpu::Features::PUSH_CONSTANTS, + } +} + +pub fn set_required_wgpu_limits(limits: wgpu::Limits) -> wgpu::Limits { + wgpu::Limits { + max_push_constant_size: limits.max_push_constant_size.max(128), + ..limits + } +} + pub fn create_wgpu_ctx( force_gpu: bool, features: wgpu::Features, + limits: wgpu::Limits, ) -> Result<(Arc, Arc), CreateWgpuCtxError> { let instance = wgpu::Instance::new(wgpu::InstanceDescriptor { backends: wgpu::Backends::all(), @@ -120,8 +130,7 @@ pub fn create_wgpu_ctx( error!("Selected adapter is CPU based. Aborting."); return Err(CreateWgpuCtxError::NoAdapter); } - let required_features = - features | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS; + let required_features = features | required_wgpu_features(); let missing_features = required_features.difference(adapter.features()); if !missing_features.is_empty() { @@ -133,10 +142,7 @@ pub fn create_wgpu_ctx( let (device, queue) = pollster::block_on(adapter.request_device( &wgpu::DeviceDescriptor { label: None, - required_limits: wgpu::Limits { - max_push_constant_size: 128, - ..Default::default() - }, + required_limits: set_required_wgpu_limits(limits), required_features, memory_hints: wgpu::MemoryHints::default(), }, diff --git a/compositor_render/src/wgpu/texture/nv12.rs b/compositor_render/src/wgpu/texture/nv12.rs index 442702742..85c3f6dca 100644 --- a/compositor_render/src/wgpu/texture/nv12.rs +++ b/compositor_render/src/wgpu/texture/nv12.rs @@ -31,7 +31,7 @@ impl<'a> NV12TextureView<'a> { let view_y = texture.create_view(&wgpu::TextureViewDescriptor { label: Some("y plane nv12 texture view"), dimension: Some(wgpu::TextureViewDimension::D2), - format: Some(wgpu::TextureFormat::NV12), + format: Some(wgpu::TextureFormat::R8Unorm), aspect: wgpu::TextureAspect::Plane0, ..Default::default() }); @@ -39,7 +39,7 @@ impl<'a> NV12TextureView<'a> { let view_uv = texture.create_view(&wgpu::TextureViewDescriptor { label: Some("uv plane nv12 texture view"), dimension: Some(wgpu::TextureViewDimension::D2), - format: Some(wgpu::TextureFormat::NV12), + format: Some(wgpu::TextureFormat::Rg8Unorm), aspect: wgpu::TextureAspect::Plane1, ..Default::default() }); diff --git a/integration_tests/examples/manual_graphics_initialization.rs b/integration_tests/examples/manual_graphics_initialization.rs new file mode 100644 index 000000000..d26ca05ee --- /dev/null +++ b/integration_tests/examples/manual_graphics_initialization.rs @@ -0,0 +1,51 @@ +use compositor_pipeline::{ + pipeline::{GraphicsContext, Options}, + Pipeline, +}; +use live_compositor::config::read_config; + +// This example illustrates how to initialize a GraphicsContext separately to get access to a wgpu +// instance, adapter, queue and device. + +#[cfg(target_os = "linux")] +fn main() { + let graphics_context = + GraphicsContext::new(false, wgpu::Features::default(), wgpu::Limits::default()).unwrap(); + + let _device = graphics_context.device.clone(); + let _queue = graphics_context.queue.clone(); + + let _adapter = graphics_context + .vulkan_ctx + .as_ref() + .unwrap() + .wgpu_ctx + .adapter + .clone(); + + let _instance = graphics_context + .vulkan_ctx + .as_ref() + .unwrap() + .wgpu_ctx + .instance + .clone(); + + let config = read_config(); + + let _pipeline = Pipeline::new(Options { + wgpu_ctx: Some(graphics_context), + queue_options: config.queue_options, + stream_fallback_timeout: config.stream_fallback_timeout, + web_renderer: config.web_renderer, + force_gpu: config.force_gpu, + download_root: config.download_root, + output_sample_rate: config.output_sample_rate, + wgpu_features: config.required_wgpu_features, + load_system_fonts: Some(true), + }) + .unwrap(); +} + +#[cfg(target_os = "macos")] +fn main() {} diff --git a/integration_tests/examples/raw_channel_input.rs b/integration_tests/examples/raw_channel_input.rs index 2fec88abd..9a4272098 100644 --- a/integration_tests/examples/raw_channel_input.rs +++ b/integration_tests/examples/raw_channel_input.rs @@ -17,12 +17,12 @@ use compositor_pipeline::{ OutputOptions, OutputProtocolOptions, }, rtp::RequestedPort, - Options, Pipeline, PipelineOutputEndCondition, RegisterOutputOptions, VideoCodec, + GraphicsContext, Options, Pipeline, PipelineOutputEndCondition, RegisterOutputOptions, + VideoCodec, }, queue::{PipelineEvent, QueueInputOptions}, }; use compositor_render::{ - create_wgpu_ctx, error::ErrorStack, scene::{Component, InputStreamComponent}, Frame, FrameData, InputId, OutputId, Resolution, @@ -44,7 +44,8 @@ fn main() { level: "info,wgpu_hal=warn,wgpu_core=warn".to_string(), }); let config = read_config(); - let (wgpu_device, wgpu_queue) = create_wgpu_ctx(false, Default::default()).unwrap(); + let ctx = GraphicsContext::new(false, Default::default(), Default::default()).unwrap(); + let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone()); // no chromium support, so we can ignore _event_loop let (pipeline, _event_loop) = Pipeline::new(Options { queue_options: config.queue_options, @@ -54,8 +55,8 @@ fn main() { download_root: config.download_root, output_sample_rate: config.output_sample_rate, wgpu_features: config.required_wgpu_features, - wgpu_ctx: Some((wgpu_device.clone(), wgpu_queue.clone())), load_system_fonts: Some(true), + wgpu_ctx: Some(ctx), }) .unwrap_or_else(|err| { panic!( diff --git a/integration_tests/examples/raw_channel_output.rs b/integration_tests/examples/raw_channel_output.rs index a22c655a8..8d18ca3b3 100644 --- a/integration_tests/examples/raw_channel_output.rs +++ b/integration_tests/examples/raw_channel_output.rs @@ -16,14 +16,13 @@ use compositor_pipeline::{ InputOptions, }, output::{RawAudioOptions, RawDataOutputOptions, RawVideoOptions}, - Options, PipelineOutputEndCondition, RawDataReceiver, RegisterInputOptions, - RegisterOutputOptions, + GraphicsContext, Options, PipelineOutputEndCondition, RawDataReceiver, + RegisterInputOptions, RegisterOutputOptions, }, queue::{PipelineEvent, QueueInputOptions}, Pipeline, }; use compositor_render::{ - create_wgpu_ctx, error::ErrorStack, scene::{Component, InputStreamComponent}, Frame, FrameData, InputId, OutputId, Resolution, @@ -58,7 +57,8 @@ fn main() { }); let mut config = read_config(); config.queue_options.ahead_of_time_processing = true; - let (wgpu_device, wgpu_queue) = create_wgpu_ctx(false, Default::default()).unwrap(); + let ctx = GraphicsContext::new(false, Default::default(), Default::default()).unwrap(); + let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone()); // no chromium support, so we can ignore _event_loop let (pipeline, _event_loop) = Pipeline::new(Options { queue_options: config.queue_options, @@ -68,8 +68,8 @@ fn main() { download_root: config.download_root, output_sample_rate: config.output_sample_rate, wgpu_features: config.required_wgpu_features, - wgpu_ctx: Some((wgpu_device.clone(), wgpu_queue.clone())), load_system_fonts: Some(true), + wgpu_ctx: Some(ctx), }) .unwrap_or_else(|err| { panic!( diff --git a/integration_tests/examples/vulkan.rs b/integration_tests/examples/vulkan.rs new file mode 100644 index 000000000..7409ac83f --- /dev/null +++ b/integration_tests/examples/vulkan.rs @@ -0,0 +1,195 @@ +use anyhow::Result; +use compositor_api::types::Resolution; +use compositor_pipeline::{ + pipeline::{ + decoder::VideoDecoderOptions, + encoder::{ + ffmpeg_h264::{EncoderPreset, Options as H264Options}, + VideoEncoderOptions, + }, + input::{ + rtp::{InputVideoStream, RtpReceiverOptions, RtpStream}, + InputOptions, + }, + output::{ + rtp::{RtpConnectionOptions, RtpSenderOptions}, + OutputOptions, OutputProtocolOptions, + }, + rtp::{RequestedPort, TransportProtocol}, + Options, OutputVideoOptions, PipelineOutputEndCondition, Port, RegisterInputOptions, + RegisterOutputOptions, VideoCodec, VideoDecoder, + }, + queue::QueueInputOptions, + Pipeline, +}; +use compositor_render::{ + error::ErrorStack, + scene::{ + Component, ComponentId, HorizontalAlign, InputStreamComponent, RGBAColor, TilesComponent, + VerticalAlign, + }, + InputId, OutputId, +}; +use live_compositor::{ + config::{read_config, LoggerConfig, LoggerFormat}, + logger::{self, FfmpegLogLevel}, +}; +use signal_hook::{consts, iterator::Signals}; +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; + +use integration_tests::{ + examples::{download_all_assets, TestSample}, + ffmpeg::{start_ffmpeg_receive, start_ffmpeg_send}, +}; + +const VIDEO_RESOLUTION: Resolution = Resolution { + width: 1280, + height: 720, +}; + +const IP: &str = "127.0.0.1"; +const INPUT_PORT: u16 = 8002; +const OUTPUT_PORT: u16 = 8004; + +const VIDEOS: u16 = 6; + +fn main() { + ffmpeg_next::format::network::init(); + logger::init_logger(LoggerConfig { + ffmpeg_logger_level: FfmpegLogLevel::Info, + format: LoggerFormat::Compact, + level: "info,wgpu_hal=warn,wgpu_core=warn".to_string(), + }); + + download_all_assets().unwrap(); + + client_code().unwrap(); +} + +fn client_code() -> Result<()> { + start_ffmpeg_receive(Some(OUTPUT_PORT), None)?; + + let config = read_config(); + let (pipeline, event_loop) = Pipeline::new(Options { + queue_options: config.queue_options, + stream_fallback_timeout: config.stream_fallback_timeout, + web_renderer: config.web_renderer, + force_gpu: config.force_gpu, + download_root: config.download_root, + output_sample_rate: config.output_sample_rate, + wgpu_features: config.required_wgpu_features, + load_system_fonts: Some(true), + wgpu_ctx: None, + }) + .unwrap_or_else(|err| { + panic!( + "Failed to start compositor.\n{}", + ErrorStack::new(&err).into_string() + ) + }); + + let pipeline = Arc::new(Mutex::new(pipeline)); + + let mut children = Vec::new(); + + for i in 1..VIDEOS + 1 { + let input_id = InputId(format!("input_{i}").into()); + + let input_options = RegisterInputOptions { + input_options: InputOptions::Rtp(RtpReceiverOptions { + port: RequestedPort::Exact(INPUT_PORT + 2 + 2 * i), + transport_protocol: TransportProtocol::Udp, + stream: RtpStream { + video: Some(InputVideoStream { + options: VideoDecoderOptions { + decoder: VideoDecoder::VulkanVideoH264, + }, + }), + audio: None, + }, + }), + queue_options: QueueInputOptions { + offset: Some(Duration::ZERO), + required: false, + buffer_duration: None, + }, + }; + + Pipeline::register_input(&pipeline, input_id.clone(), input_options).unwrap(); + + children.push(Component::InputStream(InputStreamComponent { + id: None, + input_id, + })); + } + + let output_options = RegisterOutputOptions { + output_options: OutputOptions { + output_protocol: OutputProtocolOptions::Rtp(RtpSenderOptions { + video: Some(VideoCodec::H264), + audio: None, + connection_options: RtpConnectionOptions::Udp { + port: Port(OUTPUT_PORT), + ip: IP.into(), + }, + }), + video: Some(VideoEncoderOptions::H264(H264Options { + preset: EncoderPreset::Ultrafast, + resolution: VIDEO_RESOLUTION.into(), + raw_options: Vec::new(), + })), + audio: None, + }, + video: Some(OutputVideoOptions { + initial: Component::Tiles(TilesComponent { + id: Some(ComponentId("tiles".into())), + padding: 5.0, + background_color: RGBAColor(0x44, 0x44, 0x44, 0xff), + children, + width: None, + height: None, + margin: 0.0, + transition: None, + vertical_align: VerticalAlign::Center, + horizontal_align: HorizontalAlign::Center, + tile_aspect_ratio: (16, 9), + }), + + end_condition: PipelineOutputEndCondition::Never, + }), + audio: None, + }; + + pipeline + .lock() + .unwrap() + .register_output(OutputId("output_1".into()), output_options) + .unwrap(); + + Pipeline::start(&pipeline); + + for i in 1..VIDEOS + 1 { + start_ffmpeg_send( + IP, + Some(INPUT_PORT + 2 + 2 * i), + None, + TestSample::BigBuckBunny, + )?; + } + + let event_loop_fallback = || { + let mut signals = Signals::new([consts::SIGINT]).unwrap(); + signals.forever().next(); + }; + if let Err(err) = event_loop.run_with_fallback(&event_loop_fallback) { + panic!( + "Failed to start event loop.\n{}", + ErrorStack::new(&err).into_string() + ) + } + + Ok(()) +} diff --git a/integration_tests/src/examples.rs b/integration_tests/src/examples.rs index 92db7929a..9a3f08d4a 100644 --- a/integration_tests/src/examples.rs +++ b/integration_tests/src/examples.rs @@ -200,7 +200,7 @@ struct AssetData { path: PathBuf, } -fn download_all_assets() -> Result<()> { +pub fn download_all_assets() -> Result<()> { let assets = [AssetData { url: String::from("https://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4"), path: examples_root_dir().join("examples/assets/BigBuckBunny.mp4"), diff --git a/src/snapshot_tests/utils.rs b/src/snapshot_tests/utils.rs index 99379b35b..d79b8cb67 100644 --- a/src/snapshot_tests/utils.rs +++ b/src/snapshot_tests/utils.rs @@ -22,7 +22,7 @@ fn global_wgpu_ctx( ) -> (Arc, Arc) { static CTX: OnceLock<(Arc, Arc)> = OnceLock::new(); - CTX.get_or_init(|| create_wgpu_ctx(force_gpu, features).unwrap()) + CTX.get_or_init(|| create_wgpu_ctx(force_gpu, features, Default::default()).unwrap()) .clone() } diff --git a/src/state.rs b/src/state.rs index e3f735155..236f4bebc 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,8 +1,11 @@ use std::sync::{Arc, Mutex, MutexGuard}; use axum::response::IntoResponse; -use compositor_pipeline::pipeline::{self}; -use compositor_render::{error::InitPipelineError, EventLoop}; +use compositor_pipeline::{ + error::InitPipelineError, + pipeline::{self}, +}; +use compositor_render::EventLoop; use serde::Serialize; diff --git a/vk-video/.gitignore b/vk-video/.gitignore new file mode 100644 index 000000000..dde3e786e --- /dev/null +++ b/vk-video/.gitignore @@ -0,0 +1,5 @@ +/target +Cargo.lock +*.h264 +*.jpeg +*.mp4 diff --git a/vk-video/Cargo.toml b/vk-video/Cargo.toml new file mode 100644 index 000000000..becd44adf --- /dev/null +++ b/vk-video/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "vk-video" +version = "0.1.0" +edition = "2021" +authors = ["Software Mansion "] +readme = "README.md" +license = "MIT" +repository = "https://github.com/software-mansion/live-compositor" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +ash = "0.38.0" +derivative = "2.2.0" +h264-reader = { git = "https://github.com/membraneframework-labs/h264-reader.git", branch = "@jerzywilczek/scaling-lists" } +thiserror = "1.0.59" +tracing = "0.1.40" +vk-mem = "0.4.0" +wgpu = "22.1.0" + +[dev-dependencies] +tracing-subscriber = "0.3.18" + +[build-dependencies] +cfg_aliases = "0.2.1" diff --git a/vk-video/LICENSE b/vk-video/LICENSE new file mode 100644 index 000000000..f9b288684 --- /dev/null +++ b/vk-video/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 Software Mansion + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vk-video/build.rs b/vk-video/build.rs new file mode 100644 index 000000000..6fc1495c2 --- /dev/null +++ b/vk-video/build.rs @@ -0,0 +1,13 @@ +fn main() { + cfg_aliases::cfg_aliases! { + vulkan: { + any( + windows, + all( + unix, + not(any(target_os = "macos", target_os = "ios", target_os = "emscripten")) + ) + ) + }, + } +} diff --git a/vk-video/examples/basic.rs b/vk-video/examples/basic.rs new file mode 100644 index 000000000..388078fb3 --- /dev/null +++ b/vk-video/examples/basic.rs @@ -0,0 +1,47 @@ +#[cfg(vulkan)] +fn main() { + use std::io::Write; + + let subscriber = tracing_subscriber::FmtSubscriber::builder() + .with_max_level(tracing::Level::INFO) + .finish(); + + tracing::subscriber::set_global_default(subscriber).expect("Failed to initialize tracing"); + + let args = std::env::args().collect::>(); + if args.len() != 2 { + println!("usage: {} FILENAME", args[0]); + return; + } + + let h264_bytestream = std::fs::read(&args[1]).unwrap_or_else(|_| panic!("read {}", args[1])); + + let vulkan_ctx = std::sync::Arc::new( + vk_video::VulkanCtx::new( + wgpu::Features::empty(), + wgpu::Limits { + max_push_constant_size: 128, + ..Default::default() + }, + ) + .unwrap(), + ); + let mut decoder = vk_video::Decoder::new(vulkan_ctx).unwrap(); + + let mut output_file = std::fs::File::create("output.nv12").unwrap(); + + for chunk in h264_bytestream.chunks(256) { + let frames = decoder.decode_to_bytes(chunk).unwrap(); + + for frame in frames { + output_file.write_all(&frame).unwrap(); + } + } +} + +#[cfg(not(vulkan))] +fn main() { + println!( + "This crate doesn't work on your operating system, because it does not support vulkan" + ); +} diff --git a/vk-video/examples/wgpu.rs b/vk-video/examples/wgpu.rs new file mode 100644 index 000000000..36f52be03 --- /dev/null +++ b/vk-video/examples/wgpu.rs @@ -0,0 +1,168 @@ +#[cfg(vulkan)] +fn main() { + use std::io::Write; + + let subscriber = tracing_subscriber::FmtSubscriber::builder() + .with_max_level(tracing::Level::INFO) + .finish(); + + tracing::subscriber::set_global_default(subscriber).expect("Failed to initialize tracing"); + + let args = std::env::args().collect::>(); + if args.len() != 2 { + println!("usage: {} FILENAME", args[0]); + return; + } + let h264_bytestream = std::fs::read(&args[1]).unwrap_or_else(|_| panic!("read {}", args[1])); + + let vulkan_ctx = std::sync::Arc::new( + vk_video::VulkanCtx::new( + wgpu::Features::empty(), + wgpu::Limits { + max_push_constant_size: 128, + ..Default::default() + }, + ) + .unwrap(), + ); + let mut decoder = vk_video::Decoder::new(vulkan_ctx.clone()).unwrap(); + + let mut output_file = std::fs::File::create("output.nv12").unwrap(); + + for chunk in h264_bytestream.chunks(256) { + let frames = decoder.decode_to_wgpu_textures(chunk).unwrap(); + + let device = &vulkan_ctx.wgpu_ctx.device; + let queue = &vulkan_ctx.wgpu_ctx.queue; + for frame in frames { + let decoded_frame = download_wgpu_texture(device, queue, frame); + output_file.write_all(&decoded_frame).unwrap(); + } + } +} + +#[cfg(not(vulkan))] +fn main() { + println!( + "This crate doesn't work on your operating system, because it does not support vulkan" + ); +} + +#[cfg(vulkan)] +fn download_wgpu_texture( + device: &wgpu::Device, + queue: &wgpu::Queue, + frame: wgpu::Texture, +) -> Vec { + use std::io::Write; + + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + let y_plane_bytes_per_row = (frame.width() as u64 + 255) / 256 * 256; + let y_plane_size = y_plane_bytes_per_row * frame.height() as u64; + + let uv_plane_bytes_per_row = y_plane_bytes_per_row; + let uv_plane_size = uv_plane_bytes_per_row * frame.height() as u64 / 2; + + let buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: y_plane_size + uv_plane_size, + usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + encoder.copy_texture_to_buffer( + wgpu::ImageCopyTexture { + aspect: wgpu::TextureAspect::Plane0, + origin: wgpu::Origin3d { x: 0, y: 0, z: 0 }, + texture: &frame, + mip_level: 0, + }, + wgpu::ImageCopyBuffer { + buffer: &buffer, + layout: wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: Some(y_plane_bytes_per_row as u32), + rows_per_image: None, + }, + }, + wgpu::Extent3d { + width: frame.width(), + height: frame.height(), + depth_or_array_layers: 1, + }, + ); + + encoder.copy_texture_to_buffer( + wgpu::ImageCopyTexture { + aspect: wgpu::TextureAspect::Plane1, + origin: wgpu::Origin3d { x: 0, y: 0, z: 0 }, + texture: &frame, + mip_level: 0, + }, + wgpu::ImageCopyBuffer { + buffer: &buffer, + layout: wgpu::ImageDataLayout { + offset: y_plane_size, + bytes_per_row: Some(uv_plane_bytes_per_row as u32), + rows_per_image: None, + }, + }, + wgpu::Extent3d { + width: frame.width() / 2, + height: frame.height() / 2, + depth_or_array_layers: 1, + }, + ); + + queue.submit(Some(encoder.finish())); + + let (y_tx, y_rx) = std::sync::mpsc::channel(); + let (uv_tx, uv_rx) = std::sync::mpsc::channel(); + let width = frame.width() as usize; + + wgpu::util::DownloadBuffer::read_buffer( + device, + queue, + &buffer.slice(..y_plane_size), + move |buf| { + let buf = buf.unwrap(); + let mut result = Vec::new(); + + for chunk in buf + .chunks(y_plane_bytes_per_row as usize) + .map(|chunk| &chunk[..width]) + { + result.write_all(chunk).unwrap(); + } + + y_tx.send(result).unwrap(); + }, + ); + + wgpu::util::DownloadBuffer::read_buffer( + device, + queue, + &buffer.slice(y_plane_size..), + move |buf| { + let buf = buf.unwrap(); + let mut result = Vec::new(); + + for chunk in buf + .chunks(uv_plane_bytes_per_row as usize) + .map(|chunk| &chunk[..width]) + { + result.write_all(chunk).unwrap(); + } + + uv_tx.send(result).unwrap(); + }, + ); + + device.poll(wgpu::Maintain::Wait); + + let mut result = Vec::new(); + result.append(&mut y_rx.recv().unwrap()); + result.append(&mut uv_rx.recv().unwrap()); + + result +} diff --git a/vk-video/src/lib.rs b/vk-video/src/lib.rs new file mode 100644 index 000000000..5826803b8 --- /dev/null +++ b/vk-video/src/lib.rs @@ -0,0 +1,69 @@ +#![cfg(not(target_os = "macos"))] +mod parser; +mod vulkan_decoder; + +use parser::Parser; +use vulkan_decoder::VulkanDecoder; + +pub use parser::ParserError; +pub use vulkan_decoder::{VulkanCtx, VulkanCtxError, VulkanDecoderError}; + +pub use vulkan_decoder::WgpuCtx; + +pub struct Decoder<'a> { + vulkan_decoder: VulkanDecoder<'a>, + parser: Parser, +} + +#[derive(Debug, thiserror::Error)] +pub enum DecoderError { + #[error("Decoder error: {0}")] + VulkanDecoderError(#[from] VulkanDecoderError), + + #[error("H264 parser error: {0}")] + ParserError(#[from] ParserError), +} + +impl<'a> Decoder<'a> { + pub fn new(vulkan_ctx: std::sync::Arc) -> Result { + let parser = Parser::default(); + let vulkan_decoder = VulkanDecoder::new(vulkan_ctx)?; + + Ok(Self { + parser, + vulkan_decoder, + }) + } +} + +impl Decoder<'_> { + /// The result is a [`Vec`] of [`Vec`]. Each [`Vec`] contains a single frame in the + /// NV12 format. + pub fn decode_to_bytes( + &mut self, + h264_bytestream: &[u8], + ) -> Result>, DecoderError> { + let instructions = self + .parser + .parse(h264_bytestream) + .into_iter() + .collect::, _>>()?; + + Ok(self.vulkan_decoder.decode_to_bytes(&instructions)?) + } + + // TODO: the below hasn't been verified. + /// The produced textures have the [`wgpu::TextureFormat::NV12`] format and can be used as a copy source or a texture binding. + pub fn decode_to_wgpu_textures( + &mut self, + h264_bytestream: &[u8], + ) -> Result, DecoderError> { + let instructions = self + .parser + .parse(h264_bytestream) + .into_iter() + .collect::, _>>()?; + + Ok(self.vulkan_decoder.decode_to_wgpu_textures(&instructions)?) + } +} diff --git a/vk-video/src/parser.rs b/vk-video/src/parser.rs new file mode 100644 index 000000000..622e86187 --- /dev/null +++ b/vk-video/src/parser.rs @@ -0,0 +1,307 @@ +use std::{ + io::Read, + sync::{mpsc, Arc}, +}; + +use h264_reader::{ + annexb::AnnexBReader, + nal::{pps::PicParameterSet, slice::SliceHeader, sps::SeqParameterSet, Nal, RefNal}, + push::{AccumulatedNalHandler, NalAccumulator, NalInterest}, +}; +use reference_manager::ReferenceContext; +use tracing::trace; + +mod au_splitter; +mod reference_manager; + +pub use reference_manager::{ReferenceId, ReferenceManagementError}; + +#[derive(Clone, derivative::Derivative)] +#[derivative(Debug)] +#[allow(non_snake_case)] +pub struct DecodeInformation { + pub(crate) reference_list: Option>, + #[derivative(Debug = "ignore")] + pub(crate) rbsp_bytes: Vec, + pub(crate) slice_indices: Vec, + #[derivative(Debug = "ignore")] + pub(crate) header: Arc, + pub(crate) sps_id: u8, + pub(crate) pps_id: u8, + pub(crate) picture_info: PictureInfo, +} + +#[derive(Debug, Clone)] +pub(crate) struct ReferencePictureInfo { + pub(crate) id: ReferenceId, + pub(crate) picture_info: PictureInfo, +} + +#[derive(Debug, Clone, Copy)] +#[allow(non_snake_case)] +pub(crate) struct PictureInfo { + pub(crate) used_for_long_term_reference: bool, + pub(crate) non_existing: bool, + pub(crate) FrameNum: u16, + pub(crate) PicOrderCnt: [i32; 2], +} + +#[derive(Debug, Clone)] +pub enum DecoderInstruction { + Decode { + decode_info: DecodeInformation, + }, + + DecodeAndStoreAs { + decode_info: DecodeInformation, + reference_id: ReferenceId, + }, + + Idr { + decode_info: DecodeInformation, + reference_id: ReferenceId, + }, + + Drop { + reference_ids: Vec, + }, + + Sps(SeqParameterSet), + + Pps(PicParameterSet), +} + +#[derive(Debug, thiserror::Error)] +pub enum ParserError { + #[error(transparent)] + ReferenceManagementError(#[from] ReferenceManagementError), + + #[error("Bitstreams that allow gaps in frame_num are not supported")] + GapsInFrameNumNotSupported, + + #[error("Streams containing fields instead of frames are not supported")] + FieldsNotSupported, + + #[error("Error while parsing a NAL header: {0:?}")] + NalHeaderParseError(h264_reader::nal::NalHeaderError), + + #[error("Error while parsing SPS: {0:?}")] + SpsParseError(h264_reader::nal::sps::SpsError), + + #[error("Error while parsing PPS: {0:?}")] + PpsParseError(h264_reader::nal::pps::PpsError), + + #[error("Error while parsing a slice: {0:?}")] + SliceParseError(h264_reader::nal::slice::SliceHeaderError), +} + +struct NalReceiver { + parser_ctx: h264_reader::Context, + au_splitter: au_splitter::AUSplitter, + reference_ctx: ReferenceContext, + debug_channel: mpsc::Sender, + decoder_channel: mpsc::Sender>, +} + +impl AccumulatedNalHandler for NalReceiver { + fn nal(&mut self, nal: RefNal<'_>) -> NalInterest { + if !nal.is_complete() { + return NalInterest::Buffer; + } + + match self.handle_nal(nal) { + Ok((debug_nalu, instructions)) => { + self.debug_channel.send(debug_nalu).unwrap(); + for instruction in instructions { + self.decoder_channel.send(Ok(instruction)).unwrap(); + } + } + + Err(err) => { + self.decoder_channel.send(Err(err)).unwrap(); + } + } + + NalInterest::Ignore + } +} + +impl NalReceiver { + fn handle_nal( + &mut self, + nal: RefNal<'_>, + ) -> Result<(NaluDebugInfo, Vec), ParserError> { + let nal_unit_type = nal + .header() + .map_err(ParserError::NalHeaderParseError)? + .nal_unit_type(); + + match nal_unit_type { + h264_reader::nal::UnitType::SeqParameterSet => { + let parsed = h264_reader::nal::sps::SeqParameterSet::from_bits(nal.rbsp_bits()) + .map_err(ParserError::SpsParseError)?; + + // Perhaps this shouldn't be here, but this is the only place we process sps + // before sending them to the decoder. It also seems that this is the only thing we + // need to check about the sps. + if parsed.gaps_in_frame_num_value_allowed_flag { + // TODO: what else to do here? sure we'll throw an error, but shouldn't we also + // terminate the parser somehow? + // perhaps this should be considered in other places we throw errors too + Err(ParserError::GapsInFrameNumNotSupported) + } else { + self.parser_ctx.put_seq_param_set(parsed.clone()); + Ok(( + NaluDebugInfo::Sps(parsed.clone()), + vec![DecoderInstruction::Sps(parsed)], + )) + } + } + + h264_reader::nal::UnitType::PicParameterSet => { + let parsed = h264_reader::nal::pps::PicParameterSet::from_bits( + &self.parser_ctx, + nal.rbsp_bits(), + ) + .map_err(ParserError::PpsParseError)?; + + self.parser_ctx.put_pic_param_set(parsed.clone()); + + Ok(( + NaluDebugInfo::Pps(parsed.clone()), + vec![DecoderInstruction::Pps(parsed)], + )) + } + + h264_reader::nal::UnitType::SliceLayerWithoutPartitioningNonIdr + | h264_reader::nal::UnitType::SliceLayerWithoutPartitioningIdr => { + let (header, sps, pps) = h264_reader::nal::slice::SliceHeader::from_bits( + &self.parser_ctx, + &mut nal.rbsp_bits(), + nal.header().unwrap(), + ) + .map_err(ParserError::SliceParseError)?; + + let header = Arc::new(header); + + let debug_nalu = match nal_unit_type { + h264_reader::nal::UnitType::SliceLayerWithoutPartitioningIdr => { + NaluDebugInfo::SliceWithoutPartitioningHeaderIdr(header.clone()) + } + h264_reader::nal::UnitType::SliceLayerWithoutPartitioningNonIdr => { + NaluDebugInfo::SliceWithoutPartitioningHeaderNonIdr(header.clone()) + } + _ => unreachable!(), + }; + + let mut rbsp_bytes = vec![0, 0, 0, 1]; + nal.reader().read_to_end(&mut rbsp_bytes).unwrap(); + let slice = Slice { + nal_header: nal.header().unwrap(), + header, + pps_id: pps.pic_parameter_set_id, + rbsp_bytes, + }; + + let Some(slices) = self.au_splitter.put_slice(slice) else { + return Ok((debug_nalu, Vec::new())); + }; + + let instructions = self.reference_ctx.put_picture(slices, sps, pps)?; + + Ok((debug_nalu, instructions)) + } + + h264_reader::nal::UnitType::Unspecified(_) + | h264_reader::nal::UnitType::SliceDataPartitionALayer + | h264_reader::nal::UnitType::SliceDataPartitionBLayer + | h264_reader::nal::UnitType::SliceDataPartitionCLayer + | h264_reader::nal::UnitType::SEI + | h264_reader::nal::UnitType::AccessUnitDelimiter + | h264_reader::nal::UnitType::EndOfSeq + | h264_reader::nal::UnitType::EndOfStream + | h264_reader::nal::UnitType::FillerData + | h264_reader::nal::UnitType::SeqParameterSetExtension + | h264_reader::nal::UnitType::PrefixNALUnit + | h264_reader::nal::UnitType::SubsetSeqParameterSet + | h264_reader::nal::UnitType::DepthParameterSet + | h264_reader::nal::UnitType::SliceLayerWithoutPartitioningAux + | h264_reader::nal::UnitType::SliceExtension + | h264_reader::nal::UnitType::SliceExtensionViewComponent + | h264_reader::nal::UnitType::Reserved(_) => Ok(( + NaluDebugInfo::Other(format!("{:?}", nal.header().unwrap().nal_unit_type())), + Vec::new(), + )), + } + } +} + +trait SpsExt { + fn max_frame_num(&self) -> i64; +} + +impl SpsExt for SeqParameterSet { + fn max_frame_num(&self) -> i64 { + 1 << self.log2_max_frame_num() + } +} + +#[derive(Debug)] +// this struct is only ever printed out in debug mode, but clippy detects this as it not being +// used. +#[allow(dead_code)] +pub enum NaluDebugInfo { + Sps(SeqParameterSet), + Pps(PicParameterSet), + SliceWithoutPartitioningHeaderNonIdr(Arc), + SliceWithoutPartitioningHeaderIdr(Arc), + Other(String), +} + +pub struct Slice { + pub nal_header: h264_reader::nal::NalHeader, + pub pps_id: h264_reader::nal::pps::PicParamSetId, + pub header: Arc, + pub rbsp_bytes: Vec, +} + +pub struct Parser { + reader: AnnexBReader>, + debug_channel: mpsc::Receiver, + decoder_channel: mpsc::Receiver>, +} + +impl Default for Parser { + fn default() -> Self { + let (debug_tx, debug_rx) = mpsc::channel(); + let (decoder_tx, decoder_rx) = mpsc::channel(); + + Parser { + reader: AnnexBReader::accumulate(NalReceiver { + reference_ctx: ReferenceContext::default(), + au_splitter: au_splitter::AUSplitter::default(), + debug_channel: debug_tx, + decoder_channel: decoder_tx, + parser_ctx: h264_reader::Context::new(), + }), + debug_channel: debug_rx, + decoder_channel: decoder_rx, + } + } +} + +impl Parser { + pub fn parse(&mut self, bytes: &[u8]) -> Vec> { + self.reader.push(bytes); + + let mut instructions = Vec::new(); + while let Ok(instruction) = self.decoder_channel.try_recv() { + instructions.push(instruction); + } + while let Ok(nalu) = self.debug_channel.try_recv() { + trace!("parsed nalu: {nalu:#?}"); + } + + instructions + } +} diff --git a/vk-video/src/parser/au_splitter.rs b/vk-video/src/parser/au_splitter.rs new file mode 100644 index 000000000..ad46ffabd --- /dev/null +++ b/vk-video/src/parser/au_splitter.rs @@ -0,0 +1,139 @@ +use h264_reader::nal::slice::PicOrderCountLsb; + +use super::Slice; + +#[derive(Default)] +pub(crate) struct AUSplitter { + buffered_nals: Vec, +} + +impl AUSplitter { + pub(crate) fn put_slice(&mut self, slice: Slice) -> Option> { + if self.is_new_au(&slice) { + let au = std::mem::take(&mut self.buffered_nals); + self.buffered_nals.push(slice); + if !au.is_empty() { + Some(au) + } else { + None + } + } else { + self.buffered_nals.push(slice); + None + } + } + + /// returns `true` if `slice` is a first slice in an Access Unit + fn is_new_au(&self, slice: &Slice) -> bool { + let Some(last) = self.buffered_nals.last() else { + return true; + }; + + first_mb_in_slice_zero(slice) + || frame_num_differs(last, slice) + || pps_id_differs(last, slice) + || field_pic_flag_differs(last, slice) + || nal_ref_idc_differs_one_zero(last, slice) + || pic_order_cnt_zero_check(last, slice) + || idr_and_non_idr(last, slice) + || idrs_where_idr_pic_id_differs(last, slice) + } +} + +// The below code is taken from Membrane's AU Splitter in their h264 parser. The comments contain +// elixir versions of the functions below them. + +// defguardp first_mb_in_slice_zero(a) +// when a.first_mb_in_slice == 0 and +// a.nal_unit_type in [1, 2, 5] +// +fn first_mb_in_slice_zero(slice: &Slice) -> bool { + slice.header.first_mb_in_slice == 0 +} + +// defguardp frame_num_differs(a, b) when a.frame_num != b.frame_num +// +fn frame_num_differs(last: &Slice, curr: &Slice) -> bool { + last.header.frame_num != curr.header.frame_num +} + +// defguardp pic_parameter_set_id_differs(a, b) +// when a.pic_parameter_set_id != b.pic_parameter_set_id +// +fn pps_id_differs(last: &Slice, curr: &Slice) -> bool { + last.pps_id != curr.pps_id +} + +// defguardp field_pic_flag_differs(a, b) when a.field_pic_flag != b.field_pic_flag +// +// defguardp bottom_field_flag_differs(a, b) when a.bottom_field_flag != b.bottom_field_flag +// +fn field_pic_flag_differs(last: &Slice, curr: &Slice) -> bool { + last.header.field_pic != curr.header.field_pic +} + +// defguardp nal_ref_idc_differs_one_zero(a, b) +// when (a.nal_ref_idc == 0 or b.nal_ref_idc == 0) and +// a.nal_ref_idc != b.nal_ref_idc +// +fn nal_ref_idc_differs_one_zero(last: &Slice, curr: &Slice) -> bool { + (last.nal_header.nal_ref_idc() == 0 || curr.nal_header.nal_ref_idc() == 0) + && last.nal_header.nal_ref_idc() != curr.nal_header.nal_ref_idc() +} + +// defguardp pic_order_cnt_zero_check(a, b) +// when a.pic_order_cnt_type == 0 and b.pic_order_cnt_type == 0 and +// (a.pic_order_cnt_lsb != b.pic_order_cnt_lsb or +// a.delta_pic_order_cnt_bottom != b.delta_pic_order_cnt_bottom) +// +fn pic_order_cnt_zero_check(last: &Slice, curr: &Slice) -> bool { + let (last_pic_order_cnt_lsb, last_delta_pic_order_cnt_bottom) = + match last.header.pic_order_cnt_lsb { + Some(PicOrderCountLsb::Frame(pic_order_cnt_lsb)) => (pic_order_cnt_lsb, 0), + Some(PicOrderCountLsb::FieldsAbsolute { + pic_order_cnt_lsb, + delta_pic_order_cnt_bottom, + }) => (pic_order_cnt_lsb, delta_pic_order_cnt_bottom), + _ => return false, + }; + + let (curr_pic_order_cnt_lsb, curr_delta_pic_order_cnt_bottom) = + match curr.header.pic_order_cnt_lsb { + Some(PicOrderCountLsb::Frame(pic_order_cnt_lsb)) => (pic_order_cnt_lsb, 0), + Some(PicOrderCountLsb::FieldsAbsolute { + pic_order_cnt_lsb, + delta_pic_order_cnt_bottom, + }) => (pic_order_cnt_lsb, delta_pic_order_cnt_bottom), + _ => return false, + }; + + last_pic_order_cnt_lsb != curr_pic_order_cnt_lsb + || last_delta_pic_order_cnt_bottom != curr_delta_pic_order_cnt_bottom +} + +// defguardp pic_order_cnt_one_check_zero(a, b) +// when a.pic_order_cnt_type == 1 and b.pic_order_cnt_type == 1 and +// hd(a.delta_pic_order_cnt) != hd(b.delta_pic_order_cnt) +// TODO + +// defguardp pic_order_cnt_one_check_one(a, b) +// when a.pic_order_cnt_type == 1 and b.pic_order_cnt_type == 1 and +// hd(hd(a.delta_pic_order_cnt)) != hd(hd(b.delta_pic_order_cnt)) +// TODO + +// defguardp idr_and_non_idr(a, b) +// when (a.nal_unit_type == 5 or b.nal_unit_type == 5) and +// a.nal_unit_type != b.nal_unit_type +// +fn idr_and_non_idr(last: &Slice, curr: &Slice) -> bool { + (last.nal_header.nal_unit_type().id() == 5) ^ (curr.nal_header.nal_unit_type().id() == 5) +} + +// defguardp idrs_with_idr_pic_id_differ(a, b) +// when a.nal_unit_type == 5 and b.nal_unit_type == 5 and a.idr_pic_id != b.idr_pic_id +fn idrs_where_idr_pic_id_differs(last: &Slice, curr: &Slice) -> bool { + match (last.header.idr_pic_id, curr.header.idr_pic_id) { + (Some(last), Some(curr)) => last != curr, + _ => false, + } +} diff --git a/vk-video/src/parser/reference_manager.rs b/vk-video/src/parser/reference_manager.rs new file mode 100644 index 000000000..424a74321 --- /dev/null +++ b/vk-video/src/parser/reference_manager.rs @@ -0,0 +1,468 @@ +use std::sync::Arc; + +use h264_reader::nal::{ + pps::PicParameterSet, + slice::{DecRefPicMarking, NumRefIdxActive, RefPicListModifications, SliceHeader}, + sps::SeqParameterSet, +}; + +use super::{ + DecodeInformation, DecoderInstruction, ParserError, PictureInfo, ReferencePictureInfo, Slice, + SpsExt, +}; + +#[derive(Debug, thiserror::Error)] +pub enum ReferenceManagementError { + #[error("B frames are not supported")] + BFramesNotSupported, + + #[error("Long-term references are not supported")] + LongTermRefsNotSupported, + + #[error("SI frames are not supported")] + SIFramesNotSupported, + + #[error("SP frames are not supported")] + SPFramesNotSupported, + + #[error("Adaptive memory control decoded reference picture marking process is not supported")] + AdaptiveMemCtlNotSupported, + + #[error("Reference picture list modifications are not supported")] + RefPicListModificationsNotSupported, + + #[error("PicOrderCntType {0} is not supperted")] + PicOrderCntTypeNotSupported(u8), + + #[error("pic_order_cnt_lsb is not present in a slice header, but is required for decoding")] + PicOrderCntLsbNotPresent, +} + +#[derive(Debug, Default, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct ReferenceId(usize); + +#[derive(Debug, Default)] +pub(crate) struct ReferenceContext { + pictures: ReferencePictures, + next_reference_id: ReferenceId, + _previous_frame_num: usize, + prev_pic_order_cnt_msb: i32, + prev_pic_order_cnt_lsb: i32, +} + +impl ReferenceContext { + fn get_next_reference_id(&mut self) -> ReferenceId { + let result = self.next_reference_id; + self.next_reference_id = ReferenceId(result.0 + 1); + result + } + + fn reset_state(&mut self) { + *self = Self { + pictures: ReferencePictures::default(), + next_reference_id: ReferenceId::default(), + _previous_frame_num: 0, + prev_pic_order_cnt_msb: 0, + prev_pic_order_cnt_lsb: 0, + }; + } + + fn add_short_term_reference( + &mut self, + header: Arc, + pic_order_cnt: [i32; 2], + ) -> ReferenceId { + let id = self.get_next_reference_id(); + self.pictures.short_term.push(ShortTermReferencePicture { + header, + id, + pic_order_cnt, + }); + id + } + + pub(crate) fn put_picture( + &mut self, + mut slices: Vec, + sps: &SeqParameterSet, + pps: &PicParameterSet, + ) -> Result, ParserError> { + let header = slices.last().unwrap().header.clone(); + + // maybe this should be done in a different place, but if you think about it, there's not + // really that many places to put this code in + let mut rbsp_bytes = Vec::new(); + let mut slice_indices = Vec::new(); + for slice in &mut slices { + if slice.rbsp_bytes.is_empty() { + continue; + } + slice_indices.push(rbsp_bytes.len()); + rbsp_bytes.append(&mut slice.rbsp_bytes); + } + + match header.dec_ref_pic_marking { + Some(DecRefPicMarking::Idr { + long_term_reference_flag, + .. + }) => { + if long_term_reference_flag { + Err(ReferenceManagementError::LongTermRefsNotSupported)?; + } + + let decode_info = self.decode_information_for_frame( + header.clone(), + slice_indices, + rbsp_bytes, + sps, + pps, + )?; + + self.reset_state(); + + let reference_id = + self.add_short_term_reference(header, decode_info.picture_info.PicOrderCnt); + + Ok(vec![DecoderInstruction::Idr { + decode_info, + reference_id, + }]) + } + + Some(DecRefPicMarking::SlidingWindow) => { + let num_short_term = self.pictures.short_term.len(); + let num_long_term = self.pictures.long_term.len(); + + let decode_info = self.decode_information_for_frame( + header.clone(), + slice_indices, + rbsp_bytes, + sps, + pps, + )?; + let reference_id = self + .add_short_term_reference(header.clone(), decode_info.picture_info.PicOrderCnt); + + let mut decoder_instructions = vec![DecoderInstruction::DecodeAndStoreAs { + decode_info, + reference_id, + }]; + + if num_short_term + num_long_term == sps.max_num_ref_frames.max(1) as usize + && !self.pictures.short_term.is_empty() + { + let (idx, _) = self + .pictures + .short_term + .iter() + .enumerate() + .min_by_key(|(_, reference)| { + reference + .decode_picture_numbers(header.frame_num as i64, sps) + .unwrap() + .FrameNumWrap + }) + .unwrap(); + + decoder_instructions.push(DecoderInstruction::Drop { + reference_ids: vec![self.pictures.short_term.remove(idx).id], + }) + } + + Ok(decoder_instructions) + } + + Some(DecRefPicMarking::Adaptive(_)) => { + Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)? + } + + // this picture is not a reference + None => Ok(vec![DecoderInstruction::Decode { + decode_info: self.decode_information_for_frame( + header, + slice_indices, + rbsp_bytes, + sps, + pps, + )?, + }]), + } + } + + fn decode_information_for_frame( + &mut self, + header: Arc, + slice_indices: Vec, + rbsp_bytes: Vec, + sps: &SeqParameterSet, + pps: &PicParameterSet, + ) -> Result { + let reference_list = match header.slice_type.family { + h264_reader::nal::slice::SliceFamily::P => { + let reference_list = + self.initialize_reference_picture_list_for_frame(&header, sps, pps)?; + + match &header.ref_pic_list_modification { + Some(RefPicListModifications::P { + ref_pic_list_modification_l0, + }) => { + if !ref_pic_list_modification_l0.is_empty() { + Err(ReferenceManagementError::RefPicListModificationsNotSupported)?; + } + } + + None + | Some(RefPicListModifications::I) + | Some(RefPicListModifications::B { .. }) => unreachable!(), + } + + Some(reference_list) + } + h264_reader::nal::slice::SliceFamily::I => None, + h264_reader::nal::slice::SliceFamily::B => { + return Err(ReferenceManagementError::BFramesNotSupported)? + } + h264_reader::nal::slice::SliceFamily::SP => { + return Err(ReferenceManagementError::SPFramesNotSupported)? + } + h264_reader::nal::slice::SliceFamily::SI => { + return Err(ReferenceManagementError::SIFramesNotSupported)? + } + }; + + let pic_order_cnt = match sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeZero { + log2_max_pic_order_cnt_lsb_minus4, + } => { + // this section is very hard to read, but all of this code is just copied from the + // h.264 spec, where it looks almost exactly like this + + let max_pic_order_cnt_lsb = 2_i32.pow(log2_max_pic_order_cnt_lsb_minus4 as u32 + 4); + + let (prev_pic_order_cnt_msb, prev_pic_order_cnt_lsb) = + if header.idr_pic_id.is_some() { + (0, 0) + } else { + (self.prev_pic_order_cnt_msb, self.prev_pic_order_cnt_lsb) + }; + + let (pic_order_cnt_lsb, delta_pic_order_cnt_bottom) = match header + .pic_order_cnt_lsb + .as_ref() + .ok_or(ReferenceManagementError::PicOrderCntLsbNotPresent)? + { + h264_reader::nal::slice::PicOrderCountLsb::Frame(pic_order_cnt_lsb) => { + (*pic_order_cnt_lsb, 0) + } + h264_reader::nal::slice::PicOrderCountLsb::FieldsAbsolute { + pic_order_cnt_lsb, + delta_pic_order_cnt_bottom, + } => (*pic_order_cnt_lsb, *delta_pic_order_cnt_bottom), + h264_reader::nal::slice::PicOrderCountLsb::FieldsDelta(_) => { + Err(ReferenceManagementError::PicOrderCntLsbNotPresent)? + } + }; + + let pic_order_cnt_lsb = pic_order_cnt_lsb as i32; + + let pic_order_cnt_msb = if pic_order_cnt_lsb < prev_pic_order_cnt_lsb + && prev_pic_order_cnt_lsb - pic_order_cnt_lsb >= max_pic_order_cnt_lsb / 2 + { + prev_pic_order_cnt_msb + max_pic_order_cnt_lsb + } else if pic_order_cnt_lsb > prev_pic_order_cnt_lsb + && pic_order_cnt_lsb - prev_pic_order_cnt_lsb > max_pic_order_cnt_lsb / 2 + { + prev_pic_order_cnt_msb - max_pic_order_cnt_lsb + } else { + prev_pic_order_cnt_msb + }; + + let pic_order_cnt = if header.field_pic == h264_reader::nal::slice::FieldPic::Frame + { + let top_field_order_cnt = pic_order_cnt_msb + pic_order_cnt_lsb; + + let bottom_field_order_cnt = top_field_order_cnt + delta_pic_order_cnt_bottom; + + top_field_order_cnt.min(bottom_field_order_cnt) + } else { + pic_order_cnt_msb + pic_order_cnt_lsb + }; + + self.prev_pic_order_cnt_msb = pic_order_cnt_msb; + self.prev_pic_order_cnt_lsb = pic_order_cnt_lsb; + + pic_order_cnt + } + + h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => { + Err(ReferenceManagementError::PicOrderCntTypeNotSupported(1))? + } + + h264_reader::nal::sps::PicOrderCntType::TypeTwo => match header.dec_ref_pic_marking { + None => 2 * header.frame_num as i32 - 1, + Some(DecRefPicMarking::Idr { .. }) | Some(DecRefPicMarking::SlidingWindow) => { + 2 * header.frame_num as i32 + } + Some(DecRefPicMarking::Adaptive(..)) => { + Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)? + } + }, + }; + + let pic_order_cnt = [pic_order_cnt; 2]; + + Ok(DecodeInformation { + reference_list, + header: header.clone(), + slice_indices, + rbsp_bytes, + sps_id: sps.id().id(), + pps_id: pps.pic_parameter_set_id.id(), + picture_info: PictureInfo { + non_existing: false, + used_for_long_term_reference: false, + PicOrderCnt: pic_order_cnt, + FrameNum: header.frame_num, + }, + }) + } + + fn initialize_short_term_reference_picture_list_for_frame( + &self, + header: &SliceHeader, + sps: &SeqParameterSet, + ) -> Result, ParserError> { + let mut short_term_reference_list = self + .pictures + .short_term + .iter() + .map(|reference| { + Ok(( + reference, + reference.decode_picture_numbers(header.frame_num.into(), sps)?, + )) + }) + .collect::, ParserError>>()?; + + short_term_reference_list.sort_by_key(|(_, numbers)| -numbers.PicNum); + + let short_term_reference_list = short_term_reference_list + .into_iter() + .map(|(reference, numbers)| ReferencePictureInfo { + id: reference.id, + picture_info: PictureInfo { + FrameNum: numbers.FrameNum as u16, + used_for_long_term_reference: false, + non_existing: false, + PicOrderCnt: reference.pic_order_cnt, + }, + }) + .collect::>(); + + Ok(short_term_reference_list) + } + + fn initialize_long_term_reference_picture_list_for_frame( + &self, + ) -> Result, ReferenceManagementError> { + if !self.pictures.long_term.is_empty() { + panic!("long-term references are not supported!"); + } + + Ok(Vec::new()) + } + + fn initialize_reference_picture_list_for_frame( + &self, + header: &SliceHeader, + sps: &SeqParameterSet, + pps: &PicParameterSet, + ) -> Result, ParserError> { + let num_ref_idx_l0_active = header + .num_ref_idx_active + .as_ref() + .map(|num| match num { + NumRefIdxActive::P { + num_ref_idx_l0_active_minus1, + } => Ok(*num_ref_idx_l0_active_minus1), + NumRefIdxActive::B { .. } => Err(ReferenceManagementError::BFramesNotSupported), + }) + .unwrap_or(Ok(pps.num_ref_idx_l0_default_active_minus1))? + + 1; + + let short_term_reference_list = + self.initialize_short_term_reference_picture_list_for_frame(header, sps)?; + + let long_term_reference_list = + self.initialize_long_term_reference_picture_list_for_frame()?; + + let mut reference_list = short_term_reference_list + .into_iter() + .chain(long_term_reference_list) + .collect::>(); + + reference_list.truncate(num_ref_idx_l0_active as usize); + + Ok(reference_list) + } +} + +#[derive(Debug)] +struct ShortTermReferencePicture { + header: Arc, + id: ReferenceId, + pic_order_cnt: [i32; 2], +} + +impl ShortTermReferencePicture { + #[allow(non_snake_case)] + fn decode_picture_numbers( + &self, + current_frame_num: i64, + sps: &SeqParameterSet, + ) -> Result { + if self.header.field_pic != h264_reader::nal::slice::FieldPic::Frame { + return Err(ParserError::FieldsNotSupported); + } + + let MaxFrameNum = sps.max_frame_num(); + + let FrameNum = self.header.frame_num as i64; + + let FrameNumWrap = if FrameNum > current_frame_num { + FrameNum - MaxFrameNum + } else { + FrameNum + }; + + // this assumes we're dealing with a short-term reference frame + let PicNum = FrameNumWrap; + + Ok(ShortTermReferencePictureNumbers { + FrameNum, + FrameNumWrap, + PicNum, + }) + } +} + +#[derive(Debug)] +struct LongTermReferencePicture { + _header: Arc, + _id: ReferenceId, +} + +#[allow(non_snake_case)] +struct ShortTermReferencePictureNumbers { + FrameNum: i64, + + FrameNumWrap: i64, + + PicNum: i64, +} + +#[derive(Debug, Default)] +struct ReferencePictures { + long_term: Vec, + short_term: Vec, +} diff --git a/vk-video/src/vulkan_decoder.rs b/vk-video/src/vulkan_decoder.rs new file mode 100644 index 000000000..f2152e472 --- /dev/null +++ b/vk-video/src/vulkan_decoder.rs @@ -0,0 +1,993 @@ +use std::sync::Arc; + +use ash::vk; + +use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet}; +use session_resources::VideoSessionResources; +use tracing::error; +use wrappers::*; + +use crate::parser::{DecodeInformation, DecoderInstruction, ReferenceId}; + +mod session_resources; +mod vulkan_ctx; +mod wrappers; + +pub use vulkan_ctx::*; + +pub struct VulkanDecoder<'a> { + vulkan_ctx: Arc, + video_session_resources: Option>, + command_buffers: CommandBuffers, + _command_pools: CommandPools, + sync_structures: SyncStructures, + reference_id_to_dpb_slot_index: std::collections::HashMap, + decode_query_pool: Option, +} + +struct SyncStructures { + sem_decode_done: Semaphore, + fence_transfer_done: Fence, + fence_memory_barrier_completed: Fence, +} + +struct CommandBuffers { + decode_buffer: CommandBuffer, + gpu_to_mem_transfer_buffer: CommandBuffer, + vulkan_to_wgpu_transfer_buffer: CommandBuffer, +} + +/// this cannot outlive the image and semaphore it borrows, but it seems impossible to encode that +/// in the lifetimes +struct DecodeOutput { + image: vk::Image, + dimensions: vk::Extent2D, + current_layout: vk::ImageLayout, + layer: u32, + wait_semaphore: vk::Semaphore, + _input_buffer: Buffer, +} + +#[derive(Debug, thiserror::Error)] +pub enum VulkanDecoderError { + #[error("Vulkan error: {0}")] + VkError(#[from] vk::Result), + + #[error("Cannot find enough memory of the right type on the deivce")] + NoMemory, + + #[error("The decoder instruction is not supported: {0:?}")] + DecoderInstructionNotSupported(Box), + + #[error("Setting the frame cropping flag in sps is not supported")] + FrameCroppingNotSupported, + + #[error("Bitstreams that contain fields rather than frames are not supported")] + FieldsNotSupported, + + #[error("Scaling lists are not supported")] + ScalingListsNotSupported, + + #[error("A NALU requiring a session received before a session was created (probably before receiving first SPS)")] + NoSession, + + #[error("A slot in the Decoded Pictures Buffer was requested, but all slots are taken")] + NoFreeSlotsInDpb, + + #[error("A picture which is not in the decoded pictures buffer was requested as a reference picture")] + NonExistentReferenceRequested, + + #[error("A vulkan decode operation failed with code {0:?}")] + DecodeOperationFailed(vk::QueryResultStatusKHR), + + #[error(transparent)] + VulkanCtxError(#[from] VulkanCtxError), +} + +impl<'a> VulkanDecoder<'a> { + pub fn new(vulkan_ctx: Arc) -> Result { + let decode_pool = Arc::new(CommandPool::new( + vulkan_ctx.device.clone(), + vulkan_ctx.queues.h264_decode.idx, + )?); + + let transfer_pool = Arc::new(CommandPool::new( + vulkan_ctx.device.clone(), + vulkan_ctx.queues.transfer.idx, + )?); + + let decode_buffer = CommandBuffer::new_primary(decode_pool.clone())?; + + let gpu_to_mem_transfer_buffer = CommandBuffer::new_primary(transfer_pool.clone())?; + + let vulkan_to_wgpu_transfer_buffer = CommandBuffer::new_primary(transfer_pool.clone())?; + + let command_pools = CommandPools { + _decode_pool: decode_pool, + _transfer_pool: transfer_pool, + }; + + let sync_structures = SyncStructures { + sem_decode_done: Semaphore::new(vulkan_ctx.device.clone())?, + fence_transfer_done: Fence::new(vulkan_ctx.device.clone(), false)?, + fence_memory_barrier_completed: Fence::new(vulkan_ctx.device.clone(), false)?, + }; + + let decode_query_pool = if vulkan_ctx + .queues + .h264_decode + .supports_result_status_queries() + { + Some(DecodeQueryPool::new( + vulkan_ctx.device.clone(), + H264ProfileInfo::decode_h264_yuv420().profile_info, + )?) + } else { + None + }; + + Ok(Self { + vulkan_ctx, + video_session_resources: None, + _command_pools: command_pools, + command_buffers: CommandBuffers { + decode_buffer, + gpu_to_mem_transfer_buffer, + vulkan_to_wgpu_transfer_buffer, + }, + sync_structures, + decode_query_pool, + reference_id_to_dpb_slot_index: Default::default(), + }) + } +} + +impl VulkanDecoder<'_> { + pub fn decode_to_bytes( + &mut self, + decoder_instructions: &[DecoderInstruction], + ) -> Result>, VulkanDecoderError> { + let mut result = Vec::new(); + for instruction in decoder_instructions { + if let Some(output) = self.decode(instruction)? { + result.push(self.download_output(output)?) + } + } + + Ok(result) + } + + pub fn decode_to_wgpu_textures( + &mut self, + decoder_instructions: &[DecoderInstruction], + ) -> Result, VulkanDecoderError> { + let mut result = Vec::new(); + for instruction in decoder_instructions { + if let Some(output) = self.decode(instruction)? { + result.push(self.output_to_wgpu_texture(output)?) + } + } + + Ok(result) + } + + fn decode( + &mut self, + instruction: &DecoderInstruction, + ) -> Result, VulkanDecoderError> { + match instruction { + DecoderInstruction::Decode { .. } => { + return Err(VulkanDecoderError::DecoderInstructionNotSupported( + Box::new(instruction.clone()), + )) + } + + DecoderInstruction::DecodeAndStoreAs { + decode_info, + reference_id, + } => { + return self + .process_reference_p_frame(decode_info, *reference_id) + .map(Option::Some) + } + + DecoderInstruction::Idr { + decode_info, + reference_id, + } => { + return self + .process_idr(decode_info, *reference_id) + .map(Option::Some) + } + + DecoderInstruction::Drop { reference_ids } => { + for reference_id in reference_ids { + match self.reference_id_to_dpb_slot_index.remove(reference_id) { + Some(dpb_idx) => self + .video_session_resources + .as_mut() + .map(|s| s.free_reference_picture(dpb_idx)), + None => return Err(VulkanDecoderError::NonExistentReferenceRequested), + }; + } + } + + DecoderInstruction::Sps(sps) => self.process_sps(sps)?, + + DecoderInstruction::Pps(pps) => self.process_pps(pps)?, + } + + Ok(None) + } + + fn process_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> { + match self.video_session_resources.as_mut() { + Some(session) => session.process_sps( + &self.vulkan_ctx, + &self.command_buffers.decode_buffer, + sps, + &self.sync_structures.fence_memory_barrier_completed, + )?, + None => { + self.video_session_resources = Some(VideoSessionResources::new_from_sps( + &self.vulkan_ctx, + &self.command_buffers.decode_buffer, + sps, + &self.sync_structures.fence_memory_barrier_completed, + )?) + } + } + + Ok(()) + } + + fn process_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> { + self.video_session_resources + .as_mut() + .ok_or(VulkanDecoderError::NoSession)? + .process_pps(pps)?; + + Ok(()) + } + + fn pad_size_to_alignment(size: u64, align: u64) -> u64 { + if size % align == 0 { + size + } else { + (size + align) / align * align + } + } + + fn process_idr( + &mut self, + decode_information: &DecodeInformation, + reference_id: ReferenceId, + ) -> Result { + self.do_decode(decode_information, reference_id, true, true) + } + + fn process_reference_p_frame( + &mut self, + decode_information: &DecodeInformation, + reference_id: ReferenceId, + ) -> Result { + self.do_decode(decode_information, reference_id, false, true) + } + + fn do_decode( + &mut self, + decode_information: &DecodeInformation, + reference_id: ReferenceId, + is_idr: bool, + is_reference: bool, + ) -> Result { + // upload data to a buffer + let size = Self::pad_size_to_alignment( + decode_information.rbsp_bytes.len() as u64, + self.vulkan_ctx + .video_capabilities + .min_bitstream_buffer_offset_alignment, + ); + + let decode_buffer = Buffer::new_with_decode_data( + self.vulkan_ctx.allocator.clone(), + &decode_information.rbsp_bytes, + size, + )?; + + // decode + let video_session_resources = self + .video_session_resources + .as_mut() + .ok_or(VulkanDecoderError::NoSession)?; + + // IDR - remove all reference picures + if is_idr { + video_session_resources + .decoding_images + .reset_all_allocations(); + + self.reference_id_to_dpb_slot_index = Default::default(); + } + + // begin video coding + self.command_buffers.decode_buffer.begin()?; + + let memory_barrier = vk::MemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::VIDEO_DECODE_KHR) + .src_access_mask(vk::AccessFlags2::VIDEO_DECODE_WRITE_KHR) + .dst_stage_mask(vk::PipelineStageFlags2::VIDEO_DECODE_KHR) + .dst_access_mask( + vk::AccessFlags2::VIDEO_DECODE_READ_KHR | vk::AccessFlags2::VIDEO_DECODE_WRITE_KHR, + ); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.decode_buffer, + &vk::DependencyInfo::default().memory_barriers(&[memory_barrier]), + ) + }; + + if let Some(pool) = self.decode_query_pool.as_ref() { + pool.reset(*self.command_buffers.decode_buffer); + } + + let reference_slots = video_session_resources + .decoding_images + .reference_slot_info(); + + let begin_info = vk::VideoBeginCodingInfoKHR::default() + .video_session(video_session_resources.video_session.session) + .video_session_parameters(video_session_resources.parameters_manager.parameters()) + .reference_slots(&reference_slots); + + unsafe { + self.vulkan_ctx + .device + .video_queue_ext + .cmd_begin_video_coding_khr(*self.command_buffers.decode_buffer, &begin_info) + }; + + // IDR - issue the reset command to the video session + if is_idr { + let control_info = vk::VideoCodingControlInfoKHR::default() + .flags(vk::VideoCodingControlFlagsKHR::RESET); + + unsafe { + self.vulkan_ctx + .device + .video_queue_ext + .cmd_control_video_coding_khr( + *self.command_buffers.decode_buffer, + &control_info, + ) + }; + } + + // allocate a new reference picture and fill out the forms to get it set up + let new_reference_slot_index = video_session_resources + .decoding_images + .allocate_reference_picture()?; + + let new_reference_slot_std_reference_info = decode_information.picture_info.into(); + let mut new_reference_slot_dpb_slot_info = vk::VideoDecodeH264DpbSlotInfoKHR::default() + .std_reference_info(&new_reference_slot_std_reference_info); + + let new_reference_slot_video_picture_resource_info = video_session_resources + .decoding_images + .video_resource_info(new_reference_slot_index) + .unwrap(); + + let setup_reference_slot = vk::VideoReferenceSlotInfoKHR::default() + .picture_resource(new_reference_slot_video_picture_resource_info) + .slot_index(new_reference_slot_index as i32) + .push_next(&mut new_reference_slot_dpb_slot_info); + + // prepare the reference list + let reference_slots = video_session_resources + .decoding_images + .reference_slot_info(); + + let references_std_ref_info = Self::prepare_references_std_ref_info(decode_information); + + let mut references_dpb_slot_info = + Self::prepare_references_dpb_slot_info(&references_std_ref_info); + + let pic_reference_slots = Self::prepare_reference_list_slot_info( + &self.reference_id_to_dpb_slot_index, + &reference_slots, + &mut references_dpb_slot_info, + decode_information, + )?; + + // prepare the decode target picture + let std_picture_info = vk::native::StdVideoDecodeH264PictureInfo { + flags: vk::native::StdVideoDecodeH264PictureInfoFlags { + _bitfield_align_1: [], + __bindgen_padding_0: [0; 3], + _bitfield_1: vk::native::StdVideoDecodeH264PictureInfoFlags::new_bitfield_1( + matches!( + decode_information.header.field_pic, + h264_reader::nal::slice::FieldPic::Field(..) + ) + .into(), + is_idr.into(), + is_idr.into(), + 0, + is_reference.into(), + 0, + ), + }, + PicOrderCnt: decode_information.picture_info.PicOrderCnt, + seq_parameter_set_id: decode_information.sps_id, + pic_parameter_set_id: decode_information.pps_id, + frame_num: decode_information.header.frame_num, + idr_pic_id: decode_information + .header + .idr_pic_id + .map(|a| a as u16) + .unwrap_or(0), + reserved1: 0, + reserved2: 0, + }; + + let slice_offsets = decode_information + .slice_indices + .iter() + .map(|&x| x as u32) + .collect::>(); + + let mut decode_h264_picture_info = vk::VideoDecodeH264PictureInfoKHR::default() + .std_picture_info(&std_picture_info) + .slice_offsets(&slice_offsets); + + let dst_picture_resource_info = &video_session_resources + .decoding_images + .target_picture_resource_info(new_reference_slot_index) + .unwrap(); + + // these 3 veriables are for copying the result later + let (target_image, target_image_layout, target_layer) = video_session_resources + .decoding_images + .target_info(new_reference_slot_index); + + // fill out the final struct and issue the command + let decode_info = vk::VideoDecodeInfoKHR::default() + .src_buffer(*decode_buffer) + .src_buffer_offset(0) + .src_buffer_range(size) + .dst_picture_resource(*dst_picture_resource_info) + .setup_reference_slot(&setup_reference_slot) + .reference_slots(&pic_reference_slots) + .push_next(&mut decode_h264_picture_info); + + if let Some(pool) = self.decode_query_pool.as_ref() { + pool.begin_query(*self.command_buffers.decode_buffer); + } + + unsafe { + self.vulkan_ctx + .device + .video_decode_queue_ext + .cmd_decode_video_khr(*self.command_buffers.decode_buffer, &decode_info) + }; + + if let Some(pool) = self.decode_query_pool.as_ref() { + pool.end_query(*self.command_buffers.decode_buffer); + } + + unsafe { + self.vulkan_ctx + .device + .video_queue_ext + .cmd_end_video_coding_khr( + *self.command_buffers.decode_buffer, + &vk::VideoEndCodingInfoKHR::default(), + ) + }; + + self.command_buffers.decode_buffer.end()?; + + self.vulkan_ctx.queues.h264_decode.submit( + &self.command_buffers.decode_buffer, + &[], + &[( + *self.sync_structures.sem_decode_done, + vk::PipelineStageFlags2::VIDEO_DECODE_KHR, + )], + None, + )?; + + // after the decode save the new reference picture + self.reference_id_to_dpb_slot_index + .insert(reference_id, new_reference_slot_index); + + // TODO: those are not the real dimensions of the image. the real dimensions should be + // calculated from the sps + let dimensions = video_session_resources.video_session.max_coded_extent; + + Ok(DecodeOutput { + image: target_image, + wait_semaphore: *self.sync_structures.sem_decode_done, + layer: target_layer as u32, + current_layout: target_image_layout, + dimensions, + _input_buffer: decode_buffer, + }) + } + + fn output_to_wgpu_texture( + &self, + decode_output: DecodeOutput, + ) -> Result { + let copy_extent = vk::Extent3D { + width: decode_output.dimensions.width, + height: decode_output.dimensions.height, + depth: 1, + }; + + let queue_indices = [ + self.vulkan_ctx.queues.transfer.idx as u32, + self.vulkan_ctx.queues.wgpu.idx as u32, + ]; + + let create_info = vk::ImageCreateInfo::default() + .flags(vk::ImageCreateFlags::MUTABLE_FORMAT) + .image_type(vk::ImageType::TYPE_2D) + .format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .extent(copy_extent) + .mip_levels(1) + .array_layers(1) + .samples(vk::SampleCountFlags::TYPE_1) + .tiling(vk::ImageTiling::OPTIMAL) + .usage( + vk::ImageUsageFlags::SAMPLED + | vk::ImageUsageFlags::TRANSFER_DST + | vk::ImageUsageFlags::TRANSFER_SRC, + ) + .sharing_mode(vk::SharingMode::CONCURRENT) + .queue_family_indices(&queue_indices) + .initial_layout(vk::ImageLayout::UNDEFINED); + + let image = Arc::new(Image::new(self.vulkan_ctx.allocator.clone(), &create_info)?); + + self.command_buffers + .vulkan_to_wgpu_transfer_buffer + .begin()?; + + let memory_barrier_src = vk::ImageMemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::NONE) + .src_access_mask(vk::AccessFlags2::NONE) + .dst_stage_mask(vk::PipelineStageFlags2::COPY) + .dst_access_mask(vk::AccessFlags2::TRANSFER_READ) + .old_layout(decode_output.current_layout) + .new_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(decode_output.image) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: decode_output.layer, + layer_count: 1, + }); + + let memory_barrier_dst = vk::ImageMemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::NONE) + .src_access_mask(vk::AccessFlags2::NONE) + .dst_stage_mask(vk::PipelineStageFlags2::COPY) + .dst_access_mask(vk::AccessFlags2::TRANSFER_WRITE) + .old_layout(vk::ImageLayout::UNDEFINED) + .new_layout(vk::ImageLayout::TRANSFER_DST_OPTIMAL) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(**image) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.vulkan_to_wgpu_transfer_buffer, + &vk::DependencyInfo::default() + .image_memory_barriers(&[memory_barrier_src, memory_barrier_dst]), + ) + }; + + let copy_info = [ + vk::ImageCopy::default() + .src_subresource(vk::ImageSubresourceLayers { + base_array_layer: decode_output.layer, + mip_level: 0, + layer_count: 1, + aspect_mask: vk::ImageAspectFlags::PLANE_0, + }) + .src_offset(vk::Offset3D::default()) + .dst_subresource(vk::ImageSubresourceLayers { + base_array_layer: 0, + mip_level: 0, + layer_count: 1, + aspect_mask: vk::ImageAspectFlags::PLANE_0, + }) + .dst_offset(vk::Offset3D::default()) + .extent(copy_extent), + vk::ImageCopy::default() + .src_subresource(vk::ImageSubresourceLayers { + base_array_layer: decode_output.layer, + mip_level: 0, + layer_count: 1, + aspect_mask: vk::ImageAspectFlags::PLANE_1, + }) + .src_offset(vk::Offset3D::default()) + .dst_subresource(vk::ImageSubresourceLayers { + base_array_layer: 0, + mip_level: 0, + layer_count: 1, + aspect_mask: vk::ImageAspectFlags::PLANE_1, + }) + .dst_offset(vk::Offset3D::default()) + .extent(vk::Extent3D { + width: copy_extent.width / 2, + height: copy_extent.height / 2, + ..copy_extent + }), + ]; + + unsafe { + self.vulkan_ctx.device.cmd_copy_image( + *self.command_buffers.vulkan_to_wgpu_transfer_buffer, + decode_output.image, + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + **image, + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + ©_info, + ); + } + + let memory_barrier_src = memory_barrier_src + .src_stage_mask(vk::PipelineStageFlags2::COPY) + .src_access_mask(vk::AccessFlags2::TRANSFER_READ) + .dst_stage_mask(vk::PipelineStageFlags2::NONE) + .dst_access_mask(vk::AccessFlags2::NONE) + .old_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL) + .new_layout(decode_output.current_layout); + + let memory_barrier_dst = memory_barrier_dst + .src_stage_mask(vk::PipelineStageFlags2::COPY) + .src_access_mask(vk::AccessFlags2::TRANSFER_WRITE) + .dst_stage_mask(vk::PipelineStageFlags2::NONE) + .dst_access_mask(vk::AccessFlags2::NONE) + .old_layout(vk::ImageLayout::TRANSFER_DST_OPTIMAL) + .new_layout(vk::ImageLayout::GENERAL); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.vulkan_to_wgpu_transfer_buffer, + &vk::DependencyInfo::default() + .image_memory_barriers(&[memory_barrier_src, memory_barrier_dst]), + ) + }; + + self.command_buffers.vulkan_to_wgpu_transfer_buffer.end()?; + + self.vulkan_ctx.queues.transfer.submit( + &self.command_buffers.vulkan_to_wgpu_transfer_buffer, + &[( + decode_output.wait_semaphore, + vk::PipelineStageFlags2::TOP_OF_PIPE, + )], + &[], + Some(*self.sync_structures.fence_transfer_done), + )?; + + self.sync_structures + .fence_transfer_done + .wait_and_reset(u64::MAX)?; + + let result = self + .decode_query_pool + .as_ref() + .map(|pool| pool.get_result_blocking()); + + if let Some(result) = result { + let result = result?; + if result.as_raw() < 0 { + return Err(VulkanDecoderError::DecodeOperationFailed(result)); + } + } + + let hal_texture = unsafe { + wgpu::hal::vulkan::Device::texture_from_raw( + **image, + &wgpu::hal::TextureDescriptor { + label: Some("vulkan video output texture"), + usage: wgpu::hal::TextureUses::RESOURCE + | wgpu::hal::TextureUses::COPY_DST + | wgpu::hal::TextureUses::COPY_SRC, + memory_flags: wgpu::hal::MemoryFlags::empty(), + size: wgpu::Extent3d { + width: copy_extent.width, + height: copy_extent.height, + depth_or_array_layers: copy_extent.depth, + }, + dimension: wgpu::TextureDimension::D2, + sample_count: 1, + view_formats: Vec::new(), + format: wgpu::TextureFormat::NV12, + mip_level_count: 1, + }, + Some(Box::new(image.clone())), + ) + }; + + let wgpu_texture = unsafe { + self.vulkan_ctx + .wgpu_ctx + .device + .create_texture_from_hal::( + hal_texture, + &wgpu::TextureDescriptor { + label: Some("vulkan video output texture"), + usage: wgpu::TextureUsages::COPY_DST + | wgpu::TextureUsages::TEXTURE_BINDING + | wgpu::TextureUsages::COPY_SRC, + size: wgpu::Extent3d { + width: copy_extent.width, + height: copy_extent.height, + depth_or_array_layers: copy_extent.depth, + }, + dimension: wgpu::TextureDimension::D2, + sample_count: 1, + view_formats: &[], + format: wgpu::TextureFormat::NV12, + mip_level_count: 1, + }, + ) + }; + + Ok(wgpu_texture) + } + + fn download_output(&self, decode_output: DecodeOutput) -> Result, VulkanDecoderError> { + let mut dst_buffer = self.copy_image_to_buffer( + decode_output.image, + decode_output.dimensions, + decode_output.current_layout, + decode_output.layer, + &[(decode_output.wait_semaphore, vk::PipelineStageFlags2::COPY)], + &[], + Some(*self.sync_structures.fence_transfer_done), + )?; + + self.sync_structures + .fence_transfer_done + .wait_and_reset(u64::MAX)?; + + let output = unsafe { + dst_buffer.download_data_from_buffer( + decode_output.dimensions.width as usize + * decode_output.dimensions.height as usize + * 3 + / 2, + )? + }; + + Ok(output) + } + + fn prepare_references_std_ref_info( + decode_information: &DecodeInformation, + ) -> Vec { + decode_information + .reference_list + .iter() + .flatten() + .map(|ref_info| ref_info.picture_info.into()) + .collect::>() + } + + fn prepare_references_dpb_slot_info( + references_std_ref_info: &[vk::native::StdVideoDecodeH264ReferenceInfo], + ) -> Vec { + references_std_ref_info + .iter() + .map(|info| vk::VideoDecodeH264DpbSlotInfoKHR::default().std_reference_info(info)) + .collect::>() + } + + fn prepare_reference_list_slot_info<'a>( + reference_id_to_dpb_slot_index: &std::collections::HashMap, + reference_slots: &'a [vk::VideoReferenceSlotInfoKHR<'a>], + references_dpb_slot_info: &'a mut [vk::VideoDecodeH264DpbSlotInfoKHR<'a>], + decode_information: &'a DecodeInformation, + ) -> Result>, VulkanDecoderError> { + let mut pic_reference_slots = Vec::new(); + for (ref_info, dpb_slot_info) in decode_information + .reference_list + .iter() + .flatten() + .zip(references_dpb_slot_info.iter_mut()) + { + let i = *reference_id_to_dpb_slot_index + .get(&ref_info.id) + .ok_or(VulkanDecoderError::NonExistentReferenceRequested)?; + + let reference = *reference_slots + .get(i) + .ok_or(VulkanDecoderError::NonExistentReferenceRequested)?; + + if reference.slot_index < 0 || reference.p_picture_resource.is_null() { + return Err(VulkanDecoderError::NonExistentReferenceRequested); + } + + let reference = reference.push_next(dpb_slot_info); + + pic_reference_slots.push(reference); + } + + Ok(pic_reference_slots) + } + + #[allow(clippy::too_many_arguments)] + fn copy_image_to_buffer( + &self, + image: vk::Image, + dimensions: vk::Extent2D, + current_image_layout: vk::ImageLayout, + layer: u32, + wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + fence: Option, + ) -> Result { + self.command_buffers.gpu_to_mem_transfer_buffer.begin()?; + + let memory_barrier = vk::ImageMemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::NONE) + .src_access_mask(vk::AccessFlags2::NONE) + .dst_stage_mask(vk::PipelineStageFlags2::COPY) + .dst_access_mask(vk::AccessFlags2::TRANSFER_READ) + .old_layout(current_image_layout) + .new_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(image) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: layer, + layer_count: 1, + }); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.gpu_to_mem_transfer_buffer, + &vk::DependencyInfo::default().image_memory_barriers(&[memory_barrier]), + ) + }; + + // TODO: in this section, we shouldn't be using `max_coded_extent` and use the real frame + // resolution + let y_plane_size = dimensions.width as u64 * dimensions.height as u64; + + let dst_buffer = Buffer::new_transfer( + self.vulkan_ctx.allocator.clone(), + y_plane_size * 3 / 2, + TransferDirection::GpuToMem, + )?; + + let copy_info = [ + vk::BufferImageCopy::default() + .image_subresource(vk::ImageSubresourceLayers { + mip_level: 0, + layer_count: 1, + base_array_layer: layer, + aspect_mask: vk::ImageAspectFlags::PLANE_0, + }) + .image_offset(vk::Offset3D { x: 0, y: 0, z: 0 }) + .image_extent(vk::Extent3D { + width: dimensions.width, + height: dimensions.height, + depth: 1, + }) + .buffer_offset(0) + .buffer_row_length(0) + .buffer_image_height(0), + vk::BufferImageCopy::default() + .image_subresource(vk::ImageSubresourceLayers { + mip_level: 0, + layer_count: 1, + base_array_layer: layer, + aspect_mask: vk::ImageAspectFlags::PLANE_1, + }) + .image_offset(vk::Offset3D { x: 0, y: 0, z: 0 }) + .image_extent(vk::Extent3D { + width: dimensions.width / 2, + height: dimensions.height / 2, + depth: 1, + }) + .buffer_offset(y_plane_size) + .buffer_row_length(0) + .buffer_image_height(0), + ]; + + unsafe { + self.vulkan_ctx.device.cmd_copy_image_to_buffer( + *self.command_buffers.gpu_to_mem_transfer_buffer, + image, + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + *dst_buffer, + ©_info, + ) + }; + + let memory_barrier = memory_barrier + .src_stage_mask(vk::PipelineStageFlags2::COPY) + .src_access_mask(vk::AccessFlags2::TRANSFER_READ) + .dst_stage_mask(vk::PipelineStageFlags2::NONE) + .dst_access_mask(vk::AccessFlags2::NONE) + .old_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL) + .new_layout(current_image_layout); + + unsafe { + self.vulkan_ctx.device.cmd_pipeline_barrier2( + *self.command_buffers.gpu_to_mem_transfer_buffer, + &vk::DependencyInfo::default().image_memory_barriers(&[memory_barrier]), + ) + }; + + self.command_buffers.gpu_to_mem_transfer_buffer.end()?; + + self.vulkan_ctx.queues.transfer.submit( + &self.command_buffers.gpu_to_mem_transfer_buffer, + wait_semaphores, + signal_semaphores, + fence, + )?; + + Ok(dst_buffer) + } +} + +pub(crate) struct H264ProfileInfo<'a> { + profile_info: vk::VideoProfileInfoKHR<'a>, + h264_info_ptr: *mut vk::VideoDecodeH264ProfileInfoKHR<'a>, +} + +impl H264ProfileInfo<'_> { + fn decode_h264_yuv420() -> Self { + let h264_profile_info = Box::leak(Box::new( + vk::VideoDecodeH264ProfileInfoKHR::default() + .std_profile_idc( + vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_BASELINE, + ) + .picture_layout(vk::VideoDecodeH264PictureLayoutFlagsKHR::PROGRESSIVE), + )); + + let h264_info_ptr = h264_profile_info as *mut _; + let profile_info = vk::VideoProfileInfoKHR::default() + .video_codec_operation(vk::VideoCodecOperationFlagsKHR::DECODE_H264) + .chroma_subsampling(vk::VideoChromaSubsamplingFlagsKHR::TYPE_420) + .luma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8) + .chroma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8) + .push_next(h264_profile_info); + + Self { + profile_info, + h264_info_ptr, + } + } +} + +impl<'a> Drop for H264ProfileInfo<'a> { + fn drop(&mut self) { + unsafe { + let _ = Box::from_raw(self.h264_info_ptr); + } + } +} diff --git a/vk-video/src/vulkan_decoder/session_resources.rs b/vk-video/src/vulkan_decoder/session_resources.rs new file mode 100644 index 000000000..e59c71e09 --- /dev/null +++ b/vk-video/src/vulkan_decoder/session_resources.rs @@ -0,0 +1,170 @@ +use ash::vk; +use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet}; +use images::DecodingImages; +use parameters::VideoSessionParametersManager; + +use super::{ + CommandBuffer, Fence, H264ProfileInfo, SeqParameterSetExt, VideoSession, VulkanCtx, + VulkanDecoderError, +}; + +mod images; +mod parameters; + +pub(super) struct VideoSessionResources<'a> { + pub(crate) video_session: VideoSession, + pub(crate) parameters_manager: VideoSessionParametersManager, + pub(crate) decoding_images: DecodingImages<'a>, +} + +impl VideoSessionResources<'_> { + pub(crate) fn new_from_sps( + vulkan_ctx: &VulkanCtx, + decode_buffer: &CommandBuffer, + sps: &SeqParameterSet, + fence_memory_barrier_completed: &Fence, + ) -> Result { + let profile = H264ProfileInfo::decode_h264_yuv420(); + + let width = sps.width()?; + let height = sps.height()?; + + let max_coded_extent = vk::Extent2D { width, height }; + // +1 for current frame + let max_dpb_slots = sps.max_num_ref_frames + 1; + let max_active_references = sps.max_num_ref_frames; + + let video_session = VideoSession::new( + vulkan_ctx, + &profile.profile_info, + max_coded_extent, + max_dpb_slots, + max_active_references, + &vulkan_ctx.video_capabilities.std_header_version, + )?; + + let mut parameters_manager = + VideoSessionParametersManager::new(vulkan_ctx, video_session.session)?; + + parameters_manager.put_sps(sps)?; + + let decoding_images = Self::new_decoding_images( + vulkan_ctx, + max_coded_extent, + max_dpb_slots, + decode_buffer, + fence_memory_barrier_completed, + )?; + + Ok(VideoSessionResources { + video_session, + parameters_manager, + decoding_images, + }) + } + + pub(crate) fn process_sps( + &mut self, + vulkan_ctx: &VulkanCtx, + decode_buffer: &CommandBuffer, + sps: &SeqParameterSet, + fence_memory_barrier_completed: &Fence, + ) -> Result<(), VulkanDecoderError> { + let profile = H264ProfileInfo::decode_h264_yuv420(); + + let width = sps.width()?; + let height = sps.height()?; + + let max_coded_extent = vk::Extent2D { width, height }; + // +1 for current frame + let max_dpb_slots = sps.max_num_ref_frames + 1; + let max_active_references = sps.max_num_ref_frames; + + if self.video_session.max_coded_extent.width >= width + && self.video_session.max_coded_extent.height >= height + && self.video_session.max_dpb_slots >= max_dpb_slots + { + // no need to change the session + self.parameters_manager.put_sps(sps)?; + return Ok(()); + } + + self.video_session = VideoSession::new( + vulkan_ctx, + &profile.profile_info, + max_coded_extent, + max_dpb_slots, + max_active_references, + &vulkan_ctx.video_capabilities.std_header_version, + )?; + + self.parameters_manager + .change_session(self.video_session.session)?; + self.parameters_manager.put_sps(sps)?; + + self.decoding_images = Self::new_decoding_images( + vulkan_ctx, + max_coded_extent, + max_dpb_slots, + decode_buffer, + fence_memory_barrier_completed, + )?; + + Ok(()) + } + + pub(crate) fn process_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> { + self.parameters_manager.put_pps(pps) + } + + fn new_decoding_images<'a>( + vulkan_ctx: &VulkanCtx, + max_coded_extent: vk::Extent2D, + max_dpb_slots: u32, + decode_buffer: &CommandBuffer, + fence_memory_barrier_completed: &Fence, + ) -> Result, VulkanDecoderError> { + let profile = H264ProfileInfo::decode_h264_yuv420(); + + // FIXME: usually, sps arrives either at the start of the stream (when all spses are sent + // at the begginning of the stream) or right before an IDR. It is however possible for an + // sps nal to arrive in between P-frames. This would cause us to loose the reference + // pictures we need to decode the stream until we receive a new IDR. Don't know if this is + // an issue worth fixing, I don't think I ever saw a stream like this. + let (decoding_images, memory_barrier) = DecodingImages::new( + vulkan_ctx, + profile, + &vulkan_ctx.h264_dpb_format_properties, + &vulkan_ctx.h264_dst_format_properties, + max_coded_extent, + max_dpb_slots, + )?; + + decode_buffer.begin()?; + + unsafe { + vulkan_ctx.device.cmd_pipeline_barrier2( + **decode_buffer, + &vk::DependencyInfo::default().image_memory_barriers(&memory_barrier), + ); + } + + decode_buffer.end()?; + + vulkan_ctx.queues.h264_decode.submit( + decode_buffer, + &[], + &[], + Some(**fence_memory_barrier_completed), + )?; + + // TODO: this shouldn't be a fence + fence_memory_barrier_completed.wait_and_reset(u64::MAX)?; + + Ok(decoding_images) + } + + pub(crate) fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> { + self.decoding_images.free_reference_picture(i) + } +} diff --git a/vk-video/src/vulkan_decoder/session_resources/images.rs b/vk-video/src/vulkan_decoder/session_resources/images.rs new file mode 100644 index 000000000..c62afbd50 --- /dev/null +++ b/vk-video/src/vulkan_decoder/session_resources/images.rs @@ -0,0 +1,291 @@ +use std::sync::Arc; + +use ash::vk; + +use crate::{ + vulkan_decoder::{H264ProfileInfo, Image, ImageView}, + VulkanCtx, VulkanDecoderError, +}; + +pub(crate) struct DecodingImages<'a> { + pub(crate) dpb_image: DecodingImageBundle<'a>, + pub(crate) dpb_slot_active: Vec, + pub(crate) dst_image: Option>, +} + +pub(crate) struct DecodingImageBundle<'a> { + pub(crate) image: Arc, + pub(crate) _image_view: ImageView, + pub(crate) video_resource_info: Vec>, +} + +impl<'a> DecodingImageBundle<'a> { + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + format: &vk::VideoFormatPropertiesKHR<'a>, + dimensions: vk::Extent2D, + image_usage: vk::ImageUsageFlags, + profile_info: &H264ProfileInfo, + array_layer_count: u32, + queue_indices: Option<&[u32]>, + layout: vk::ImageLayout, + ) -> Result<(Self, vk::ImageMemoryBarrier2<'a>), VulkanDecoderError> { + let mut profile_list_info = vk::VideoProfileListInfoKHR::default() + .profiles(std::slice::from_ref(&profile_info.profile_info)); + + let mut image_create_info = vk::ImageCreateInfo::default() + .flags(format.image_create_flags) + .image_type(format.image_type) + .format(format.format) + .extent(vk::Extent3D { + width: dimensions.width, + height: dimensions.height, + depth: 1, + }) + .mip_levels(1) + .array_layers(array_layer_count) + .samples(vk::SampleCountFlags::TYPE_1) + .tiling(format.image_tiling) + .usage(image_usage) + .initial_layout(vk::ImageLayout::UNDEFINED) + .push_next(&mut profile_list_info); + + match queue_indices { + Some(indices) => { + image_create_info = image_create_info + .sharing_mode(vk::SharingMode::CONCURRENT) + .queue_family_indices(indices); + } + None => { + image_create_info = image_create_info.sharing_mode(vk::SharingMode::EXCLUSIVE); + } + } + + let image = Arc::new(Image::new( + vulkan_ctx.allocator.clone(), + &image_create_info, + )?); + + let subresource_range = vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: vk::REMAINING_ARRAY_LAYERS, + }; + + let image_view_create_info = vk::ImageViewCreateInfo::default() + .flags(vk::ImageViewCreateFlags::empty()) + .image(**image) + .view_type(if array_layer_count == 1 { + vk::ImageViewType::TYPE_2D + } else { + vk::ImageViewType::TYPE_2D_ARRAY + }) + .format(format.format) + .components(vk::ComponentMapping::default()) + .subresource_range(subresource_range); + + let image_view = ImageView::new( + vulkan_ctx.device.clone(), + image.clone(), + &image_view_create_info, + )?; + + let video_resource_info = (0..array_layer_count) + .map(|i| { + vk::VideoPictureResourceInfoKHR::default() + .coded_offset(vk::Offset2D { x: 0, y: 0 }) + .coded_extent(dimensions) + .base_array_layer(i) + .image_view_binding(image_view.view) + }) + .collect(); + + let image_memory_barrier = vk::ImageMemoryBarrier2::default() + .src_stage_mask(vk::PipelineStageFlags2::NONE) + .src_access_mask(vk::AccessFlags2::NONE) + .dst_stage_mask(vk::PipelineStageFlags2::NONE) + .dst_access_mask(vk::AccessFlags2::NONE) + .old_layout(vk::ImageLayout::UNDEFINED) + .new_layout(layout) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(**image) + .subresource_range(subresource_range); + + Ok(( + Self { + image, + _image_view: image_view, + video_resource_info, + }, + image_memory_barrier, + )) + } + + fn extent(&self) -> vk::Extent3D { + self.image.extent + } +} + +impl<'a> DecodingImages<'a> { + pub(crate) fn target_picture_resource_info( + &'a self, + new_reference_slot_index: usize, + ) -> Option> { + match &self.dst_image { + Some(image) => Some(image.video_resource_info[0]), + None => self.video_resource_info(new_reference_slot_index).copied(), + } + } + + pub(crate) fn target_info( + &self, + new_reference_slot_index: usize, + ) -> (vk::Image, vk::ImageLayout, usize) { + match &self.dst_image { + Some(image) => (**image.image, vk::ImageLayout::VIDEO_DECODE_DST_KHR, 0), + None => ( + **self.dpb_image.image, + vk::ImageLayout::VIDEO_DECODE_DPB_KHR, + new_reference_slot_index, + ), + } + } + + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + profile: H264ProfileInfo, + dpb_format: &vk::VideoFormatPropertiesKHR<'a>, + dst_format: &Option>, + dimensions: vk::Extent2D, + max_dpb_slots: u32, + ) -> Result<(Self, Vec>), VulkanDecoderError> { + let dpb_image_usage = if dst_format.is_some() { + dpb_format.image_usage_flags & vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR + } else { + dpb_format.image_usage_flags + & (vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR + | vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR + | vk::ImageUsageFlags::TRANSFER_SRC) + }; + + let queue_indices = [ + vulkan_ctx.queues.transfer.idx as u32, + vulkan_ctx.queues.h264_decode.idx as u32, + ]; + + let (dpb_image, dpb_memory_barrier) = DecodingImageBundle::new( + vulkan_ctx, + dpb_format, + dimensions, + dpb_image_usage, + &profile, + max_dpb_slots, + if dst_format.is_some() { + None + } else { + Some(&queue_indices) + }, + vk::ImageLayout::VIDEO_DECODE_DPB_KHR, + )?; + + let output = dst_format + .map(|dst_format| { + let dst_image_usage = dst_format.image_usage_flags + & (vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR + | vk::ImageUsageFlags::TRANSFER_SRC); + DecodingImageBundle::new( + vulkan_ctx, + &dst_format, + dimensions, + dst_image_usage, + &profile, + 1, + Some(&queue_indices), + vk::ImageLayout::VIDEO_DECODE_DST_KHR, + ) + }) + .transpose()?; + + let (dst_image, dst_memory_barrier) = match output { + Some((output_images, output_memory_barrier)) => { + (Some(output_images), Some(output_memory_barrier)) + } + None => (None, None), + }; + + let barriers = [dpb_memory_barrier] + .into_iter() + .chain(dst_memory_barrier) + .collect::>(); + + Ok(( + Self { + dpb_image, + dpb_slot_active: vec![false; max_dpb_slots as usize], + dst_image, + }, + barriers, + )) + } + + #[allow(dead_code)] + pub(crate) fn dbp_extent(&self) -> vk::Extent3D { + self.dpb_image.extent() + } + + #[allow(dead_code)] + pub(crate) fn dst_extent(&self) -> Option { + self.dst_image.as_ref().map(|i| i.extent()) + } + + pub(crate) fn reference_slot_info(&self) -> Vec { + self.dpb_image + .video_resource_info + .iter() + .enumerate() + .map(|(i, info)| { + vk::VideoReferenceSlotInfoKHR::default() + .picture_resource(info) + .slot_index(if self.dpb_slot_active[i] { + i as i32 + } else { + -1 + }) + }) + .collect() + } + + pub(crate) fn allocate_reference_picture(&mut self) -> Result { + let i = self + .dpb_slot_active + .iter() + .enumerate() + .find(|(_, &v)| !v) + .map(|(i, _)| i) + .ok_or(VulkanDecoderError::NoFreeSlotsInDpb)?; + + self.dpb_slot_active[i] = true; + + Ok(i) + } + + pub(crate) fn video_resource_info(&self, i: usize) -> Option<&vk::VideoPictureResourceInfoKHR> { + self.dpb_image.video_resource_info.get(i) + } + + pub(crate) fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> { + self.dpb_slot_active[i] = false; + + Ok(()) + } + + pub(crate) fn reset_all_allocations(&mut self) { + self.dpb_slot_active + .iter_mut() + .for_each(|slot| *slot = false); + } +} diff --git a/vk-video/src/vulkan_decoder/session_resources/parameters.rs b/vk-video/src/vulkan_decoder/session_resources/parameters.rs new file mode 100644 index 000000000..e5f366f67 --- /dev/null +++ b/vk-video/src/vulkan_decoder/session_resources/parameters.rs @@ -0,0 +1,116 @@ +use std::{collections::HashMap, sync::Arc}; + +use ash::vk; +use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet}; + +use crate::{ + vulkan_decoder::{ + Device, VideoSessionParameters, VkPictureParameterSet, VkSequenceParameterSet, + }, + VulkanCtx, VulkanDecoderError, +}; + +/// Since `VideoSessionParameters` can only add sps and pps values (inserting sps or pps with an +/// existing id is prohibited), this is an abstraction which provides the capability to replace an +/// existing sps or pps. +pub(crate) struct VideoSessionParametersManager { + pub(crate) parameters: VideoSessionParameters, + sps: HashMap, + pps: HashMap<(u8, u8), VkPictureParameterSet>, + device: Arc, + session: vk::VideoSessionKHR, +} + +impl VideoSessionParametersManager { + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + session: vk::VideoSessionKHR, + ) -> Result { + Ok(Self { + parameters: VideoSessionParameters::new( + vulkan_ctx.device.clone(), + session, + &[], + &[], + None, + )?, + sps: HashMap::new(), + pps: HashMap::new(), + device: vulkan_ctx.device.clone(), + session, + }) + } + + pub(crate) fn parameters(&self) -> vk::VideoSessionParametersKHR { + self.parameters.parameters + } + + pub(crate) fn change_session( + &mut self, + session: vk::VideoSessionKHR, + ) -> Result<(), VulkanDecoderError> { + if self.session == session { + return Ok(()); + } + self.session = session; + + let sps = self.sps.values().map(|sps| sps.sps).collect::>(); + let pps = self.pps.values().map(|pps| pps.pps).collect::>(); + + self.parameters = + VideoSessionParameters::new(self.device.clone(), session, &sps, &pps, None)?; + + Ok(()) + } + + // it is probably not optimal to insert sps and pps searately. this could be optimized, so that + // the insertion happens lazily when the parameters are bound to a session. + pub(crate) fn put_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> { + let key = sps.seq_parameter_set_id.id(); + match self.sps.entry(key) { + std::collections::hash_map::Entry::Occupied(mut e) => { + e.insert(sps.try_into()?); + + self.parameters = VideoSessionParameters::new( + self.device.clone(), + self.session, + &[self.sps[&key].sps], + &[], + Some(&self.parameters), + )? + } + std::collections::hash_map::Entry::Vacant(e) => { + e.insert(sps.try_into()?); + + self.parameters.add(&[self.sps[&key].sps], &[])?; + } + } + + Ok(()) + } + + pub(crate) fn put_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> { + let key = (pps.seq_parameter_set_id.id(), pps.pic_parameter_set_id.id()); + match self.pps.entry(key) { + std::collections::hash_map::Entry::Occupied(mut e) => { + e.insert(pps.try_into()?); + + self.parameters = VideoSessionParameters::new( + self.device.clone(), + self.session, + &[], + &[self.pps[&key].pps], + Some(&self.parameters), + )?; + } + + std::collections::hash_map::Entry::Vacant(e) => { + e.insert(pps.try_into()?); + + self.parameters.add(&[], &[self.pps[&key].pps])?; + } + } + + Ok(()) + } +} diff --git a/vk-video/src/vulkan_decoder/vulkan_ctx.rs b/vk-video/src/vulkan_decoder/vulkan_ctx.rs new file mode 100644 index 000000000..953b29ea5 --- /dev/null +++ b/vk-video/src/vulkan_decoder/vulkan_ctx.rs @@ -0,0 +1,700 @@ +use std::{ + ffi::{c_void, CStr}, + sync::Arc, +}; + +use ash::{vk, Entry}; +use tracing::{error, info}; + +use super::{ + Allocator, CommandBuffer, CommandPool, DebugMessenger, Device, H264ProfileInfo, Instance, + VulkanDecoderError, +}; + +const REQUIRED_EXTENSIONS: &[&CStr] = &[ + vk::KHR_VIDEO_QUEUE_NAME, + vk::KHR_VIDEO_DECODE_QUEUE_NAME, + vk::KHR_VIDEO_DECODE_H264_NAME, +]; + +#[derive(thiserror::Error, Debug)] +pub enum VulkanCtxError { + #[error("Error loading vulkan: {0}")] + LoadingError(#[from] ash::LoadingError), + + #[error("Vulkan error: {0}")] + VkError(#[from] vk::Result), + + #[error("wgpu instance error: {0}")] + WgpuInstanceError(#[from] wgpu::hal::InstanceError), + + #[error("wgpu device error: {0}")] + WgpuDeviceError(#[from] wgpu::hal::DeviceError), + + #[error("wgpu request device error: {0}")] + WgpuRequestDeviceError(#[from] wgpu::RequestDeviceError), + + #[error("cannot create a wgpu adapter")] + WgpuAdapterNotCreated, + + #[error("Cannot find a suitable physical device")] + NoDevice, + + #[error("String conversion error: {0}")] + StringConversionError(#[from] std::ffi::FromBytesUntilNulError), +} + +pub struct VulkanCtx { + _entry: Arc, + _instance: Arc, + _physical_device: vk::PhysicalDevice, + pub(crate) device: Arc, + pub(crate) allocator: Arc, + pub(crate) queues: Queues, + _debug_messenger: Option, + pub(crate) video_capabilities: vk::VideoCapabilitiesKHR<'static>, + pub(crate) h264_dpb_format_properties: vk::VideoFormatPropertiesKHR<'static>, + pub(crate) h264_dst_format_properties: Option>, + pub wgpu_ctx: WgpuCtx, +} + +pub struct WgpuCtx { + pub instance: Arc, + pub adapter: Arc, + pub device: Arc, + pub queue: Arc, +} + +pub(crate) struct CommandPools { + pub(crate) _decode_pool: Arc, + pub(crate) _transfer_pool: Arc, +} + +pub(crate) struct Queue { + pub(crate) queue: std::sync::Mutex, + pub(crate) idx: usize, + _video_properties: vk::QueueFamilyVideoPropertiesKHR<'static>, + pub(crate) query_result_status_properties: + vk::QueueFamilyQueryResultStatusPropertiesKHR<'static>, + device: Arc, +} + +impl Queue { + pub(crate) fn supports_result_status_queries(&self) -> bool { + self.query_result_status_properties + .query_result_status_support + == vk::TRUE + } + + pub(crate) fn submit( + &self, + buffer: &CommandBuffer, + wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)], + fence: Option, + ) -> Result<(), VulkanDecoderError> { + fn to_sem_submit_info( + submits: &[(vk::Semaphore, vk::PipelineStageFlags2)], + ) -> Vec { + submits + .iter() + .map(|&(sem, stage)| { + vk::SemaphoreSubmitInfo::default() + .semaphore(sem) + .stage_mask(stage) + }) + .collect::>() + } + + let wait_semaphores = to_sem_submit_info(wait_semaphores); + let signal_semaphores = to_sem_submit_info(signal_semaphores); + + let buffer_submit_info = + [vk::CommandBufferSubmitInfo::default().command_buffer(buffer.buffer)]; + + let submit_info = [vk::SubmitInfo2::default() + .wait_semaphore_infos(&wait_semaphores) + .signal_semaphore_infos(&signal_semaphores) + .command_buffer_infos(&buffer_submit_info)]; + + unsafe { + self.device.queue_submit2( + *self.queue.lock().unwrap(), + &submit_info, + fence.unwrap_or(vk::Fence::null()), + )? + }; + + Ok(()) + } +} + +pub(crate) struct Queues { + pub(crate) transfer: Queue, + pub(crate) h264_decode: Queue, + pub(crate) wgpu: Queue, +} + +impl VulkanCtx { + pub fn new( + wgpu_features: wgpu::Features, + wgpu_limits: wgpu::Limits, + ) -> Result { + let entry = Arc::new(unsafe { Entry::load()? }); + + let instance_extension_properties = + unsafe { entry.enumerate_instance_extension_properties(None)? }; + info!( + "instance_extension_properties amount: {}", + instance_extension_properties.len() + ); + + let api_version = vk::make_api_version(0, 1, 3, 0); + let app_info = vk::ApplicationInfo { + api_version, + ..Default::default() + }; + + let layers = if cfg!(debug_assertions) { + vec![c"VK_LAYER_KHRONOS_validation".as_ptr()] + } else { + Vec::new() + }; + + let extensions = if cfg!(debug_assertions) { + vec![vk::EXT_DEBUG_UTILS_NAME] + } else { + Vec::new() + }; + + let wgpu_extensions = wgpu::hal::vulkan::Instance::desired_extensions( + &entry, + api_version, + wgpu::InstanceFlags::empty(), + )?; + + let extensions = extensions + .into_iter() + .chain(wgpu_extensions) + .collect::>(); + + let extension_ptrs = extensions.iter().map(|e| e.as_ptr()).collect::>(); + + let create_info = vk::InstanceCreateInfo::default() + .application_info(&app_info) + .enabled_layer_names(&layers) + .enabled_extension_names(&extension_ptrs); + + let instance = unsafe { entry.create_instance(&create_info, None) }?; + let video_queue_instance_ext = ash::khr::video_queue::Instance::new(&entry, &instance); + let debug_utils_instance_ext = ash::ext::debug_utils::Instance::new(&entry, &instance); + + let instance = Arc::new(Instance { + instance, + _entry: entry.clone(), + video_queue_instance_ext, + debug_utils_instance_ext, + }); + + let debug_messenger = if cfg!(debug_assertions) { + Some(DebugMessenger::new(instance.clone())?) + } else { + None + }; + + let wgpu_instance = unsafe { + wgpu::hal::vulkan::Instance::from_raw( + (*entry).clone(), + instance.instance.clone(), + api_version, + 0, + None, + extensions, + wgpu::InstanceFlags::empty(), + false, + None, + )? + }; + + let physical_devices = unsafe { instance.enumerate_physical_devices()? }; + + let ChosenDevice { + physical_device, + queue_indices, + h264_dpb_format_properties, + h264_dst_format_properties, + video_capabilities, + } = find_device(&physical_devices, &instance, REQUIRED_EXTENSIONS)?; + + let wgpu_adapter = wgpu_instance + .expose_adapter(physical_device) + .ok_or(VulkanCtxError::WgpuAdapterNotCreated)?; + + let wgpu_features = wgpu_features | wgpu::Features::TEXTURE_FORMAT_NV12; + + // TODO: we can only get the required extensions after exposing the adapter; the creation + // of the adapter and verification of whether the device supports all extensions should + // happen while picking the device. + let wgpu_extensions = wgpu_adapter + .adapter + .required_device_extensions(wgpu_features); + + let required_extensions = REQUIRED_EXTENSIONS + .iter() + .copied() + .chain(wgpu_extensions) + .collect::>(); + + let required_extensions_as_ptrs = required_extensions + .iter() + .map(|e| e.as_ptr()) + .collect::>(); + + let queue_create_infos = queue_indices.queue_create_infos(); + + let mut wgpu_physical_device_features = wgpu_adapter + .adapter + .physical_device_features(&required_extensions, wgpu_features); + + let mut vk_synch_2_feature = + vk::PhysicalDeviceSynchronization2Features::default().synchronization2(true); + + let device_create_info = vk::DeviceCreateInfo::default() + .queue_create_infos(&queue_create_infos) + .enabled_extension_names(&required_extensions_as_ptrs); + + let device_create_info = wgpu_physical_device_features + .add_to_device_create(device_create_info) + .push_next(&mut vk_synch_2_feature); + + let device = unsafe { instance.create_device(physical_device, &device_create_info, None)? }; + let video_queue_ext = ash::khr::video_queue::Device::new(&instance, &device); + let video_decode_queue_ext = ash::khr::video_decode_queue::Device::new(&instance, &device); + + let device = Arc::new(Device { + device, + video_queue_ext, + video_decode_queue_ext, + _instance: instance.clone(), + }); + + let h264_decode_queue = + unsafe { device.get_device_queue(queue_indices.h264_decode.idx as u32, 0) }; + let transfer_queue = + unsafe { device.get_device_queue(queue_indices.transfer.idx as u32, 0) }; + let wgpu_queue = unsafe { + device.get_device_queue(queue_indices.graphics_transfer_compute.idx as u32, 0) + }; + + let queues = Queues { + transfer: Queue { + queue: transfer_queue.into(), + idx: queue_indices.transfer.idx, + _video_properties: queue_indices.transfer.video_properties, + query_result_status_properties: queue_indices + .transfer + .query_result_status_properties, + device: device.clone(), + }, + h264_decode: Queue { + queue: h264_decode_queue.into(), + idx: queue_indices.h264_decode.idx, + _video_properties: queue_indices.h264_decode.video_properties, + query_result_status_properties: queue_indices + .h264_decode + .query_result_status_properties, + device: device.clone(), + }, + wgpu: Queue { + queue: wgpu_queue.into(), + idx: queue_indices.graphics_transfer_compute.idx, + _video_properties: queue_indices.graphics_transfer_compute.video_properties, + query_result_status_properties: queue_indices + .graphics_transfer_compute + .query_result_status_properties, + device: device.clone(), + }, + }; + + let wgpu_device = unsafe { + wgpu_adapter.adapter.device_from_raw( + device.device.clone(), + false, + &required_extensions, + wgpu_features, + &wgpu::MemoryHints::default(), + queue_indices.graphics_transfer_compute.idx as u32, + 0, + )? + }; + + let allocator = Arc::new(Allocator::new( + instance.clone(), + physical_device, + device.clone(), + )?); + + let wgpu_instance = + unsafe { wgpu::Instance::from_hal::(wgpu_instance) }; + let wgpu_adapter = unsafe { wgpu_instance.create_adapter_from_hal(wgpu_adapter) }; + let (wgpu_device, wgpu_queue) = unsafe { + wgpu_adapter.create_device_from_hal( + wgpu_device, + &wgpu::DeviceDescriptor { + label: Some("wgpu device created by the vulkan video decoder"), + memory_hints: wgpu::MemoryHints::default(), + required_limits: wgpu_limits, + required_features: wgpu_features, + }, + None, + )? + }; + + let wgpu_ctx = WgpuCtx { + instance: Arc::new(wgpu_instance), + adapter: Arc::new(wgpu_adapter), + device: Arc::new(wgpu_device), + queue: Arc::new(wgpu_queue), + }; + + Ok(Self { + _entry: entry, + _instance: instance, + _physical_device: physical_device, + device, + allocator, + queues, + _debug_messenger: debug_messenger, + video_capabilities, + h264_dpb_format_properties, + h264_dst_format_properties, + wgpu_ctx, + }) + } +} + +impl std::fmt::Debug for VulkanCtx { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("VulkanCtx").finish() + } +} + +struct ChosenDevice<'a> { + physical_device: vk::PhysicalDevice, + queue_indices: QueueIndices<'a>, + h264_dpb_format_properties: vk::VideoFormatPropertiesKHR<'a>, + h264_dst_format_properties: Option>, + video_capabilities: vk::VideoCapabilitiesKHR<'a>, +} + +fn find_device<'a>( + devices: &[vk::PhysicalDevice], + instance: &Instance, + required_extension_names: &[&CStr], +) -> Result, VulkanCtxError> { + for &device in devices { + let properties = unsafe { instance.get_physical_device_properties(device) }; + + let mut vk_13_features = vk::PhysicalDeviceVulkan13Features::default(); + let mut features = vk::PhysicalDeviceFeatures2::default().push_next(&mut vk_13_features); + + unsafe { instance.get_physical_device_features2(device, &mut features) }; + let extensions = unsafe { instance.enumerate_device_extension_properties(device)? }; + + if vk_13_features.synchronization2 == 0 { + error!( + "device {:?} does not support the required synchronization2 feature", + properties.device_name_as_c_str()? + ); + } + + if !required_extension_names.iter().all(|&extension_name| { + extensions.iter().any(|ext| { + let Ok(name) = ext.extension_name_as_c_str() else { + return false; + }; + + if name != extension_name { + return false; + }; + + true + }) + }) { + error!( + "device {:?} does not support the required extensions", + properties.device_name_as_c_str()? + ); + continue; + } + + let queues_len = + unsafe { instance.get_physical_device_queue_family_properties2_len(device) }; + let mut queues = vec![vk::QueueFamilyProperties2::default(); queues_len]; + let mut video_properties = vec![vk::QueueFamilyVideoPropertiesKHR::default(); queues_len]; + let mut query_result_status_properties = + vec![vk::QueueFamilyQueryResultStatusPropertiesKHR::default(); queues_len]; + + for ((queue, video_properties), query_result_properties) in queues + .iter_mut() + .zip(video_properties.iter_mut()) + .zip(query_result_status_properties.iter_mut()) + { + *queue = queue + .push_next(video_properties) + .push_next(query_result_properties); + } + + unsafe { instance.get_physical_device_queue_family_properties2(device, &mut queues) }; + + let profile_info = H264ProfileInfo::decode_h264_yuv420(); + + let mut h264_caps = vk::VideoDecodeH264CapabilitiesKHR::default(); + let mut decode_caps = vk::VideoDecodeCapabilitiesKHR { + p_next: (&mut h264_caps as *mut _) as *mut c_void, // why does this not have `.push_next()`? wtf + ..Default::default() + }; + + let mut caps = vk::VideoCapabilitiesKHR::default().push_next(&mut decode_caps); + + unsafe { + (instance + .video_queue_instance_ext + .fp() + .get_physical_device_video_capabilities_khr)( + device, + &profile_info.profile_info, + &mut caps, + ) + .result()? + }; + + let video_capabilities = vk::VideoCapabilitiesKHR::default() + .flags(caps.flags) + .min_bitstream_buffer_size_alignment(caps.min_bitstream_buffer_size_alignment) + .min_bitstream_buffer_offset_alignment(caps.min_bitstream_buffer_offset_alignment) + .picture_access_granularity(caps.picture_access_granularity) + .min_coded_extent(caps.min_coded_extent) + .max_coded_extent(caps.max_coded_extent) + .max_dpb_slots(caps.max_dpb_slots) + .max_active_reference_pictures(caps.max_active_reference_pictures) + .std_header_version(caps.std_header_version); + info!("caps: {caps:#?}"); + + let flags = decode_caps.flags; + + let h264_dpb_format_properties = + if flags.contains(vk::VideoDecodeCapabilityFlagsKHR::DPB_AND_OUTPUT_COINCIDE) { + query_video_format_properties( + device, + &instance.video_queue_instance_ext, + &profile_info, + vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR + | vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR + | vk::ImageUsageFlags::TRANSFER_SRC, + )? + } else { + query_video_format_properties( + device, + &instance.video_queue_instance_ext, + &profile_info, + vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR, + )? + }; + + let h264_dst_format_properties = + if flags.contains(vk::VideoDecodeCapabilityFlagsKHR::DPB_AND_OUTPUT_COINCIDE) { + None + } else { + Some(query_video_format_properties( + device, + &instance.video_queue_instance_ext, + &profile_info, + vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR | vk::ImageUsageFlags::TRANSFER_SRC, + )?) + }; + + let h264_dpb_format_properties = + if flags.contains(vk::VideoDecodeCapabilityFlagsKHR::DPB_AND_OUTPUT_COINCIDE) { + match h264_dpb_format_properties + .into_iter() + .find(|f| f.format == vk::Format::G8_B8R8_2PLANE_420_UNORM) + { + Some(f) => f, + None => continue, + } + } else { + h264_dpb_format_properties[0] + }; + + let h264_dst_format_properties = match h264_dst_format_properties { + Some(format_properties) => match format_properties + .into_iter() + .find(|f| f.format == vk::Format::G8_B8R8_2PLANE_420_UNORM) + { + Some(f) => Some(f), + None => continue, + }, + None => None, + }; + + let video_queues = queues + .iter() + .enumerate() + .filter(|(_, q)| { + q.queue_family_properties + .queue_flags + .contains(vk::QueueFlags::VIDEO_DECODE_KHR) + }) + .map(|(i, _)| i) + .collect::>(); // TODO: have to split the queues + + let Some(transfer_queue_idx) = queues + .iter() + .enumerate() + .find(|(_, q)| { + q.queue_family_properties + .queue_flags + .contains(vk::QueueFlags::TRANSFER) + && !q + .queue_family_properties + .queue_flags + .intersects(vk::QueueFlags::GRAPHICS) + }) + .map(|(i, _)| i) + else { + continue; + }; + + let Some(graphics_transfer_compute_queue_idx) = queues + .iter() + .enumerate() + .find(|(_, q)| { + q.queue_family_properties.queue_flags.contains( + vk::QueueFlags::GRAPHICS | vk::QueueFlags::TRANSFER | vk::QueueFlags::COMPUTE, + ) + }) + .map(|(i, _)| i) + else { + continue; + }; + + let Some(decode_queue_idx) = video_queues.into_iter().find(|&i| { + video_properties[i] + .video_codec_operations + .contains(vk::VideoCodecOperationFlagsKHR::DECODE_H264) + }) else { + continue; + }; + + info!("deocde_caps: {decode_caps:#?}"); + info!("h264_caps: {h264_caps:#?}"); + info!("dpb_format_properties: {h264_dpb_format_properties:#?}"); + info!("dst_format_properties: {h264_dst_format_properties:#?}"); + + return Ok(ChosenDevice { + physical_device: device, + queue_indices: QueueIndices { + transfer: QueueIndex { + idx: transfer_queue_idx, + video_properties: video_properties[transfer_queue_idx], + query_result_status_properties: query_result_status_properties + [transfer_queue_idx], + }, + h264_decode: QueueIndex { + idx: decode_queue_idx, + video_properties: video_properties[decode_queue_idx], + query_result_status_properties: query_result_status_properties + [decode_queue_idx], + }, + graphics_transfer_compute: QueueIndex { + idx: graphics_transfer_compute_queue_idx, + video_properties: video_properties[graphics_transfer_compute_queue_idx], + query_result_status_properties: query_result_status_properties + [graphics_transfer_compute_queue_idx], + }, + }, + h264_dpb_format_properties, + h264_dst_format_properties, + video_capabilities, + }); + } + + Err(VulkanCtxError::NoDevice) +} + +fn query_video_format_properties<'a>( + device: vk::PhysicalDevice, + video_queue_instance_ext: &ash::khr::video_queue::Instance, + profile_info: &H264ProfileInfo, + image_usage: vk::ImageUsageFlags, +) -> Result>, VulkanCtxError> { + let mut profile_list_info = vk::VideoProfileListInfoKHR::default() + .profiles(std::slice::from_ref(&profile_info.profile_info)); + + let format_info = vk::PhysicalDeviceVideoFormatInfoKHR::default() + .image_usage(image_usage) + .push_next(&mut profile_list_info); + + let mut format_info_length = 0; + + unsafe { + (video_queue_instance_ext + .fp() + .get_physical_device_video_format_properties_khr)( + device, + &format_info, + &mut format_info_length, + std::ptr::null_mut(), + ) + .result()?; + } + + let mut format_properties = + vec![vk::VideoFormatPropertiesKHR::default(); format_info_length as usize]; + + unsafe { + (video_queue_instance_ext + .fp() + .get_physical_device_video_format_properties_khr)( + device, + &format_info, + &mut format_info_length, + format_properties.as_mut_ptr(), + ) + .result()?; + } + + Ok(format_properties) +} + +struct QueueIndex<'a> { + idx: usize, + video_properties: vk::QueueFamilyVideoPropertiesKHR<'a>, + query_result_status_properties: vk::QueueFamilyQueryResultStatusPropertiesKHR<'a>, +} + +pub(crate) struct QueueIndices<'a> { + transfer: QueueIndex<'a>, + h264_decode: QueueIndex<'a>, + graphics_transfer_compute: QueueIndex<'a>, +} + +impl QueueIndices<'_> { + fn queue_create_infos(&self) -> Vec { + [ + self.h264_decode.idx, + self.transfer.idx, + self.graphics_transfer_compute.idx, + ] + .into_iter() + .collect::>() + .into_iter() + .map(|i| { + vk::DeviceQueueCreateInfo::default() + .queue_family_index(i as u32) + .queue_priorities(&[1.0]) + }) + .collect::>() + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers.rs b/vk-video/src/vulkan_decoder/wrappers.rs new file mode 100644 index 000000000..5067ae512 --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers.rs @@ -0,0 +1,61 @@ +use std::sync::Arc; + +use ash::Entry; + +mod command; +mod debug; +mod mem; +mod parameter_sets; +mod sync; +mod video; +mod vk_extensions; + +pub(crate) use command::*; +pub(crate) use debug::*; +pub(crate) use mem::*; +pub(crate) use parameter_sets::*; +pub(crate) use sync::*; +pub(crate) use video::*; +pub(crate) use vk_extensions::*; + +pub(crate) struct Instance { + pub(crate) instance: ash::Instance, + pub(crate) _entry: Arc, + pub(crate) video_queue_instance_ext: ash::khr::video_queue::Instance, + pub(crate) debug_utils_instance_ext: ash::ext::debug_utils::Instance, +} + +impl Drop for Instance { + fn drop(&mut self) { + unsafe { self.destroy_instance(None) }; + } +} + +impl std::ops::Deref for Instance { + type Target = ash::Instance; + + fn deref(&self) -> &Self::Target { + &self.instance + } +} + +pub(crate) struct Device { + pub(crate) device: ash::Device, + pub(crate) video_queue_ext: ash::khr::video_queue::Device, + pub(crate) video_decode_queue_ext: ash::khr::video_decode_queue::Device, + pub(crate) _instance: Arc, +} + +impl std::ops::Deref for Device { + type Target = ash::Device; + + fn deref(&self) -> &Self::Target { + &self.device + } +} + +impl Drop for Device { + fn drop(&mut self) { + unsafe { self.destroy_device(None) }; + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/command.rs b/vk-video/src/vulkan_decoder/wrappers/command.rs new file mode 100644 index 000000000..8add7a1fa --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/command.rs @@ -0,0 +1,93 @@ +use std::sync::Arc; + +use ash::vk; + +use crate::vulkan_decoder::{VulkanCtxError, VulkanDecoderError}; + +use super::Device; + +pub(crate) struct CommandPool { + pub(crate) command_pool: vk::CommandPool, + device: Arc, +} + +impl CommandPool { + pub(crate) fn new( + device: Arc, + queue_family_index: usize, + ) -> Result { + let create_info = vk::CommandPoolCreateInfo::default() + .flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER) + .queue_family_index(queue_family_index as u32); + + let command_pool = unsafe { device.create_command_pool(&create_info, None)? }; + + Ok(Self { + device, + command_pool, + }) + } +} + +impl Drop for CommandPool { + fn drop(&mut self) { + unsafe { + self.device.destroy_command_pool(self.command_pool, None); + } + } +} + +impl std::ops::Deref for CommandPool { + type Target = vk::CommandPool; + + fn deref(&self) -> &Self::Target { + &self.command_pool + } +} + +pub(crate) struct CommandBuffer { + pool: Arc, + pub(crate) buffer: vk::CommandBuffer, +} + +impl CommandBuffer { + pub(crate) fn new_primary(pool: Arc) -> Result { + let allocate_info = vk::CommandBufferAllocateInfo::default() + .command_pool(**pool) + .level(vk::CommandBufferLevel::PRIMARY) + .command_buffer_count(1); + + let buffer = unsafe { pool.device.allocate_command_buffers(&allocate_info)?[0] }; + + Ok(Self { pool, buffer }) + } + + pub(crate) fn begin(&self) -> Result<(), VulkanDecoderError> { + unsafe { + self.device().begin_command_buffer( + self.buffer, + &vk::CommandBufferBeginInfo::default() + .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT), + )? + }; + Ok(()) + } + + pub(crate) fn end(&self) -> Result<(), VulkanDecoderError> { + unsafe { self.device().end_command_buffer(self.buffer)? }; + + Ok(()) + } + + fn device(&self) -> &Device { + &self.pool.device + } +} + +impl std::ops::Deref for CommandBuffer { + type Target = vk::CommandBuffer; + + fn deref(&self) -> &Self::Target { + &self.buffer + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/debug.rs b/vk-video/src/vulkan_decoder/wrappers/debug.rs new file mode 100644 index 000000000..339fd599a --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/debug.rs @@ -0,0 +1,185 @@ +use std::{ffi::c_void, sync::Arc}; + +use ash::vk::{self, QueryType}; +use tracing::{error, info, trace, warn}; + +use crate::vulkan_decoder::{VulkanCtxError, VulkanDecoderError}; + +use super::{Device, Instance}; + +pub(crate) struct DebugMessenger { + messenger: vk::DebugUtilsMessengerEXT, + instance: Arc, +} + +impl DebugMessenger { + pub(crate) fn new(instance: Arc) -> Result { + let debug_messenger_create_info = vk::DebugUtilsMessengerCreateInfoEXT::default() + .message_severity( + vk::DebugUtilsMessageSeverityFlagsEXT::ERROR + | vk::DebugUtilsMessageSeverityFlagsEXT::WARNING + | vk::DebugUtilsMessageSeverityFlagsEXT::INFO + | vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE, + ) + .message_type( + vk::DebugUtilsMessageTypeFlagsEXT::GENERAL + | vk::DebugUtilsMessageTypeFlagsEXT::VALIDATION + | vk::DebugUtilsMessageTypeFlagsEXT::PERFORMANCE, + ) + .pfn_user_callback(Some(debug_messenger_callback)); + + let messenger = unsafe { + instance + .debug_utils_instance_ext + .create_debug_utils_messenger(&debug_messenger_create_info, None)? + }; + + Ok(Self { + instance, + messenger, + }) + } +} + +impl Drop for DebugMessenger { + fn drop(&mut self) { + unsafe { + self.instance + .debug_utils_instance_ext + .destroy_debug_utils_messenger(self.messenger, None) + }; + } +} + +unsafe extern "system" fn debug_messenger_callback( + message_severity: vk::DebugUtilsMessageSeverityFlagsEXT, + message_types: vk::DebugUtilsMessageTypeFlagsEXT, + p_callback_data: *const vk::DebugUtilsMessengerCallbackDataEXT<'_>, + _p_user_data: *mut c_void, +) -> vk::Bool32 { + let callback_data = unsafe { *p_callback_data }; + let message_id = callback_data + .message_id_name_as_c_str() + .unwrap_or(c"") + .to_str() + .unwrap(); + let message = callback_data + .message_as_c_str() + .unwrap_or(c"") + .to_str() + .unwrap(); + let t = format!("{:?}", message_types); + match message_severity { + vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE => { + trace!("[{t}][{message_id}] {message}"); + } + + vk::DebugUtilsMessageSeverityFlagsEXT::INFO => { + info!("[{t}][{message_id}] {message}"); + } + + vk::DebugUtilsMessageSeverityFlagsEXT::WARNING => { + warn!("[{t}][{message_id}] {message}"); + } + + vk::DebugUtilsMessageSeverityFlagsEXT::ERROR => { + error!("[{t}][{message_id}] {message}"); + } + _ => {} + } + + vk::FALSE +} + +pub(crate) struct DecodeQueryPool { + pool: QueryPool, +} + +impl DecodeQueryPool { + pub(crate) fn new( + device: Arc, + profile: vk::VideoProfileInfoKHR, + ) -> Result { + let pool = QueryPool::new(device, QueryType::RESULT_STATUS_ONLY_KHR, 1, Some(profile))?; + Ok(Self { pool }) + } + + pub(crate) fn reset(&self, buffer: vk::CommandBuffer) { + unsafe { + self.pool + .device + .cmd_reset_query_pool(buffer, self.pool.pool, 0, 1) + }; + } + + // if we want to switch to inline queries we can use this, but we need to check how many + // implementations support them + pub(crate) fn _inline_query(&self) -> vk::VideoInlineQueryInfoKHR { + vk::VideoInlineQueryInfoKHR::default() + .query_pool(self.pool.pool) + .first_query(0) + .query_count(1) + } + + pub(crate) fn begin_query(&self, buffer: vk::CommandBuffer) { + unsafe { + self.pool.device.cmd_begin_query( + buffer, + self.pool.pool, + 0, + vk::QueryControlFlags::empty(), + ) + } + } + + pub(crate) fn end_query(&self, buffer: vk::CommandBuffer) { + unsafe { self.pool.device.cmd_end_query(buffer, self.pool.pool, 0) } + } + + pub(crate) fn get_result_blocking( + &self, + ) -> Result { + let mut result = vk::QueryResultStatusKHR::NOT_READY; + unsafe { + self.pool.device.get_query_pool_results( + self.pool.pool, + 0, + std::slice::from_mut(&mut result), + vk::QueryResultFlags::WAIT | vk::QueryResultFlags::WITH_STATUS_KHR, + )? + }; + + Ok(result) + } +} + +pub(crate) struct QueryPool { + pool: vk::QueryPool, + device: Arc, +} + +impl QueryPool { + pub(crate) fn new( + device: Arc, + ty: vk::QueryType, + count: u32, + mut p_next: Option, + ) -> Result { + let mut create_info = vk::QueryPoolCreateInfo::default() + .query_type(ty) + .query_count(count); + + if let Some(p_next) = p_next.as_mut() { + create_info = create_info.push_next(p_next) + } + let pool = unsafe { device.create_query_pool(&create_info, None)? }; + + Ok(Self { pool, device }) + } +} + +impl Drop for QueryPool { + fn drop(&mut self) { + unsafe { self.device.destroy_query_pool(self.pool, None) }; + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/mem.rs b/vk-video/src/vulkan_decoder/wrappers/mem.rs new file mode 100644 index 000000000..5876702ec --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/mem.rs @@ -0,0 +1,290 @@ +use std::sync::Arc; + +use ash::vk; +use vk_mem::Alloc; + +use crate::vulkan_decoder::{H264ProfileInfo, VulkanCtxError, VulkanDecoderError}; + +use super::{Device, Instance}; + +pub(crate) struct Allocator { + allocator: vk_mem::Allocator, + _instance: Arc, + _device: Arc, +} + +impl Allocator { + pub(crate) fn new( + instance: Arc, + physical_device: vk::PhysicalDevice, + device: Arc, + ) -> Result { + let mut allocator_create_info = + vk_mem::AllocatorCreateInfo::new(&instance, &device, physical_device); + allocator_create_info.vulkan_api_version = vk::API_VERSION_1_3; + + let allocator = unsafe { vk_mem::Allocator::new(allocator_create_info)? }; + + Ok(Self { + allocator, + _device: device, + _instance: instance, + }) + } +} + +impl std::ops::Deref for Allocator { + type Target = vk_mem::Allocator; + + fn deref(&self) -> &Self::Target { + &self.allocator + } +} + +pub(crate) struct MemoryAllocation { + pub(crate) allocation: vk_mem::Allocation, + allocator: Arc, +} + +impl MemoryAllocation { + pub(crate) fn new( + allocator: Arc, + memory_requirements: &vk::MemoryRequirements, + alloc_info: &vk_mem::AllocationCreateInfo, + ) -> Result { + let allocation = unsafe { allocator.allocate_memory(memory_requirements, alloc_info)? }; + + Ok(Self { + allocation, + allocator, + }) + } + + pub(crate) fn allocation_info(&self) -> vk_mem::AllocationInfo { + self.allocator.get_allocation_info(&self.allocation) + } +} + +impl std::ops::Deref for MemoryAllocation { + type Target = vk_mem::Allocation; + + fn deref(&self) -> &Self::Target { + &self.allocation + } +} + +impl Drop for MemoryAllocation { + fn drop(&mut self) { + unsafe { self.allocator.free_memory(&mut self.allocation) }; + } +} + +pub(crate) struct Buffer { + pub(crate) buffer: vk::Buffer, + pub(crate) allocation: vk_mem::Allocation, + allocator: Arc, +} + +#[derive(Debug, Clone, Copy)] +pub(crate) enum TransferDirection { + GpuToMem, +} + +impl Buffer { + pub(crate) fn new_decode( + allocator: Arc, + size: u64, + profile: &H264ProfileInfo, + ) -> Result { + let mut profile_list_info = vk::VideoProfileListInfoKHR::default() + .profiles(std::slice::from_ref(&profile.profile_info)); + + let buffer_create_info = vk::BufferCreateInfo::default() + .size(size) + .usage(vk::BufferUsageFlags::VIDEO_DECODE_SRC_KHR) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .push_next(&mut profile_list_info); + + let allocation_create_info = vk_mem::AllocationCreateInfo { + usage: vk_mem::MemoryUsage::Auto, + required_flags: vk::MemoryPropertyFlags::HOST_COHERENT, + flags: vk_mem::AllocationCreateFlags::HOST_ACCESS_SEQUENTIAL_WRITE, + ..Default::default() + }; + + Self::new(allocator, buffer_create_info, allocation_create_info) + } + + pub(crate) fn new_transfer( + allocator: Arc, + size: u64, + direction: TransferDirection, + ) -> Result { + let usage = match direction { + TransferDirection::GpuToMem => vk::BufferUsageFlags::TRANSFER_DST, + }; + + let allocation_flags = match direction { + TransferDirection::GpuToMem => vk_mem::AllocationCreateFlags::HOST_ACCESS_RANDOM, + }; + + let buffer_create_info = vk::BufferCreateInfo::default() + .size(size) + .usage(usage) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + let allocation_create_info = vk_mem::AllocationCreateInfo { + usage: vk_mem::MemoryUsage::Auto, + required_flags: vk::MemoryPropertyFlags::HOST_COHERENT, + flags: allocation_flags, + ..Default::default() + }; + + Self::new(allocator, buffer_create_info, allocation_create_info) + } + + fn new( + allocator: Arc, + create_info: vk::BufferCreateInfo, + allocation_create_info: vk_mem::AllocationCreateInfo, + ) -> Result { + let (buffer, allocation) = + unsafe { allocator.create_buffer(&create_info, &allocation_create_info)? }; + + Ok(Self { + buffer, + allocation, + allocator, + }) + } + + /// ## Safety + /// the buffer has to be mappable and readable + pub(crate) unsafe fn download_data_from_buffer( + &mut self, + size: usize, + ) -> Result, VulkanDecoderError> { + let mut output = Vec::new(); + unsafe { + let memory = self.allocator.map_memory(&mut self.allocation)?; + let memory_slice = std::slice::from_raw_parts_mut(memory, size); + output.extend_from_slice(memory_slice); + self.allocator.unmap_memory(&mut self.allocation); + } + + Ok(output) + } + + pub(crate) fn new_with_decode_data( + allocator: Arc, + data: &[u8], + buffer_size: u64, + ) -> Result { + let mut decode_buffer = Buffer::new_decode( + allocator.clone(), + buffer_size, + &H264ProfileInfo::decode_h264_yuv420(), + )?; + + unsafe { + let mem = allocator.map_memory(&mut decode_buffer.allocation)?; + let slice = std::slice::from_raw_parts_mut(mem.cast(), data.len()); + slice.copy_from_slice(data); + allocator.unmap_memory(&mut decode_buffer.allocation); + } + + Ok(decode_buffer) + } +} + +impl Drop for Buffer { + fn drop(&mut self) { + unsafe { + self.allocator + .destroy_buffer(self.buffer, &mut self.allocation) + } + } +} + +impl std::ops::Deref for Buffer { + type Target = vk::Buffer; + + fn deref(&self) -> &Self::Target { + &self.buffer + } +} + +pub(crate) struct Image { + pub(crate) image: vk::Image, + allocation: vk_mem::Allocation, + allocator: Arc, + pub(crate) extent: vk::Extent3D, +} + +impl Image { + pub(crate) fn new( + allocator: Arc, + image_create_info: &vk::ImageCreateInfo, + ) -> Result { + let extent = image_create_info.extent; + let alloc_info = vk_mem::AllocationCreateInfo { + usage: vk_mem::MemoryUsage::Auto, + ..Default::default() + }; + + let (image, allocation) = + unsafe { allocator.create_image(image_create_info, &alloc_info)? }; + + Ok(Image { + image, + allocation, + allocator, + extent, + }) + } +} + +impl std::ops::Deref for Image { + type Target = vk::Image; + + fn deref(&self) -> &Self::Target { + &self.image + } +} + +impl Drop for Image { + fn drop(&mut self) { + unsafe { + self.allocator + .destroy_image(self.image, &mut self.allocation) + }; + } +} + +pub(crate) struct ImageView { + pub(crate) view: vk::ImageView, + pub(crate) _image: Arc, + pub(crate) device: Arc, +} + +impl ImageView { + pub(crate) fn new( + device: Arc, + image: Arc, + create_info: &vk::ImageViewCreateInfo, + ) -> Result { + let view = unsafe { device.create_image_view(create_info, None)? }; + + Ok(ImageView { + view, + _image: image, + device: device.clone(), + }) + } +} + +impl Drop for ImageView { + fn drop(&mut self) { + unsafe { self.device.destroy_image_view(self.view, None) }; + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/parameter_sets.rs b/vk-video/src/vulkan_decoder/wrappers/parameter_sets.rs new file mode 100644 index 000000000..2229662e3 --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/parameter_sets.rs @@ -0,0 +1,289 @@ +use ash::vk; +use h264_reader::nal::sps::SeqParameterSet; + +use crate::VulkanDecoderError; + +const MACROBLOCK_SIZE: u32 = 16; + +pub(crate) trait SeqParameterSetExt { + fn width(&self) -> Result; + fn height(&self) -> Result; +} + +impl SeqParameterSetExt for SeqParameterSet { + fn width(&self) -> Result { + match self.frame_cropping { + None => Ok((self.pic_width_in_mbs_minus1 + 1) * MACROBLOCK_SIZE), + Some(_) => Err(VulkanDecoderError::FrameCroppingNotSupported), + } + } + + fn height(&self) -> Result { + match self.frame_mbs_flags { + h264_reader::nal::sps::FrameMbsFlags::Frames => { + Ok((self.pic_height_in_map_units_minus1 + 1) * MACROBLOCK_SIZE) + } + h264_reader::nal::sps::FrameMbsFlags::Fields { .. } => { + Err(VulkanDecoderError::FieldsNotSupported) + } + } + } +} + +pub(crate) struct VkSequenceParameterSet { + pub(crate) sps: vk::native::StdVideoH264SequenceParameterSet, + // in the future, heap-allocated VUI and HRD parameters can be put here to have everything + // together +} + +impl TryFrom<&'_ SeqParameterSet> for VkSequenceParameterSet { + type Error = VulkanDecoderError; + + #[allow(non_snake_case)] + fn try_from(sps: &SeqParameterSet) -> Result { + let flags = vk::native::StdVideoH264SpsFlags { + _bitfield_1: vk::native::StdVideoH264SpsFlags::new_bitfield_1( + sps.constraint_flags.flag0().into(), + sps.constraint_flags.flag1().into(), + sps.constraint_flags.flag2().into(), + sps.constraint_flags.flag3().into(), + sps.constraint_flags.flag4().into(), + sps.constraint_flags.flag5().into(), + sps.direct_8x8_inference_flag.into(), + match sps.frame_mbs_flags { + h264_reader::nal::sps::FrameMbsFlags::Frames => 0, + h264_reader::nal::sps::FrameMbsFlags::Fields { + mb_adaptive_frame_field_flag, + } => mb_adaptive_frame_field_flag.into(), + }, + matches!( + sps.frame_mbs_flags, + h264_reader::nal::sps::FrameMbsFlags::Frames + ) + .into(), + match sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeOne { + delta_pic_order_always_zero_flag, + .. + } => delta_pic_order_always_zero_flag.into(), + // The spec doesn't say what to do if this flag is not present... + h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } + | h264_reader::nal::sps::PicOrderCntType::TypeTwo => 0, + }, + sps.chroma_info.separate_colour_plane_flag.into(), + sps.gaps_in_frame_num_value_allowed_flag.into(), + sps.chroma_info.qpprime_y_zero_transform_bypass_flag.into(), + sps.frame_cropping.is_some().into(), + sps.chroma_info.scaling_matrix.is_some().into(), + 0, + ), + _bitfield_align_1: [], + __bindgen_padding_0: 0, + }; + + let profile_idc: u8 = sps.profile_idc.into(); + + let pic_order_cnt_type = match sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } => 0, + h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => 1, + h264_reader::nal::sps::PicOrderCntType::TypeTwo => 2, + }; + + let ( + offset_for_non_ref_pic, + offset_for_top_to_bottom_field, + num_ref_frames_in_pic_order_cnt_cycle, + ) = match &sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeOne { + offset_for_non_ref_pic, + offset_for_top_to_bottom_field, + offsets_for_ref_frame, + .. + } => ( + *offset_for_non_ref_pic, + *offset_for_top_to_bottom_field, + offsets_for_ref_frame.len() as u8, + ), + h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } => (0, 0, 0), + h264_reader::nal::sps::PicOrderCntType::TypeTwo => (0, 0, 0), + }; + + let log2_max_pic_order_cnt_lsb_minus4 = match &sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeZero { + log2_max_pic_order_cnt_lsb_minus4, + } => *log2_max_pic_order_cnt_lsb_minus4, + h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } + | h264_reader::nal::sps::PicOrderCntType::TypeTwo => 0, + }; + + let ( + frame_crop_left_offset, + frame_crop_right_offset, + frame_crop_top_offset, + frame_crop_bottom_offset, + ) = match sps.frame_cropping { + Some(h264_reader::nal::sps::FrameCropping { + left_offset, + right_offset, + top_offset, + bottom_offset, + }) => (left_offset, right_offset, top_offset, bottom_offset), + None => (0, 0, 0, 0), + }; + + let pOffsetForRefFrame = match &sps.pic_order_cnt { + h264_reader::nal::sps::PicOrderCntType::TypeOne { + offsets_for_ref_frame, + .. + } => offsets_for_ref_frame.as_ptr(), + h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } + | h264_reader::nal::sps::PicOrderCntType::TypeTwo => std::ptr::null(), + }; + + let pScalingLists = match sps.chroma_info.scaling_matrix { + Some(_) => return Err(VulkanDecoderError::ScalingListsNotSupported), + None => std::ptr::null(), + }; + + // TODO: this is not necessary to reconstruct samples. I don't know why the decoder would + // need this. Maybe we can do this in the future. + let pSequenceParameterSetVui = std::ptr::null(); + + Ok(Self { + sps: vk::native::StdVideoH264SequenceParameterSet { + flags, + profile_idc: profile_idc as u32, + level_idc: h264_level_idc_to_vk(sps.level_idc), + chroma_format_idc: sps.chroma_info.chroma_format.to_chroma_format_idc(), + seq_parameter_set_id: sps.seq_parameter_set_id.id(), + bit_depth_luma_minus8: sps.chroma_info.bit_depth_luma_minus8, + bit_depth_chroma_minus8: sps.chroma_info.bit_depth_chroma_minus8, + log2_max_frame_num_minus4: sps.log2_max_frame_num_minus4, + pic_order_cnt_type, + offset_for_non_ref_pic, + offset_for_top_to_bottom_field, + num_ref_frames_in_pic_order_cnt_cycle, + log2_max_pic_order_cnt_lsb_minus4, + max_num_ref_frames: sps.max_num_ref_frames as u8, + reserved1: 0, + pic_width_in_mbs_minus1: sps.pic_width_in_mbs_minus1, + pic_height_in_map_units_minus1: sps.pic_height_in_map_units_minus1, + frame_crop_left_offset, + frame_crop_right_offset, + frame_crop_top_offset, + frame_crop_bottom_offset, + reserved2: 0, + pOffsetForRefFrame, + pScalingLists, + pSequenceParameterSetVui, + }, + }) + } +} + +trait ChromaFormatExt { + fn to_chroma_format_idc(&self) -> u32; +} + +impl ChromaFormatExt for h264_reader::nal::sps::ChromaFormat { + fn to_chroma_format_idc(&self) -> u32 { + match self { + h264_reader::nal::sps::ChromaFormat::Monochrome => 0, + h264_reader::nal::sps::ChromaFormat::YUV420 => 1, + h264_reader::nal::sps::ChromaFormat::YUV422 => 2, + h264_reader::nal::sps::ChromaFormat::YUV444 => 3, + h264_reader::nal::sps::ChromaFormat::Invalid(v) => *v, + } + } +} + +fn h264_level_idc_to_vk(level_idc: u8) -> u32 { + match level_idc { + 10 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_0, + 11 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_1, + 12 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_2, + 13 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_3, + 20 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_0, + 21 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_1, + 22 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_2, + 30 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_0, + 31 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_1, + 32 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_2, + 40 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_0, + 41 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_1, + 42 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_2, + 50 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_0, + 51 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_1, + 52 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_2, + 60 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_0, + 61 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_1, + 62 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_2, + _ => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_INVALID, + } +} + +pub(crate) struct VkPictureParameterSet { + pub(crate) pps: vk::native::StdVideoH264PictureParameterSet, +} + +impl TryFrom<&'_ h264_reader::nal::pps::PicParameterSet> for VkPictureParameterSet { + type Error = VulkanDecoderError; + + #[allow(non_snake_case)] + fn try_from(pps: &h264_reader::nal::pps::PicParameterSet) -> Result { + let flags = vk::native::StdVideoH264PpsFlags { + _bitfield_align_1: [], + __bindgen_padding_0: [0; 3], + _bitfield_1: vk::native::StdVideoH264PpsFlags::new_bitfield_1( + pps.extension + .as_ref() + .map(|ext| ext.transform_8x8_mode_flag.into()) + .unwrap_or(0), + pps.redundant_pic_cnt_present_flag.into(), + pps.constrained_intra_pred_flag.into(), + pps.deblocking_filter_control_present_flag.into(), + pps.weighted_pred_flag.into(), + pps.bottom_field_pic_order_in_frame_present_flag.into(), + pps.entropy_coding_mode_flag.into(), + pps.extension + .as_ref() + .map(|ext| ext.pic_scaling_matrix.is_some().into()) + .unwrap_or(0), + ), + }; + + let chroma_qp_index_offset = pps.chroma_qp_index_offset as i8; + + let second_chroma_qp_index_offset = pps + .extension + .as_ref() + .map(|ext| ext.second_chroma_qp_index_offset as i8) + .unwrap_or(chroma_qp_index_offset); + + let pScalingLists = match pps.extension { + Some(h264_reader::nal::pps::PicParameterSetExtra { + pic_scaling_matrix: Some(_), + .. + }) => return Err(VulkanDecoderError::ScalingListsNotSupported), + _ => std::ptr::null(), + }; + + Ok(Self { + pps: vk::native::StdVideoH264PictureParameterSet { + flags, + seq_parameter_set_id: pps.seq_parameter_set_id.id(), + pic_parameter_set_id: pps.pic_parameter_set_id.id(), + num_ref_idx_l0_default_active_minus1: pps.num_ref_idx_l0_default_active_minus1 + as u8, + num_ref_idx_l1_default_active_minus1: pps.num_ref_idx_l1_default_active_minus1 + as u8, + weighted_bipred_idc: pps.weighted_bipred_idc.into(), + pic_init_qp_minus26: pps.pic_init_qp_minus26 as i8, + pic_init_qs_minus26: pps.pic_init_qs_minus26 as i8, + chroma_qp_index_offset, + second_chroma_qp_index_offset, + pScalingLists, + }, + }) + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/sync.rs b/vk-video/src/vulkan_decoder/wrappers/sync.rs new file mode 100644 index 000000000..b0a3061e2 --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/sync.rs @@ -0,0 +1,85 @@ +use std::sync::Arc; + +use ash::vk; + +use crate::vulkan_decoder::VulkanDecoderError; + +use super::Device; + +pub(crate) struct Fence { + pub(crate) fence: vk::Fence, + device: Arc, +} + +impl Fence { + pub(crate) fn new(device: Arc, signaled: bool) -> Result { + let flags = if signaled { + vk::FenceCreateFlags::SIGNALED + } else { + vk::FenceCreateFlags::empty() + }; + let create_info = vk::FenceCreateInfo::default().flags(flags); + let fence = unsafe { device.create_fence(&create_info, None)? }; + + Ok(Self { device, fence }) + } + + pub(crate) fn wait(&self, timeout: u64) -> Result<(), VulkanDecoderError> { + unsafe { self.device.wait_for_fences(&[self.fence], true, timeout)? }; + Ok(()) + } + + pub(crate) fn reset(&self) -> Result<(), VulkanDecoderError> { + unsafe { self.device.reset_fences(&[self.fence])? }; + Ok(()) + } + + pub(crate) fn wait_and_reset(&self, timeout: u64) -> Result<(), VulkanDecoderError> { + self.wait(timeout)?; + self.reset()?; + + Ok(()) + } +} + +impl Drop for Fence { + fn drop(&mut self) { + unsafe { self.device.destroy_fence(self.fence, None) }; + } +} + +impl std::ops::Deref for Fence { + type Target = vk::Fence; + + fn deref(&self) -> &Self::Target { + &self.fence + } +} + +pub(crate) struct Semaphore { + pub(crate) semaphore: vk::Semaphore, + device: Arc, +} + +impl Semaphore { + pub(crate) fn new(device: Arc) -> Result { + let create_info = vk::SemaphoreCreateInfo::default(); + let semaphore = unsafe { device.create_semaphore(&create_info, None)? }; + + Ok(Self { device, semaphore }) + } +} + +impl Drop for Semaphore { + fn drop(&mut self) { + unsafe { self.device.destroy_semaphore(self.semaphore, None) }; + } +} + +impl std::ops::Deref for Semaphore { + type Target = vk::Semaphore; + + fn deref(&self) -> &Self::Target { + &self.semaphore + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/video.rs b/vk-video/src/vulkan_decoder/wrappers/video.rs new file mode 100644 index 000000000..e548f6ea8 --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/video.rs @@ -0,0 +1,206 @@ +use std::sync::Arc; + +use ash::vk; + +use crate::{vulkan_decoder::VulkanDecoderError, VulkanCtx}; + +use super::{Device, MemoryAllocation, VideoQueueExt}; + +pub(crate) struct VideoSessionParameters { + pub(crate) parameters: vk::VideoSessionParametersKHR, + update_sequence_count: u32, + device: Arc, +} + +impl VideoSessionParameters { + pub(crate) fn new( + device: Arc, + session: vk::VideoSessionKHR, + initial_sps: &[vk::native::StdVideoH264SequenceParameterSet], + initial_pps: &[vk::native::StdVideoH264PictureParameterSet], + template: Option<&Self>, + ) -> Result { + let parameters_add_info = vk::VideoDecodeH264SessionParametersAddInfoKHR::default() + .std_sp_ss(initial_sps) + .std_pp_ss(initial_pps); + + let mut h264_create_info = vk::VideoDecodeH264SessionParametersCreateInfoKHR::default() + .max_std_sps_count(32) + .max_std_pps_count(32) + .parameters_add_info(¶meters_add_info); + + let create_info = vk::VideoSessionParametersCreateInfoKHR::default() + .flags(vk::VideoSessionParametersCreateFlagsKHR::empty()) + .video_session_parameters_template( + template + .map(|t| t.parameters) + .unwrap_or_else(vk::VideoSessionParametersKHR::null), + ) + .video_session(session) + .push_next(&mut h264_create_info); + + let parameters = unsafe { + device + .video_queue_ext + .create_video_session_parameters_khr(&create_info, None)? + }; + + Ok(Self { + parameters, + update_sequence_count: 0, + device: device.clone(), + }) + } + + pub(crate) fn add( + &mut self, + sps: &[vk::native::StdVideoH264SequenceParameterSet], + pps: &[vk::native::StdVideoH264PictureParameterSet], + ) -> Result<(), VulkanDecoderError> { + let mut parameters_add_info = vk::VideoDecodeH264SessionParametersAddInfoKHR::default() + .std_sp_ss(sps) + .std_pp_ss(pps); + + self.update_sequence_count += 1; + let update_info = vk::VideoSessionParametersUpdateInfoKHR::default() + .update_sequence_count(self.update_sequence_count) + .push_next(&mut parameters_add_info); + + unsafe { + self.device + .video_queue_ext + .update_video_session_parameters_khr(self.parameters, &update_info)? + }; + + Ok(()) + } +} + +impl Drop for VideoSessionParameters { + fn drop(&mut self) { + unsafe { + self.device + .video_queue_ext + .destroy_video_session_parameters_khr(self.parameters, None) + } + } +} + +pub(crate) struct VideoSession { + pub(crate) session: vk::VideoSessionKHR, + pub(crate) device: Arc, + pub(crate) _allocations: Vec, + pub(crate) max_coded_extent: vk::Extent2D, + pub(crate) max_dpb_slots: u32, +} + +impl VideoSession { + pub(crate) fn new( + vulkan_ctx: &VulkanCtx, + profile_info: &vk::VideoProfileInfoKHR, + max_coded_extent: vk::Extent2D, + max_dpb_slots: u32, + max_active_references: u32, + std_header_version: &vk::ExtensionProperties, + ) -> Result { + // TODO: this probably works, but this format needs to be detected and set + // based on what the GPU supports + let format = vk::Format::G8_B8R8_2PLANE_420_UNORM; + + let session_create_info = vk::VideoSessionCreateInfoKHR::default() + .queue_family_index(vulkan_ctx.queues.h264_decode.idx as u32) + .video_profile(profile_info) + .picture_format(format) + .max_coded_extent(max_coded_extent) + .reference_picture_format(format) + .max_dpb_slots(max_dpb_slots) + .max_active_reference_pictures(max_active_references) + .std_header_version(std_header_version); + + let video_session = unsafe { + vulkan_ctx + .device + .video_queue_ext + .create_video_session_khr(&session_create_info, None)? + }; + + let memory_requirements = unsafe { + vulkan_ctx + .device + .video_queue_ext + .get_video_session_memory_requirements_khr(video_session)? + }; + + let allocations = memory_requirements + .iter() + .map(|req| { + MemoryAllocation::new( + vulkan_ctx.allocator.clone(), + &req.memory_requirements, + &vk_mem::AllocationCreateInfo { + usage: vk_mem::MemoryUsage::Unknown, + ..Default::default() + }, + ) + }) + .collect::, _>>()?; + + let memory_bind_infos = memory_requirements + .into_iter() + .zip(allocations.iter()) + .map(|(req, allocation)| { + let allocation_info = allocation.allocation_info(); + vk::BindVideoSessionMemoryInfoKHR::default() + .memory_bind_index(req.memory_bind_index) + .memory(allocation_info.device_memory) + .memory_offset(allocation_info.offset) + .memory_size(allocation_info.size) + }) + .collect::>(); + + unsafe { + vulkan_ctx + .device + .video_queue_ext + .bind_video_session_memory_khr(video_session, &memory_bind_infos)? + }; + + Ok(VideoSession { + session: video_session, + _allocations: allocations, + device: vulkan_ctx.device.clone(), + max_coded_extent, + max_dpb_slots, + }) + } +} + +impl Drop for VideoSession { + fn drop(&mut self) { + unsafe { + self.device + .video_queue_ext + .destroy_video_session_khr(self.session, None) + }; + } +} + +impl From for vk::native::StdVideoDecodeH264ReferenceInfo { + fn from(picture_info: crate::parser::PictureInfo) -> Self { + vk::native::StdVideoDecodeH264ReferenceInfo { + flags: vk::native::StdVideoDecodeH264ReferenceInfoFlags { + __bindgen_padding_0: [0; 3], + _bitfield_align_1: [], + _bitfield_1: vk::native::StdVideoDecodeH264ReferenceInfoFlags::new_bitfield_1( + 0, + 0, + picture_info.used_for_long_term_reference.into(), + picture_info.non_existing.into(), + ), + }, + FrameNum: picture_info.FrameNum, + PicOrderCnt: picture_info.PicOrderCnt, + reserved: 0, + } + } +} diff --git a/vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs b/vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs new file mode 100644 index 000000000..8384443e3 --- /dev/null +++ b/vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs @@ -0,0 +1,228 @@ +use ash::{prelude::VkResult, vk, RawPtr}; + +pub(crate) trait VideoQueueExt { + unsafe fn cmd_begin_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + begin_info: &vk::VideoBeginCodingInfoKHR, + ); + + unsafe fn cmd_end_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + end_info: &vk::VideoEndCodingInfoKHR, + ); + + unsafe fn cmd_control_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + control_info: &vk::VideoCodingControlInfoKHR, + ); + + unsafe fn get_video_session_memory_requirements_khr( + &self, + video_session: vk::VideoSessionKHR, + ) -> VkResult>; + + unsafe fn create_video_session_khr( + &self, + create_info: &vk::VideoSessionCreateInfoKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) -> VkResult; + + unsafe fn bind_video_session_memory_khr( + &self, + video_session: vk::VideoSessionKHR, + memory_bind_infos: &[vk::BindVideoSessionMemoryInfoKHR], + ) -> VkResult<()>; + + unsafe fn destroy_video_session_khr( + &self, + video_session: vk::VideoSessionKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ); + + unsafe fn create_video_session_parameters_khr( + &self, + create_info: &vk::VideoSessionParametersCreateInfoKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) -> VkResult; + + unsafe fn destroy_video_session_parameters_khr( + &self, + parameters: vk::VideoSessionParametersKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ); + + unsafe fn update_video_session_parameters_khr( + &self, + parameters: vk::VideoSessionParametersKHR, + update_info: &vk::VideoSessionParametersUpdateInfoKHR, + ) -> VkResult<()>; +} + +impl VideoQueueExt for ash::khr::video_queue::Device { + unsafe fn cmd_begin_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + begin_info: &vk::VideoBeginCodingInfoKHR, + ) { + unsafe { (self.fp().cmd_begin_video_coding_khr)(command_buffer, begin_info) } + } + + unsafe fn cmd_end_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + end_info: &vk::VideoEndCodingInfoKHR, + ) { + unsafe { (self.fp().cmd_end_video_coding_khr)(command_buffer, end_info) } + } + + unsafe fn cmd_control_video_coding_khr( + &self, + command_buffer: vk::CommandBuffer, + control_info: &vk::VideoCodingControlInfoKHR, + ) { + unsafe { (self.fp().cmd_control_video_coding_khr)(command_buffer, control_info) } + } + + unsafe fn get_video_session_memory_requirements_khr( + &self, + video_session: vk::VideoSessionKHR, + ) -> VkResult> { + let mut memory_requirements_len = 0; + unsafe { + (self.fp().get_video_session_memory_requirements_khr)( + self.device(), + video_session, + &mut memory_requirements_len, + std::ptr::null_mut(), + ) + .result()?; + } + + let mut memory_requirements = vec![ + vk::VideoSessionMemoryRequirementsKHR::default(); + memory_requirements_len as usize + ]; + + unsafe { + (self.fp().get_video_session_memory_requirements_khr)( + self.device(), + video_session, + &mut memory_requirements_len, + memory_requirements.as_mut_ptr(), + ) + .result_with_success(memory_requirements) + } + } + + unsafe fn create_video_session_khr( + &self, + create_info: &vk::VideoSessionCreateInfoKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) -> VkResult { + let mut video_session = vk::VideoSessionKHR::default(); + + unsafe { + (self.fp().create_video_session_khr)( + self.device(), + create_info, + allocation_callbacks.as_raw_ptr(), + &mut video_session, + ) + .result_with_success(video_session) + } + } + + unsafe fn bind_video_session_memory_khr( + &self, + video_session: vk::VideoSessionKHR, + memory_bind_infos: &[vk::BindVideoSessionMemoryInfoKHR], + ) -> VkResult<()> { + unsafe { + (self.fp().bind_video_session_memory_khr)( + self.device(), + video_session, + memory_bind_infos.len() as u32, + memory_bind_infos.as_ptr(), + ) + .result() + } + } + + unsafe fn destroy_video_session_khr( + &self, + video_session: vk::VideoSessionKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) { + unsafe { + (self.fp().destroy_video_session_khr)( + self.device(), + video_session, + allocation_callbacks.as_raw_ptr(), + ) + } + } + + unsafe fn create_video_session_parameters_khr( + &self, + create_info: &vk::VideoSessionParametersCreateInfoKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) -> VkResult { + let mut parameters = vk::VideoSessionParametersKHR::default(); + + unsafe { + (self.fp().create_video_session_parameters_khr)( + self.device(), + create_info, + allocation_callbacks.as_raw_ptr(), + &mut parameters, + ) + .result_with_success(parameters) + } + } + + unsafe fn destroy_video_session_parameters_khr( + &self, + parameters: vk::VideoSessionParametersKHR, + allocation_callbacks: Option<&vk::AllocationCallbacks>, + ) { + unsafe { + (self.fp().destroy_video_session_parameters_khr)( + self.device(), + parameters, + allocation_callbacks.as_raw_ptr(), + ) + } + } + + unsafe fn update_video_session_parameters_khr( + &self, + parameters: vk::VideoSessionParametersKHR, + update_info: &vk::VideoSessionParametersUpdateInfoKHR, + ) -> VkResult<()> { + unsafe { + (self.fp().update_video_session_parameters_khr)(self.device(), parameters, update_info) + .result() + } + } +} + +pub(crate) trait VideoDecodeQueueExt { + unsafe fn cmd_decode_video_khr( + &self, + command_buffer: vk::CommandBuffer, + decode_info: &vk::VideoDecodeInfoKHR, + ); +} + +impl VideoDecodeQueueExt for ash::khr::video_decode_queue::Device { + unsafe fn cmd_decode_video_khr( + &self, + command_buffer: vk::CommandBuffer, + decode_info: &vk::VideoDecodeInfoKHR, + ) { + unsafe { (self.fp().cmd_decode_video_khr)(command_buffer, decode_info) } + } +}