From 79ff607b7c700c5f752b4abc3694a76e830fa56f Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzy.wilczek@swmansion.com>
Date: Thu, 19 Sep 2024 17:30:02 +0200
Subject: [PATCH 01/13] First working version -- API needs to be improved

---
 Cargo.lock                                    |   92 ++
 Cargo.toml                                    |    2 +
 .../src/types/from_register_input.rs          |    5 +
 compositor_api/src/types/register_input.rs    |    3 +
 compositor_pipeline/Cargo.toml                |    1 +
 compositor_pipeline/src/error.rs              |   16 +
 compositor_pipeline/src/pipeline.rs           |   77 +-
 compositor_pipeline/src/pipeline/decoder.rs   |    2 +
 .../src/pipeline/decoder/video.rs             |   17 +-
 .../pipeline/decoder/video/vulkan_video.rs    |  108 ++
 compositor_pipeline/src/pipeline/input.rs     |    1 +
 .../src/pipeline/input/mp4/mp4_file_reader.rs |    3 +-
 compositor_pipeline/src/pipeline/types.rs     |    6 +
 compositor_render/src/error.rs                |    9 -
 compositor_render/src/wgpu/ctx.rs             |   17 +-
 compositor_render/src/wgpu/texture/nv12.rs    |    4 +-
 .../examples/raw_channel_input.rs             |   16 +-
 .../examples/raw_channel_output.rs            |   17 +-
 integration_tests/examples/vulkan.rs          |  109 ++
 src/state.rs                                  |    7 +-
 vk-video/.gitignore                           |    5 +
 vk-video/Cargo.toml                           |   23 +
 vk-video/LICENSE                              |   21 +
 vk-video/examples/basic.rs                    |   39 +
 vk-video/examples/wgpu.rs                     |  157 ++
 vk-video/src/lib.rs                           |   68 +
 vk-video/src/parser.rs                        |  761 +++++++++
 vk-video/src/parser/au_splitter.rs            |  136 ++
 vk-video/src/vulkan_decoder.rs                | 1406 +++++++++++++++++
 vk-video/src/vulkan_decoder/parameter_sets.rs |  262 +++
 vk-video/src/vulkan_decoder/vulkan_ctx.rs     |  644 ++++++++
 vk-video/src/vulkan_decoder/wrappers.rs       |   59 +
 .../src/vulkan_decoder/wrappers/command.rs    |  132 ++
 vk-video/src/vulkan_decoder/wrappers/debug.rs |  185 +++
 vk-video/src/vulkan_decoder/wrappers/mem.rs   |  249 +++
 vk-video/src/vulkan_decoder/wrappers/sync.rs  |   85 +
 vk-video/src/vulkan_decoder/wrappers/video.rs |  298 ++++
 .../vulkan_decoder/wrappers/vk_extensions.rs  |  228 +++
 38 files changed, 5230 insertions(+), 40 deletions(-)
 create mode 100644 compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs
 create mode 100644 integration_tests/examples/vulkan.rs
 create mode 100644 vk-video/.gitignore
 create mode 100644 vk-video/Cargo.toml
 create mode 100644 vk-video/LICENSE
 create mode 100644 vk-video/examples/basic.rs
 create mode 100644 vk-video/examples/wgpu.rs
 create mode 100644 vk-video/src/lib.rs
 create mode 100644 vk-video/src/parser.rs
 create mode 100644 vk-video/src/parser/au_splitter.rs
 create mode 100644 vk-video/src/vulkan_decoder.rs
 create mode 100644 vk-video/src/vulkan_decoder/parameter_sets.rs
 create mode 100644 vk-video/src/vulkan_decoder/vulkan_ctx.rs
 create mode 100644 vk-video/src/vulkan_decoder/wrappers.rs
 create mode 100644 vk-video/src/vulkan_decoder/wrappers/command.rs
 create mode 100644 vk-video/src/vulkan_decoder/wrappers/debug.rs
 create mode 100644 vk-video/src/vulkan_decoder/wrappers/mem.rs
 create mode 100644 vk-video/src/vulkan_decoder/wrappers/sync.rs
 create mode 100644 vk-video/src/vulkan_decoder/wrappers/video.rs
 create mode 100644 vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs

diff --git a/Cargo.lock b/Cargo.lock
index fad06cac6..2cfcc0e00 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -305,6 +305,12 @@ version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
 
+[[package]]
+name = "bitstream-io"
+version = "2.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b81e1519b0d82120d2fd469d5bfb2919a9361c48b02d82d04befc1cdd2002452"
+
 [[package]]
 name = "block"
 version = "0.1.6"
@@ -532,6 +538,7 @@ dependencies = [
  "socket2",
  "thiserror",
  "tracing",
+ "vk-video",
  "webrtc-util",
  "wgpu",
 ]
@@ -801,6 +808,17 @@ dependencies = [
  "thiserror",
 ]
 
+[[package]]
+name = "derivative"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -1073,6 +1091,12 @@ dependencies = [
  "percent-encoding",
 ]
 
+[[package]]
+name = "four-cc"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "795cbfc56d419a7ce47ccbb7504dd9a5b7c484c083c356e797de08bd988d9629"
+
 [[package]]
 name = "fs_extra"
 version = "1.3.0"
@@ -1328,6 +1352,18 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "h264-reader"
+version = "0.7.1-dev"
+source = "git+https://github.com/membraneframework-labs/h264-reader.git?branch=@jerzywilczek/scaling-lists#7c982f1089558640021ff8a70a2fa253e3e881c7"
+dependencies = [
+ "bitstream-io",
+ "hex-slice",
+ "log",
+ "memchr",
+ "rfc6381-codec",
+]
+
 [[package]]
 name = "half"
 version = "2.2.1"
@@ -1374,6 +1410,12 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
 
+[[package]]
+name = "hex-slice"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5491a308e0214554f07a81d8944abe45f552871c12e3c3c6e7e5d354039a6c4c"
+
 [[package]]
 name = "hexf-parse"
 version = "0.2.1"
@@ -1978,6 +2020,21 @@ dependencies = [
  "thiserror",
 ]
 
+[[package]]
+name = "mp4ra-rust"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdbc3d3867085d66ac6270482e66f3dd2c5a18451a3dc9ad7269e94844a536b7"
+dependencies = [
+ "four-cc",
+]
+
+[[package]]
+name = "mpeg4-audio-const"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96a1fe2275b68991faded2c80aa4a33dba398b77d276038b8f50701a22e55918"
+
 [[package]]
 name = "naga"
 version = "22.1.0"
@@ -2653,6 +2710,16 @@ dependencies = [
  "usvg",
 ]
 
+[[package]]
+name = "rfc6381-codec"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed54c20f5c3ec82eab6d998b313dc75ec5d5650d4f57675e61d72489040297fd"
+dependencies = [
+ "mp4ra-rust",
+ "mpeg4-audio-const",
+]
+
 [[package]]
 name = "rgb"
 version = "0.8.36"
@@ -3886,6 +3953,31 @@ version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
 
+[[package]]
+name = "vk-mem"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cb12b79bcec57a3334d0284f1364c1846f378bb47df9779c6dbfcfc245c9404"
+dependencies = [
+ "ash",
+ "bitflags 2.6.0",
+ "cc",
+]
+
+[[package]]
+name = "vk-video"
+version = "0.1.0"
+dependencies = [
+ "ash",
+ "derivative",
+ "h264-reader",
+ "thiserror",
+ "tracing",
+ "tracing-subscriber 0.3.18",
+ "vk-mem",
+ "wgpu",
+]
+
 [[package]]
 name = "want"
 version = "0.3.1"
diff --git a/Cargo.toml b/Cargo.toml
index c9be7ba7f..d6710df22 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,6 +16,7 @@ members = [
     "decklink",
     "compositor_api",
     "compositor_web",
+    "vk-video",
 ]
 resolver = "2"
 
@@ -56,6 +57,7 @@ schemars = { git = "https://github.com/membraneframework-labs/schemars", rev = "
     "preserve_order",
 ] }
 shared_memory = "0.12.4"
+vk-video = { path = "vk-video" }
 wgpu = { version = "22.1.0", default-features = false, features = [
     "wgsl",
     "dx12",
diff --git a/compositor_api/src/types/from_register_input.rs b/compositor_api/src/types/from_register_input.rs
index 7c493e8bc..c3daf2995 100644
--- a/compositor_api/src/types/from_register_input.rs
+++ b/compositor_api/src/types/from_register_input.rs
@@ -106,6 +106,11 @@ impl TryFrom<RtpInput> for pipeline::RegisterInputOptions {
                 options: match video {
                     InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions {
                         codec: pipeline::VideoCodec::H264,
+                        decoder: pipeline::VideoDecoder::FFmpegH264,
+                    },
+                    InputRtpVideoOptions::VulkanVideo => decoder::VideoDecoderOptions {
+                        decoder: pipeline::VideoDecoder::VulkanVideo,
+                        codec: pipeline::VideoCodec::H264,
                     },
                 },
             }),
diff --git a/compositor_api/src/types/register_input.rs b/compositor_api/src/types/register_input.rs
index e66f14fc4..635386e61 100644
--- a/compositor_api/src/types/register_input.rs
+++ b/compositor_api/src/types/register_input.rs
@@ -126,4 +126,7 @@ pub enum InputRtpAudioOptions {
 pub enum InputRtpVideoOptions {
     #[serde(rename = "ffmpeg_h264")]
     FfmepgH264,
+
+    #[serde(rename = "vulkan_video")]
+    VulkanVideo,
 }
diff --git a/compositor_pipeline/Cargo.toml b/compositor_pipeline/Cargo.toml
index 08a259c3a..b7f6c193c 100644
--- a/compositor_pipeline/Cargo.toml
+++ b/compositor_pipeline/Cargo.toml
@@ -27,6 +27,7 @@ reqwest = { workspace = true }
 tracing = { workspace = true }
 fdk-aac-sys = "0.5.0"
 rubato = "0.15.0"
+vk-video = { workspace = true }
 wgpu = { workspace = true }
 glyphon = { workspace = true }
 
diff --git a/compositor_pipeline/src/error.rs b/compositor_pipeline/src/error.rs
index 1516fd085..591f4fa96 100644
--- a/compositor_pipeline/src/error.rs
+++ b/compositor_pipeline/src/error.rs
@@ -9,6 +9,18 @@ use compositor_render::{
 use crate::pipeline::{decoder::AacDecoderError, VideoCodec};
 use fdk_aac_sys as fdk;
 
+#[derive(Debug, thiserror::Error)]
+pub enum InitPipelineError {
+    #[error(transparent)]
+    InitRendererEngine(#[from] InitRendererEngineError),
+
+    #[error("Failed to create a download directory.")]
+    CreateDownloadDir(#[source] std::io::Error),
+
+    #[error(transparent)]
+    VulkanCtxError(#[from] vk_video::VulkanCtxError),
+}
+
 #[derive(Debug, thiserror::Error)]
 pub enum RegisterInputError {
     #[error("Failed to register input stream. Stream \"{0}\" is already registered.")]
@@ -120,6 +132,10 @@ pub enum InputInitError {
 
     #[error("Couldn't read decoder init result.")]
     CannotReadInitResult,
+
+    #[cfg(target_os = "linux")]
+    #[error(transparent)]
+    VulkanDecoderError(#[from] vk_video::DecoderError),
 }
 
 pub enum ErrorType {
diff --git a/compositor_pipeline/src/pipeline.rs b/compositor_pipeline/src/pipeline.rs
index 34da59cb9..da570e7de 100644
--- a/compositor_pipeline/src/pipeline.rs
+++ b/compositor_pipeline/src/pipeline.rs
@@ -7,8 +7,7 @@ use std::thread;
 use std::time::Duration;
 
 use compositor_render::error::{
-    ErrorStack, InitPipelineError, RegisterRendererError, RequestKeyframeError,
-    UnregisterRendererError,
+    ErrorStack, RegisterRendererError, RequestKeyframeError, UnregisterRendererError,
 };
 use compositor_render::scene::Component;
 use compositor_render::web_renderer::WebRendererInitOptions;
@@ -32,6 +31,7 @@ use types::RawDataSender;
 use crate::audio_mixer::AudioMixer;
 use crate::audio_mixer::MixingStrategy;
 use crate::audio_mixer::{AudioChannels, AudioMixingParams};
+use crate::error::InitPipelineError;
 use crate::error::{
     RegisterInputError, RegisterOutputError, UnregisterInputError, UnregisterOutputError,
 };
@@ -61,6 +61,7 @@ use self::pipeline_output::PipelineOutput;
 
 pub use self::types::{
     AudioCodec, EncodedChunk, EncodedChunkKind, EncoderOutputEvent, RawDataReceiver, VideoCodec,
+    VideoDecoder,
 };
 pub use pipeline_output::PipelineOutputEndCondition;
 
@@ -109,7 +110,36 @@ pub struct Pipeline {
     is_started: bool,
 }
 
-#[derive(Debug, Clone)]
+pub struct PreinitializedContext {
+    pub device: Arc<wgpu::Device>,
+    pub queue: Arc<wgpu::Queue>,
+
+    #[cfg(target_os = "linux")]
+    pub vulkan_ctx: Arc<vk_video::VulkanCtx>,
+}
+
+impl PreinitializedContext {
+    #[cfg(target_os = "linux")]
+    pub fn new(features: wgpu::Features, limits: wgpu::Limits) -> Result<Self, InitPipelineError> {
+        let vulkan_ctx = Arc::new(vk_video::VulkanCtx::new(features, limits)?);
+        Ok(PreinitializedContext {
+            device: vulkan_ctx.wgpu_ctx.device.clone(),
+            queue: vulkan_ctx.wgpu_ctx.queue.clone(),
+            vulkan_ctx,
+        })
+    }
+}
+
+impl std::fmt::Debug for PreinitializedContext {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("PreinitializedContext")
+            .field("device", &self.device)
+            .field("queue", &self.queue)
+            .finish()
+    }
+}
+
+#[derive(Debug)]
 pub struct Options {
     pub queue_options: QueueOptions,
     pub stream_fallback_timeout: Duration,
@@ -118,28 +148,59 @@ pub struct Options {
     pub download_root: PathBuf,
     pub output_sample_rate: u32,
     pub wgpu_features: WgpuFeatures,
-    pub wgpu_ctx: Option<(Arc<wgpu::Device>, Arc<wgpu::Queue>)>,
     pub load_system_fonts: Option<bool>,
+    pub wgpu_ctx: Option<PreinitializedContext>,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Clone)]
 pub struct PipelineCtx {
     pub output_sample_rate: u32,
     pub output_framerate: Framerate,
     pub download_dir: Arc<PathBuf>,
     pub event_emitter: Arc<EventEmitter>,
+    #[cfg(target_os = "linux")]
+    pub vulkan_ctx: Arc<vk_video::VulkanCtx>,
+}
+
+impl std::fmt::Debug for PipelineCtx {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("PipelineCtx")
+            .field("output_sample_rate", &self.output_sample_rate)
+            .field("output_framerate", &self.output_framerate)
+            .field("download_dir", &self.download_dir)
+            .field("event_emitter", &self.event_emitter)
+            .finish()
+    }
 }
 
 impl Pipeline {
     pub fn new(opts: Options) -> Result<(Self, Arc<dyn EventLoop>), InitPipelineError> {
+        let preinitialized_ctx = match opts.wgpu_ctx {
+            Some(ctx) => Some(ctx),
+            None => {
+                if cfg!(target_os = "linux") {
+                    Some(PreinitializedContext::new(opts.wgpu_features | wgpu::Features::PUSH_CONSTANTS | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, wgpu::Limits {
+                        max_push_constant_size: 128,
+                        ..Default::default()
+                    })?)
+                } else {
+                    None
+                }
+            }
+        };
+
+        let wgpu_ctx = preinitialized_ctx
+            .as_ref()
+            .map(|ctx| (ctx.device.clone(), ctx.queue.clone()));
+
         let (renderer, event_loop) = Renderer::new(RendererOptions {
             web_renderer: opts.web_renderer,
             framerate: opts.queue_options.output_framerate,
             stream_fallback_timeout: opts.stream_fallback_timeout,
             force_gpu: opts.force_gpu,
             wgpu_features: opts.wgpu_features,
-            wgpu_ctx: opts.wgpu_ctx,
             load_system_fonts: opts.load_system_fonts.unwrap_or(true),
+            wgpu_ctx,
         })?;
 
         let download_dir = opts
@@ -160,6 +221,10 @@ impl Pipeline {
                 output_framerate: opts.queue_options.output_framerate,
                 download_dir: download_dir.into(),
                 event_emitter,
+                #[cfg(target_os = "linux")]
+                vulkan_ctx: preinitialized_ctx
+                    .map(|ctx| ctx.vulkan_ctx)
+                    .expect("This should not fail on linux"),
             },
         };
 
diff --git a/compositor_pipeline/src/pipeline/decoder.rs b/compositor_pipeline/src/pipeline/decoder.rs
index 87f26addc..8dd84cd09 100644
--- a/compositor_pipeline/src/pipeline/decoder.rs
+++ b/compositor_pipeline/src/pipeline/decoder.rs
@@ -1,6 +1,7 @@
 use crate::{audio_mixer::InputSamples, queue::PipelineEvent};
 
 use super::types::VideoCodec;
+use super::types::VideoDecoder;
 
 use bytes::Bytes;
 use compositor_render::Frame;
@@ -17,6 +18,7 @@ pub(super) use video::start_video_decoder_thread;
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct VideoDecoderOptions {
+    pub decoder: VideoDecoder,
     pub codec: VideoCodec,
 }
 
diff --git a/compositor_pipeline/src/pipeline/decoder/video.rs b/compositor_pipeline/src/pipeline/decoder/video.rs
index 52475dae8..2a817461c 100644
--- a/compositor_pipeline/src/pipeline/decoder/video.rs
+++ b/compositor_pipeline/src/pipeline/decoder/video.rs
@@ -3,23 +3,34 @@ use crossbeam_channel::{Receiver, Sender};
 
 use crate::{
     error::InputInitError,
-    pipeline::{types::EncodedChunk, VideoCodec},
+    pipeline::{types::EncodedChunk, PipelineCtx, VideoCodec, VideoDecoder},
     queue::PipelineEvent,
 };
 
 use super::VideoDecoderOptions;
 
 mod ffmpeg_h264;
+mod vulkan_video;
 
 pub fn start_video_decoder_thread(
     options: VideoDecoderOptions,
+    pipeline_ctx: &PipelineCtx,
     chunks_receiver: Receiver<PipelineEvent<EncodedChunk>>,
     frame_sender: Sender<PipelineEvent<Frame>>,
     input_id: InputId,
 ) -> Result<(), InputInitError> {
-    match options.codec {
-        VideoCodec::H264 => {
+    match (options.codec, options.decoder) {
+        (VideoCodec::H264, VideoDecoder::FFmpegH264) => {
             ffmpeg_h264::start_ffmpeg_decoder_thread(chunks_receiver, frame_sender, input_id)
         }
+
+        (VideoCodec::H264, VideoDecoder::VulkanVideo) => {
+            vulkan_video::start_vulkan_video_decoder_thread(
+                pipeline_ctx.vulkan_ctx.clone(),
+                chunks_receiver,
+                frame_sender,
+                input_id,
+            )
+        }
     }
 }
diff --git a/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs
new file mode 100644
index 000000000..241cdf929
--- /dev/null
+++ b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs
@@ -0,0 +1,108 @@
+use std::sync::Arc;
+
+use compositor_render::{Frame, FrameData, InputId, Resolution};
+use crossbeam_channel::{Receiver, Sender};
+use tracing::{debug, error, span, trace, warn, Level};
+use vk_video::{Decoder, VulkanCtx};
+
+use crate::{
+    error::InputInitError,
+    pipeline::{EncodedChunk, EncodedChunkKind, VideoCodec},
+    queue::PipelineEvent,
+};
+
+pub fn start_vulkan_video_decoder_thread(
+    vulkan_ctx: Arc<VulkanCtx>,
+    chunks_receiver: Receiver<PipelineEvent<EncodedChunk>>,
+    frame_sender: Sender<PipelineEvent<Frame>>,
+    input_id: InputId,
+) -> Result<(), InputInitError> {
+    let (init_result_sender, init_result_receiver) = crossbeam_channel::bounded(0);
+
+    std::thread::Builder::new()
+        .name(format!("h264 vulkan video decoder {}", input_id.0))
+        .spawn(move || {
+            let _span = span!(
+                Level::INFO,
+                "h264 vulkan video decoder",
+                input_id = input_id.to_string()
+            )
+            .entered();
+            run_decoder_thread(
+                vulkan_ctx,
+                init_result_sender,
+                chunks_receiver,
+                frame_sender,
+            )
+        })
+        .unwrap();
+
+    init_result_receiver.recv().unwrap()?;
+
+    Ok(())
+}
+
+fn run_decoder_thread(
+    vulkan_ctx: Arc<VulkanCtx>,
+    init_result_sender: Sender<Result<(), InputInitError>>,
+    chunks_receiver: Receiver<PipelineEvent<EncodedChunk>>,
+    frame_sender: Sender<PipelineEvent<Frame>>,
+) {
+    let mut decoder = match Decoder::new(vulkan_ctx) {
+        Ok(decoder) => {
+            init_result_sender.send(Ok(())).unwrap();
+            decoder
+        }
+        Err(err) => {
+            init_result_sender.send(Err(err.into())).unwrap();
+            return;
+        }
+    };
+
+    for chunk in chunks_receiver {
+        let chunk = match chunk {
+            PipelineEvent::Data(chunk) => chunk,
+            PipelineEvent::EOS => {
+                break;
+            }
+        };
+
+        if chunk.kind != EncodedChunkKind::Video(VideoCodec::H264) {
+            error!(
+                "H264 decoder received chunk of wrong kind: {:?}",
+                chunk.kind
+            );
+            continue;
+        }
+
+        let result = match decoder.decode_to_wgpu_textures(&chunk.data) {
+            Ok(res) => res,
+            Err(err) => {
+                warn!("Failed to decode frame: {err}");
+                continue;
+            }
+        };
+
+        for frame in result {
+            let resolution = Resolution {
+                width: frame.width() as usize,
+                height: frame.height() as usize,
+            };
+
+            let frame = Frame {
+                data: FrameData::Nv12WgpuTexture(frame.into()),
+                pts: chunk.pts,
+                resolution,
+            };
+
+            trace!(pts=?frame.pts, "H264 decoder produced a frame.");
+            if frame_sender.send(PipelineEvent::Data(frame)).is_err() {
+                debug!("Failed to send frame from H264 decoder. Channel closed.");
+                return;
+            }
+        }
+    }
+    if frame_sender.send(PipelineEvent::EOS).is_err() {
+        debug!("Failed to send EOS from H264 decoder. Channel closed.")
+    }
+}
diff --git a/compositor_pipeline/src/pipeline/input.rs b/compositor_pipeline/src/pipeline/input.rs
index e0342df4d..baba3159a 100644
--- a/compositor_pipeline/src/pipeline/input.rs
+++ b/compositor_pipeline/src/pipeline/input.rs
@@ -162,6 +162,7 @@ fn start_input_threads(
                 let (sender, receiver) = bounded(10);
                 start_video_decoder_thread(
                     decoder_options,
+                    pipeline_ctx,
                     chunk_receiver,
                     sender,
                     input_id.clone(),
diff --git a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs
index 37ddfb837..3811ea005 100644
--- a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs
+++ b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs
@@ -15,7 +15,7 @@ use crate::{
     pipeline::{
         decoder::{AacDecoderOptions, AudioDecoderOptions, VideoDecoderOptions},
         types::{EncodedChunk, EncodedChunkKind},
-        AudioCodec, VideoCodec,
+        AudioCodec, VideoCodec, VideoDecoder,
     },
     queue::PipelineEvent,
 };
@@ -235,6 +235,7 @@ impl Mp4FileReader<VideoDecoderOptions> {
 
         let decoder_options = VideoDecoderOptions {
             codec: VideoCodec::H264,
+            decoder: VideoDecoder::FFmpegH264,
         };
 
         Some(TrackInfo {
diff --git a/compositor_pipeline/src/pipeline/types.rs b/compositor_pipeline/src/pipeline/types.rs
index a30361073..6f028d402 100644
--- a/compositor_pipeline/src/pipeline/types.rs
+++ b/compositor_pipeline/src/pipeline/types.rs
@@ -55,6 +55,12 @@ pub struct RawDataSender {
     pub audio: Option<Sender<PipelineEvent<InputSamples>>>,
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum VideoDecoder {
+    FFmpegH264,
+    VulkanVideo,
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum VideoCodec {
     H264,
diff --git a/compositor_render/src/error.rs b/compositor_render/src/error.rs
index 5f6474355..49884c28e 100644
--- a/compositor_render/src/error.rs
+++ b/compositor_render/src/error.rs
@@ -13,15 +13,6 @@ use crate::{OutputId, RendererId};
 pub use crate::registry::RegisterError;
 pub use crate::wgpu::WgpuError;
 
-#[derive(Debug, thiserror::Error)]
-pub enum InitPipelineError {
-    #[error(transparent)]
-    InitRendererEngine(#[from] InitRendererEngineError),
-
-    #[error("Failed to create a download directory.")]
-    CreateDownloadDir(#[source] std::io::Error),
-}
-
 #[derive(Debug, thiserror::Error)]
 pub enum InitRendererEngineError {
     #[error("Failed to initialize a wgpu context.")]
diff --git a/compositor_render/src/wgpu/ctx.rs b/compositor_render/src/wgpu/ctx.rs
index 40207b134..e6ee5e984 100644
--- a/compositor_render/src/wgpu/ctx.rs
+++ b/compositor_render/src/wgpu/ctx.rs
@@ -42,12 +42,7 @@ impl WgpuCtx {
     }
 
     fn check_wgpu_ctx(device: &wgpu::Device, features: wgpu::Features) {
-        let expected_features = match cfg!(target_arch = "wasm32") {
-            false => {
-                features | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS
-            }
-            true => features | wgpu::Features::PUSH_CONSTANTS,
-        };
+        let expected_features = features | required_wgpu_features();
 
         let missing_features = expected_features.difference(device.features());
         if !missing_features.is_empty() {
@@ -92,6 +87,13 @@ impl WgpuCtx {
     }
 }
 
+pub fn required_wgpu_features() -> wgpu::Features {
+    match cfg!(target_arch = "wasm32") {
+        false => wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS,
+        true => wgpu::Features::PUSH_CONSTANTS,
+    }
+}
+
 pub fn create_wgpu_ctx(
     force_gpu: bool,
     features: wgpu::Features,
@@ -120,8 +122,7 @@ pub fn create_wgpu_ctx(
         error!("Selected adapter is CPU based. Aborting.");
         return Err(CreateWgpuCtxError::NoAdapter);
     }
-    let required_features =
-        features | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS;
+    let required_features = features | required_wgpu_features();
 
     let missing_features = required_features.difference(adapter.features());
     if !missing_features.is_empty() {
diff --git a/compositor_render/src/wgpu/texture/nv12.rs b/compositor_render/src/wgpu/texture/nv12.rs
index 442702742..85c3f6dca 100644
--- a/compositor_render/src/wgpu/texture/nv12.rs
+++ b/compositor_render/src/wgpu/texture/nv12.rs
@@ -31,7 +31,7 @@ impl<'a> NV12TextureView<'a> {
         let view_y = texture.create_view(&wgpu::TextureViewDescriptor {
             label: Some("y plane nv12 texture view"),
             dimension: Some(wgpu::TextureViewDimension::D2),
-            format: Some(wgpu::TextureFormat::NV12),
+            format: Some(wgpu::TextureFormat::R8Unorm),
             aspect: wgpu::TextureAspect::Plane0,
             ..Default::default()
         });
@@ -39,7 +39,7 @@ impl<'a> NV12TextureView<'a> {
         let view_uv = texture.create_view(&wgpu::TextureViewDescriptor {
             label: Some("uv plane nv12 texture view"),
             dimension: Some(wgpu::TextureViewDimension::D2),
-            format: Some(wgpu::TextureFormat::NV12),
+            format: Some(wgpu::TextureFormat::Rg8Unorm),
             aspect: wgpu::TextureAspect::Plane1,
             ..Default::default()
         });
diff --git a/integration_tests/examples/raw_channel_input.rs b/integration_tests/examples/raw_channel_input.rs
index 2fec88abd..5bb660079 100644
--- a/integration_tests/examples/raw_channel_input.rs
+++ b/integration_tests/examples/raw_channel_input.rs
@@ -17,12 +17,12 @@ use compositor_pipeline::{
             OutputOptions, OutputProtocolOptions,
         },
         rtp::RequestedPort,
-        Options, Pipeline, PipelineOutputEndCondition, RegisterOutputOptions, VideoCodec,
+        Options, Pipeline, PipelineOutputEndCondition, PreinitializedContext,
+        RegisterOutputOptions, VideoCodec,
     },
     queue::{PipelineEvent, QueueInputOptions},
 };
 use compositor_render::{
-    create_wgpu_ctx,
     error::ErrorStack,
     scene::{Component, InputStreamComponent},
     Frame, FrameData, InputId, OutputId, Resolution,
@@ -44,7 +44,15 @@ fn main() {
         level: "info,wgpu_hal=warn,wgpu_core=warn".to_string(),
     });
     let config = read_config();
-    let (wgpu_device, wgpu_queue) = create_wgpu_ctx(false, Default::default()).unwrap();
+    let ctx = PreinitializedContext::new(
+        wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS,
+        wgpu::Limits {
+            max_push_constant_size: 128,
+            ..Default::default()
+        },
+    )
+    .unwrap();
+    let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone());
     // no chromium support, so we can ignore _event_loop
     let (pipeline, _event_loop) = Pipeline::new(Options {
         queue_options: config.queue_options,
@@ -54,8 +62,8 @@ fn main() {
         download_root: config.download_root,
         output_sample_rate: config.output_sample_rate,
         wgpu_features: config.required_wgpu_features,
-        wgpu_ctx: Some((wgpu_device.clone(), wgpu_queue.clone())),
         load_system_fonts: Some(true),
+        wgpu_ctx: Some(ctx),
     })
     .unwrap_or_else(|err| {
         panic!(
diff --git a/integration_tests/examples/raw_channel_output.rs b/integration_tests/examples/raw_channel_output.rs
index a22c655a8..1fa196f5f 100644
--- a/integration_tests/examples/raw_channel_output.rs
+++ b/integration_tests/examples/raw_channel_output.rs
@@ -16,14 +16,13 @@ use compositor_pipeline::{
             InputOptions,
         },
         output::{RawAudioOptions, RawDataOutputOptions, RawVideoOptions},
-        Options, PipelineOutputEndCondition, RawDataReceiver, RegisterInputOptions,
-        RegisterOutputOptions,
+        Options, PipelineOutputEndCondition, PreinitializedContext, RawDataReceiver,
+        RegisterInputOptions, RegisterOutputOptions,
     },
     queue::{PipelineEvent, QueueInputOptions},
     Pipeline,
 };
 use compositor_render::{
-    create_wgpu_ctx,
     error::ErrorStack,
     scene::{Component, InputStreamComponent},
     Frame, FrameData, InputId, OutputId, Resolution,
@@ -58,7 +57,15 @@ fn main() {
     });
     let mut config = read_config();
     config.queue_options.ahead_of_time_processing = true;
-    let (wgpu_device, wgpu_queue) = create_wgpu_ctx(false, Default::default()).unwrap();
+    let ctx = PreinitializedContext::new(
+        wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS,
+        wgpu::Limits {
+            max_push_constant_size: 128,
+            ..Default::default()
+        },
+    )
+    .unwrap();
+    let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone());
     // no chromium support, so we can ignore _event_loop
     let (pipeline, _event_loop) = Pipeline::new(Options {
         queue_options: config.queue_options,
@@ -68,8 +75,8 @@ fn main() {
         download_root: config.download_root,
         output_sample_rate: config.output_sample_rate,
         wgpu_features: config.required_wgpu_features,
-        wgpu_ctx: Some((wgpu_device.clone(), wgpu_queue.clone())),
         load_system_fonts: Some(true),
+        wgpu_ctx: Some(ctx),
     })
     .unwrap_or_else(|err| {
         panic!(
diff --git a/integration_tests/examples/vulkan.rs b/integration_tests/examples/vulkan.rs
new file mode 100644
index 000000000..675985901
--- /dev/null
+++ b/integration_tests/examples/vulkan.rs
@@ -0,0 +1,109 @@
+use anyhow::Result;
+use compositor_api::types::Resolution;
+use serde_json::json;
+use std::time::Duration;
+
+use integration_tests::{
+    examples::{self, run_example, TestSample},
+    ffmpeg::{start_ffmpeg_receive, start_ffmpeg_send},
+};
+
+const VIDEO_RESOLUTION: Resolution = Resolution {
+    width: 1280,
+    height: 720,
+};
+
+const IP: &str = "127.0.0.1";
+const INPUT_PORT: u16 = 8002;
+const OUTPUT_PORT: u16 = 8004;
+
+const VIDEOS: u16 = 6;
+
+fn main() {
+    run_example(client_code);
+}
+
+fn client_code() -> Result<()> {
+    start_ffmpeg_receive(Some(OUTPUT_PORT), None)?;
+
+    let mut children = Vec::new();
+
+    for i in 1..VIDEOS + 1 {
+        let input_name = format!("input_{i}");
+
+        examples::post(
+            &format!("input/{input_name}/register"),
+            &json!({
+                    "type": "rtp_stream",
+                    "port": INPUT_PORT + 2 + 2 * i,
+                    "video": {
+                    "decoder": "vulkan_video"
+                }
+            }),
+        )?;
+
+        children.push(json!({
+            "type": "input_stream",
+            "input_id": input_name,
+        }));
+    }
+
+    let scene = json!({
+        "type": "tiles",
+        "id": "tile",
+        "padding": 5,
+        "background_color_rgba": "#444444FF",
+        "children": children,
+        "transition": {
+            "duration_ms": 700,
+            "easing_function": {
+                "function_name": "cubic_bezier",
+                "points": [0.35, 0.22, 0.1, 0.8]
+            }
+        },
+    });
+
+    let shader_source = include_str!("./silly.wgsl");
+    examples::post(
+        "shader/shader_example_1/register",
+        &json!({
+            "source": shader_source,
+        }),
+    )?;
+
+    examples::post(
+        "output/output_1/register",
+        &json!({
+            "type": "rtp_stream",
+            "port": OUTPUT_PORT,
+            "ip": IP,
+            "video": {
+                "resolution": {
+                    "width": VIDEO_RESOLUTION.width,
+                    "height": VIDEO_RESOLUTION.height,
+                },
+                "encoder": {
+                    "type": "ffmpeg_h264",
+                    "preset": "ultrafast"
+                },
+                "initial": {
+                    "root": scene
+                }
+            }
+        }),
+    )?;
+
+    std::thread::sleep(Duration::from_millis(500));
+
+    examples::post("start", &json!({}))?;
+
+    for i in 1..VIDEOS + 1 {
+        start_ffmpeg_send(
+            IP,
+            Some(INPUT_PORT + 2 + 2 * i),
+            None,
+            TestSample::BigBuckBunny,
+        )?;
+    }
+    Ok(())
+}
diff --git a/src/state.rs b/src/state.rs
index e3f735155..236f4bebc 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -1,8 +1,11 @@
 use std::sync::{Arc, Mutex, MutexGuard};
 
 use axum::response::IntoResponse;
-use compositor_pipeline::pipeline::{self};
-use compositor_render::{error::InitPipelineError, EventLoop};
+use compositor_pipeline::{
+    error::InitPipelineError,
+    pipeline::{self},
+};
+use compositor_render::EventLoop;
 
 use serde::Serialize;
 
diff --git a/vk-video/.gitignore b/vk-video/.gitignore
new file mode 100644
index 000000000..dde3e786e
--- /dev/null
+++ b/vk-video/.gitignore
@@ -0,0 +1,5 @@
+/target
+Cargo.lock
+*.h264
+*.jpeg
+*.mp4
diff --git a/vk-video/Cargo.toml b/vk-video/Cargo.toml
new file mode 100644
index 000000000..31b60e970
--- /dev/null
+++ b/vk-video/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "vk-video"
+version = "0.1.0"
+edition = "2021"
+authors = ["Software Mansion <contact@swmansion.com>"]
+readme = "README.md"
+license = "MIT"
+repository = "https://github.com/software-mansion/live-compositor"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+ash = "0.38.0"
+derivative = "2.2.0"
+h264-reader = { git = "https://github.com/membraneframework-labs/h264-reader.git", branch = "@jerzywilczek/scaling-lists" }
+thiserror = "1.0.59"
+tracing = "0.1.40"
+vk-mem = "0.4.0"
+wgpu =  "22.1.0"
+
+[dev-dependencies]
+tracing-subscriber = "0.3.18"
+
diff --git a/vk-video/LICENSE b/vk-video/LICENSE
new file mode 100644
index 000000000..f9b288684
--- /dev/null
+++ b/vk-video/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2024 Software Mansion <swmansion.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vk-video/examples/basic.rs b/vk-video/examples/basic.rs
new file mode 100644
index 000000000..d75452d81
--- /dev/null
+++ b/vk-video/examples/basic.rs
@@ -0,0 +1,39 @@
+use std::io::Write;
+
+fn main() {
+    let subscriber = tracing_subscriber::FmtSubscriber::builder()
+        .with_max_level(tracing::Level::INFO)
+        .finish();
+
+    tracing::subscriber::set_global_default(subscriber).expect("Failed to initialize tracing");
+
+    let args = std::env::args().collect::<Vec<_>>();
+    if args.len() != 2 {
+        println!("usage: {} FILENAME", args[0]);
+        return;
+    }
+
+    let h264_bytestream = std::fs::read(&args[1]).unwrap_or_else(|_| panic!("read {}", args[1]));
+
+    let vulkan_ctx = std::sync::Arc::new(
+        vk_video::VulkanCtx::new(
+            wgpu::Features::empty(),
+            wgpu::Limits {
+                max_push_constant_size: 128,
+                ..Default::default()
+            },
+        )
+        .unwrap(),
+    );
+    let mut decoder = vk_video::Decoder::new(vulkan_ctx).unwrap();
+
+    let mut output_file = std::fs::File::create("output.nv12").unwrap();
+
+    for chunk in h264_bytestream.chunks(256) {
+        let frames = decoder.decode_to_bytes(chunk).unwrap();
+
+        for frame in frames {
+            output_file.write_all(&frame).unwrap();
+        }
+    }
+}
diff --git a/vk-video/examples/wgpu.rs b/vk-video/examples/wgpu.rs
new file mode 100644
index 000000000..a5a8f03bd
--- /dev/null
+++ b/vk-video/examples/wgpu.rs
@@ -0,0 +1,157 @@
+use std::io::Write;
+
+fn main() {
+    let subscriber = tracing_subscriber::FmtSubscriber::builder()
+        .with_max_level(tracing::Level::INFO)
+        .finish();
+
+    tracing::subscriber::set_global_default(subscriber).expect("Failed to initialize tracing");
+
+    let args = std::env::args().collect::<Vec<_>>();
+    if args.len() != 2 {
+        println!("usage: {} FILENAME", args[0]);
+        return;
+    }
+    let h264_bytestream = std::fs::read(&args[1]).unwrap_or_else(|_| panic!("read {}", args[1]));
+
+    let vulkan_ctx = std::sync::Arc::new(
+        vk_video::VulkanCtx::new(
+            wgpu::Features::empty(),
+            wgpu::Limits {
+                max_push_constant_size: 128,
+                ..Default::default()
+            },
+        )
+        .unwrap(),
+    );
+    let mut decoder = vk_video::Decoder::new(vulkan_ctx.clone()).unwrap();
+
+    let mut output_file = std::fs::File::create("output.nv12").unwrap();
+
+    for chunk in h264_bytestream.chunks(256) {
+        let frames = decoder.decode_to_wgpu_textures(chunk).unwrap();
+
+        let device = &vulkan_ctx.wgpu_ctx.device;
+        let queue = &vulkan_ctx.wgpu_ctx.queue;
+        for frame in frames {
+            let decoded_frame = download_wgpu_texture(device, queue, frame);
+            output_file.write_all(&decoded_frame).unwrap();
+        }
+    }
+}
+
+fn download_wgpu_texture(
+    device: &wgpu::Device,
+    queue: &wgpu::Queue,
+    frame: wgpu::Texture,
+) -> Vec<u8> {
+    let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+    let y_plane_bytes_per_row = (frame.width() as u64 + 255) / 256 * 256;
+    let y_plane_size = y_plane_bytes_per_row * frame.height() as u64;
+
+    let uv_plane_bytes_per_row = y_plane_bytes_per_row;
+    let uv_plane_size = uv_plane_bytes_per_row * frame.height() as u64 / 2;
+
+    let buffer = device.create_buffer(&wgpu::BufferDescriptor {
+        label: None,
+        size: y_plane_size + uv_plane_size,
+        usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::COPY_DST,
+        mapped_at_creation: false,
+    });
+
+    encoder.copy_texture_to_buffer(
+        wgpu::ImageCopyTexture {
+            aspect: wgpu::TextureAspect::Plane0,
+            origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
+            texture: &frame,
+            mip_level: 0,
+        },
+        wgpu::ImageCopyBuffer {
+            buffer: &buffer,
+            layout: wgpu::ImageDataLayout {
+                offset: 0,
+                bytes_per_row: Some(y_plane_bytes_per_row as u32),
+                rows_per_image: None,
+            },
+        },
+        wgpu::Extent3d {
+            width: frame.width(),
+            height: frame.height(),
+            depth_or_array_layers: 1,
+        },
+    );
+
+    encoder.copy_texture_to_buffer(
+        wgpu::ImageCopyTexture {
+            aspect: wgpu::TextureAspect::Plane1,
+            origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
+            texture: &frame,
+            mip_level: 0,
+        },
+        wgpu::ImageCopyBuffer {
+            buffer: &buffer,
+            layout: wgpu::ImageDataLayout {
+                offset: y_plane_size,
+                bytes_per_row: Some(uv_plane_bytes_per_row as u32),
+                rows_per_image: None,
+            },
+        },
+        wgpu::Extent3d {
+            width: frame.width() / 2,
+            height: frame.height() / 2,
+            depth_or_array_layers: 1,
+        },
+    );
+
+    queue.submit(Some(encoder.finish()));
+
+    let (y_tx, y_rx) = std::sync::mpsc::channel();
+    let (uv_tx, uv_rx) = std::sync::mpsc::channel();
+    let width = frame.width() as usize;
+
+    wgpu::util::DownloadBuffer::read_buffer(
+        device,
+        queue,
+        &buffer.slice(..y_plane_size),
+        move |buf| {
+            let buf = buf.unwrap();
+            let mut result = Vec::new();
+
+            for chunk in buf
+                .chunks(y_plane_bytes_per_row as usize)
+                .map(|chunk| &chunk[..width])
+            {
+                result.write_all(chunk).unwrap();
+            }
+
+            y_tx.send(result).unwrap();
+        },
+    );
+
+    wgpu::util::DownloadBuffer::read_buffer(
+        device,
+        queue,
+        &buffer.slice(y_plane_size..),
+        move |buf| {
+            let buf = buf.unwrap();
+            let mut result = Vec::new();
+
+            for chunk in buf
+                .chunks(uv_plane_bytes_per_row as usize)
+                .map(|chunk| &chunk[..width])
+            {
+                result.write_all(chunk).unwrap();
+            }
+
+            uv_tx.send(result).unwrap();
+        },
+    );
+
+    device.poll(wgpu::Maintain::Wait);
+
+    let mut result = Vec::new();
+    result.append(&mut y_rx.recv().unwrap());
+    result.append(&mut uv_rx.recv().unwrap());
+
+    result
+}
diff --git a/vk-video/src/lib.rs b/vk-video/src/lib.rs
new file mode 100644
index 000000000..e47c7deda
--- /dev/null
+++ b/vk-video/src/lib.rs
@@ -0,0 +1,68 @@
+mod parser;
+mod vulkan_decoder;
+
+use parser::Parser;
+use vulkan_decoder::VulkanDecoder;
+
+pub use parser::ParserError;
+pub use vulkan_decoder::{VulkanCtx, VulkanCtxError, VulkanDecoderError};
+
+pub use vulkan_decoder::WgpuCtx;
+
+pub struct Decoder<'a> {
+    vulkan_decoder: VulkanDecoder<'a>,
+    parser: Parser,
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum DecoderError {
+    #[error("Error originating in the decoder: {0}")]
+    VulkanDecoderError(#[from] VulkanDecoderError),
+
+    #[error("Error originating in the h264 parser: {0}")]
+    ParserError(#[from] ParserError),
+}
+
+impl<'a> Decoder<'a> {
+    pub fn new(vulkan_ctx: std::sync::Arc<VulkanCtx>) -> Result<Self, DecoderError> {
+        let parser = Parser::default();
+        let vulkan_decoder = VulkanDecoder::new(vulkan_ctx)?;
+
+        Ok(Self {
+            parser,
+            vulkan_decoder,
+        })
+    }
+}
+
+impl Decoder<'_> {
+    /// The result is a [`Vec`] of [`Vec<u8>`]. Each [`Vec<u8>`] contains a single frame in the
+    /// NV12 format.
+    pub fn decode_to_bytes(
+        &mut self,
+        h264_bytestream: &[u8],
+    ) -> Result<Vec<Vec<u8>>, DecoderError> {
+        let instructions = self
+            .parser
+            .parse(h264_bytestream)
+            .into_iter()
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(self.vulkan_decoder.decode_to_bytes(&instructions)?)
+    }
+
+    // TODO: the below hasn't been verified.
+    /// The produced textures have the [`wgpu::TextureFormat::NV12`] format and can be used as a copy source or a texture binding.
+    pub fn decode_to_wgpu_textures(
+        &mut self,
+        h264_bytestream: &[u8],
+    ) -> Result<Vec<wgpu::Texture>, DecoderError> {
+        let instructions = self
+            .parser
+            .parse(h264_bytestream)
+            .into_iter()
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(self.vulkan_decoder.decode_to_wgpu_textures(&instructions)?)
+    }
+}
diff --git a/vk-video/src/parser.rs b/vk-video/src/parser.rs
new file mode 100644
index 000000000..0b11a640e
--- /dev/null
+++ b/vk-video/src/parser.rs
@@ -0,0 +1,761 @@
+use std::{
+    io::Read,
+    sync::{mpsc, Arc},
+};
+
+use h264_reader::{
+    annexb::AnnexBReader,
+    nal::{
+        pps::PicParameterSet,
+        slice::{DecRefPicMarking, NumRefIdxActive, RefPicListModifications, SliceHeader},
+        sps::SeqParameterSet,
+        Nal, RefNal,
+    },
+    push::{AccumulatedNalHandler, NalAccumulator, NalInterest},
+};
+use tracing::trace;
+
+mod au_splitter;
+
+#[derive(Debug, thiserror::Error)]
+pub enum ReferenceManagementError {
+    #[error("B frames are not supported")]
+    BFramesNotSupported,
+
+    #[error("Long-term references are not supported")]
+    LongTermRefsNotSupported,
+
+    #[error("SI frames are not supported")]
+    SIFramesNotSupported,
+
+    #[error("SP frames are not supported")]
+    SPFramesNotSupported,
+
+    #[error("Adaptive memory control decoded reference picture marking process is not supported")]
+    AdaptiveMemCtlNotSupported,
+
+    #[error("Reference picture list modifications are not supported")]
+    RefPicListModificationsNotSupported,
+
+    #[error("PicOrderCntType {0} is not supperted")]
+    PicOrderCntTypeNotSupported(u8),
+
+    #[error("pic_order_cnt_lsb is not present in a slice header, but is required for decoding")]
+    PicOrderCntLsbNotPresent,
+}
+
+#[derive(Debug, Default, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub struct ReferenceId(usize);
+
+#[derive(Clone, derivative::Derivative)]
+#[derivative(Debug)]
+#[allow(non_snake_case)]
+pub struct DecodeInformation {
+    pub(crate) reference_list: Option<Vec<ReferencePictureInfo>>,
+    #[derivative(Debug = "ignore")]
+    pub(crate) rbsp_bytes: Vec<u8>,
+    pub(crate) slice_indices: Vec<usize>,
+    #[derivative(Debug = "ignore")]
+    pub(crate) header: Arc<SliceHeader>,
+    pub(crate) sps_id: u8,
+    pub(crate) pps_id: u8,
+    pub(crate) picture_info: PictureInfo,
+}
+
+#[derive(Debug, Clone)]
+pub(crate) struct ReferencePictureInfo {
+    pub(crate) id: ReferenceId,
+    pub(crate) picture_info: PictureInfo,
+}
+
+#[derive(Debug, Clone, Copy)]
+#[allow(non_snake_case)]
+pub(crate) struct PictureInfo {
+    pub(crate) used_for_long_term_reference: bool,
+    pub(crate) non_existing: bool,
+    pub(crate) FrameNum: u16,
+    pub(crate) PicOrderCnt: [i32; 2],
+}
+
+#[derive(Debug, Clone)]
+pub enum DecoderInstruction {
+    Decode {
+        decode_info: DecodeInformation,
+    },
+
+    DecodeAndStoreAs {
+        decode_info: DecodeInformation,
+        reference_id: ReferenceId,
+    },
+
+    Idr {
+        decode_info: DecodeInformation,
+        reference_id: ReferenceId,
+    },
+
+    Drop {
+        reference_ids: Vec<ReferenceId>,
+    },
+
+    Sps(SeqParameterSet),
+
+    Pps(PicParameterSet),
+}
+
+#[derive(Debug, Default)]
+struct ReferenceContext {
+    pictures: ReferencePictures,
+    next_reference_id: ReferenceId,
+    _previous_frame_num: usize,
+    prev_pic_order_cnt_msb: i32,
+    prev_pic_order_cnt_lsb: i32,
+}
+
+impl ReferenceContext {
+    fn get_next_reference_id(&mut self) -> ReferenceId {
+        let result = self.next_reference_id;
+        self.next_reference_id = ReferenceId(result.0 + 1);
+        result
+    }
+
+    fn reset_state(&mut self) {
+        *self = Self {
+            pictures: ReferencePictures::default(),
+            next_reference_id: ReferenceId::default(),
+            _previous_frame_num: 0,
+            prev_pic_order_cnt_msb: 0,
+            prev_pic_order_cnt_lsb: 0,
+        };
+    }
+
+    fn add_short_term_reference(
+        &mut self,
+        header: Arc<SliceHeader>,
+        pic_order_cnt: [i32; 2],
+    ) -> ReferenceId {
+        let id = self.get_next_reference_id();
+        self.pictures.short_term.push(ShortTermReferencePicture {
+            header,
+            id,
+            pic_order_cnt,
+        });
+        id
+    }
+
+    fn put_picture(
+        &mut self,
+        mut slices: Vec<Slice>,
+        sps: &SeqParameterSet,
+        pps: &PicParameterSet,
+    ) -> Result<Vec<DecoderInstruction>, ParserError> {
+        let header = slices.last().unwrap().header.clone();
+        let mut rbsp_bytes = Vec::new();
+        let mut slice_indices = Vec::new();
+        for slice in &mut slices {
+            if slice.rbsp_bytes.is_empty() {
+                continue;
+            }
+            slice_indices.push(rbsp_bytes.len());
+            rbsp_bytes.append(&mut slice.rbsp_bytes);
+        }
+
+        match header.dec_ref_pic_marking {
+            Some(DecRefPicMarking::Idr {
+                long_term_reference_flag,
+                ..
+            }) => {
+                if long_term_reference_flag {
+                    Err(ReferenceManagementError::LongTermRefsNotSupported)?;
+                }
+
+                let decode_info = self.decode_information_for_frame(
+                    header.clone(),
+                    slice_indices,
+                    rbsp_bytes,
+                    sps,
+                    pps,
+                )?;
+
+                self.reset_state();
+
+                let reference_id =
+                    self.add_short_term_reference(header, decode_info.picture_info.PicOrderCnt);
+
+                Ok(vec![DecoderInstruction::Idr {
+                    decode_info,
+                    reference_id,
+                }])
+            }
+
+            Some(DecRefPicMarking::SlidingWindow) => {
+                let num_short_term = self.pictures.short_term.len();
+                let num_long_term = self.pictures.long_term.len();
+
+                let decode_info = self.decode_information_for_frame(
+                    header.clone(),
+                    slice_indices,
+                    rbsp_bytes,
+                    sps,
+                    pps,
+                )?;
+                let reference_id = self
+                    .add_short_term_reference(header.clone(), decode_info.picture_info.PicOrderCnt);
+
+                let mut decoder_instructions = vec![DecoderInstruction::DecodeAndStoreAs {
+                    decode_info,
+                    reference_id,
+                }];
+
+                if num_short_term + num_long_term == sps.max_num_ref_frames.max(1) as usize
+                    && !self.pictures.short_term.is_empty()
+                {
+                    let (idx, _) = self
+                        .pictures
+                        .short_term
+                        .iter()
+                        .enumerate()
+                        .min_by_key(|(_, reference)| {
+                            reference
+                                .decode_picture_numbers(header.frame_num as i64, sps)
+                                .unwrap()
+                                .FrameNumWrap
+                        })
+                        .unwrap();
+
+                    decoder_instructions.push(DecoderInstruction::Drop {
+                        reference_ids: vec![self.pictures.short_term.remove(idx).id],
+                    })
+                }
+
+                Ok(decoder_instructions)
+            }
+
+            Some(DecRefPicMarking::Adaptive(_)) => {
+                Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)?
+            }
+
+            // this picture is not a reference
+            None => Ok(vec![DecoderInstruction::Decode {
+                decode_info: self.decode_information_for_frame(
+                    header,
+                    slice_indices,
+                    rbsp_bytes,
+                    sps,
+                    pps,
+                )?,
+            }]),
+        }
+    }
+
+    fn decode_information_for_frame(
+        &mut self,
+        header: Arc<SliceHeader>,
+        slice_indices: Vec<usize>,
+        rbsp_bytes: Vec<u8>,
+        sps: &SeqParameterSet,
+        pps: &PicParameterSet,
+    ) -> Result<DecodeInformation, ParserError> {
+        let reference_list = match header.slice_type.family {
+            h264_reader::nal::slice::SliceFamily::P => {
+                let reference_list =
+                    self.initialize_reference_picture_list_for_frame(&header, sps, pps)?;
+
+                match &header.ref_pic_list_modification {
+                    Some(RefPicListModifications::P {
+                        ref_pic_list_modification_l0,
+                    }) => {
+                        if !ref_pic_list_modification_l0.is_empty() {
+                            Err(ReferenceManagementError::RefPicListModificationsNotSupported)?;
+                        }
+                    }
+
+                    None
+                    | Some(RefPicListModifications::I)
+                    | Some(RefPicListModifications::B { .. }) => unreachable!(),
+                }
+
+                Some(reference_list)
+            }
+            h264_reader::nal::slice::SliceFamily::I => None,
+            h264_reader::nal::slice::SliceFamily::B => {
+                return Err(ReferenceManagementError::BFramesNotSupported)?
+            }
+            h264_reader::nal::slice::SliceFamily::SP => {
+                return Err(ReferenceManagementError::SPFramesNotSupported)?
+            }
+            h264_reader::nal::slice::SliceFamily::SI => {
+                return Err(ReferenceManagementError::SIFramesNotSupported)?
+            }
+        };
+
+        let pic_order_cnt = match sps.pic_order_cnt {
+            h264_reader::nal::sps::PicOrderCntType::TypeZero {
+                log2_max_pic_order_cnt_lsb_minus4,
+            } => {
+                // this section is very hard to read, but all of this code is just copied from the
+                // h.264 spec, where it looks almost exactly like this
+
+                let max_pic_order_cnt_lsb = 2_i32.pow(log2_max_pic_order_cnt_lsb_minus4 as u32 + 4);
+
+                let (prev_pic_order_cnt_msb, prev_pic_order_cnt_lsb) =
+                    if header.idr_pic_id.is_some() {
+                        (0, 0)
+                    } else {
+                        (self.prev_pic_order_cnt_msb, self.prev_pic_order_cnt_lsb)
+                    };
+
+                let (pic_order_cnt_lsb, delta_pic_order_cnt_bottom) = match header
+                    .pic_order_cnt_lsb
+                    .as_ref()
+                    .ok_or(ReferenceManagementError::PicOrderCntLsbNotPresent)?
+                {
+                    h264_reader::nal::slice::PicOrderCountLsb::Frame(pic_order_cnt_lsb) => {
+                        (*pic_order_cnt_lsb, 0)
+                    }
+                    h264_reader::nal::slice::PicOrderCountLsb::FieldsAbsolute {
+                        pic_order_cnt_lsb,
+                        delta_pic_order_cnt_bottom,
+                    } => (*pic_order_cnt_lsb, *delta_pic_order_cnt_bottom),
+                    h264_reader::nal::slice::PicOrderCountLsb::FieldsDelta(_) => {
+                        Err(ReferenceManagementError::PicOrderCntLsbNotPresent)?
+                    }
+                };
+
+                let pic_order_cnt_lsb = pic_order_cnt_lsb as i32;
+
+                let pic_order_cnt_msb = if pic_order_cnt_lsb < prev_pic_order_cnt_lsb
+                    && prev_pic_order_cnt_lsb - pic_order_cnt_lsb >= max_pic_order_cnt_lsb / 2
+                {
+                    prev_pic_order_cnt_msb + max_pic_order_cnt_lsb
+                } else if pic_order_cnt_lsb > prev_pic_order_cnt_lsb
+                    && pic_order_cnt_lsb - prev_pic_order_cnt_lsb > max_pic_order_cnt_lsb / 2
+                {
+                    prev_pic_order_cnt_msb - max_pic_order_cnt_lsb
+                } else {
+                    prev_pic_order_cnt_msb
+                };
+
+                let pic_order_cnt = if header.field_pic == h264_reader::nal::slice::FieldPic::Frame
+                {
+                    let top_field_order_cnt = pic_order_cnt_msb + pic_order_cnt_lsb;
+
+                    let bottom_field_order_cnt = top_field_order_cnt + delta_pic_order_cnt_bottom;
+
+                    top_field_order_cnt.min(bottom_field_order_cnt)
+                } else {
+                    pic_order_cnt_msb + pic_order_cnt_lsb
+                };
+
+                self.prev_pic_order_cnt_msb = pic_order_cnt_msb;
+                self.prev_pic_order_cnt_lsb = pic_order_cnt_lsb;
+
+                pic_order_cnt
+            }
+
+            h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => {
+                Err(ReferenceManagementError::PicOrderCntTypeNotSupported(1))?
+            }
+
+            h264_reader::nal::sps::PicOrderCntType::TypeTwo => match header.dec_ref_pic_marking {
+                None => 2 * header.frame_num as i32 - 1,
+                Some(DecRefPicMarking::Idr { .. }) | Some(DecRefPicMarking::SlidingWindow) => {
+                    2 * header.frame_num as i32
+                }
+                Some(DecRefPicMarking::Adaptive(..)) => {
+                    Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)?
+                }
+            },
+        };
+
+        let pic_order_cnt = [pic_order_cnt; 2];
+
+        Ok(DecodeInformation {
+            reference_list,
+            header: header.clone(),
+            slice_indices,
+            rbsp_bytes,
+            sps_id: sps.id().id(),
+            pps_id: pps.pic_parameter_set_id.id(),
+            picture_info: PictureInfo {
+                non_existing: false,
+                used_for_long_term_reference: false,
+                PicOrderCnt: pic_order_cnt,
+                FrameNum: header.frame_num,
+            },
+        })
+    }
+
+    fn initialize_short_term_reference_picture_list_for_frame(
+        &self,
+        header: &SliceHeader,
+        sps: &SeqParameterSet,
+    ) -> Result<Vec<ReferencePictureInfo>, ParserError> {
+        let mut short_term_reference_list = self
+            .pictures
+            .short_term
+            .iter()
+            .map(|reference| {
+                Ok((
+                    reference,
+                    reference.decode_picture_numbers(header.frame_num.into(), sps)?,
+                ))
+            })
+            .collect::<Result<Vec<_>, ParserError>>()?;
+
+        short_term_reference_list.sort_by_key(|(_, numbers)| -numbers.PicNum);
+
+        let short_term_reference_list = short_term_reference_list
+            .into_iter()
+            .map(|(reference, numbers)| ReferencePictureInfo {
+                id: reference.id,
+                picture_info: PictureInfo {
+                    FrameNum: numbers.FrameNum as u16,
+                    used_for_long_term_reference: false,
+                    non_existing: false,
+                    PicOrderCnt: reference.pic_order_cnt,
+                },
+            })
+            .collect::<Vec<_>>();
+
+        Ok(short_term_reference_list)
+    }
+
+    fn initialize_long_term_reference_picture_list_for_frame(
+        &self,
+    ) -> Result<Vec<ReferencePictureInfo>, ReferenceManagementError> {
+        if !self.pictures.long_term.is_empty() {
+            panic!("long-term references are not supported!");
+        }
+
+        Ok(Vec::new())
+    }
+
+    fn initialize_reference_picture_list_for_frame(
+        &self,
+        header: &SliceHeader,
+        sps: &SeqParameterSet,
+        pps: &PicParameterSet,
+    ) -> Result<Vec<ReferencePictureInfo>, ParserError> {
+        let num_ref_idx_l0_active = header
+            .num_ref_idx_active
+            .as_ref()
+            .map(|num| match num {
+                NumRefIdxActive::P {
+                    num_ref_idx_l0_active_minus1,
+                } => Ok(*num_ref_idx_l0_active_minus1),
+                NumRefIdxActive::B { .. } => Err(ReferenceManagementError::BFramesNotSupported),
+            })
+            .unwrap_or(Ok(pps.num_ref_idx_l0_default_active_minus1))?
+            + 1;
+
+        let short_term_reference_list =
+            self.initialize_short_term_reference_picture_list_for_frame(header, sps)?;
+
+        let long_term_reference_list =
+            self.initialize_long_term_reference_picture_list_for_frame()?;
+
+        let mut reference_list = short_term_reference_list
+            .into_iter()
+            .chain(long_term_reference_list)
+            .collect::<Vec<_>>();
+
+        reference_list.truncate(num_ref_idx_l0_active as usize);
+
+        Ok(reference_list)
+    }
+}
+
+#[derive(Debug)]
+struct ShortTermReferencePicture {
+    header: Arc<SliceHeader>,
+    id: ReferenceId,
+    pic_order_cnt: [i32; 2],
+}
+
+impl ShortTermReferencePicture {
+    #[allow(non_snake_case)]
+    fn decode_picture_numbers(
+        &self,
+        current_frame_num: i64,
+        sps: &SeqParameterSet,
+    ) -> Result<ShortTermReferencePictureNumbers, ParserError> {
+        if self.header.field_pic != h264_reader::nal::slice::FieldPic::Frame {
+            return Err(ParserError::FieldsNotSupported);
+        }
+
+        let MaxFrameNum = sps.max_frame_num();
+
+        let FrameNum = self.header.frame_num as i64;
+
+        let FrameNumWrap = if FrameNum > current_frame_num {
+            FrameNum - MaxFrameNum
+        } else {
+            FrameNum
+        };
+
+        // this assumes we're dealing with a short-term reference frame
+        let PicNum = FrameNumWrap;
+
+        Ok(ShortTermReferencePictureNumbers {
+            FrameNum,
+            FrameNumWrap,
+            PicNum,
+        })
+    }
+}
+
+#[derive(Debug)]
+struct LongTermReferencePicture {
+    _header: Arc<SliceHeader>,
+    _id: ReferenceId,
+}
+
+#[allow(non_snake_case)]
+struct ShortTermReferencePictureNumbers {
+    FrameNum: i64,
+
+    FrameNumWrap: i64,
+
+    PicNum: i64,
+}
+
+#[derive(Debug, Default)]
+struct ReferencePictures {
+    long_term: Vec<LongTermReferencePicture>,
+    short_term: Vec<ShortTermReferencePicture>,
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum ParserError {
+    #[error(transparent)]
+    ReferenceManagementError(#[from] ReferenceManagementError),
+
+    #[error("Bitstreams that allow gaps in frame_num are not supported")]
+    GapsInFrameNumNotSupported,
+
+    #[error("Streams containing fields instead of frames are not supported")]
+    FieldsNotSupported,
+
+    #[error("Error while parsing a NAL header: {0:?}")]
+    NalHeaderParseError(h264_reader::nal::NalHeaderError),
+
+    #[error("Error while parsing SPS: {0:?}")]
+    SpsParseError(h264_reader::nal::sps::SpsError),
+
+    #[error("Error while parsing PPS: {0:?}")]
+    PpsParseError(h264_reader::nal::pps::PpsError),
+
+    #[error("Error while parsing a slice: {0:?}")]
+    SliceParseError(h264_reader::nal::slice::SliceHeaderError),
+}
+
+struct NalReceiver {
+    parser_ctx: h264_reader::Context,
+    au_splitter: au_splitter::AUSplitter,
+    reference_ctx: ReferenceContext,
+    debug_channel: mpsc::Sender<NaluDebugInfo>,
+    decoder_channel: mpsc::Sender<Result<DecoderInstruction, ParserError>>,
+}
+
+impl AccumulatedNalHandler for NalReceiver {
+    fn nal(&mut self, nal: RefNal<'_>) -> NalInterest {
+        if !nal.is_complete() {
+            return NalInterest::Buffer;
+        }
+
+        match self.handle_nal(nal) {
+            Ok((debug_nalu, instructions)) => {
+                self.debug_channel.send(debug_nalu).unwrap();
+                for instruction in instructions {
+                    self.decoder_channel.send(Ok(instruction)).unwrap();
+                }
+            }
+
+            Err(err) => {
+                self.decoder_channel.send(Err(err)).unwrap();
+            }
+        }
+
+        NalInterest::Ignore
+    }
+}
+
+impl NalReceiver {
+    fn handle_nal(
+        &mut self,
+        nal: RefNal<'_>,
+    ) -> Result<(NaluDebugInfo, Vec<DecoderInstruction>), ParserError> {
+        let nal_unit_type = nal
+            .header()
+            .map_err(ParserError::NalHeaderParseError)?
+            .nal_unit_type();
+
+        match nal_unit_type {
+            h264_reader::nal::UnitType::SeqParameterSet => {
+                let parsed = h264_reader::nal::sps::SeqParameterSet::from_bits(nal.rbsp_bits())
+                    .map_err(ParserError::SpsParseError)?;
+
+                // Perhaps this shouldn't be here, but this is the only place we process sps
+                // before sending them to the decoder. It also seems that this is the only thing we
+                // need to check about the sps.
+                if parsed.gaps_in_frame_num_value_allowed_flag {
+                    // TODO: what else to do here? sure we'll throw an error, but shouldn't we also
+                    // terminate the parser somehow?
+                    // perhaps this should be considered in other places we throw errors too
+                    Err(ParserError::GapsInFrameNumNotSupported)
+                } else {
+                    self.parser_ctx.put_seq_param_set(parsed.clone());
+                    Ok((
+                        NaluDebugInfo::Sps(parsed.clone()),
+                        vec![DecoderInstruction::Sps(parsed)],
+                    ))
+                }
+            }
+
+            h264_reader::nal::UnitType::PicParameterSet => {
+                let parsed = h264_reader::nal::pps::PicParameterSet::from_bits(
+                    &self.parser_ctx,
+                    nal.rbsp_bits(),
+                )
+                .map_err(ParserError::PpsParseError)?;
+
+                self.parser_ctx.put_pic_param_set(parsed.clone());
+
+                Ok((
+                    NaluDebugInfo::Pps(parsed.clone()),
+                    vec![DecoderInstruction::Pps(parsed)],
+                ))
+            }
+
+            h264_reader::nal::UnitType::SliceLayerWithoutPartitioningNonIdr
+            | h264_reader::nal::UnitType::SliceLayerWithoutPartitioningIdr => {
+                let (header, sps, pps) = h264_reader::nal::slice::SliceHeader::from_bits(
+                    &self.parser_ctx,
+                    &mut nal.rbsp_bits(),
+                    nal.header().unwrap(),
+                )
+                .map_err(ParserError::SliceParseError)?;
+
+                let header = Arc::new(header);
+
+                let debug_nalu = match nal_unit_type {
+                    h264_reader::nal::UnitType::SliceLayerWithoutPartitioningIdr => {
+                        NaluDebugInfo::SliceWithoutPartitioningHeaderIdr(header.clone())
+                    }
+                    h264_reader::nal::UnitType::SliceLayerWithoutPartitioningNonIdr => {
+                        NaluDebugInfo::SliceWithoutPartitioningHeaderNonIdr(header.clone())
+                    }
+                    _ => unreachable!(),
+                };
+
+                let mut rbsp_bytes = vec![0, 0, 0, 1];
+                nal.reader().read_to_end(&mut rbsp_bytes).unwrap();
+                let slice = Slice {
+                    nal_header: nal.header().unwrap(),
+                    header,
+                    pps_id: pps.pic_parameter_set_id,
+                    rbsp_bytes,
+                };
+
+                let Some(slices) = self.au_splitter.put_slice(slice) else {
+                    return Ok((debug_nalu, Vec::new()));
+                };
+
+                let instructions = self.reference_ctx.put_picture(slices, sps, pps)?;
+
+                Ok((debug_nalu, instructions))
+            }
+
+            h264_reader::nal::UnitType::Unspecified(_)
+            | h264_reader::nal::UnitType::SliceDataPartitionALayer
+            | h264_reader::nal::UnitType::SliceDataPartitionBLayer
+            | h264_reader::nal::UnitType::SliceDataPartitionCLayer
+            | h264_reader::nal::UnitType::SEI
+            | h264_reader::nal::UnitType::AccessUnitDelimiter
+            | h264_reader::nal::UnitType::EndOfSeq
+            | h264_reader::nal::UnitType::EndOfStream
+            | h264_reader::nal::UnitType::FillerData
+            | h264_reader::nal::UnitType::SeqParameterSetExtension
+            | h264_reader::nal::UnitType::PrefixNALUnit
+            | h264_reader::nal::UnitType::SubsetSeqParameterSet
+            | h264_reader::nal::UnitType::DepthParameterSet
+            | h264_reader::nal::UnitType::SliceLayerWithoutPartitioningAux
+            | h264_reader::nal::UnitType::SliceExtension
+            | h264_reader::nal::UnitType::SliceExtensionViewComponent
+            | h264_reader::nal::UnitType::Reserved(_) => Ok((
+                NaluDebugInfo::Other(format!("{:?}", nal.header().unwrap().nal_unit_type())),
+                Vec::new(),
+            )),
+        }
+    }
+}
+
+trait SpsExt {
+    fn max_frame_num(&self) -> i64;
+}
+
+impl SpsExt for SeqParameterSet {
+    fn max_frame_num(&self) -> i64 {
+        1 << self.log2_max_frame_num()
+    }
+}
+
+#[derive(Debug)]
+// this struct is only ever printed out in debug mode, but clippy detects this as it not being
+// used.
+#[allow(dead_code)]
+pub enum NaluDebugInfo {
+    Sps(SeqParameterSet),
+    Pps(PicParameterSet),
+    SliceWithoutPartitioningHeaderNonIdr(Arc<SliceHeader>),
+    SliceWithoutPartitioningHeaderIdr(Arc<SliceHeader>),
+    Other(String),
+}
+
+pub struct Slice {
+    pub nal_header: h264_reader::nal::NalHeader,
+    pub pps_id: h264_reader::nal::pps::PicParamSetId,
+    pub header: Arc<SliceHeader>,
+    pub rbsp_bytes: Vec<u8>,
+}
+
+pub struct Parser {
+    reader: AnnexBReader<NalAccumulator<NalReceiver>>,
+    debug_channel: mpsc::Receiver<NaluDebugInfo>,
+    decoder_channel: mpsc::Receiver<Result<DecoderInstruction, ParserError>>,
+}
+
+impl Default for Parser {
+    fn default() -> Self {
+        let (debug_tx, debug_rx) = mpsc::channel();
+        let (decoder_tx, decoder_rx) = mpsc::channel();
+
+        Parser {
+            reader: AnnexBReader::accumulate(NalReceiver {
+                reference_ctx: ReferenceContext::default(),
+                au_splitter: au_splitter::AUSplitter::default(),
+                debug_channel: debug_tx,
+                decoder_channel: decoder_tx,
+                parser_ctx: h264_reader::Context::new(),
+            }),
+            debug_channel: debug_rx,
+            decoder_channel: decoder_rx,
+        }
+    }
+}
+
+impl Parser {
+    pub fn parse(&mut self, bytes: &[u8]) -> Vec<Result<DecoderInstruction, ParserError>> {
+        self.reader.push(bytes);
+
+        let mut instructions = Vec::new();
+        while let Ok(instruction) = self.decoder_channel.try_recv() {
+            instructions.push(instruction);
+        }
+        while let Ok(nalu) = self.debug_channel.try_recv() {
+            trace!("parsed nalu: {nalu:#?}");
+        }
+
+        instructions
+    }
+}
diff --git a/vk-video/src/parser/au_splitter.rs b/vk-video/src/parser/au_splitter.rs
new file mode 100644
index 000000000..29a84d964
--- /dev/null
+++ b/vk-video/src/parser/au_splitter.rs
@@ -0,0 +1,136 @@
+use h264_reader::nal::slice::PicOrderCountLsb;
+
+use super::Slice;
+
+#[derive(Default)]
+pub(crate) struct AUSplitter {
+    buffered_nals: Vec<Slice>,
+}
+
+impl AUSplitter {
+    pub(crate) fn put_slice(&mut self, slice: Slice) -> Option<Vec<Slice>> {
+        if self.is_new_au(&slice) {
+            let au = std::mem::take(&mut self.buffered_nals);
+            self.buffered_nals.push(slice);
+            if !au.is_empty() {
+                Some(au)
+            } else {
+                None
+            }
+        } else {
+            self.buffered_nals.push(slice);
+            None
+        }
+    }
+
+    /// returns `true` if `slice` is a first slice in an Access Unit
+    fn is_new_au(&self, slice: &Slice) -> bool {
+        let Some(last) = self.buffered_nals.last() else {
+            return true;
+        };
+
+        first_mb_in_slice_zero(slice)
+            || frame_num_differs(last, slice)
+            || pps_id_differs(last, slice)
+            || field_pic_flag_differs(last, slice)
+            || nal_ref_idc_differs_one_zero(last, slice)
+            || pic_order_cnt_zero_check(last, slice)
+            || idr_and_non_idr(last, slice)
+            || idrs_where_idr_pic_id_differs(last, slice)
+    }
+}
+
+// defguardp first_mb_in_slice_zero(a)
+//           when a.first_mb_in_slice == 0 and
+//                  a.nal_unit_type in [1, 2, 5]
+//
+fn first_mb_in_slice_zero(slice: &Slice) -> bool {
+    slice.header.first_mb_in_slice == 0
+}
+
+// defguardp frame_num_differs(a, b) when a.frame_num != b.frame_num
+//
+fn frame_num_differs(last: &Slice, curr: &Slice) -> bool {
+    last.header.frame_num != curr.header.frame_num
+}
+
+// defguardp pic_parameter_set_id_differs(a, b)
+//           when a.pic_parameter_set_id != b.pic_parameter_set_id
+//
+fn pps_id_differs(last: &Slice, curr: &Slice) -> bool {
+    last.pps_id != curr.pps_id
+}
+
+// defguardp field_pic_flag_differs(a, b) when a.field_pic_flag != b.field_pic_flag
+//
+// defguardp bottom_field_flag_differs(a, b) when a.bottom_field_flag != b.bottom_field_flag
+//
+fn field_pic_flag_differs(last: &Slice, curr: &Slice) -> bool {
+    last.header.field_pic != curr.header.field_pic
+}
+
+// defguardp nal_ref_idc_differs_one_zero(a, b)
+//           when (a.nal_ref_idc == 0 or b.nal_ref_idc == 0) and
+//                  a.nal_ref_idc != b.nal_ref_idc
+//
+fn nal_ref_idc_differs_one_zero(last: &Slice, curr: &Slice) -> bool {
+    (last.nal_header.nal_ref_idc() == 0 || curr.nal_header.nal_ref_idc() == 0)
+        && last.nal_header.nal_ref_idc() != curr.nal_header.nal_ref_idc()
+}
+
+// defguardp pic_order_cnt_zero_check(a, b)
+//           when a.pic_order_cnt_type == 0 and b.pic_order_cnt_type == 0 and
+//                  (a.pic_order_cnt_lsb != b.pic_order_cnt_lsb or
+//                     a.delta_pic_order_cnt_bottom != b.delta_pic_order_cnt_bottom)
+//
+fn pic_order_cnt_zero_check(last: &Slice, curr: &Slice) -> bool {
+    let (last_pic_order_cnt_lsb, last_delta_pic_order_cnt_bottom) =
+        match last.header.pic_order_cnt_lsb {
+            Some(PicOrderCountLsb::Frame(pic_order_cnt_lsb)) => (pic_order_cnt_lsb, 0),
+            Some(PicOrderCountLsb::FieldsAbsolute {
+                pic_order_cnt_lsb,
+                delta_pic_order_cnt_bottom,
+            }) => (pic_order_cnt_lsb, delta_pic_order_cnt_bottom),
+            _ => return false,
+        };
+
+    let (curr_pic_order_cnt_lsb, curr_delta_pic_order_cnt_bottom) =
+        match curr.header.pic_order_cnt_lsb {
+            Some(PicOrderCountLsb::Frame(pic_order_cnt_lsb)) => (pic_order_cnt_lsb, 0),
+            Some(PicOrderCountLsb::FieldsAbsolute {
+                pic_order_cnt_lsb,
+                delta_pic_order_cnt_bottom,
+            }) => (pic_order_cnt_lsb, delta_pic_order_cnt_bottom),
+            _ => return false,
+        };
+
+    last_pic_order_cnt_lsb != curr_pic_order_cnt_lsb
+        || last_delta_pic_order_cnt_bottom != curr_delta_pic_order_cnt_bottom
+}
+
+// defguardp pic_order_cnt_one_check_zero(a, b)
+//           when a.pic_order_cnt_type == 1 and b.pic_order_cnt_type == 1 and
+//                  hd(a.delta_pic_order_cnt) != hd(b.delta_pic_order_cnt)
+// TODO
+
+// defguardp pic_order_cnt_one_check_one(a, b)
+//           when a.pic_order_cnt_type == 1 and b.pic_order_cnt_type == 1 and
+//                  hd(hd(a.delta_pic_order_cnt)) != hd(hd(b.delta_pic_order_cnt))
+// TODO
+
+// defguardp idr_and_non_idr(a, b)
+//           when (a.nal_unit_type == 5 or b.nal_unit_type == 5) and
+//                  a.nal_unit_type != b.nal_unit_type
+//
+fn idr_and_non_idr(last: &Slice, curr: &Slice) -> bool {
+    (last.nal_header.nal_unit_type().id() == 5) ^ (curr.nal_header.nal_unit_type().id() == 5)
+}
+
+// defguardp idrs_with_idr_pic_id_differ(a, b)
+//           when a.nal_unit_type == 5 and b.nal_unit_type == 5 and a.idr_pic_id != b.idr_pic_id
+fn idrs_where_idr_pic_id_differs(last: &Slice, curr: &Slice) -> bool {
+    match (last.header.idr_pic_id, curr.header.idr_pic_id) {
+        (Some(last), Some(curr)) => last != curr,
+        _ => false,
+    }
+}
diff --git a/vk-video/src/vulkan_decoder.rs b/vk-video/src/vulkan_decoder.rs
new file mode 100644
index 000000000..9b78b74c4
--- /dev/null
+++ b/vk-video/src/vulkan_decoder.rs
@@ -0,0 +1,1406 @@
+use std::sync::Arc;
+
+use ash::vk;
+
+use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet};
+use tracing::error;
+use wrappers::*;
+
+use crate::parser::{DecodeInformation, DecoderInstruction, ReferenceId};
+
+mod parameter_sets;
+mod vulkan_ctx;
+mod wrappers;
+
+pub use vulkan_ctx::*;
+
+const MACROBLOCK_SIZE: u32 = 16;
+
+pub struct VulkanDecoder<'a> {
+    vulkan_ctx: Arc<VulkanCtx>,
+    video_session_resources: Option<VideoSessionResources<'a>>,
+    command_buffers: CommandBuffers,
+    _command_pools: CommandPools,
+    sync_structures: SyncStructures,
+    reference_id_to_dpb_slot_index: std::collections::HashMap<ReferenceId, usize>,
+    decode_query_pool: Option<DecodeQueryPool>,
+}
+
+struct SyncStructures {
+    sem_decode_done: Semaphore,
+    fence_transfer_done: Fence,
+    fence_memory_barrier_completed: Fence,
+}
+
+struct CommandBuffers {
+    decode_buffer: CommandBuffer,
+    gpu_to_mem_transfer_buffer: CommandBuffer,
+    vulkan_to_wgpu_transfer_buffer: CommandBuffer,
+}
+
+struct VideoSessionResources<'a> {
+    video_session: VideoSession,
+    parameters_manager: VideoSessionParametersManager,
+    decoding_images: DecodingImages<'a>,
+}
+
+/// this cannot outlive the image and semaphore it borrows, but it seems impossible to encode that
+/// in the lifetimes
+struct DecodeOutput {
+    image: vk::Image,
+    dimensions: vk::Extent2D,
+    current_layout: vk::ImageLayout,
+    layer: u32,
+    wait_semaphore: vk::Semaphore,
+    _input_buffer: Buffer,
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum VulkanDecoderError {
+    #[error("Vulkan error: {0}")]
+    VkError(#[from] vk::Result),
+
+    #[error("Cannot find enough memory of the right type on the deivce")]
+    NoMemory,
+
+    #[error("The decoder instruction is not supported: {0:?}")]
+    DecoderInstructionNotSupported(Box<DecoderInstruction>),
+
+    #[error("Setting the frame cropping flag in sps is not supported")]
+    FrameCroppingNotSupported,
+
+    #[error("Bitstreams that contain fields rather than frames are not supported")]
+    FieldsNotSupported,
+
+    #[error("Scaling lists are not supported")]
+    ScalingListsNotSupported,
+
+    #[error("A NALU requiring a session received before a session was created (probably before receiving first SPS)")]
+    NoSession,
+
+    #[error("A slot in the Decoded Pictures Buffer was requested, but all slots are taken")]
+    NoFreeSlotsInDpb,
+
+    #[error("A picture which is not in the decoded pictures buffer was requested as a reference picture")]
+    NonExistantReferenceRequested,
+
+    #[error("A vulkan decode operation failed with code {0:?}")]
+    DecodeOperationFailed(vk::QueryResultStatusKHR),
+
+    #[error(transparent)]
+    VulkanCtxError(#[from] VulkanCtxError),
+}
+
+impl<'a> VulkanDecoder<'a> {
+    pub fn new(vulkan_ctx: Arc<VulkanCtx>) -> Result<Self, VulkanDecoderError> {
+        let decode_pool = Arc::new(CommandPool::new(
+            vulkan_ctx.device.clone(),
+            vulkan_ctx.queues.h264_decode.idx,
+        )?);
+
+        let transfer_pool = Arc::new(CommandPool::new(
+            vulkan_ctx.device.clone(),
+            vulkan_ctx.queues.transfer.idx,
+        )?);
+
+        let decode_buffer = CommandBuffer::new_primary(decode_pool.clone())?;
+
+        let gpu_to_mem_transfer_buffer = CommandBuffer::new_primary(transfer_pool.clone())?;
+
+        let vulkan_to_wgpu_transfer_buffer = CommandBuffer::new_primary(transfer_pool.clone())?;
+
+        let command_pools = CommandPools {
+            _decode_pool: decode_pool,
+            _transfer_pool: transfer_pool,
+        };
+
+        let sync_structures = SyncStructures {
+            sem_decode_done: Semaphore::new(vulkan_ctx.device.clone())?,
+            fence_transfer_done: Fence::new(vulkan_ctx.device.clone(), false)?,
+            fence_memory_barrier_completed: Fence::new(vulkan_ctx.device.clone(), false)?,
+        };
+
+        let decode_query_pool = if vulkan_ctx
+            .queues
+            .h264_decode
+            .supports_result_status_queries()
+        {
+            Some(DecodeQueryPool::new(
+                vulkan_ctx.device.clone(),
+                H264ProfileInfo::decode_h264_yuv420().profile_info,
+            )?)
+        } else {
+            None
+        };
+
+        Ok(Self {
+            vulkan_ctx,
+            video_session_resources: None,
+            _command_pools: command_pools,
+            command_buffers: CommandBuffers {
+                decode_buffer,
+                gpu_to_mem_transfer_buffer,
+                vulkan_to_wgpu_transfer_buffer,
+            },
+            sync_structures,
+            decode_query_pool,
+            reference_id_to_dpb_slot_index: Default::default(),
+        })
+    }
+}
+
+impl VulkanDecoder<'_> {
+    pub fn decode_to_bytes(
+        &mut self,
+        decoder_instructions: &[DecoderInstruction],
+    ) -> Result<Vec<Vec<u8>>, VulkanDecoderError> {
+        let mut result = Vec::new();
+        for instruction in decoder_instructions {
+            if let Some(output) = self.decode(instruction)? {
+                result.push(self.download_output(output)?)
+            }
+        }
+
+        Ok(result)
+    }
+
+    pub fn decode_to_wgpu_textures(
+        &mut self,
+        decoder_instructions: &[DecoderInstruction],
+    ) -> Result<Vec<wgpu::Texture>, VulkanDecoderError> {
+        let mut result = Vec::new();
+        for instruction in decoder_instructions {
+            if let Some(output) = self.decode(instruction)? {
+                result.push(self.output_to_wgpu_texture(output)?)
+            }
+        }
+
+        Ok(result)
+    }
+
+    fn decode(
+        &mut self,
+        instruction: &DecoderInstruction,
+    ) -> Result<Option<DecodeOutput>, VulkanDecoderError> {
+        match instruction {
+            DecoderInstruction::Decode { .. } => {
+                return Err(VulkanDecoderError::DecoderInstructionNotSupported(
+                    Box::new(instruction.clone()),
+                ))
+            }
+
+            DecoderInstruction::DecodeAndStoreAs {
+                decode_info,
+                reference_id,
+            } => {
+                return self
+                    .process_reference_p_frame(decode_info, *reference_id)
+                    .map(Option::Some)
+            }
+
+            DecoderInstruction::Idr {
+                decode_info,
+                reference_id,
+            } => {
+                return self
+                    .process_idr(decode_info, *reference_id)
+                    .map(Option::Some)
+            }
+
+            DecoderInstruction::Drop { reference_ids } => {
+                for reference_id in reference_ids {
+                    match self.reference_id_to_dpb_slot_index.remove(reference_id) {
+                        Some(dpb_idx) => self
+                            .video_session_resources
+                            .as_mut()
+                            .map(|s| s.decoding_images.free_reference_picture(dpb_idx)),
+                        None => return Err(VulkanDecoderError::NonExistantReferenceRequested),
+                    };
+                }
+            }
+
+            DecoderInstruction::Sps(sps) => self.process_sps(sps)?,
+
+            DecoderInstruction::Pps(pps) => self.process_pps(pps)?,
+        }
+
+        Ok(None)
+    }
+
+    fn process_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> {
+        let profile = H264ProfileInfo::decode_h264_yuv420();
+
+        let width = match sps.frame_cropping {
+            None => (sps.pic_width_in_mbs_minus1 + 1) * MACROBLOCK_SIZE,
+            Some(_) => return Err(VulkanDecoderError::FrameCroppingNotSupported),
+        };
+
+        let height = match sps.frame_mbs_flags {
+            h264_reader::nal::sps::FrameMbsFlags::Frames => {
+                (sps.pic_height_in_map_units_minus1 + 1) * MACROBLOCK_SIZE
+            }
+            h264_reader::nal::sps::FrameMbsFlags::Fields { .. } => {
+                return Err(VulkanDecoderError::FieldsNotSupported)
+            }
+        };
+
+        let max_coded_extent = vk::Extent2D { width, height };
+        // +1 for current frame
+        let max_dpb_slots = sps.max_num_ref_frames + 1;
+        let max_active_references = sps.max_num_ref_frames;
+
+        if let Some(VideoSessionResources {
+            video_session,
+            parameters_manager: parameters,
+            ..
+        }) = &mut self.video_session_resources
+        {
+            if video_session.max_coded_extent.width >= width
+                && video_session.max_coded_extent.height >= height
+                && video_session.max_dpb_slots >= max_dpb_slots
+            {
+                // no need to change the session
+                parameters.put_sps(sps)?;
+                return Ok(());
+            }
+        }
+
+        let video_session = VideoSession::new(
+            &self.vulkan_ctx,
+            &profile.profile_info,
+            max_coded_extent,
+            max_dpb_slots,
+            max_active_references,
+            &self.vulkan_ctx.video_capabilities.std_header_version,
+        )?;
+
+        let parameters = self
+            .video_session_resources
+            .take()
+            .map(|r| r.parameters_manager);
+
+        let mut parameters = match parameters {
+            Some(mut parameters) => {
+                parameters.change_session(video_session.session)?;
+                parameters
+            }
+            None => VideoSessionParametersManager::new(&self.vulkan_ctx, video_session.session)?,
+        };
+
+        parameters.put_sps(sps)?;
+
+        // FIXME: usually, sps arrives either at the start of the stream (when all spses are sent
+        // at the begginning of the stream) or right before an IDR. It is however possible for an
+        // sps nal to arrive in between P-frames. This would cause us to loose the reference
+        // pictures we need to decode the stream until we receive a new IDR. Don't know if this is
+        // an issue worth fixing, I don't think I ever saw a stream like this.
+        let (decoding_images, memory_barrier) = DecodingImages::new(
+            &self.vulkan_ctx,
+            profile,
+            &self.vulkan_ctx.h264_dpb_format_properties,
+            &self.vulkan_ctx.h264_dst_format_properties,
+            max_coded_extent,
+            max_dpb_slots,
+        )?;
+
+        self.command_buffers.decode_buffer.begin()?;
+
+        unsafe {
+            self.vulkan_ctx.device.cmd_pipeline_barrier2(
+                *self.command_buffers.decode_buffer,
+                &vk::DependencyInfo::default().image_memory_barriers(&memory_barrier),
+            );
+        }
+
+        self.command_buffers.decode_buffer.end()?;
+
+        self.command_buffers.decode_buffer.submit(
+            *self.vulkan_ctx.queues.h264_decode.queue.lock().unwrap(),
+            &[],
+            &[],
+            Some(*self.sync_structures.fence_memory_barrier_completed),
+        )?;
+
+        // TODO: this shouldn't be a fence
+        self.sync_structures
+            .fence_memory_barrier_completed
+            .wait_and_reset(u64::MAX)?;
+
+        self.video_session_resources = Some(VideoSessionResources {
+            video_session,
+            parameters_manager: parameters,
+            decoding_images,
+        });
+
+        Ok(())
+    }
+
+    fn process_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> {
+        self.video_session_resources
+            .as_mut()
+            .map(|r| &mut r.parameters_manager)
+            .ok_or(VulkanDecoderError::NoSession)?
+            .put_pps(pps)?;
+
+        Ok(())
+    }
+
+    fn pad_size_to_alignment(size: u64, align: u64) -> u64 {
+        if size % align == 0 {
+            size
+        } else {
+            (size + align) / align * align
+        }
+    }
+
+    fn process_idr(
+        &mut self,
+        decode_information: &DecodeInformation,
+        reference_id: ReferenceId,
+    ) -> Result<DecodeOutput, VulkanDecoderError> {
+        self.do_decode(decode_information, reference_id, true, true)
+    }
+
+    fn process_reference_p_frame(
+        &mut self,
+        decode_information: &DecodeInformation,
+        reference_id: ReferenceId,
+    ) -> Result<DecodeOutput, VulkanDecoderError> {
+        self.do_decode(decode_information, reference_id, false, true)
+    }
+
+    fn do_decode(
+        &mut self,
+        decode_information: &DecodeInformation,
+        reference_id: ReferenceId,
+        is_idr: bool,
+        is_reference: bool,
+    ) -> Result<DecodeOutput, VulkanDecoderError> {
+        // upload data to a buffer
+        let size = Self::pad_size_to_alignment(
+            decode_information.rbsp_bytes.len() as u64,
+            self.vulkan_ctx
+                .video_capabilities
+                .min_bitstream_buffer_offset_alignment,
+        );
+
+        let decode_buffer =
+            self.upload_decode_data_to_buffer(&decode_information.rbsp_bytes, size)?;
+
+        // decode
+        let video_session_resources = self
+            .video_session_resources
+            .as_mut()
+            .ok_or(VulkanDecoderError::NoSession)?;
+
+        // IDR - remove all reference picures
+        if is_idr {
+            video_session_resources
+                .decoding_images
+                .reset_all_allocations();
+
+            self.reference_id_to_dpb_slot_index = Default::default();
+        }
+
+        // begin video coding
+        self.command_buffers.decode_buffer.begin()?;
+
+        let memory_barrier = vk::MemoryBarrier2::default()
+            .src_stage_mask(vk::PipelineStageFlags2::VIDEO_DECODE_KHR)
+            .src_access_mask(vk::AccessFlags2::VIDEO_DECODE_WRITE_KHR)
+            .dst_stage_mask(vk::PipelineStageFlags2::VIDEO_DECODE_KHR)
+            .dst_access_mask(
+                vk::AccessFlags2::VIDEO_DECODE_READ_KHR | vk::AccessFlags2::VIDEO_DECODE_WRITE_KHR,
+            );
+
+        unsafe {
+            self.vulkan_ctx.device.cmd_pipeline_barrier2(
+                *self.command_buffers.decode_buffer,
+                &vk::DependencyInfo::default().memory_barriers(&[memory_barrier]),
+            )
+        };
+
+        if let Some(pool) = self.decode_query_pool.as_ref() {
+            pool.reset(*self.command_buffers.decode_buffer);
+        }
+
+        let reference_slots = video_session_resources
+            .decoding_images
+            .reference_slot_info();
+
+        let begin_info = vk::VideoBeginCodingInfoKHR::default()
+            .video_session(video_session_resources.video_session.session)
+            .video_session_parameters(video_session_resources.parameters_manager.parameters())
+            .reference_slots(&reference_slots);
+
+        unsafe {
+            self.vulkan_ctx
+                .device
+                .video_queue_ext
+                .cmd_begin_video_coding_khr(*self.command_buffers.decode_buffer, &begin_info)
+        };
+
+        // IDR - issue the reset command to the video session
+        if is_idr {
+            let control_info = vk::VideoCodingControlInfoKHR::default()
+                .flags(vk::VideoCodingControlFlagsKHR::RESET);
+
+            unsafe {
+                self.vulkan_ctx
+                    .device
+                    .video_queue_ext
+                    .cmd_control_video_coding_khr(
+                        *self.command_buffers.decode_buffer,
+                        &control_info,
+                    )
+            };
+        }
+
+        // allocate a new reference picture and fill out the forms to get it set up
+        let new_reference_slot_index = video_session_resources
+            .decoding_images
+            .allocate_reference_picture()?;
+
+        let new_reference_slot_std_reference_info = decode_information.picture_info.into();
+        let mut new_reference_slot_dpb_slot_info = vk::VideoDecodeH264DpbSlotInfoKHR::default()
+            .std_reference_info(&new_reference_slot_std_reference_info);
+
+        let new_reference_slot_video_picture_resource_info = video_session_resources
+            .decoding_images
+            .video_resource_info(new_reference_slot_index)
+            .unwrap();
+
+        let setup_reference_slot = vk::VideoReferenceSlotInfoKHR::default()
+            .picture_resource(new_reference_slot_video_picture_resource_info)
+            .slot_index(new_reference_slot_index as i32)
+            .push_next(&mut new_reference_slot_dpb_slot_info);
+
+        // prepare the reference list
+        let reference_slots = video_session_resources
+            .decoding_images
+            .reference_slot_info();
+
+        let references_std_ref_info = Self::prepare_references_std_ref_info(decode_information);
+
+        let mut references_dpb_slot_info =
+            Self::prepare_references_dpb_slot_info(&references_std_ref_info);
+
+        let pic_reference_slots = Self::prepare_reference_list_slot_info(
+            &self.reference_id_to_dpb_slot_index,
+            &reference_slots,
+            &mut references_dpb_slot_info,
+            decode_information,
+        )?;
+
+        // prepare the decode target picture
+        let std_picture_info = vk::native::StdVideoDecodeH264PictureInfo {
+            flags: vk::native::StdVideoDecodeH264PictureInfoFlags {
+                _bitfield_align_1: [],
+                __bindgen_padding_0: [0; 3],
+                _bitfield_1: vk::native::StdVideoDecodeH264PictureInfoFlags::new_bitfield_1(
+                    matches!(
+                        decode_information.header.field_pic,
+                        h264_reader::nal::slice::FieldPic::Field(..)
+                    )
+                    .into(),
+                    is_idr.into(),
+                    is_idr.into(),
+                    0,
+                    is_reference.into(),
+                    0,
+                ),
+            },
+            PicOrderCnt: decode_information.picture_info.PicOrderCnt,
+            seq_parameter_set_id: decode_information.sps_id,
+            pic_parameter_set_id: decode_information.pps_id,
+            frame_num: decode_information.header.frame_num,
+            idr_pic_id: decode_information
+                .header
+                .idr_pic_id
+                .map(|a| a as u16)
+                .unwrap_or(0),
+            reserved1: 0,
+            reserved2: 0,
+        };
+
+        let slice_offsets = decode_information
+            .slice_indices
+            .iter()
+            .map(|&x| x as u32)
+            .collect::<Vec<_>>();
+
+        let mut decode_h264_picture_info = vk::VideoDecodeH264PictureInfoKHR::default()
+            .std_picture_info(&std_picture_info)
+            .slice_offsets(&slice_offsets);
+
+        let dst_picture_resource_info = match &video_session_resources.decoding_images.dst_image {
+            Some(image) => image.video_resource_info[0],
+            None => *new_reference_slot_video_picture_resource_info,
+        };
+
+        // these 3 veriables are for copying the result later
+        let (dst_image, dst_image_layout, dst_layer) =
+            match &video_session_resources.decoding_images.dst_image {
+                Some(image) => (**image.image, vk::ImageLayout::VIDEO_DECODE_DST_KHR, 0),
+                None => (
+                    **video_session_resources.decoding_images.dpb_image.image,
+                    vk::ImageLayout::VIDEO_DECODE_DPB_KHR,
+                    new_reference_slot_index,
+                ),
+            };
+
+        // fill out the final struct and issue the command
+        let decode_info = vk::VideoDecodeInfoKHR::default()
+            .src_buffer(*decode_buffer)
+            .src_buffer_offset(0)
+            .src_buffer_range(size)
+            .dst_picture_resource(dst_picture_resource_info)
+            .setup_reference_slot(&setup_reference_slot)
+            .reference_slots(&pic_reference_slots)
+            .push_next(&mut decode_h264_picture_info);
+
+        if let Some(pool) = self.decode_query_pool.as_ref() {
+            pool.begin_query(*self.command_buffers.decode_buffer);
+        }
+
+        unsafe {
+            self.vulkan_ctx
+                .device
+                .video_decode_queue_ext
+                .cmd_decode_video_khr(*self.command_buffers.decode_buffer, &decode_info)
+        };
+
+        if let Some(pool) = self.decode_query_pool.as_ref() {
+            pool.end_query(*self.command_buffers.decode_buffer);
+        }
+
+        unsafe {
+            self.vulkan_ctx
+                .device
+                .video_queue_ext
+                .cmd_end_video_coding_khr(
+                    *self.command_buffers.decode_buffer,
+                    &vk::VideoEndCodingInfoKHR::default(),
+                )
+        };
+
+        self.command_buffers.decode_buffer.end()?;
+
+        self.command_buffers.decode_buffer.submit(
+            *self.vulkan_ctx.queues.h264_decode.queue.lock().unwrap(),
+            &[],
+            &[(
+                *self.sync_structures.sem_decode_done,
+                vk::PipelineStageFlags2::VIDEO_DECODE_KHR,
+            )],
+            None,
+        )?;
+
+        // after the decode save the new reference picture
+        self.reference_id_to_dpb_slot_index
+            .insert(reference_id, new_reference_slot_index);
+
+        // TODO: those are not the real dimensions of the image. the real dimensions should be
+        // calculated from the sps
+        let dimensions = video_session_resources.video_session.max_coded_extent;
+
+        Ok(DecodeOutput {
+            image: dst_image,
+            wait_semaphore: *self.sync_structures.sem_decode_done,
+            layer: dst_layer as u32,
+            current_layout: dst_image_layout,
+            dimensions,
+            _input_buffer: decode_buffer,
+        })
+    }
+
+    fn output_to_wgpu_texture(
+        &self,
+        decode_output: DecodeOutput,
+    ) -> Result<wgpu::Texture, VulkanDecoderError> {
+        let copy_extent = vk::Extent3D {
+            width: decode_output.dimensions.width,
+            height: decode_output.dimensions.height,
+            depth: 1,
+        };
+
+        let queue_indices = [
+            self.vulkan_ctx.queues.transfer.idx as u32,
+            self.vulkan_ctx.queues.wgpu.idx as u32,
+        ];
+
+        let create_info = vk::ImageCreateInfo::default()
+            .flags(vk::ImageCreateFlags::MUTABLE_FORMAT)
+            .image_type(vk::ImageType::TYPE_2D)
+            .format(vk::Format::G8_B8R8_2PLANE_420_UNORM)
+            .extent(copy_extent)
+            .mip_levels(1)
+            .array_layers(1)
+            .samples(vk::SampleCountFlags::TYPE_1)
+            .tiling(vk::ImageTiling::OPTIMAL)
+            .usage(
+                vk::ImageUsageFlags::SAMPLED
+                    | vk::ImageUsageFlags::TRANSFER_DST
+                    | vk::ImageUsageFlags::TRANSFER_SRC,
+            )
+            .sharing_mode(vk::SharingMode::CONCURRENT)
+            .queue_family_indices(&queue_indices)
+            .initial_layout(vk::ImageLayout::UNDEFINED);
+
+        let image = Arc::new(Image::new(self.vulkan_ctx.allocator.clone(), &create_info)?);
+
+        self.command_buffers
+            .vulkan_to_wgpu_transfer_buffer
+            .begin()?;
+
+        let memory_barrier_src = vk::ImageMemoryBarrier2::default()
+            .src_stage_mask(vk::PipelineStageFlags2::NONE)
+            .src_access_mask(vk::AccessFlags2::NONE)
+            .dst_stage_mask(vk::PipelineStageFlags2::COPY)
+            .dst_access_mask(vk::AccessFlags2::TRANSFER_READ)
+            .old_layout(decode_output.current_layout)
+            .new_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL)
+            .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .image(decode_output.image)
+            .subresource_range(vk::ImageSubresourceRange {
+                aspect_mask: vk::ImageAspectFlags::COLOR,
+                base_mip_level: 0,
+                level_count: 1,
+                base_array_layer: decode_output.layer,
+                layer_count: 1,
+            });
+
+        let memory_barrier_dst = vk::ImageMemoryBarrier2::default()
+            .src_stage_mask(vk::PipelineStageFlags2::NONE)
+            .src_access_mask(vk::AccessFlags2::NONE)
+            .dst_stage_mask(vk::PipelineStageFlags2::COPY)
+            .dst_access_mask(vk::AccessFlags2::TRANSFER_WRITE)
+            .old_layout(vk::ImageLayout::UNDEFINED)
+            .new_layout(vk::ImageLayout::TRANSFER_DST_OPTIMAL)
+            .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .image(**image)
+            .subresource_range(vk::ImageSubresourceRange {
+                aspect_mask: vk::ImageAspectFlags::COLOR,
+                base_mip_level: 0,
+                level_count: 1,
+                base_array_layer: 0,
+                layer_count: 1,
+            });
+
+        unsafe {
+            self.vulkan_ctx.device.cmd_pipeline_barrier2(
+                *self.command_buffers.vulkan_to_wgpu_transfer_buffer,
+                &vk::DependencyInfo::default()
+                    .image_memory_barriers(&[memory_barrier_src, memory_barrier_dst]),
+            )
+        };
+
+        let copy_info = [
+            vk::ImageCopy::default()
+                .src_subresource(vk::ImageSubresourceLayers {
+                    base_array_layer: decode_output.layer,
+                    mip_level: 0,
+                    layer_count: 1,
+                    aspect_mask: vk::ImageAspectFlags::PLANE_0,
+                })
+                .src_offset(vk::Offset3D::default())
+                .dst_subresource(vk::ImageSubresourceLayers {
+                    base_array_layer: 0,
+                    mip_level: 0,
+                    layer_count: 1,
+                    aspect_mask: vk::ImageAspectFlags::PLANE_0,
+                })
+                .dst_offset(vk::Offset3D::default())
+                .extent(copy_extent),
+            vk::ImageCopy::default()
+                .src_subresource(vk::ImageSubresourceLayers {
+                    base_array_layer: decode_output.layer,
+                    mip_level: 0,
+                    layer_count: 1,
+                    aspect_mask: vk::ImageAspectFlags::PLANE_1,
+                })
+                .src_offset(vk::Offset3D::default())
+                .dst_subresource(vk::ImageSubresourceLayers {
+                    base_array_layer: 0,
+                    mip_level: 0,
+                    layer_count: 1,
+                    aspect_mask: vk::ImageAspectFlags::PLANE_1,
+                })
+                .dst_offset(vk::Offset3D::default())
+                .extent(vk::Extent3D {
+                    width: copy_extent.width / 2,
+                    height: copy_extent.height / 2,
+                    ..copy_extent
+                }),
+        ];
+
+        unsafe {
+            self.vulkan_ctx.device.cmd_copy_image(
+                *self.command_buffers.vulkan_to_wgpu_transfer_buffer,
+                decode_output.image,
+                vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
+                **image,
+                vk::ImageLayout::TRANSFER_DST_OPTIMAL,
+                &copy_info,
+            );
+        }
+
+        let memory_barrier_src = memory_barrier_src
+            .src_stage_mask(vk::PipelineStageFlags2::COPY)
+            .src_access_mask(vk::AccessFlags2::TRANSFER_READ)
+            .dst_stage_mask(vk::PipelineStageFlags2::NONE)
+            .dst_access_mask(vk::AccessFlags2::NONE)
+            .old_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL)
+            .new_layout(decode_output.current_layout);
+
+        let memory_barrier_dst = memory_barrier_dst
+            .src_stage_mask(vk::PipelineStageFlags2::COPY)
+            .src_access_mask(vk::AccessFlags2::TRANSFER_WRITE)
+            .dst_stage_mask(vk::PipelineStageFlags2::NONE)
+            .dst_access_mask(vk::AccessFlags2::NONE)
+            .old_layout(vk::ImageLayout::TRANSFER_DST_OPTIMAL)
+            .new_layout(vk::ImageLayout::GENERAL);
+
+        unsafe {
+            self.vulkan_ctx.device.cmd_pipeline_barrier2(
+                *self.command_buffers.vulkan_to_wgpu_transfer_buffer,
+                &vk::DependencyInfo::default()
+                    .image_memory_barriers(&[memory_barrier_src, memory_barrier_dst]),
+            )
+        };
+
+        self.command_buffers.vulkan_to_wgpu_transfer_buffer.end()?;
+
+        self.command_buffers.vulkan_to_wgpu_transfer_buffer.submit(
+            *self.vulkan_ctx.queues.transfer.queue.lock().unwrap(),
+            &[(
+                decode_output.wait_semaphore,
+                vk::PipelineStageFlags2::TOP_OF_PIPE,
+            )],
+            &[],
+            Some(*self.sync_structures.fence_transfer_done),
+        )?;
+
+        self.sync_structures
+            .fence_transfer_done
+            .wait_and_reset(u64::MAX)?;
+
+        let result = self
+            .decode_query_pool
+            .as_ref()
+            .map(|pool| pool.get_result_blocking());
+
+        if let Some(result) = result {
+            let result = result?;
+            if result.as_raw() < 0 {
+                return Err(VulkanDecoderError::DecodeOperationFailed(result));
+            }
+        }
+
+        let hal_texture = unsafe {
+            wgpu::hal::vulkan::Device::texture_from_raw(
+                **image,
+                &wgpu::hal::TextureDescriptor {
+                    label: Some("vulkan video output texture"),
+                    usage: wgpu::hal::TextureUses::RESOURCE
+                        | wgpu::hal::TextureUses::COPY_DST
+                        | wgpu::hal::TextureUses::COPY_SRC,
+                    memory_flags: wgpu::hal::MemoryFlags::empty(),
+                    size: wgpu::Extent3d {
+                        width: copy_extent.width,
+                        height: copy_extent.height,
+                        depth_or_array_layers: copy_extent.depth,
+                    },
+                    dimension: wgpu::TextureDimension::D2,
+                    sample_count: 1,
+                    view_formats: Vec::new(),
+                    format: wgpu::TextureFormat::NV12,
+                    mip_level_count: 1,
+                },
+                Some(Box::new(image.clone())),
+            )
+        };
+
+        let wgpu_texture = unsafe {
+            self.vulkan_ctx
+                .wgpu_ctx
+                .device
+                .create_texture_from_hal::<wgpu::hal::vulkan::Api>(
+                    hal_texture,
+                    &wgpu::TextureDescriptor {
+                        label: Some("vulkan video output texture"),
+                        usage: wgpu::TextureUsages::COPY_DST
+                            | wgpu::TextureUsages::TEXTURE_BINDING
+                            | wgpu::TextureUsages::COPY_SRC,
+                        size: wgpu::Extent3d {
+                            width: copy_extent.width,
+                            height: copy_extent.height,
+                            depth_or_array_layers: copy_extent.depth,
+                        },
+                        dimension: wgpu::TextureDimension::D2,
+                        sample_count: 1,
+                        view_formats: &[],
+                        format: wgpu::TextureFormat::NV12,
+                        mip_level_count: 1,
+                    },
+                )
+        };
+
+        Ok(wgpu_texture)
+    }
+
+    fn download_output(&self, decode_output: DecodeOutput) -> Result<Vec<u8>, VulkanDecoderError> {
+        let mut dst_buffer = self.copy_image_to_buffer(
+            decode_output.image,
+            decode_output.dimensions,
+            decode_output.current_layout,
+            decode_output.layer,
+            &[(decode_output.wait_semaphore, vk::PipelineStageFlags2::COPY)],
+            &[],
+            Some(*self.sync_structures.fence_transfer_done),
+        )?;
+
+        self.sync_structures
+            .fence_transfer_done
+            .wait_and_reset(u64::MAX)?;
+
+        let output = unsafe {
+            self.download_data_from_buffer(
+                &mut dst_buffer,
+                decode_output.dimensions.width as usize
+                    * decode_output.dimensions.height as usize
+                    * 3
+                    / 2,
+            )?
+        };
+
+        Ok(output)
+    }
+
+    fn prepare_references_std_ref_info(
+        decode_information: &DecodeInformation,
+    ) -> Vec<vk::native::StdVideoDecodeH264ReferenceInfo> {
+        decode_information
+            .reference_list
+            .iter()
+            .flatten()
+            .map(|ref_info| ref_info.picture_info.into())
+            .collect::<Vec<_>>()
+    }
+
+    fn prepare_references_dpb_slot_info(
+        references_std_ref_info: &[vk::native::StdVideoDecodeH264ReferenceInfo],
+    ) -> Vec<vk::VideoDecodeH264DpbSlotInfoKHR> {
+        references_std_ref_info
+            .iter()
+            .map(|info| vk::VideoDecodeH264DpbSlotInfoKHR::default().std_reference_info(info))
+            .collect::<Vec<_>>()
+    }
+
+    fn prepare_reference_list_slot_info<'a>(
+        reference_id_to_dpb_slot_index: &std::collections::HashMap<ReferenceId, usize>,
+        reference_slots: &'a [vk::VideoReferenceSlotInfoKHR<'a>],
+        references_dpb_slot_info: &'a mut [vk::VideoDecodeH264DpbSlotInfoKHR<'a>],
+        decode_information: &'a DecodeInformation,
+    ) -> Result<Vec<vk::VideoReferenceSlotInfoKHR<'a>>, VulkanDecoderError> {
+        let mut pic_reference_slots = Vec::new();
+        for (ref_info, dpb_slot_info) in decode_information
+            .reference_list
+            .iter()
+            .flatten()
+            .zip(references_dpb_slot_info.iter_mut())
+        {
+            let i = *reference_id_to_dpb_slot_index
+                .get(&ref_info.id)
+                .ok_or(VulkanDecoderError::NonExistantReferenceRequested)?;
+
+            let reference = *reference_slots
+                .get(i)
+                .ok_or(VulkanDecoderError::NonExistantReferenceRequested)?;
+
+            if reference.slot_index < 0 || reference.p_picture_resource.is_null() {
+                return Err(VulkanDecoderError::NonExistantReferenceRequested);
+            }
+
+            let reference = reference.push_next(dpb_slot_info);
+
+            pic_reference_slots.push(reference);
+        }
+
+        Ok(pic_reference_slots)
+    }
+
+    /// ## Safety
+    /// the buffer has to be mappable and readable
+    unsafe fn download_data_from_buffer(
+        &self,
+        buffer: &mut Buffer,
+        size: usize,
+    ) -> Result<Vec<u8>, VulkanDecoderError> {
+        let mut output = Vec::new();
+        unsafe {
+            let memory = self
+                .vulkan_ctx
+                .allocator
+                .map_memory(&mut buffer.allocation)?;
+            let memory_slice = std::slice::from_raw_parts_mut(memory, size);
+            output.extend_from_slice(memory_slice);
+            self.vulkan_ctx
+                .allocator
+                .unmap_memory(&mut buffer.allocation);
+        }
+
+        Ok(output)
+    }
+
+    fn upload_decode_data_to_buffer(
+        &self,
+        data: &[u8],
+        buffer_size: u64,
+    ) -> Result<Buffer, VulkanDecoderError> {
+        let mut decode_buffer = Buffer::new_decode(
+            self.vulkan_ctx.allocator.clone(),
+            buffer_size,
+            &H264ProfileInfo::decode_h264_yuv420(),
+        )?;
+
+        unsafe {
+            let mem = self
+                .vulkan_ctx
+                .allocator
+                .map_memory(&mut decode_buffer.allocation)?;
+            let slice = std::slice::from_raw_parts_mut(mem.cast(), data.len());
+            slice.copy_from_slice(data);
+            self.vulkan_ctx
+                .allocator
+                .unmap_memory(&mut decode_buffer.allocation);
+        }
+
+        Ok(decode_buffer)
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn copy_image_to_buffer(
+        &self,
+        image: vk::Image,
+        dimensions: vk::Extent2D,
+        current_image_layout: vk::ImageLayout,
+        layer: u32,
+        wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)],
+        signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)],
+        fence: Option<vk::Fence>,
+    ) -> Result<Buffer, VulkanDecoderError> {
+        self.command_buffers.gpu_to_mem_transfer_buffer.begin()?;
+
+        let memory_barrier = vk::ImageMemoryBarrier2::default()
+            .src_stage_mask(vk::PipelineStageFlags2::NONE)
+            .src_access_mask(vk::AccessFlags2::NONE)
+            .dst_stage_mask(vk::PipelineStageFlags2::COPY)
+            .dst_access_mask(vk::AccessFlags2::TRANSFER_READ)
+            .old_layout(current_image_layout)
+            .new_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL)
+            .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .image(image)
+            .subresource_range(vk::ImageSubresourceRange {
+                aspect_mask: vk::ImageAspectFlags::COLOR,
+                base_mip_level: 0,
+                level_count: 1,
+                base_array_layer: layer,
+                layer_count: 1,
+            });
+
+        unsafe {
+            self.vulkan_ctx.device.cmd_pipeline_barrier2(
+                *self.command_buffers.gpu_to_mem_transfer_buffer,
+                &vk::DependencyInfo::default().image_memory_barriers(&[memory_barrier]),
+            )
+        };
+
+        // TODO: in this section, we shouldn't be using `max_coded_extent` and use the real frame
+        // resolution
+        let y_plane_size = dimensions.width as u64 * dimensions.height as u64;
+
+        let dst_buffer = Buffer::new_transfer(
+            self.vulkan_ctx.allocator.clone(),
+            y_plane_size * 3 / 2,
+            TransferDirection::GpuToMem,
+        )?;
+
+        let copy_info = [
+            vk::BufferImageCopy::default()
+                .image_subresource(vk::ImageSubresourceLayers {
+                    mip_level: 0,
+                    layer_count: 1,
+                    base_array_layer: layer,
+                    aspect_mask: vk::ImageAspectFlags::PLANE_0,
+                })
+                .image_offset(vk::Offset3D { x: 0, y: 0, z: 0 })
+                .image_extent(vk::Extent3D {
+                    width: dimensions.width,
+                    height: dimensions.height,
+                    depth: 1,
+                })
+                .buffer_offset(0)
+                .buffer_row_length(0)
+                .buffer_image_height(0),
+            vk::BufferImageCopy::default()
+                .image_subresource(vk::ImageSubresourceLayers {
+                    mip_level: 0,
+                    layer_count: 1,
+                    base_array_layer: layer,
+                    aspect_mask: vk::ImageAspectFlags::PLANE_1,
+                })
+                .image_offset(vk::Offset3D { x: 0, y: 0, z: 0 })
+                .image_extent(vk::Extent3D {
+                    width: dimensions.width / 2,
+                    height: dimensions.height / 2,
+                    depth: 1,
+                })
+                .buffer_offset(y_plane_size)
+                .buffer_row_length(0)
+                .buffer_image_height(0),
+        ];
+
+        unsafe {
+            self.vulkan_ctx.device.cmd_copy_image_to_buffer(
+                *self.command_buffers.gpu_to_mem_transfer_buffer,
+                image,
+                vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
+                *dst_buffer,
+                &copy_info,
+            )
+        };
+
+        let memory_barrier = memory_barrier
+            .src_stage_mask(vk::PipelineStageFlags2::COPY)
+            .src_access_mask(vk::AccessFlags2::TRANSFER_READ)
+            .dst_stage_mask(vk::PipelineStageFlags2::NONE)
+            .dst_access_mask(vk::AccessFlags2::NONE)
+            .old_layout(vk::ImageLayout::TRANSFER_SRC_OPTIMAL)
+            .new_layout(current_image_layout);
+
+        unsafe {
+            self.vulkan_ctx.device.cmd_pipeline_barrier2(
+                *self.command_buffers.gpu_to_mem_transfer_buffer,
+                &vk::DependencyInfo::default().image_memory_barriers(&[memory_barrier]),
+            )
+        };
+
+        self.command_buffers.gpu_to_mem_transfer_buffer.end()?;
+
+        self.command_buffers.gpu_to_mem_transfer_buffer.submit(
+            *self.vulkan_ctx.queues.transfer.queue.lock().unwrap(),
+            wait_semaphores,
+            signal_semaphores,
+            fence,
+        )?;
+
+        Ok(dst_buffer)
+    }
+}
+
+impl From<crate::parser::PictureInfo> for vk::native::StdVideoDecodeH264ReferenceInfo {
+    fn from(picture_info: crate::parser::PictureInfo) -> Self {
+        vk::native::StdVideoDecodeH264ReferenceInfo {
+            flags: vk::native::StdVideoDecodeH264ReferenceInfoFlags {
+                __bindgen_padding_0: [0; 3],
+                _bitfield_align_1: [],
+                _bitfield_1: vk::native::StdVideoDecodeH264ReferenceInfoFlags::new_bitfield_1(
+                    0,
+                    0,
+                    picture_info.used_for_long_term_reference.into(),
+                    picture_info.non_existing.into(),
+                ),
+            },
+            FrameNum: picture_info.FrameNum,
+            PicOrderCnt: picture_info.PicOrderCnt,
+            reserved: 0,
+        }
+    }
+}
+
+pub(crate) struct DecodingImages<'a> {
+    pub(crate) dpb_image: DecodingImageBundle<'a>,
+    pub(crate) dpb_slot_active: Vec<bool>,
+    pub(crate) dst_image: Option<DecodingImageBundle<'a>>,
+}
+
+pub(crate) struct DecodingImageBundle<'a> {
+    pub(crate) image: Arc<Image>,
+    pub(crate) _image_view: ImageView,
+    pub(crate) video_resource_info: Vec<vk::VideoPictureResourceInfoKHR<'a>>,
+}
+
+impl<'a> DecodingImageBundle<'a> {
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) fn new(
+        vulkan_ctx: &VulkanCtx,
+        format: &vk::VideoFormatPropertiesKHR<'a>,
+        dimensions: vk::Extent2D,
+        image_usage: vk::ImageUsageFlags,
+        profile_info: &H264ProfileInfo,
+        array_layer_count: u32,
+        queue_indices: Option<&[u32]>,
+        layout: vk::ImageLayout,
+    ) -> Result<(Self, vk::ImageMemoryBarrier2<'a>), VulkanDecoderError> {
+        let mut profile_list_info = vk::VideoProfileListInfoKHR::default()
+            .profiles(std::slice::from_ref(&profile_info.profile_info));
+
+        let mut image_create_info = vk::ImageCreateInfo::default()
+            .flags(format.image_create_flags)
+            .image_type(format.image_type)
+            .format(format.format)
+            .extent(vk::Extent3D {
+                width: dimensions.width,
+                height: dimensions.height,
+                depth: 1,
+            })
+            .mip_levels(1)
+            .array_layers(array_layer_count)
+            .samples(vk::SampleCountFlags::TYPE_1)
+            .tiling(format.image_tiling)
+            .usage(image_usage)
+            .initial_layout(vk::ImageLayout::UNDEFINED)
+            .push_next(&mut profile_list_info);
+
+        match queue_indices {
+            Some(indices) => {
+                image_create_info = image_create_info
+                    .sharing_mode(vk::SharingMode::CONCURRENT)
+                    .queue_family_indices(indices);
+            }
+            None => {
+                image_create_info = image_create_info.sharing_mode(vk::SharingMode::EXCLUSIVE);
+            }
+        }
+
+        let image = Arc::new(Image::new(
+            vulkan_ctx.allocator.clone(),
+            &image_create_info,
+        )?);
+
+        let subresource_range = vk::ImageSubresourceRange {
+            aspect_mask: vk::ImageAspectFlags::COLOR,
+            base_mip_level: 0,
+            level_count: 1,
+            base_array_layer: 0,
+            layer_count: vk::REMAINING_ARRAY_LAYERS,
+        };
+
+        let image_view_create_info = vk::ImageViewCreateInfo::default()
+            .flags(vk::ImageViewCreateFlags::empty())
+            .image(**image)
+            .view_type(if array_layer_count == 1 {
+                vk::ImageViewType::TYPE_2D
+            } else {
+                vk::ImageViewType::TYPE_2D_ARRAY
+            })
+            .format(format.format)
+            .components(vk::ComponentMapping::default())
+            .subresource_range(subresource_range);
+
+        let image_view = ImageView::new(
+            vulkan_ctx.device.clone(),
+            image.clone(),
+            &image_view_create_info,
+        )?;
+
+        let video_resource_info = (0..array_layer_count)
+            .map(|i| {
+                vk::VideoPictureResourceInfoKHR::default()
+                    .coded_offset(vk::Offset2D { x: 0, y: 0 })
+                    .coded_extent(dimensions)
+                    .base_array_layer(i)
+                    .image_view_binding(image_view.view)
+            })
+            .collect();
+
+        let image_memory_barrier = vk::ImageMemoryBarrier2::default()
+            .src_stage_mask(vk::PipelineStageFlags2::NONE)
+            .src_access_mask(vk::AccessFlags2::NONE)
+            .dst_stage_mask(vk::PipelineStageFlags2::NONE)
+            .dst_access_mask(vk::AccessFlags2::NONE)
+            .old_layout(vk::ImageLayout::UNDEFINED)
+            .new_layout(layout)
+            .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .image(**image)
+            .subresource_range(subresource_range);
+
+        Ok((
+            Self {
+                image,
+                _image_view: image_view,
+                video_resource_info,
+            },
+            image_memory_barrier,
+        ))
+    }
+}
+
+impl<'a> DecodingImages<'a> {
+    pub(crate) fn new(
+        vulkan_ctx: &VulkanCtx,
+        profile: H264ProfileInfo,
+        dpb_format: &vk::VideoFormatPropertiesKHR<'a>,
+        dst_format: &Option<vk::VideoFormatPropertiesKHR<'a>>,
+        dimensions: vk::Extent2D,
+        max_dpb_slots: u32,
+    ) -> Result<(Self, Vec<vk::ImageMemoryBarrier2<'a>>), VulkanDecoderError> {
+        let dpb_image_usage = if dst_format.is_some() {
+            dpb_format.image_usage_flags & vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR
+        } else {
+            dpb_format.image_usage_flags
+                & (vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR
+                    | vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR
+                    | vk::ImageUsageFlags::TRANSFER_SRC)
+        };
+
+        let queue_indices = [
+            vulkan_ctx.queues.transfer.idx as u32,
+            vulkan_ctx.queues.h264_decode.idx as u32,
+        ];
+
+        let (dpb_image, dpb_memory_barrier) = DecodingImageBundle::new(
+            vulkan_ctx,
+            dpb_format,
+            dimensions,
+            dpb_image_usage,
+            &profile,
+            max_dpb_slots,
+            if dst_format.is_some() {
+                None
+            } else {
+                Some(&queue_indices)
+            },
+            vk::ImageLayout::VIDEO_DECODE_DPB_KHR,
+        )?;
+
+        let output = dst_format
+            .map(|dst_format| {
+                let dst_image_usage = dst_format.image_usage_flags
+                    & (vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR
+                        | vk::ImageUsageFlags::TRANSFER_SRC);
+                DecodingImageBundle::new(
+                    vulkan_ctx,
+                    &dst_format,
+                    dimensions,
+                    dst_image_usage,
+                    &profile,
+                    1,
+                    Some(&queue_indices),
+                    vk::ImageLayout::VIDEO_DECODE_DST_KHR,
+                )
+            })
+            .transpose()?;
+
+        let (dst_image, dst_memory_barrier) = match output {
+            Some((output_images, output_memory_barrier)) => {
+                (Some(output_images), Some(output_memory_barrier))
+            }
+            None => (None, None),
+        };
+
+        let barriers = [dpb_memory_barrier]
+            .into_iter()
+            .chain(dst_memory_barrier)
+            .collect::<Vec<_>>();
+
+        Ok((
+            Self {
+                dpb_image,
+                dpb_slot_active: vec![false; max_dpb_slots as usize],
+                dst_image,
+            },
+            barriers,
+        ))
+    }
+
+    fn reference_slot_info(&self) -> Vec<vk::VideoReferenceSlotInfoKHR> {
+        self.dpb_image
+            .video_resource_info
+            .iter()
+            .enumerate()
+            .map(|(i, info)| {
+                vk::VideoReferenceSlotInfoKHR::default()
+                    .picture_resource(info)
+                    .slot_index(if self.dpb_slot_active[i] {
+                        i as i32
+                    } else {
+                        -1
+                    })
+            })
+            .collect()
+    }
+
+    fn allocate_reference_picture(&mut self) -> Result<usize, VulkanDecoderError> {
+        let i = self
+            .dpb_slot_active
+            .iter()
+            .enumerate()
+            .find(|(_, &v)| !v)
+            .map(|(i, _)| i)
+            .ok_or(VulkanDecoderError::NoFreeSlotsInDpb)?;
+
+        self.dpb_slot_active[i] = true;
+
+        Ok(i)
+    }
+
+    fn video_resource_info(&self, i: usize) -> Option<&vk::VideoPictureResourceInfoKHR> {
+        self.dpb_image.video_resource_info.get(i)
+    }
+
+    fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> {
+        self.dpb_slot_active[i] = false;
+
+        Ok(())
+    }
+
+    fn reset_all_allocations(&mut self) {
+        self.dpb_slot_active
+            .iter_mut()
+            .for_each(|slot| *slot = false);
+    }
+}
+
+pub(crate) struct H264ProfileInfo<'a> {
+    profile_info: vk::VideoProfileInfoKHR<'a>,
+    h264_info_ptr: *mut vk::VideoDecodeH264ProfileInfoKHR<'a>,
+}
+
+impl H264ProfileInfo<'_> {
+    fn decode_h264_yuv420() -> Self {
+        let h264_profile_info = Box::leak(Box::new(
+            vk::VideoDecodeH264ProfileInfoKHR::default()
+                .std_profile_idc(
+                    vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_BASELINE,
+                )
+                .picture_layout(vk::VideoDecodeH264PictureLayoutFlagsKHR::PROGRESSIVE),
+        ));
+
+        let h264_info_ptr = h264_profile_info as *mut _;
+        let profile_info = vk::VideoProfileInfoKHR::default()
+            .video_codec_operation(vk::VideoCodecOperationFlagsKHR::DECODE_H264)
+            .chroma_subsampling(vk::VideoChromaSubsamplingFlagsKHR::TYPE_420)
+            .luma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8)
+            .chroma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8)
+            .push_next(h264_profile_info);
+
+        Self {
+            profile_info,
+            h264_info_ptr,
+        }
+    }
+}
+
+impl<'a> Drop for H264ProfileInfo<'a> {
+    fn drop(&mut self) {
+        unsafe {
+            let _ = Box::from_raw(self.h264_info_ptr);
+        }
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/parameter_sets.rs b/vk-video/src/vulkan_decoder/parameter_sets.rs
new file mode 100644
index 000000000..ebf4267c5
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/parameter_sets.rs
@@ -0,0 +1,262 @@
+use ash::vk;
+use h264_reader::nal::sps::SeqParameterSet;
+
+use super::VulkanDecoderError;
+
+pub(crate) struct VkSequenceParameterSet {
+    pub(crate) sps: vk::native::StdVideoH264SequenceParameterSet,
+    // in the future, heap-allocated VUI and HRD parameters can be put here to have everything
+    // together
+}
+
+impl TryFrom<&'_ SeqParameterSet> for VkSequenceParameterSet {
+    type Error = VulkanDecoderError;
+
+    #[allow(non_snake_case)]
+    fn try_from(sps: &SeqParameterSet) -> Result<VkSequenceParameterSet, VulkanDecoderError> {
+        let flags = vk::native::StdVideoH264SpsFlags {
+            _bitfield_1: vk::native::StdVideoH264SpsFlags::new_bitfield_1(
+                sps.constraint_flags.flag0().into(),
+                sps.constraint_flags.flag1().into(),
+                sps.constraint_flags.flag2().into(),
+                sps.constraint_flags.flag3().into(),
+                sps.constraint_flags.flag4().into(),
+                sps.constraint_flags.flag5().into(),
+                sps.direct_8x8_inference_flag.into(),
+                match sps.frame_mbs_flags {
+                    h264_reader::nal::sps::FrameMbsFlags::Frames => 0,
+                    h264_reader::nal::sps::FrameMbsFlags::Fields {
+                        mb_adaptive_frame_field_flag,
+                    } => mb_adaptive_frame_field_flag.into(),
+                },
+                matches!(
+                    sps.frame_mbs_flags,
+                    h264_reader::nal::sps::FrameMbsFlags::Frames
+                )
+                .into(),
+                match sps.pic_order_cnt {
+                    h264_reader::nal::sps::PicOrderCntType::TypeOne {
+                        delta_pic_order_always_zero_flag,
+                        ..
+                    } => delta_pic_order_always_zero_flag.into(),
+                    // The spec doesn't say what to do if this flag is not present...
+                    h264_reader::nal::sps::PicOrderCntType::TypeZero { .. }
+                    | h264_reader::nal::sps::PicOrderCntType::TypeTwo => 0,
+                },
+                sps.chroma_info.separate_colour_plane_flag.into(),
+                sps.gaps_in_frame_num_value_allowed_flag.into(),
+                sps.chroma_info.qpprime_y_zero_transform_bypass_flag.into(),
+                sps.frame_cropping.is_some().into(),
+                sps.chroma_info.scaling_matrix.is_some().into(),
+                0,
+            ),
+            _bitfield_align_1: [],
+            __bindgen_padding_0: 0,
+        };
+
+        let profile_idc: u8 = sps.profile_idc.into();
+
+        let pic_order_cnt_type = match sps.pic_order_cnt {
+            h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } => 0,
+            h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => 1,
+            h264_reader::nal::sps::PicOrderCntType::TypeTwo => 2,
+        };
+
+        let (
+            offset_for_non_ref_pic,
+            offset_for_top_to_bottom_field,
+            num_ref_frames_in_pic_order_cnt_cycle,
+        ) = match &sps.pic_order_cnt {
+            h264_reader::nal::sps::PicOrderCntType::TypeOne {
+                offset_for_non_ref_pic,
+                offset_for_top_to_bottom_field,
+                offsets_for_ref_frame,
+                ..
+            } => (
+                *offset_for_non_ref_pic,
+                *offset_for_top_to_bottom_field,
+                offsets_for_ref_frame.len() as u8,
+            ),
+            h264_reader::nal::sps::PicOrderCntType::TypeZero { .. } => (0, 0, 0),
+            h264_reader::nal::sps::PicOrderCntType::TypeTwo => (0, 0, 0),
+        };
+
+        let log2_max_pic_order_cnt_lsb_minus4 = match &sps.pic_order_cnt {
+            h264_reader::nal::sps::PicOrderCntType::TypeZero {
+                log2_max_pic_order_cnt_lsb_minus4,
+            } => *log2_max_pic_order_cnt_lsb_minus4,
+            h264_reader::nal::sps::PicOrderCntType::TypeOne { .. }
+            | h264_reader::nal::sps::PicOrderCntType::TypeTwo => 0,
+        };
+
+        let (
+            frame_crop_left_offset,
+            frame_crop_right_offset,
+            frame_crop_top_offset,
+            frame_crop_bottom_offset,
+        ) = match sps.frame_cropping {
+            Some(h264_reader::nal::sps::FrameCropping {
+                left_offset,
+                right_offset,
+                top_offset,
+                bottom_offset,
+            }) => (left_offset, right_offset, top_offset, bottom_offset),
+            None => (0, 0, 0, 0),
+        };
+
+        let pOffsetForRefFrame = match &sps.pic_order_cnt {
+            h264_reader::nal::sps::PicOrderCntType::TypeOne {
+                offsets_for_ref_frame,
+                ..
+            } => offsets_for_ref_frame.as_ptr(),
+            h264_reader::nal::sps::PicOrderCntType::TypeZero { .. }
+            | h264_reader::nal::sps::PicOrderCntType::TypeTwo => std::ptr::null(),
+        };
+
+        let pScalingLists = match sps.chroma_info.scaling_matrix {
+            Some(_) => return Err(VulkanDecoderError::ScalingListsNotSupported),
+            None => std::ptr::null(),
+        };
+
+        // TODO: this is not necessary to reconstruct samples. I don't know why the decoder would
+        // need this. Maybe we can do this in the future.
+        let pSequenceParameterSetVui = std::ptr::null();
+
+        Ok(Self {
+            sps: vk::native::StdVideoH264SequenceParameterSet {
+                flags,
+                profile_idc: profile_idc as u32,
+                level_idc: h264_level_idc_to_vk(sps.level_idc),
+                chroma_format_idc: sps.chroma_info.chroma_format.to_chroma_format_idc(),
+                seq_parameter_set_id: sps.seq_parameter_set_id.id(),
+                bit_depth_luma_minus8: sps.chroma_info.bit_depth_luma_minus8,
+                bit_depth_chroma_minus8: sps.chroma_info.bit_depth_chroma_minus8,
+                log2_max_frame_num_minus4: sps.log2_max_frame_num_minus4,
+                pic_order_cnt_type,
+                offset_for_non_ref_pic,
+                offset_for_top_to_bottom_field,
+                num_ref_frames_in_pic_order_cnt_cycle,
+                log2_max_pic_order_cnt_lsb_minus4,
+                max_num_ref_frames: sps.max_num_ref_frames as u8,
+                reserved1: 0,
+                pic_width_in_mbs_minus1: sps.pic_width_in_mbs_minus1,
+                pic_height_in_map_units_minus1: sps.pic_height_in_map_units_minus1,
+                frame_crop_left_offset,
+                frame_crop_right_offset,
+                frame_crop_top_offset,
+                frame_crop_bottom_offset,
+                reserved2: 0,
+                pOffsetForRefFrame,
+                pScalingLists,
+                pSequenceParameterSetVui,
+            },
+        })
+    }
+}
+
+trait ChromaFormatExt {
+    fn to_chroma_format_idc(&self) -> u32;
+}
+
+impl ChromaFormatExt for h264_reader::nal::sps::ChromaFormat {
+    fn to_chroma_format_idc(&self) -> u32 {
+        match self {
+            h264_reader::nal::sps::ChromaFormat::Monochrome => 0,
+            h264_reader::nal::sps::ChromaFormat::YUV420 => 1,
+            h264_reader::nal::sps::ChromaFormat::YUV422 => 2,
+            h264_reader::nal::sps::ChromaFormat::YUV444 => 3,
+            h264_reader::nal::sps::ChromaFormat::Invalid(v) => *v,
+        }
+    }
+}
+
+fn h264_level_idc_to_vk(level_idc: u8) -> u32 {
+    match level_idc {
+        10 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_0,
+        11 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_1,
+        12 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_2,
+        13 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_3,
+        20 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_0,
+        21 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_1,
+        22 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_2,
+        30 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_0,
+        31 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_1,
+        32 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_2,
+        40 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_0,
+        41 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_1,
+        42 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_2,
+        50 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_0,
+        51 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_1,
+        52 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_2,
+        60 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_0,
+        61 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_1,
+        62 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_2,
+        _ => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_INVALID,
+    }
+}
+
+pub(super) struct VkPictureParameterSet {
+    pub(super) pps: vk::native::StdVideoH264PictureParameterSet,
+}
+
+impl TryFrom<&'_ h264_reader::nal::pps::PicParameterSet> for VkPictureParameterSet {
+    type Error = VulkanDecoderError;
+
+    #[allow(non_snake_case)]
+    fn try_from(pps: &h264_reader::nal::pps::PicParameterSet) -> Result<Self, Self::Error> {
+        let flags = vk::native::StdVideoH264PpsFlags {
+            _bitfield_align_1: [],
+            __bindgen_padding_0: [0; 3],
+            _bitfield_1: vk::native::StdVideoH264PpsFlags::new_bitfield_1(
+                pps.extension
+                    .as_ref()
+                    .map(|ext| ext.transform_8x8_mode_flag.into())
+                    .unwrap_or(0),
+                pps.redundant_pic_cnt_present_flag.into(),
+                pps.constrained_intra_pred_flag.into(),
+                pps.deblocking_filter_control_present_flag.into(),
+                pps.weighted_pred_flag.into(),
+                pps.bottom_field_pic_order_in_frame_present_flag.into(),
+                pps.entropy_coding_mode_flag.into(),
+                pps.extension
+                    .as_ref()
+                    .map(|ext| ext.pic_scaling_matrix.is_some().into())
+                    .unwrap_or(0),
+            ),
+        };
+
+        let chroma_qp_index_offset = pps.chroma_qp_index_offset as i8;
+
+        let second_chroma_qp_index_offset = pps
+            .extension
+            .as_ref()
+            .map(|ext| ext.second_chroma_qp_index_offset as i8)
+            .unwrap_or(chroma_qp_index_offset);
+
+        let pScalingLists = match pps.extension {
+            Some(h264_reader::nal::pps::PicParameterSetExtra {
+                pic_scaling_matrix: Some(_),
+                ..
+            }) => return Err(VulkanDecoderError::ScalingListsNotSupported),
+            _ => std::ptr::null(),
+        };
+
+        Ok(Self {
+            pps: vk::native::StdVideoH264PictureParameterSet {
+                flags,
+                seq_parameter_set_id: pps.seq_parameter_set_id.id(),
+                pic_parameter_set_id: pps.pic_parameter_set_id.id(),
+                num_ref_idx_l0_default_active_minus1: pps.num_ref_idx_l0_default_active_minus1
+                    as u8,
+                num_ref_idx_l1_default_active_minus1: pps.num_ref_idx_l1_default_active_minus1
+                    as u8,
+                weighted_bipred_idc: pps.weighted_bipred_idc.into(),
+                pic_init_qp_minus26: pps.pic_init_qp_minus26 as i8,
+                pic_init_qs_minus26: pps.pic_init_qs_minus26 as i8,
+                chroma_qp_index_offset,
+                second_chroma_qp_index_offset,
+                pScalingLists,
+            },
+        })
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/vulkan_ctx.rs b/vk-video/src/vulkan_decoder/vulkan_ctx.rs
new file mode 100644
index 000000000..a6481f721
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/vulkan_ctx.rs
@@ -0,0 +1,644 @@
+use std::{
+    ffi::{c_void, CStr},
+    sync::Arc,
+};
+
+use ash::{vk, Entry};
+use tracing::{error, info};
+
+use super::{Allocator, CommandPool, DebugMessenger, Device, H264ProfileInfo, Instance};
+
+const REQUIRED_EXTENSIONS: &[&CStr] = &[
+    vk::KHR_VIDEO_QUEUE_NAME,
+    vk::KHR_VIDEO_DECODE_QUEUE_NAME,
+    vk::KHR_VIDEO_DECODE_H264_NAME,
+];
+
+#[derive(thiserror::Error, Debug)]
+pub enum VulkanCtxError {
+    #[error("Error loading vulkan: {0}")]
+    LoadingError(#[from] ash::LoadingError),
+
+    #[error("Vulkan error: {0}")]
+    VkError(#[from] vk::Result),
+
+    #[error("wgpu instance error: {0}")]
+    WgpuInstanceError(#[from] wgpu::hal::InstanceError),
+
+    #[error("wgpu device error: {0}")]
+    WgpuDeviceError(#[from] wgpu::hal::DeviceError),
+
+    #[error("wgpu request device error: {0}")]
+    WgpuRequestDeviceError(#[from] wgpu::RequestDeviceError),
+
+    #[error("cannot create a wgpu adapter")]
+    WgpuAdapterNotCreated,
+
+    #[error("Cannot find a suitable physical device")]
+    NoDevice,
+
+    #[error("String conversion error: {0}")]
+    StringConversionError(#[from] std::ffi::FromBytesUntilNulError),
+}
+
+pub struct VulkanCtx {
+    _entry: Arc<Entry>,
+    _instance: Arc<Instance>,
+    _physical_device: vk::PhysicalDevice,
+    pub(crate) device: Arc<Device>,
+    pub(crate) allocator: Arc<Allocator>,
+    pub(crate) queues: Queues,
+    _debug_messenger: Option<DebugMessenger>,
+    pub(crate) video_capabilities: vk::VideoCapabilitiesKHR<'static>,
+    pub(crate) h264_dpb_format_properties: vk::VideoFormatPropertiesKHR<'static>,
+    pub(crate) h264_dst_format_properties: Option<vk::VideoFormatPropertiesKHR<'static>>,
+    pub wgpu_ctx: WgpuCtx,
+}
+
+pub struct WgpuCtx {
+    pub instance: Arc<wgpu::Instance>,
+    pub adapter: Arc<wgpu::Adapter>,
+    pub device: Arc<wgpu::Device>,
+    pub queue: Arc<wgpu::Queue>,
+}
+
+pub(crate) struct CommandPools {
+    pub(crate) _decode_pool: Arc<CommandPool>,
+    pub(crate) _transfer_pool: Arc<CommandPool>,
+}
+
+pub(crate) struct Queue {
+    pub(crate) queue: std::sync::Mutex<vk::Queue>,
+    pub(crate) idx: usize,
+    _video_properties: vk::QueueFamilyVideoPropertiesKHR<'static>,
+    pub(crate) query_result_status_properties:
+        vk::QueueFamilyQueryResultStatusPropertiesKHR<'static>,
+}
+
+impl Queue {
+    pub(crate) fn supports_result_status_queries(&self) -> bool {
+        self.query_result_status_properties
+            .query_result_status_support
+            == vk::TRUE
+    }
+}
+
+pub(crate) struct Queues {
+    pub(crate) transfer: Queue,
+    pub(crate) h264_decode: Queue,
+    pub(crate) wgpu: Queue,
+}
+
+impl VulkanCtx {
+    pub fn new(
+        wgpu_features: wgpu::Features,
+        wgpu_limits: wgpu::Limits,
+    ) -> Result<Self, VulkanCtxError> {
+        let entry = Arc::new(unsafe { Entry::load()? });
+
+        let instance_extension_properties =
+            unsafe { entry.enumerate_instance_extension_properties(None)? };
+        info!(
+            "instance_extension_properties amount: {}",
+            instance_extension_properties.len()
+        );
+
+        let api_version = vk::make_api_version(0, 1, 3, 0);
+        let app_info = vk::ApplicationInfo {
+            api_version,
+            ..Default::default()
+        };
+
+        let layers = if cfg!(debug_assertions) {
+            vec![c"VK_LAYER_KHRONOS_validation".as_ptr()]
+        } else {
+            Vec::new()
+        };
+
+        let extensions = if cfg!(debug_assertions) {
+            vec![vk::EXT_DEBUG_UTILS_NAME]
+        } else {
+            Vec::new()
+        };
+
+        let wgpu_extensions = wgpu::hal::vulkan::Instance::desired_extensions(
+            &entry,
+            api_version,
+            wgpu::InstanceFlags::empty(),
+        )?;
+
+        let extensions = extensions
+            .into_iter()
+            .chain(wgpu_extensions)
+            .collect::<Vec<_>>();
+
+        let extension_ptrs = extensions.iter().map(|e| e.as_ptr()).collect::<Vec<_>>();
+
+        let create_info = vk::InstanceCreateInfo::default()
+            .application_info(&app_info)
+            .enabled_layer_names(&layers)
+            .enabled_extension_names(&extension_ptrs);
+
+        let instance = unsafe { entry.create_instance(&create_info, None) }?;
+        let video_queue_instance_ext = ash::khr::video_queue::Instance::new(&entry, &instance);
+        let debug_utils_instance_ext = ash::ext::debug_utils::Instance::new(&entry, &instance);
+
+        let instance = Arc::new(Instance {
+            instance,
+            _entry: entry.clone(),
+            video_queue_instance_ext,
+            debug_utils_instance_ext,
+        });
+
+        let debug_messenger = if cfg!(debug_assertions) {
+            Some(DebugMessenger::new(instance.clone())?)
+        } else {
+            None
+        };
+
+        let wgpu_instance = unsafe {
+            wgpu::hal::vulkan::Instance::from_raw(
+                (*entry).clone(),
+                instance.instance.clone(),
+                api_version,
+                0,
+                None,
+                extensions,
+                wgpu::InstanceFlags::empty(),
+                false,
+                None,
+            )?
+        };
+
+        let physical_devices = unsafe { instance.enumerate_physical_devices()? };
+
+        let ChosenDevice {
+            physical_device,
+            queue_indices,
+            h264_dpb_format_properties,
+            h264_dst_format_properties,
+            video_capabilities,
+        } = find_device(&physical_devices, &instance, REQUIRED_EXTENSIONS)?;
+
+        let wgpu_adapter = wgpu_instance
+            .expose_adapter(physical_device)
+            .ok_or(VulkanCtxError::WgpuAdapterNotCreated)?;
+
+        let wgpu_features = wgpu_features | wgpu::Features::TEXTURE_FORMAT_NV12;
+
+        // TODO: we can only get the required extensions after exposing the adapter; the creation
+        // of the adapter and verification of whether the device supports all extensions should
+        // happen while picking the device.
+        let wgpu_extensions = wgpu_adapter
+            .adapter
+            .required_device_extensions(wgpu_features);
+
+        let required_extensions = REQUIRED_EXTENSIONS
+            .iter()
+            .copied()
+            .chain(wgpu_extensions)
+            .collect::<Vec<_>>();
+
+        let required_extensions_as_ptrs = required_extensions
+            .iter()
+            .map(|e| e.as_ptr())
+            .collect::<Vec<_>>();
+
+        let queue_create_infos = queue_indices.queue_create_infos();
+
+        let mut wgpu_physical_device_features = wgpu_adapter
+            .adapter
+            .physical_device_features(&required_extensions, wgpu_features);
+
+        let mut vk_synch_2_feature =
+            vk::PhysicalDeviceSynchronization2Features::default().synchronization2(true);
+
+        let device_create_info = vk::DeviceCreateInfo::default()
+            .queue_create_infos(&queue_create_infos)
+            .enabled_extension_names(&required_extensions_as_ptrs);
+
+        let device_create_info = wgpu_physical_device_features
+            .add_to_device_create(device_create_info)
+            .push_next(&mut vk_synch_2_feature);
+
+        let device = unsafe { instance.create_device(physical_device, &device_create_info, None)? };
+        let h264_decode_queue =
+            unsafe { device.get_device_queue(queue_indices.h264_decode.idx as u32, 0) };
+        let transfer_queue =
+            unsafe { device.get_device_queue(queue_indices.transfer.idx as u32, 0) };
+        let wgpu_queue = unsafe {
+            device.get_device_queue(queue_indices.graphics_transfer_compute.idx as u32, 0)
+        };
+        let queues = Queues {
+            transfer: Queue {
+                queue: transfer_queue.into(),
+                idx: queue_indices.transfer.idx,
+                _video_properties: queue_indices.transfer.video_properties,
+                query_result_status_properties: queue_indices
+                    .transfer
+                    .query_result_status_properties,
+            },
+            h264_decode: Queue {
+                queue: h264_decode_queue.into(),
+                idx: queue_indices.h264_decode.idx,
+                _video_properties: queue_indices.h264_decode.video_properties,
+                query_result_status_properties: queue_indices
+                    .h264_decode
+                    .query_result_status_properties,
+            },
+            wgpu: Queue {
+                queue: wgpu_queue.into(),
+                idx: queue_indices.graphics_transfer_compute.idx,
+                _video_properties: queue_indices.graphics_transfer_compute.video_properties,
+                query_result_status_properties: queue_indices
+                    .graphics_transfer_compute
+                    .query_result_status_properties,
+            },
+        };
+
+        let video_queue_ext = ash::khr::video_queue::Device::new(&instance, &device);
+        let video_decode_queue_ext = ash::khr::video_decode_queue::Device::new(&instance, &device);
+
+        let device = Arc::new(Device {
+            device,
+            video_queue_ext,
+            video_decode_queue_ext,
+            _instance: instance.clone(),
+        });
+
+        let wgpu_device = unsafe {
+            wgpu_adapter.adapter.device_from_raw(
+                device.device.clone(),
+                false,
+                &required_extensions,
+                wgpu_features,
+                &wgpu::MemoryHints::default(),
+                queue_indices.graphics_transfer_compute.idx as u32,
+                0,
+            )?
+        };
+
+        let allocator = Arc::new(Allocator::new(
+            instance.clone(),
+            physical_device,
+            device.clone(),
+        )?);
+
+        let wgpu_instance =
+            unsafe { wgpu::Instance::from_hal::<wgpu::hal::vulkan::Api>(wgpu_instance) };
+        let wgpu_adapter = unsafe { wgpu_instance.create_adapter_from_hal(wgpu_adapter) };
+        let (wgpu_device, wgpu_queue) = unsafe {
+            wgpu_adapter.create_device_from_hal(
+                wgpu_device,
+                &wgpu::DeviceDescriptor {
+                    label: Some("wgpu device created by the vulkan video decoder"),
+                    memory_hints: wgpu::MemoryHints::default(),
+                    required_limits: wgpu_limits,
+                    required_features: wgpu_features,
+                },
+                None,
+            )?
+        };
+
+        let wgpu_ctx = WgpuCtx {
+            instance: Arc::new(wgpu_instance),
+            adapter: Arc::new(wgpu_adapter),
+            device: Arc::new(wgpu_device),
+            queue: Arc::new(wgpu_queue),
+        };
+
+        Ok(Self {
+            _entry: entry,
+            _instance: instance,
+            _physical_device: physical_device,
+            device,
+            allocator,
+            queues,
+            _debug_messenger: debug_messenger,
+            video_capabilities,
+            h264_dpb_format_properties,
+            h264_dst_format_properties,
+            wgpu_ctx,
+        })
+    }
+}
+
+struct ChosenDevice<'a> {
+    physical_device: vk::PhysicalDevice,
+    queue_indices: QueueIndices<'a>,
+    h264_dpb_format_properties: vk::VideoFormatPropertiesKHR<'a>,
+    h264_dst_format_properties: Option<vk::VideoFormatPropertiesKHR<'a>>,
+    video_capabilities: vk::VideoCapabilitiesKHR<'a>,
+}
+
+fn find_device<'a>(
+    devices: &[vk::PhysicalDevice],
+    instance: &Instance,
+    required_extension_names: &[&CStr],
+) -> Result<ChosenDevice<'a>, VulkanCtxError> {
+    for &device in devices {
+        let properties = unsafe { instance.get_physical_device_properties(device) };
+
+        let mut vk_13_features = vk::PhysicalDeviceVulkan13Features::default();
+        let mut features = vk::PhysicalDeviceFeatures2::default().push_next(&mut vk_13_features);
+
+        unsafe { instance.get_physical_device_features2(device, &mut features) };
+        let extensions = unsafe { instance.enumerate_device_extension_properties(device)? };
+
+        if vk_13_features.synchronization2 == 0 {
+            error!(
+                "device {:?} does not support the required synchronization2 feature",
+                properties.device_name_as_c_str()?
+            );
+        }
+
+        if !required_extension_names.iter().all(|&extension_name| {
+            extensions.iter().any(|ext| {
+                let Ok(name) = ext.extension_name_as_c_str() else {
+                    return false;
+                };
+
+                if name != extension_name {
+                    return false;
+                };
+
+                true
+            })
+        }) {
+            error!(
+                "device {:?} does not support the required extensions",
+                properties.device_name_as_c_str()?
+            );
+            continue;
+        }
+
+        let queues_len =
+            unsafe { instance.get_physical_device_queue_family_properties2_len(device) };
+        let mut queues = vec![vk::QueueFamilyProperties2::default(); queues_len];
+        let mut video_properties = vec![vk::QueueFamilyVideoPropertiesKHR::default(); queues_len];
+        let mut query_result_status_properties =
+            vec![vk::QueueFamilyQueryResultStatusPropertiesKHR::default(); queues_len];
+
+        for ((queue, video_properties), query_result_properties) in queues
+            .iter_mut()
+            .zip(video_properties.iter_mut())
+            .zip(query_result_status_properties.iter_mut())
+        {
+            *queue = queue
+                .push_next(video_properties)
+                .push_next(query_result_properties);
+        }
+
+        unsafe { instance.get_physical_device_queue_family_properties2(device, &mut queues) };
+
+        let profile_info = H264ProfileInfo::decode_h264_yuv420();
+
+        let mut h264_caps = vk::VideoDecodeH264CapabilitiesKHR::default();
+        let mut decode_caps = vk::VideoDecodeCapabilitiesKHR {
+            p_next: (&mut h264_caps as *mut _) as *mut c_void, // why does this not have `.push_next()`? wtf
+            ..Default::default()
+        };
+
+        let mut caps = vk::VideoCapabilitiesKHR::default().push_next(&mut decode_caps);
+
+        unsafe {
+            (instance
+                .video_queue_instance_ext
+                .fp()
+                .get_physical_device_video_capabilities_khr)(
+                device,
+                &profile_info.profile_info,
+                &mut caps,
+            )
+            .result()?
+        };
+
+        let video_capabilities = vk::VideoCapabilitiesKHR::default()
+            .flags(caps.flags)
+            .min_bitstream_buffer_size_alignment(caps.min_bitstream_buffer_size_alignment)
+            .min_bitstream_buffer_offset_alignment(caps.min_bitstream_buffer_offset_alignment)
+            .picture_access_granularity(caps.picture_access_granularity)
+            .min_coded_extent(caps.min_coded_extent)
+            .max_coded_extent(caps.max_coded_extent)
+            .max_dpb_slots(caps.max_dpb_slots)
+            .max_active_reference_pictures(caps.max_active_reference_pictures)
+            .std_header_version(caps.std_header_version);
+        info!("caps: {caps:#?}");
+
+        let flags = decode_caps.flags;
+
+        let h264_dpb_format_properties =
+            if flags.contains(vk::VideoDecodeCapabilityFlagsKHR::DPB_AND_OUTPUT_COINCIDE) {
+                query_video_format_properties(
+                    device,
+                    &instance.video_queue_instance_ext,
+                    &profile_info,
+                    vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR
+                        | vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR
+                        | vk::ImageUsageFlags::TRANSFER_SRC,
+                )?
+            } else {
+                query_video_format_properties(
+                    device,
+                    &instance.video_queue_instance_ext,
+                    &profile_info,
+                    vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR,
+                )?
+            };
+
+        let h264_dst_format_properties =
+            if flags.contains(vk::VideoDecodeCapabilityFlagsKHR::DPB_AND_OUTPUT_COINCIDE) {
+                None
+            } else {
+                Some(query_video_format_properties(
+                    device,
+                    &instance.video_queue_instance_ext,
+                    &profile_info,
+                    vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR | vk::ImageUsageFlags::TRANSFER_SRC,
+                )?)
+            };
+
+        let h264_dpb_format_properties =
+            if flags.contains(vk::VideoDecodeCapabilityFlagsKHR::DPB_AND_OUTPUT_COINCIDE) {
+                match h264_dpb_format_properties
+                    .into_iter()
+                    .find(|f| f.format == vk::Format::G8_B8R8_2PLANE_420_UNORM)
+                {
+                    Some(f) => f,
+                    None => continue,
+                }
+            } else {
+                h264_dpb_format_properties[0]
+            };
+
+        let h264_dst_format_properties = match h264_dst_format_properties {
+            Some(format_properties) => match format_properties
+                .into_iter()
+                .find(|f| f.format == vk::Format::G8_B8R8_2PLANE_420_UNORM)
+            {
+                Some(f) => Some(f),
+                None => continue,
+            },
+            None => None,
+        };
+
+        let video_queues = queues
+            .iter()
+            .enumerate()
+            .filter(|(_, q)| {
+                q.queue_family_properties
+                    .queue_flags
+                    .contains(vk::QueueFlags::VIDEO_DECODE_KHR)
+            })
+            .map(|(i, _)| i)
+            .collect::<Vec<_>>(); // TODO: have to split the queues
+
+        let Some(transfer_queue_idx) = queues
+            .iter()
+            .enumerate()
+            .find(|(_, q)| {
+                q.queue_family_properties
+                    .queue_flags
+                    .contains(vk::QueueFlags::TRANSFER)
+                    && !q
+                        .queue_family_properties
+                        .queue_flags
+                        .intersects(vk::QueueFlags::GRAPHICS)
+            })
+            .map(|(i, _)| i)
+        else {
+            continue;
+        };
+
+        let Some(graphics_transfer_compute_queue_idx) = queues
+            .iter()
+            .enumerate()
+            .find(|(_, q)| {
+                q.queue_family_properties.queue_flags.contains(
+                    vk::QueueFlags::GRAPHICS | vk::QueueFlags::TRANSFER | vk::QueueFlags::COMPUTE,
+                )
+            })
+            .map(|(i, _)| i)
+        else {
+            continue;
+        };
+
+        let Some(decode_queue_idx) = video_queues.into_iter().find(|&i| {
+            video_properties[i]
+                .video_codec_operations
+                .contains(vk::VideoCodecOperationFlagsKHR::DECODE_H264)
+        }) else {
+            continue;
+        };
+
+        info!("deocde_caps: {decode_caps:#?}");
+        info!("h264_caps: {h264_caps:#?}");
+        info!("dpb_format_properties: {h264_dpb_format_properties:#?}");
+        info!("dst_format_properties: {h264_dst_format_properties:#?}");
+
+        return Ok(ChosenDevice {
+            physical_device: device,
+            queue_indices: QueueIndices {
+                transfer: QueueIndex {
+                    idx: transfer_queue_idx,
+                    video_properties: video_properties[transfer_queue_idx],
+                    query_result_status_properties: query_result_status_properties
+                        [transfer_queue_idx],
+                },
+                h264_decode: QueueIndex {
+                    idx: decode_queue_idx,
+                    video_properties: video_properties[decode_queue_idx],
+                    query_result_status_properties: query_result_status_properties
+                        [decode_queue_idx],
+                },
+                graphics_transfer_compute: QueueIndex {
+                    idx: graphics_transfer_compute_queue_idx,
+                    video_properties: video_properties[graphics_transfer_compute_queue_idx],
+                    query_result_status_properties: query_result_status_properties
+                        [graphics_transfer_compute_queue_idx],
+                },
+            },
+            h264_dpb_format_properties,
+            h264_dst_format_properties,
+            video_capabilities,
+        });
+    }
+
+    Err(VulkanCtxError::NoDevice)
+}
+
+fn query_video_format_properties<'a>(
+    device: vk::PhysicalDevice,
+    video_queue_instance_ext: &ash::khr::video_queue::Instance,
+    profile_info: &H264ProfileInfo,
+    image_usage: vk::ImageUsageFlags,
+) -> Result<Vec<vk::VideoFormatPropertiesKHR<'a>>, VulkanCtxError> {
+    let mut profile_list_info = vk::VideoProfileListInfoKHR::default()
+        .profiles(std::slice::from_ref(&profile_info.profile_info));
+
+    let format_info = vk::PhysicalDeviceVideoFormatInfoKHR::default()
+        .image_usage(image_usage)
+        .push_next(&mut profile_list_info);
+
+    let mut format_info_length = 0;
+
+    unsafe {
+        (video_queue_instance_ext
+            .fp()
+            .get_physical_device_video_format_properties_khr)(
+            device,
+            &format_info,
+            &mut format_info_length,
+            std::ptr::null_mut(),
+        )
+        .result()?;
+    }
+
+    let mut format_properties =
+        vec![vk::VideoFormatPropertiesKHR::default(); format_info_length as usize];
+
+    unsafe {
+        (video_queue_instance_ext
+            .fp()
+            .get_physical_device_video_format_properties_khr)(
+            device,
+            &format_info,
+            &mut format_info_length,
+            format_properties.as_mut_ptr(),
+        )
+        .result()?;
+    }
+
+    Ok(format_properties)
+}
+
+struct QueueIndex<'a> {
+    idx: usize,
+    video_properties: vk::QueueFamilyVideoPropertiesKHR<'a>,
+    query_result_status_properties: vk::QueueFamilyQueryResultStatusPropertiesKHR<'a>,
+}
+
+pub(crate) struct QueueIndices<'a> {
+    transfer: QueueIndex<'a>,
+    h264_decode: QueueIndex<'a>,
+    graphics_transfer_compute: QueueIndex<'a>,
+}
+
+impl QueueIndices<'_> {
+    fn queue_create_infos(&self) -> Vec<vk::DeviceQueueCreateInfo> {
+        [
+            self.h264_decode.idx,
+            self.transfer.idx,
+            self.graphics_transfer_compute.idx,
+        ]
+        .into_iter()
+        .collect::<std::collections::HashSet<usize>>()
+        .into_iter()
+        .map(|i| {
+            vk::DeviceQueueCreateInfo::default()
+                .queue_family_index(i as u32)
+                .queue_priorities(&[1.0])
+        })
+        .collect::<Vec<_>>()
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/wrappers.rs b/vk-video/src/vulkan_decoder/wrappers.rs
new file mode 100644
index 000000000..2d02da14c
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/wrappers.rs
@@ -0,0 +1,59 @@
+use std::sync::Arc;
+
+use ash::Entry;
+
+mod command;
+mod debug;
+mod mem;
+mod sync;
+mod video;
+mod vk_extensions;
+
+pub(crate) use command::*;
+pub(crate) use debug::*;
+pub(crate) use mem::*;
+pub(crate) use sync::*;
+pub(crate) use video::*;
+pub(crate) use vk_extensions::*;
+
+pub(crate) struct Instance {
+    pub(crate) instance: ash::Instance,
+    pub(crate) _entry: Arc<Entry>,
+    pub(crate) video_queue_instance_ext: ash::khr::video_queue::Instance,
+    pub(crate) debug_utils_instance_ext: ash::ext::debug_utils::Instance,
+}
+
+impl Drop for Instance {
+    fn drop(&mut self) {
+        unsafe { self.destroy_instance(None) };
+    }
+}
+
+impl std::ops::Deref for Instance {
+    type Target = ash::Instance;
+
+    fn deref(&self) -> &Self::Target {
+        &self.instance
+    }
+}
+
+pub(crate) struct Device {
+    pub(crate) device: ash::Device,
+    pub(crate) video_queue_ext: ash::khr::video_queue::Device,
+    pub(crate) video_decode_queue_ext: ash::khr::video_decode_queue::Device,
+    pub(crate) _instance: Arc<Instance>,
+}
+
+impl std::ops::Deref for Device {
+    type Target = ash::Device;
+
+    fn deref(&self) -> &Self::Target {
+        &self.device
+    }
+}
+
+impl Drop for Device {
+    fn drop(&mut self) {
+        unsafe { self.destroy_device(None) };
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/wrappers/command.rs b/vk-video/src/vulkan_decoder/wrappers/command.rs
new file mode 100644
index 000000000..d76eb5249
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/wrappers/command.rs
@@ -0,0 +1,132 @@
+use std::sync::Arc;
+
+use ash::vk;
+
+use crate::vulkan_decoder::{VulkanCtxError, VulkanDecoderError};
+
+use super::Device;
+
+pub(crate) struct CommandPool {
+    pub(crate) command_pool: vk::CommandPool,
+    device: Arc<Device>,
+}
+
+impl CommandPool {
+    pub(crate) fn new(
+        device: Arc<Device>,
+        queue_family_index: usize,
+    ) -> Result<Self, VulkanCtxError> {
+        let create_info = vk::CommandPoolCreateInfo::default()
+            .flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER)
+            .queue_family_index(queue_family_index as u32);
+
+        let command_pool = unsafe { device.create_command_pool(&create_info, None)? };
+
+        Ok(Self {
+            device,
+            command_pool,
+        })
+    }
+}
+
+impl Drop for CommandPool {
+    fn drop(&mut self) {
+        unsafe {
+            self.device.destroy_command_pool(self.command_pool, None);
+        }
+    }
+}
+
+impl std::ops::Deref for CommandPool {
+    type Target = vk::CommandPool;
+
+    fn deref(&self) -> &Self::Target {
+        &self.command_pool
+    }
+}
+
+pub(crate) struct CommandBuffer {
+    pool: Arc<CommandPool>,
+    pub(crate) buffer: vk::CommandBuffer,
+}
+
+impl CommandBuffer {
+    pub(crate) fn new_primary(pool: Arc<CommandPool>) -> Result<Self, VulkanDecoderError> {
+        let allocate_info = vk::CommandBufferAllocateInfo::default()
+            .command_pool(**pool)
+            .level(vk::CommandBufferLevel::PRIMARY)
+            .command_buffer_count(1);
+
+        let buffer = unsafe { pool.device.allocate_command_buffers(&allocate_info)?[0] };
+
+        Ok(Self { pool, buffer })
+    }
+
+    pub(crate) fn submit(
+        &self,
+        queue: vk::Queue,
+        wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)],
+        signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)],
+        fence: Option<vk::Fence>,
+    ) -> Result<(), VulkanDecoderError> {
+        fn to_sem_submit_info(
+            submits: &[(vk::Semaphore, vk::PipelineStageFlags2)],
+        ) -> Vec<vk::SemaphoreSubmitInfo> {
+            submits
+                .iter()
+                .map(|&(sem, stage)| {
+                    vk::SemaphoreSubmitInfo::default()
+                        .semaphore(sem)
+                        .stage_mask(stage)
+                })
+                .collect::<Vec<_>>()
+        }
+
+        let wait_semaphores = to_sem_submit_info(wait_semaphores);
+        let signal_semaphores = to_sem_submit_info(signal_semaphores);
+
+        let buffer_submit_info =
+            [vk::CommandBufferSubmitInfo::default().command_buffer(self.buffer)];
+
+        let submit_info = [vk::SubmitInfo2::default()
+            .wait_semaphore_infos(&wait_semaphores)
+            .signal_semaphore_infos(&signal_semaphores)
+            .command_buffer_infos(&buffer_submit_info)];
+
+        unsafe {
+            self.device()
+                .queue_submit2(queue, &submit_info, fence.unwrap_or(vk::Fence::null()))?
+        };
+
+        Ok(())
+    }
+
+    pub(crate) fn begin(&self) -> Result<(), VulkanDecoderError> {
+        unsafe {
+            self.device().begin_command_buffer(
+                self.buffer,
+                &vk::CommandBufferBeginInfo::default()
+                    .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT),
+            )?
+        };
+        Ok(())
+    }
+
+    pub(crate) fn end(&self) -> Result<(), VulkanDecoderError> {
+        unsafe { self.device().end_command_buffer(self.buffer)? };
+
+        Ok(())
+    }
+
+    fn device(&self) -> &Device {
+        &self.pool.device
+    }
+}
+
+impl std::ops::Deref for CommandBuffer {
+    type Target = vk::CommandBuffer;
+
+    fn deref(&self) -> &Self::Target {
+        &self.buffer
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/wrappers/debug.rs b/vk-video/src/vulkan_decoder/wrappers/debug.rs
new file mode 100644
index 000000000..339fd599a
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/wrappers/debug.rs
@@ -0,0 +1,185 @@
+use std::{ffi::c_void, sync::Arc};
+
+use ash::vk::{self, QueryType};
+use tracing::{error, info, trace, warn};
+
+use crate::vulkan_decoder::{VulkanCtxError, VulkanDecoderError};
+
+use super::{Device, Instance};
+
+pub(crate) struct DebugMessenger {
+    messenger: vk::DebugUtilsMessengerEXT,
+    instance: Arc<Instance>,
+}
+
+impl DebugMessenger {
+    pub(crate) fn new(instance: Arc<Instance>) -> Result<Self, VulkanCtxError> {
+        let debug_messenger_create_info = vk::DebugUtilsMessengerCreateInfoEXT::default()
+            .message_severity(
+                vk::DebugUtilsMessageSeverityFlagsEXT::ERROR
+                    | vk::DebugUtilsMessageSeverityFlagsEXT::WARNING
+                    | vk::DebugUtilsMessageSeverityFlagsEXT::INFO
+                    | vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE,
+            )
+            .message_type(
+                vk::DebugUtilsMessageTypeFlagsEXT::GENERAL
+                    | vk::DebugUtilsMessageTypeFlagsEXT::VALIDATION
+                    | vk::DebugUtilsMessageTypeFlagsEXT::PERFORMANCE,
+            )
+            .pfn_user_callback(Some(debug_messenger_callback));
+
+        let messenger = unsafe {
+            instance
+                .debug_utils_instance_ext
+                .create_debug_utils_messenger(&debug_messenger_create_info, None)?
+        };
+
+        Ok(Self {
+            instance,
+            messenger,
+        })
+    }
+}
+
+impl Drop for DebugMessenger {
+    fn drop(&mut self) {
+        unsafe {
+            self.instance
+                .debug_utils_instance_ext
+                .destroy_debug_utils_messenger(self.messenger, None)
+        };
+    }
+}
+
+unsafe extern "system" fn debug_messenger_callback(
+    message_severity: vk::DebugUtilsMessageSeverityFlagsEXT,
+    message_types: vk::DebugUtilsMessageTypeFlagsEXT,
+    p_callback_data: *const vk::DebugUtilsMessengerCallbackDataEXT<'_>,
+    _p_user_data: *mut c_void,
+) -> vk::Bool32 {
+    let callback_data = unsafe { *p_callback_data };
+    let message_id = callback_data
+        .message_id_name_as_c_str()
+        .unwrap_or(c"")
+        .to_str()
+        .unwrap();
+    let message = callback_data
+        .message_as_c_str()
+        .unwrap_or(c"")
+        .to_str()
+        .unwrap();
+    let t = format!("{:?}", message_types);
+    match message_severity {
+        vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE => {
+            trace!("[{t}][{message_id}] {message}");
+        }
+
+        vk::DebugUtilsMessageSeverityFlagsEXT::INFO => {
+            info!("[{t}][{message_id}] {message}");
+        }
+
+        vk::DebugUtilsMessageSeverityFlagsEXT::WARNING => {
+            warn!("[{t}][{message_id}] {message}");
+        }
+
+        vk::DebugUtilsMessageSeverityFlagsEXT::ERROR => {
+            error!("[{t}][{message_id}] {message}");
+        }
+        _ => {}
+    }
+
+    vk::FALSE
+}
+
+pub(crate) struct DecodeQueryPool {
+    pool: QueryPool,
+}
+
+impl DecodeQueryPool {
+    pub(crate) fn new(
+        device: Arc<Device>,
+        profile: vk::VideoProfileInfoKHR,
+    ) -> Result<Self, VulkanDecoderError> {
+        let pool = QueryPool::new(device, QueryType::RESULT_STATUS_ONLY_KHR, 1, Some(profile))?;
+        Ok(Self { pool })
+    }
+
+    pub(crate) fn reset(&self, buffer: vk::CommandBuffer) {
+        unsafe {
+            self.pool
+                .device
+                .cmd_reset_query_pool(buffer, self.pool.pool, 0, 1)
+        };
+    }
+
+    // if we want to switch to inline queries we can use this, but we need to check how many
+    // implementations support them
+    pub(crate) fn _inline_query(&self) -> vk::VideoInlineQueryInfoKHR {
+        vk::VideoInlineQueryInfoKHR::default()
+            .query_pool(self.pool.pool)
+            .first_query(0)
+            .query_count(1)
+    }
+
+    pub(crate) fn begin_query(&self, buffer: vk::CommandBuffer) {
+        unsafe {
+            self.pool.device.cmd_begin_query(
+                buffer,
+                self.pool.pool,
+                0,
+                vk::QueryControlFlags::empty(),
+            )
+        }
+    }
+
+    pub(crate) fn end_query(&self, buffer: vk::CommandBuffer) {
+        unsafe { self.pool.device.cmd_end_query(buffer, self.pool.pool, 0) }
+    }
+
+    pub(crate) fn get_result_blocking(
+        &self,
+    ) -> Result<vk::QueryResultStatusKHR, VulkanDecoderError> {
+        let mut result = vk::QueryResultStatusKHR::NOT_READY;
+        unsafe {
+            self.pool.device.get_query_pool_results(
+                self.pool.pool,
+                0,
+                std::slice::from_mut(&mut result),
+                vk::QueryResultFlags::WAIT | vk::QueryResultFlags::WITH_STATUS_KHR,
+            )?
+        };
+
+        Ok(result)
+    }
+}
+
+pub(crate) struct QueryPool {
+    pool: vk::QueryPool,
+    device: Arc<Device>,
+}
+
+impl QueryPool {
+    pub(crate) fn new<T: vk::ExtendsQueryPoolCreateInfo>(
+        device: Arc<Device>,
+        ty: vk::QueryType,
+        count: u32,
+        mut p_next: Option<T>,
+    ) -> Result<Self, VulkanDecoderError> {
+        let mut create_info = vk::QueryPoolCreateInfo::default()
+            .query_type(ty)
+            .query_count(count);
+
+        if let Some(p_next) = p_next.as_mut() {
+            create_info = create_info.push_next(p_next)
+        }
+        let pool = unsafe { device.create_query_pool(&create_info, None)? };
+
+        Ok(Self { pool, device })
+    }
+}
+
+impl Drop for QueryPool {
+    fn drop(&mut self) {
+        unsafe { self.device.destroy_query_pool(self.pool, None) };
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/wrappers/mem.rs b/vk-video/src/vulkan_decoder/wrappers/mem.rs
new file mode 100644
index 000000000..37f8e83bf
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/wrappers/mem.rs
@@ -0,0 +1,249 @@
+use std::sync::Arc;
+
+use ash::vk;
+use vk_mem::Alloc;
+
+use crate::vulkan_decoder::{H264ProfileInfo, VulkanCtxError, VulkanDecoderError};
+
+use super::{Device, Instance};
+
+pub(crate) struct Allocator {
+    allocator: vk_mem::Allocator,
+    _instance: Arc<Instance>,
+    _device: Arc<Device>,
+}
+
+impl Allocator {
+    pub(crate) fn new(
+        instance: Arc<Instance>,
+        physical_device: vk::PhysicalDevice,
+        device: Arc<Device>,
+    ) -> Result<Self, VulkanCtxError> {
+        let mut allocator_create_info =
+            vk_mem::AllocatorCreateInfo::new(&instance, &device, physical_device);
+        allocator_create_info.vulkan_api_version = vk::API_VERSION_1_3;
+
+        let allocator = unsafe { vk_mem::Allocator::new(allocator_create_info)? };
+
+        Ok(Self {
+            allocator,
+            _device: device,
+            _instance: instance,
+        })
+    }
+}
+
+impl std::ops::Deref for Allocator {
+    type Target = vk_mem::Allocator;
+
+    fn deref(&self) -> &Self::Target {
+        &self.allocator
+    }
+}
+
+pub(crate) struct MemoryAllocation {
+    pub(crate) allocation: vk_mem::Allocation,
+    allocator: Arc<Allocator>,
+}
+
+impl MemoryAllocation {
+    pub(crate) fn new(
+        allocator: Arc<Allocator>,
+        memory_requirements: &vk::MemoryRequirements,
+        alloc_info: &vk_mem::AllocationCreateInfo,
+    ) -> Result<Self, VulkanDecoderError> {
+        let allocation = unsafe { allocator.allocate_memory(memory_requirements, alloc_info)? };
+
+        Ok(Self {
+            allocation,
+            allocator,
+        })
+    }
+
+    pub(crate) fn allocation_info(&self) -> vk_mem::AllocationInfo {
+        self.allocator.get_allocation_info(&self.allocation)
+    }
+}
+
+impl std::ops::Deref for MemoryAllocation {
+    type Target = vk_mem::Allocation;
+
+    fn deref(&self) -> &Self::Target {
+        &self.allocation
+    }
+}
+
+impl Drop for MemoryAllocation {
+    fn drop(&mut self) {
+        unsafe { self.allocator.free_memory(&mut self.allocation) };
+    }
+}
+
+pub(crate) struct Buffer {
+    pub(crate) buffer: vk::Buffer,
+    pub(crate) allocation: vk_mem::Allocation,
+    allocator: Arc<Allocator>,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum TransferDirection {
+    GpuToMem,
+}
+
+impl Buffer {
+    pub(crate) fn new_decode(
+        allocator: Arc<Allocator>,
+        size: u64,
+        profile: &H264ProfileInfo,
+    ) -> Result<Self, VulkanDecoderError> {
+        let mut profile_list_info = vk::VideoProfileListInfoKHR::default()
+            .profiles(std::slice::from_ref(&profile.profile_info));
+
+        let buffer_create_info = vk::BufferCreateInfo::default()
+            .size(size)
+            .usage(vk::BufferUsageFlags::VIDEO_DECODE_SRC_KHR)
+            .sharing_mode(vk::SharingMode::EXCLUSIVE)
+            .push_next(&mut profile_list_info);
+
+        let allocation_create_info = vk_mem::AllocationCreateInfo {
+            usage: vk_mem::MemoryUsage::Auto,
+            required_flags: vk::MemoryPropertyFlags::HOST_COHERENT,
+            flags: vk_mem::AllocationCreateFlags::HOST_ACCESS_SEQUENTIAL_WRITE,
+            ..Default::default()
+        };
+
+        Self::new(allocator, buffer_create_info, allocation_create_info)
+    }
+
+    pub(crate) fn new_transfer(
+        allocator: Arc<Allocator>,
+        size: u64,
+        direction: TransferDirection,
+    ) -> Result<Self, VulkanDecoderError> {
+        let usage = match direction {
+            TransferDirection::GpuToMem => vk::BufferUsageFlags::TRANSFER_DST,
+        };
+
+        let allocation_flags = match direction {
+            TransferDirection::GpuToMem => vk_mem::AllocationCreateFlags::HOST_ACCESS_RANDOM,
+        };
+
+        let buffer_create_info = vk::BufferCreateInfo::default()
+            .size(size)
+            .usage(usage)
+            .sharing_mode(vk::SharingMode::EXCLUSIVE);
+
+        let allocation_create_info = vk_mem::AllocationCreateInfo {
+            usage: vk_mem::MemoryUsage::Auto,
+            required_flags: vk::MemoryPropertyFlags::HOST_COHERENT,
+            flags: allocation_flags,
+            ..Default::default()
+        };
+
+        Self::new(allocator, buffer_create_info, allocation_create_info)
+    }
+
+    fn new(
+        allocator: Arc<Allocator>,
+        create_info: vk::BufferCreateInfo,
+        allocation_create_info: vk_mem::AllocationCreateInfo,
+    ) -> Result<Self, VulkanDecoderError> {
+        let (buffer, allocation) =
+            unsafe { allocator.create_buffer(&create_info, &allocation_create_info)? };
+
+        Ok(Self {
+            buffer,
+            allocation,
+            allocator,
+        })
+    }
+}
+
+impl Drop for Buffer {
+    fn drop(&mut self) {
+        unsafe {
+            self.allocator
+                .destroy_buffer(self.buffer, &mut self.allocation)
+        }
+    }
+}
+
+impl std::ops::Deref for Buffer {
+    type Target = vk::Buffer;
+
+    fn deref(&self) -> &Self::Target {
+        &self.buffer
+    }
+}
+
+pub(crate) struct Image {
+    pub(crate) image: vk::Image,
+    allocation: vk_mem::Allocation,
+    allocator: Arc<Allocator>,
+}
+
+impl Image {
+    pub(crate) fn new(
+        allocator: Arc<Allocator>,
+        image_create_info: &vk::ImageCreateInfo,
+    ) -> Result<Self, VulkanDecoderError> {
+        let alloc_info = vk_mem::AllocationCreateInfo {
+            usage: vk_mem::MemoryUsage::Auto,
+            ..Default::default()
+        };
+
+        let (image, allocation) =
+            unsafe { allocator.create_image(image_create_info, &alloc_info)? };
+
+        Ok(Image {
+            image,
+            allocation,
+            allocator,
+        })
+    }
+}
+
+impl std::ops::Deref for Image {
+    type Target = vk::Image;
+
+    fn deref(&self) -> &Self::Target {
+        &self.image
+    }
+}
+
+impl Drop for Image {
+    fn drop(&mut self) {
+        unsafe {
+            self.allocator
+                .destroy_image(self.image, &mut self.allocation)
+        };
+    }
+}
+
+pub(crate) struct ImageView {
+    pub(crate) view: vk::ImageView,
+    pub(crate) _image: Arc<Image>,
+    pub(crate) device: Arc<Device>,
+}
+
+impl ImageView {
+    pub(crate) fn new(
+        device: Arc<Device>,
+        image: Arc<Image>,
+        create_info: &vk::ImageViewCreateInfo,
+    ) -> Result<Self, VulkanDecoderError> {
+        let view = unsafe { device.create_image_view(create_info, None)? };
+
+        Ok(ImageView {
+            view,
+            _image: image,
+            device: device.clone(),
+        })
+    }
+}
+
+impl Drop for ImageView {
+    fn drop(&mut self) {
+        unsafe { self.device.destroy_image_view(self.view, None) };
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/wrappers/sync.rs b/vk-video/src/vulkan_decoder/wrappers/sync.rs
new file mode 100644
index 000000000..b0a3061e2
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/wrappers/sync.rs
@@ -0,0 +1,85 @@
+use std::sync::Arc;
+
+use ash::vk;
+
+use crate::vulkan_decoder::VulkanDecoderError;
+
+use super::Device;
+
+pub(crate) struct Fence {
+    pub(crate) fence: vk::Fence,
+    device: Arc<Device>,
+}
+
+impl Fence {
+    pub(crate) fn new(device: Arc<Device>, signaled: bool) -> Result<Self, VulkanDecoderError> {
+        let flags = if signaled {
+            vk::FenceCreateFlags::SIGNALED
+        } else {
+            vk::FenceCreateFlags::empty()
+        };
+        let create_info = vk::FenceCreateInfo::default().flags(flags);
+        let fence = unsafe { device.create_fence(&create_info, None)? };
+
+        Ok(Self { device, fence })
+    }
+
+    pub(crate) fn wait(&self, timeout: u64) -> Result<(), VulkanDecoderError> {
+        unsafe { self.device.wait_for_fences(&[self.fence], true, timeout)? };
+        Ok(())
+    }
+
+    pub(crate) fn reset(&self) -> Result<(), VulkanDecoderError> {
+        unsafe { self.device.reset_fences(&[self.fence])? };
+        Ok(())
+    }
+
+    pub(crate) fn wait_and_reset(&self, timeout: u64) -> Result<(), VulkanDecoderError> {
+        self.wait(timeout)?;
+        self.reset()?;
+
+        Ok(())
+    }
+}
+
+impl Drop for Fence {
+    fn drop(&mut self) {
+        unsafe { self.device.destroy_fence(self.fence, None) };
+    }
+}
+
+impl std::ops::Deref for Fence {
+    type Target = vk::Fence;
+
+    fn deref(&self) -> &Self::Target {
+        &self.fence
+    }
+}
+
+pub(crate) struct Semaphore {
+    pub(crate) semaphore: vk::Semaphore,
+    device: Arc<Device>,
+}
+
+impl Semaphore {
+    pub(crate) fn new(device: Arc<Device>) -> Result<Self, VulkanDecoderError> {
+        let create_info = vk::SemaphoreCreateInfo::default();
+        let semaphore = unsafe { device.create_semaphore(&create_info, None)? };
+
+        Ok(Self { device, semaphore })
+    }
+}
+
+impl Drop for Semaphore {
+    fn drop(&mut self) {
+        unsafe { self.device.destroy_semaphore(self.semaphore, None) };
+    }
+}
+
+impl std::ops::Deref for Semaphore {
+    type Target = vk::Semaphore;
+
+    fn deref(&self) -> &Self::Target {
+        &self.semaphore
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/wrappers/video.rs b/vk-video/src/vulkan_decoder/wrappers/video.rs
new file mode 100644
index 000000000..7dc745c1b
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/wrappers/video.rs
@@ -0,0 +1,298 @@
+use std::{collections::HashMap, sync::Arc};
+
+use ash::vk;
+use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet};
+
+use crate::{
+    vulkan_decoder::{
+        parameter_sets::{VkPictureParameterSet, VkSequenceParameterSet},
+        VulkanDecoderError,
+    },
+    VulkanCtx,
+};
+
+use super::{Device, MemoryAllocation, VideoQueueExt};
+
+/// Since `VideoSessionParameters` can only add sps and pps values (inserting sps or pps with an
+/// existing id is prohibited), this is an abstraction which provides the capability to replace an
+/// existing sps or pps.
+pub(crate) struct VideoSessionParametersManager {
+    pub(crate) parameters: VideoSessionParameters,
+    sps: HashMap<u8, VkSequenceParameterSet>,
+    pps: HashMap<(u8, u8), VkPictureParameterSet>,
+    device: Arc<Device>,
+    session: vk::VideoSessionKHR,
+}
+
+impl VideoSessionParametersManager {
+    pub(crate) fn new(
+        vulkan_ctx: &VulkanCtx,
+        session: vk::VideoSessionKHR,
+    ) -> Result<Self, VulkanDecoderError> {
+        Ok(Self {
+            parameters: VideoSessionParameters::new(
+                vulkan_ctx.device.clone(),
+                session,
+                &[],
+                &[],
+                None,
+            )?,
+            sps: HashMap::new(),
+            pps: HashMap::new(),
+            device: vulkan_ctx.device.clone(),
+            session,
+        })
+    }
+
+    pub(crate) fn parameters(&self) -> vk::VideoSessionParametersKHR {
+        self.parameters.parameters
+    }
+
+    pub(crate) fn change_session(
+        &mut self,
+        session: vk::VideoSessionKHR,
+    ) -> Result<(), VulkanDecoderError> {
+        if self.session == session {
+            return Ok(());
+        }
+        self.session = session;
+
+        let sps = self.sps.values().map(|sps| sps.sps).collect::<Vec<_>>();
+        let pps = self.pps.values().map(|pps| pps.pps).collect::<Vec<_>>();
+
+        self.parameters =
+            VideoSessionParameters::new(self.device.clone(), session, &sps, &pps, None)?;
+
+        Ok(())
+    }
+
+    // it is probably not optimal to insert sps and pps searately. this could be optimized, so that
+    // the insertion happens lazily when the parameters are bound to a session.
+    pub(crate) fn put_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> {
+        let key = sps.seq_parameter_set_id.id();
+        match self.sps.entry(key) {
+            std::collections::hash_map::Entry::Occupied(mut e) => {
+                e.insert(sps.try_into()?);
+
+                self.parameters = VideoSessionParameters::new(
+                    self.device.clone(),
+                    self.session,
+                    &[self.sps[&key].sps],
+                    &[],
+                    Some(&self.parameters),
+                )?
+            }
+            std::collections::hash_map::Entry::Vacant(e) => {
+                e.insert(sps.try_into()?);
+
+                self.parameters.add(&[self.sps[&key].sps], &[])?;
+            }
+        }
+
+        Ok(())
+    }
+
+    pub(crate) fn put_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> {
+        let key = (pps.seq_parameter_set_id.id(), pps.pic_parameter_set_id.id());
+        match self.pps.entry(key) {
+            std::collections::hash_map::Entry::Occupied(mut e) => {
+                e.insert(pps.try_into()?);
+
+                self.parameters = VideoSessionParameters::new(
+                    self.device.clone(),
+                    self.session,
+                    &[],
+                    &[self.pps[&key].pps],
+                    Some(&self.parameters),
+                )?;
+            }
+
+            std::collections::hash_map::Entry::Vacant(e) => {
+                e.insert(pps.try_into()?);
+
+                self.parameters.add(&[], &[self.pps[&key].pps])?;
+            }
+        }
+
+        Ok(())
+    }
+}
+
+pub(crate) struct VideoSessionParameters {
+    pub(crate) parameters: vk::VideoSessionParametersKHR,
+    update_sequence_count: u32,
+    device: Arc<Device>,
+}
+
+impl VideoSessionParameters {
+    pub(crate) fn new(
+        device: Arc<Device>,
+        session: vk::VideoSessionKHR,
+        initial_sps: &[vk::native::StdVideoH264SequenceParameterSet],
+        initial_pps: &[vk::native::StdVideoH264PictureParameterSet],
+        template: Option<&Self>,
+    ) -> Result<Self, VulkanDecoderError> {
+        let parameters_add_info = vk::VideoDecodeH264SessionParametersAddInfoKHR::default()
+            .std_sp_ss(initial_sps)
+            .std_pp_ss(initial_pps);
+
+        let mut h264_create_info = vk::VideoDecodeH264SessionParametersCreateInfoKHR::default()
+            .max_std_sps_count(32)
+            .max_std_pps_count(32)
+            .parameters_add_info(&parameters_add_info);
+
+        let create_info = vk::VideoSessionParametersCreateInfoKHR::default()
+            .flags(vk::VideoSessionParametersCreateFlagsKHR::empty())
+            .video_session_parameters_template(
+                template
+                    .map(|t| t.parameters)
+                    .unwrap_or_else(vk::VideoSessionParametersKHR::null),
+            )
+            .video_session(session)
+            .push_next(&mut h264_create_info);
+
+        let parameters = unsafe {
+            device
+                .video_queue_ext
+                .create_video_session_parameters_khr(&create_info, None)?
+        };
+
+        Ok(Self {
+            parameters,
+            update_sequence_count: 0,
+            device: device.clone(),
+        })
+    }
+
+    pub(crate) fn add(
+        &mut self,
+        sps: &[vk::native::StdVideoH264SequenceParameterSet],
+        pps: &[vk::native::StdVideoH264PictureParameterSet],
+    ) -> Result<(), VulkanDecoderError> {
+        let mut parameters_add_info = vk::VideoDecodeH264SessionParametersAddInfoKHR::default()
+            .std_sp_ss(sps)
+            .std_pp_ss(pps);
+
+        self.update_sequence_count += 1;
+        let update_info = vk::VideoSessionParametersUpdateInfoKHR::default()
+            .update_sequence_count(self.update_sequence_count)
+            .push_next(&mut parameters_add_info);
+
+        unsafe {
+            self.device
+                .video_queue_ext
+                .update_video_session_parameters_khr(self.parameters, &update_info)?
+        };
+
+        Ok(())
+    }
+}
+
+impl Drop for VideoSessionParameters {
+    fn drop(&mut self) {
+        unsafe {
+            self.device
+                .video_queue_ext
+                .destroy_video_session_parameters_khr(self.parameters, None)
+        }
+    }
+}
+
+pub(crate) struct VideoSession {
+    pub(crate) session: vk::VideoSessionKHR,
+    pub(crate) device: Arc<Device>,
+    pub(crate) _allocations: Vec<MemoryAllocation>,
+    pub(crate) max_coded_extent: vk::Extent2D,
+    pub(crate) max_dpb_slots: u32,
+}
+
+impl VideoSession {
+    pub(crate) fn new(
+        vulkan_ctx: &VulkanCtx,
+        profile_info: &vk::VideoProfileInfoKHR,
+        max_coded_extent: vk::Extent2D,
+        max_dpb_slots: u32,
+        max_active_references: u32,
+        std_header_version: &vk::ExtensionProperties,
+    ) -> Result<Self, VulkanDecoderError> {
+        // TODO: this probably works, but this format needs to be detected and set
+        // based on what the GPU supports
+        let format = vk::Format::G8_B8R8_2PLANE_420_UNORM;
+
+        let session_create_info = vk::VideoSessionCreateInfoKHR::default()
+            .queue_family_index(vulkan_ctx.queues.h264_decode.idx as u32)
+            .video_profile(profile_info)
+            .picture_format(format)
+            .max_coded_extent(max_coded_extent)
+            .reference_picture_format(format)
+            .max_dpb_slots(max_dpb_slots)
+            .max_active_reference_pictures(max_active_references)
+            .std_header_version(std_header_version);
+
+        let video_session = unsafe {
+            vulkan_ctx
+                .device
+                .video_queue_ext
+                .create_video_session_khr(&session_create_info, None)?
+        };
+
+        let memory_requirements = unsafe {
+            vulkan_ctx
+                .device
+                .video_queue_ext
+                .get_video_session_memory_requirements_khr(video_session)?
+        };
+
+        let allocations = memory_requirements
+            .iter()
+            .map(|req| {
+                MemoryAllocation::new(
+                    vulkan_ctx.allocator.clone(),
+                    &req.memory_requirements,
+                    &vk_mem::AllocationCreateInfo {
+                        usage: vk_mem::MemoryUsage::Unknown,
+                        ..Default::default()
+                    },
+                )
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        let memory_bind_infos = memory_requirements
+            .into_iter()
+            .zip(allocations.iter())
+            .map(|(req, allocation)| {
+                let allocation_info = allocation.allocation_info();
+                vk::BindVideoSessionMemoryInfoKHR::default()
+                    .memory_bind_index(req.memory_bind_index)
+                    .memory(allocation_info.device_memory)
+                    .memory_offset(allocation_info.offset)
+                    .memory_size(allocation_info.size)
+            })
+            .collect::<Vec<_>>();
+
+        unsafe {
+            vulkan_ctx
+                .device
+                .video_queue_ext
+                .bind_video_session_memory_khr(video_session, &memory_bind_infos)?
+        };
+
+        Ok(VideoSession {
+            session: video_session,
+            _allocations: allocations,
+            device: vulkan_ctx.device.clone(),
+            max_coded_extent,
+            max_dpb_slots,
+        })
+    }
+}
+
+impl Drop for VideoSession {
+    fn drop(&mut self) {
+        unsafe {
+            self.device
+                .video_queue_ext
+                .destroy_video_session_khr(self.session, None)
+        };
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs b/vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs
new file mode 100644
index 000000000..8384443e3
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/wrappers/vk_extensions.rs
@@ -0,0 +1,228 @@
+use ash::{prelude::VkResult, vk, RawPtr};
+
+pub(crate) trait VideoQueueExt {
+    unsafe fn cmd_begin_video_coding_khr(
+        &self,
+        command_buffer: vk::CommandBuffer,
+        begin_info: &vk::VideoBeginCodingInfoKHR,
+    );
+
+    unsafe fn cmd_end_video_coding_khr(
+        &self,
+        command_buffer: vk::CommandBuffer,
+        end_info: &vk::VideoEndCodingInfoKHR,
+    );
+
+    unsafe fn cmd_control_video_coding_khr(
+        &self,
+        command_buffer: vk::CommandBuffer,
+        control_info: &vk::VideoCodingControlInfoKHR,
+    );
+
+    unsafe fn get_video_session_memory_requirements_khr(
+        &self,
+        video_session: vk::VideoSessionKHR,
+    ) -> VkResult<Vec<vk::VideoSessionMemoryRequirementsKHR>>;
+
+    unsafe fn create_video_session_khr(
+        &self,
+        create_info: &vk::VideoSessionCreateInfoKHR,
+        allocation_callbacks: Option<&vk::AllocationCallbacks>,
+    ) -> VkResult<vk::VideoSessionKHR>;
+
+    unsafe fn bind_video_session_memory_khr(
+        &self,
+        video_session: vk::VideoSessionKHR,
+        memory_bind_infos: &[vk::BindVideoSessionMemoryInfoKHR],
+    ) -> VkResult<()>;
+
+    unsafe fn destroy_video_session_khr(
+        &self,
+        video_session: vk::VideoSessionKHR,
+        allocation_callbacks: Option<&vk::AllocationCallbacks>,
+    );
+
+    unsafe fn create_video_session_parameters_khr(
+        &self,
+        create_info: &vk::VideoSessionParametersCreateInfoKHR,
+        allocation_callbacks: Option<&vk::AllocationCallbacks>,
+    ) -> VkResult<vk::VideoSessionParametersKHR>;
+
+    unsafe fn destroy_video_session_parameters_khr(
+        &self,
+        parameters: vk::VideoSessionParametersKHR,
+        allocation_callbacks: Option<&vk::AllocationCallbacks>,
+    );
+
+    unsafe fn update_video_session_parameters_khr(
+        &self,
+        parameters: vk::VideoSessionParametersKHR,
+        update_info: &vk::VideoSessionParametersUpdateInfoKHR,
+    ) -> VkResult<()>;
+}
+
+impl VideoQueueExt for ash::khr::video_queue::Device {
+    unsafe fn cmd_begin_video_coding_khr(
+        &self,
+        command_buffer: vk::CommandBuffer,
+        begin_info: &vk::VideoBeginCodingInfoKHR,
+    ) {
+        unsafe { (self.fp().cmd_begin_video_coding_khr)(command_buffer, begin_info) }
+    }
+
+    unsafe fn cmd_end_video_coding_khr(
+        &self,
+        command_buffer: vk::CommandBuffer,
+        end_info: &vk::VideoEndCodingInfoKHR,
+    ) {
+        unsafe { (self.fp().cmd_end_video_coding_khr)(command_buffer, end_info) }
+    }
+
+    unsafe fn cmd_control_video_coding_khr(
+        &self,
+        command_buffer: vk::CommandBuffer,
+        control_info: &vk::VideoCodingControlInfoKHR,
+    ) {
+        unsafe { (self.fp().cmd_control_video_coding_khr)(command_buffer, control_info) }
+    }
+
+    unsafe fn get_video_session_memory_requirements_khr(
+        &self,
+        video_session: vk::VideoSessionKHR,
+    ) -> VkResult<Vec<vk::VideoSessionMemoryRequirementsKHR>> {
+        let mut memory_requirements_len = 0;
+        unsafe {
+            (self.fp().get_video_session_memory_requirements_khr)(
+                self.device(),
+                video_session,
+                &mut memory_requirements_len,
+                std::ptr::null_mut(),
+            )
+            .result()?;
+        }
+
+        let mut memory_requirements = vec![
+            vk::VideoSessionMemoryRequirementsKHR::default();
+            memory_requirements_len as usize
+        ];
+
+        unsafe {
+            (self.fp().get_video_session_memory_requirements_khr)(
+                self.device(),
+                video_session,
+                &mut memory_requirements_len,
+                memory_requirements.as_mut_ptr(),
+            )
+            .result_with_success(memory_requirements)
+        }
+    }
+
+    unsafe fn create_video_session_khr(
+        &self,
+        create_info: &vk::VideoSessionCreateInfoKHR,
+        allocation_callbacks: Option<&vk::AllocationCallbacks>,
+    ) -> VkResult<vk::VideoSessionKHR> {
+        let mut video_session = vk::VideoSessionKHR::default();
+
+        unsafe {
+            (self.fp().create_video_session_khr)(
+                self.device(),
+                create_info,
+                allocation_callbacks.as_raw_ptr(),
+                &mut video_session,
+            )
+            .result_with_success(video_session)
+        }
+    }
+
+    unsafe fn bind_video_session_memory_khr(
+        &self,
+        video_session: vk::VideoSessionKHR,
+        memory_bind_infos: &[vk::BindVideoSessionMemoryInfoKHR],
+    ) -> VkResult<()> {
+        unsafe {
+            (self.fp().bind_video_session_memory_khr)(
+                self.device(),
+                video_session,
+                memory_bind_infos.len() as u32,
+                memory_bind_infos.as_ptr(),
+            )
+            .result()
+        }
+    }
+
+    unsafe fn destroy_video_session_khr(
+        &self,
+        video_session: vk::VideoSessionKHR,
+        allocation_callbacks: Option<&vk::AllocationCallbacks>,
+    ) {
+        unsafe {
+            (self.fp().destroy_video_session_khr)(
+                self.device(),
+                video_session,
+                allocation_callbacks.as_raw_ptr(),
+            )
+        }
+    }
+
+    unsafe fn create_video_session_parameters_khr(
+        &self,
+        create_info: &vk::VideoSessionParametersCreateInfoKHR,
+        allocation_callbacks: Option<&vk::AllocationCallbacks>,
+    ) -> VkResult<vk::VideoSessionParametersKHR> {
+        let mut parameters = vk::VideoSessionParametersKHR::default();
+
+        unsafe {
+            (self.fp().create_video_session_parameters_khr)(
+                self.device(),
+                create_info,
+                allocation_callbacks.as_raw_ptr(),
+                &mut parameters,
+            )
+            .result_with_success(parameters)
+        }
+    }
+
+    unsafe fn destroy_video_session_parameters_khr(
+        &self,
+        parameters: vk::VideoSessionParametersKHR,
+        allocation_callbacks: Option<&vk::AllocationCallbacks>,
+    ) {
+        unsafe {
+            (self.fp().destroy_video_session_parameters_khr)(
+                self.device(),
+                parameters,
+                allocation_callbacks.as_raw_ptr(),
+            )
+        }
+    }
+
+    unsafe fn update_video_session_parameters_khr(
+        &self,
+        parameters: vk::VideoSessionParametersKHR,
+        update_info: &vk::VideoSessionParametersUpdateInfoKHR,
+    ) -> VkResult<()> {
+        unsafe {
+            (self.fp().update_video_session_parameters_khr)(self.device(), parameters, update_info)
+                .result()
+        }
+    }
+}
+
+pub(crate) trait VideoDecodeQueueExt {
+    unsafe fn cmd_decode_video_khr(
+        &self,
+        command_buffer: vk::CommandBuffer,
+        decode_info: &vk::VideoDecodeInfoKHR,
+    );
+}
+
+impl VideoDecodeQueueExt for ash::khr::video_decode_queue::Device {
+    unsafe fn cmd_decode_video_khr(
+        &self,
+        command_buffer: vk::CommandBuffer,
+        decode_info: &vk::VideoDecodeInfoKHR,
+    ) {
+        unsafe { (self.fp().cmd_decode_video_khr)(command_buffer, decode_info) }
+    }
+}

From 579ca0c1deb85048b918067578025f54015a49cc Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzy.wilczek@swmansion.com>
Date: Wed, 25 Sep 2024 14:12:51 +0200
Subject: [PATCH 02/13] Improve the API so that it is similar to what we had
 before

---
 compositor_pipeline/src/error.rs              |  3 +
 compositor_pipeline/src/pipeline.rs           | 73 ++++++++++++++-----
 .../src/pipeline/decoder/video.rs             |  6 +-
 .../examples/raw_channel_input.rs             | 13 +---
 .../examples/raw_channel_output.rs            | 11 +--
 5 files changed, 66 insertions(+), 40 deletions(-)

diff --git a/compositor_pipeline/src/error.rs b/compositor_pipeline/src/error.rs
index 591f4fa96..6a18be2f2 100644
--- a/compositor_pipeline/src/error.rs
+++ b/compositor_pipeline/src/error.rs
@@ -136,6 +136,9 @@ pub enum InputInitError {
     #[cfg(target_os = "linux")]
     #[error(transparent)]
     VulkanDecoderError(#[from] vk_video::DecoderError),
+
+    #[error("Vulkan context is not available. Cannot create a vulkan video decoder")]
+    VulkanContextRequiredForVulkanDecoder,
 }
 
 pub enum ErrorType {
diff --git a/compositor_pipeline/src/pipeline.rs b/compositor_pipeline/src/pipeline.rs
index da570e7de..2e07e7370 100644
--- a/compositor_pipeline/src/pipeline.rs
+++ b/compositor_pipeline/src/pipeline.rs
@@ -110,27 +110,61 @@ pub struct Pipeline {
     is_started: bool,
 }
 
-pub struct PreinitializedContext {
+pub struct GraphicsContext {
     pub device: Arc<wgpu::Device>,
     pub queue: Arc<wgpu::Queue>,
 
     #[cfg(target_os = "linux")]
-    pub vulkan_ctx: Arc<vk_video::VulkanCtx>,
+    pub vulkan_ctx: Option<Arc<vk_video::VulkanCtx>>,
 }
 
-impl PreinitializedContext {
+impl GraphicsContext {
     #[cfg(target_os = "linux")]
-    pub fn new(features: wgpu::Features, limits: wgpu::Limits) -> Result<Self, InitPipelineError> {
-        let vulkan_ctx = Arc::new(vk_video::VulkanCtx::new(features, limits)?);
-        Ok(PreinitializedContext {
-            device: vulkan_ctx.wgpu_ctx.device.clone(),
-            queue: vulkan_ctx.wgpu_ctx.queue.clone(),
-            vulkan_ctx,
-        })
+    pub fn new(
+        force_gpu: bool,
+        features: wgpu::Features,
+        limits: wgpu::Limits,
+    ) -> Result<Self, InitPipelineError> {
+        use compositor_render::{create_wgpu_ctx, error::InitRendererEngineError};
+
+        let vulkan_features = features
+            | wgpu::Features::TEXTURE_BINDING_ARRAY
+            | wgpu::Features::PUSH_CONSTANTS
+            | wgpu::Features::TEXTURE_FORMAT_NV12;
+
+        let limits = if limits.max_push_constant_size < 128 {
+            wgpu::Limits {
+                max_push_constant_size: 128,
+                ..limits
+            }
+        } else {
+            limits
+        };
+
+        match vk_video::VulkanCtx::new(vulkan_features, limits) {
+            Ok(ctx) => Ok(GraphicsContext {
+                device: ctx.wgpu_ctx.device.clone(),
+                queue: ctx.wgpu_ctx.queue.clone(),
+                vulkan_ctx: Some(ctx.into()),
+            }),
+
+            Err(err) => {
+                info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support.");
+
+                let (device, queue) = create_wgpu_ctx(force_gpu, features)
+                    .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?;
+
+                Ok(GraphicsContext {
+                    device,
+                    queue,
+                    vulkan_ctx: None,
+                })
+            }
+        }
     }
 }
 
-impl std::fmt::Debug for PreinitializedContext {
+impl std::fmt::Debug for GraphicsContext {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("PreinitializedContext")
             .field("device", &self.device)
@@ -149,7 +183,7 @@ pub struct Options {
     pub output_sample_rate: u32,
     pub wgpu_features: WgpuFeatures,
     pub load_system_fonts: Option<bool>,
-    pub wgpu_ctx: Option<PreinitializedContext>,
+    pub wgpu_ctx: Option<GraphicsContext>,
 }
 
 #[derive(Clone)]
@@ -159,7 +193,7 @@ pub struct PipelineCtx {
     pub download_dir: Arc<PathBuf>,
     pub event_emitter: Arc<EventEmitter>,
     #[cfg(target_os = "linux")]
-    pub vulkan_ctx: Arc<vk_video::VulkanCtx>,
+    pub vulkan_ctx: Option<Arc<vk_video::VulkanCtx>>,
 }
 
 impl std::fmt::Debug for PipelineCtx {
@@ -179,10 +213,11 @@ impl Pipeline {
             Some(ctx) => Some(ctx),
             None => {
                 if cfg!(target_os = "linux") {
-                    Some(PreinitializedContext::new(opts.wgpu_features | wgpu::Features::PUSH_CONSTANTS | wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, wgpu::Limits {
-                        max_push_constant_size: 128,
-                        ..Default::default()
-                    })?)
+                    Some(GraphicsContext::new(
+                        opts.force_gpu,
+                        opts.wgpu_features,
+                        Default::default(),
+                    )?)
                 } else {
                     None
                 }
@@ -222,9 +257,7 @@ impl Pipeline {
                 download_dir: download_dir.into(),
                 event_emitter,
                 #[cfg(target_os = "linux")]
-                vulkan_ctx: preinitialized_ctx
-                    .map(|ctx| ctx.vulkan_ctx)
-                    .expect("This should not fail on linux"),
+                vulkan_ctx: preinitialized_ctx.and_then(|ctx| ctx.vulkan_ctx),
             },
         };
 
diff --git a/compositor_pipeline/src/pipeline/decoder/video.rs b/compositor_pipeline/src/pipeline/decoder/video.rs
index 2a817461c..fad0da153 100644
--- a/compositor_pipeline/src/pipeline/decoder/video.rs
+++ b/compositor_pipeline/src/pipeline/decoder/video.rs
@@ -25,8 +25,12 @@ pub fn start_video_decoder_thread(
         }
 
         (VideoCodec::H264, VideoDecoder::VulkanVideo) => {
+            let Some(vulkan_ctx) = pipeline_ctx.vulkan_ctx.as_ref().map(|ctx| ctx.clone()) else {
+                return Err(InputInitError::VulkanContextRequiredForVulkanDecoder);
+            };
+
             vulkan_video::start_vulkan_video_decoder_thread(
-                pipeline_ctx.vulkan_ctx.clone(),
+                vulkan_ctx,
                 chunks_receiver,
                 frame_sender,
                 input_id,
diff --git a/integration_tests/examples/raw_channel_input.rs b/integration_tests/examples/raw_channel_input.rs
index 5bb660079..9a4272098 100644
--- a/integration_tests/examples/raw_channel_input.rs
+++ b/integration_tests/examples/raw_channel_input.rs
@@ -17,8 +17,8 @@ use compositor_pipeline::{
             OutputOptions, OutputProtocolOptions,
         },
         rtp::RequestedPort,
-        Options, Pipeline, PipelineOutputEndCondition, PreinitializedContext,
-        RegisterOutputOptions, VideoCodec,
+        GraphicsContext, Options, Pipeline, PipelineOutputEndCondition, RegisterOutputOptions,
+        VideoCodec,
     },
     queue::{PipelineEvent, QueueInputOptions},
 };
@@ -44,14 +44,7 @@ fn main() {
         level: "info,wgpu_hal=warn,wgpu_core=warn".to_string(),
     });
     let config = read_config();
-    let ctx = PreinitializedContext::new(
-        wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS,
-        wgpu::Limits {
-            max_push_constant_size: 128,
-            ..Default::default()
-        },
-    )
-    .unwrap();
+    let ctx = GraphicsContext::new(false, Default::default(), Default::default()).unwrap();
     let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone());
     // no chromium support, so we can ignore _event_loop
     let (pipeline, _event_loop) = Pipeline::new(Options {
diff --git a/integration_tests/examples/raw_channel_output.rs b/integration_tests/examples/raw_channel_output.rs
index 1fa196f5f..8d18ca3b3 100644
--- a/integration_tests/examples/raw_channel_output.rs
+++ b/integration_tests/examples/raw_channel_output.rs
@@ -16,7 +16,7 @@ use compositor_pipeline::{
             InputOptions,
         },
         output::{RawAudioOptions, RawDataOutputOptions, RawVideoOptions},
-        Options, PipelineOutputEndCondition, PreinitializedContext, RawDataReceiver,
+        GraphicsContext, Options, PipelineOutputEndCondition, RawDataReceiver,
         RegisterInputOptions, RegisterOutputOptions,
     },
     queue::{PipelineEvent, QueueInputOptions},
@@ -57,14 +57,7 @@ fn main() {
     });
     let mut config = read_config();
     config.queue_options.ahead_of_time_processing = true;
-    let ctx = PreinitializedContext::new(
-        wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::PUSH_CONSTANTS,
-        wgpu::Limits {
-            max_push_constant_size: 128,
-            ..Default::default()
-        },
-    )
-    .unwrap();
+    let ctx = GraphicsContext::new(false, Default::default(), Default::default()).unwrap();
     let (wgpu_device, wgpu_queue) = (ctx.device.clone(), ctx.queue.clone());
     // no chromium support, so we can ignore _event_loop
     let (pipeline, _event_loop) = Pipeline::new(Options {

From 47fb257e2e5c7c256c61c7f896aa7cc62374bba8 Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzywilczek@Jerzys-MacBook-Pro.local>
Date: Fri, 27 Sep 2024 11:12:30 +0200
Subject: [PATCH 03/13] Add proper handling for compilation on macos.

---
 Cargo.toml                                    |  3 +
 compositor_api/Cargo.toml                     |  1 +
 .../src/types/from_register_input.rs          | 11 +++-
 compositor_pipeline/Cargo.toml                |  5 +-
 compositor_pipeline/src/error.rs              |  5 +-
 compositor_pipeline/src/pipeline.rs           | 61 ++++++++++---------
 .../src/pipeline/decoder/video.rs             | 17 +++---
 .../src/pipeline/decoder/video/ffmpeg_h264.rs |  6 +-
 .../pipeline/decoder/video/vulkan_video.rs    |  8 ++-
 compositor_pipeline/src/pipeline/types.rs     |  1 +
 compositor_render/src/lib.rs                  |  2 +-
 compositor_render/src/wgpu.rs                 |  2 +-
 compositor_render/src/wgpu/ctx.rs             | 15 +++--
 src/snapshot_tests/utils.rs                   |  2 +-
 vk-video/examples/basic.rs                    | 24 +++++++-
 vk-video/examples/wgpu.rs                     | 33 +++++++++-
 vk-video/src/lib.rs                           |  1 +
 17 files changed, 138 insertions(+), 59 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index d6710df22..bea6c7e76 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -102,6 +102,9 @@ http-body-util = "0.1.2"
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 shared_memory = { workspace = true }
 
+[target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies]
+compositor_api = { workspace = true, features = ["vk-video"] }
+
 [[bin]]
 name = "process_helper"
 path = "src/bin/process_helper/main.rs"
diff --git a/compositor_api/Cargo.toml b/compositor_api/Cargo.toml
index 5a8911708..69af23089 100644
--- a/compositor_api/Cargo.toml
+++ b/compositor_api/Cargo.toml
@@ -7,6 +7,7 @@ license = "BUSL-1.1"
 [features]
 decklink = ["compositor_pipeline/decklink"]
 web_renderer = ["compositor_render/web_renderer"]
+vk-video = ["compositor_pipeline/vk-video"]
 
 [dependencies]
 compositor_render = { workspace = true }
diff --git a/compositor_api/src/types/from_register_input.rs b/compositor_api/src/types/from_register_input.rs
index c3daf2995..e3d705a2b 100644
--- a/compositor_api/src/types/from_register_input.rs
+++ b/compositor_api/src/types/from_register_input.rs
@@ -102,18 +102,23 @@ impl TryFrom<RtpInput> for pipeline::RegisterInputOptions {
         }
 
         let rtp_stream = input::rtp::RtpStream {
-            video: video.as_ref().map(|video| input::rtp::InputVideoStream {
+            video: video.as_ref().map(|video| Ok(input::rtp::InputVideoStream {
                 options: match video {
                     InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions {
                         codec: pipeline::VideoCodec::H264,
                         decoder: pipeline::VideoDecoder::FFmpegH264,
                     },
+                    #[cfg(feature = "vk-video")]
                     InputRtpVideoOptions::VulkanVideo => decoder::VideoDecoderOptions {
                         decoder: pipeline::VideoDecoder::VulkanVideo,
                         codec: pipeline::VideoCodec::H264,
                     },
-                },
-            }),
+                    #[cfg(not(feature = "vk-video"))]
+                    InputRtpVideoOptions::VulkanVideo => return Err(TypeError::new(
+                        "This Live Compositor binary was build without Vulkan Video support. Rebuilt it on a platform which supports Vulkan Video."
+                    )),
+                }
+            })).transpose()?,
             audio: audio.map(TryFrom::try_from).transpose()?,
         };
 
diff --git a/compositor_pipeline/Cargo.toml b/compositor_pipeline/Cargo.toml
index b7f6c193c..199f9e224 100644
--- a/compositor_pipeline/Cargo.toml
+++ b/compositor_pipeline/Cargo.toml
@@ -7,6 +7,7 @@ license = "BUSL-1.1"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [features]
 decklink = ["dep:decklink"]
+vk-video = ["dep:vk-video"]
 
 [dependencies]
 compositor_render = { workspace = true }
@@ -27,9 +28,11 @@ reqwest = { workspace = true }
 tracing = { workspace = true }
 fdk-aac-sys = "0.5.0"
 rubato = "0.15.0"
-vk-video = { workspace = true }
 wgpu = { workspace = true }
 glyphon = { workspace = true }
 
 [target.x86_64-unknown-linux-gnu.dependencies]
 decklink = { path = "../decklink", optional = true }
+
+[target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies]
+vk-video = { path = "../vk-video/", optional = true }
diff --git a/compositor_pipeline/src/error.rs b/compositor_pipeline/src/error.rs
index 6a18be2f2..03dfcace6 100644
--- a/compositor_pipeline/src/error.rs
+++ b/compositor_pipeline/src/error.rs
@@ -17,6 +17,7 @@ pub enum InitPipelineError {
     #[error("Failed to create a download directory.")]
     CreateDownloadDir(#[source] std::io::Error),
 
+    #[cfg(feature = "vk-video")]
     #[error(transparent)]
     VulkanCtxError(#[from] vk_video::VulkanCtxError),
 }
@@ -133,11 +134,11 @@ pub enum InputInitError {
     #[error("Couldn't read decoder init result.")]
     CannotReadInitResult,
 
-    #[cfg(target_os = "linux")]
+    #[cfg(feature = "vk-video")]
     #[error(transparent)]
     VulkanDecoderError(#[from] vk_video::DecoderError),
 
-    #[error("Vulkan context is not available. Cannot create a vulkan video decoder")]
+    #[error("Pipeline couldn't detect a vulkan video compatible device when it was being initialized. Cannot create a vulkan video decoder")]
     VulkanContextRequiredForVulkanDecoder,
 }
 
diff --git a/compositor_pipeline/src/pipeline.rs b/compositor_pipeline/src/pipeline.rs
index 2e07e7370..d3e4e58d8 100644
--- a/compositor_pipeline/src/pipeline.rs
+++ b/compositor_pipeline/src/pipeline.rs
@@ -63,6 +63,7 @@ pub use self::types::{
     AudioCodec, EncodedChunk, EncodedChunkKind, EncoderOutputEvent, RawDataReceiver, VideoCodec,
     VideoDecoder,
 };
+use compositor_render::{create_wgpu_ctx, error::InitRendererEngineError};
 pub use pipeline_output::PipelineOutputEndCondition;
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -114,34 +115,25 @@ pub struct GraphicsContext {
     pub device: Arc<wgpu::Device>,
     pub queue: Arc<wgpu::Queue>,
 
-    #[cfg(target_os = "linux")]
+    #[cfg(feature = "vk-video")]
     pub vulkan_ctx: Option<Arc<vk_video::VulkanCtx>>,
 }
 
 impl GraphicsContext {
-    #[cfg(target_os = "linux")]
+    #[cfg(feature = "vk-video")]
     pub fn new(
         force_gpu: bool,
         features: wgpu::Features,
         limits: wgpu::Limits,
     ) -> Result<Self, InitPipelineError> {
-        use compositor_render::{create_wgpu_ctx, error::InitRendererEngineError};
+        use compositor_render::{required_wgpu_features, set_required_wgpu_limits};
 
-        let vulkan_features = features
-            | wgpu::Features::TEXTURE_BINDING_ARRAY
-            | wgpu::Features::PUSH_CONSTANTS
-            | wgpu::Features::TEXTURE_FORMAT_NV12;
+        let vulkan_features =
+            features | required_wgpu_features() | wgpu::Features::TEXTURE_FORMAT_NV12;
 
-        let limits = if limits.max_push_constant_size < 128 {
-            wgpu::Limits {
-                max_push_constant_size: 128,
-                ..limits
-            }
-        } else {
-            limits
-        };
+        let limits = set_required_wgpu_limits(limits);
 
-        match vk_video::VulkanCtx::new(vulkan_features, limits) {
+        match vk_video::VulkanCtx::new(vulkan_features, limits.clone()) {
             Ok(ctx) => Ok(GraphicsContext {
                 device: ctx.wgpu_ctx.device.clone(),
                 queue: ctx.wgpu_ctx.queue.clone(),
@@ -151,7 +143,7 @@ impl GraphicsContext {
             Err(err) => {
                 info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support.");
 
-                let (device, queue) = create_wgpu_ctx(force_gpu, features)
+                let (device, queue) = create_wgpu_ctx(force_gpu, features, limits)
                     .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?;
 
                 Ok(GraphicsContext {
@@ -162,6 +154,18 @@ impl GraphicsContext {
             }
         }
     }
+
+    #[cfg(not(feature = "vk-video"))]
+    pub fn new(
+        force_gpu: bool,
+        features: wgpu::Features,
+        limits: wgpu::Limits,
+    ) -> Result<Self, InitPipelineError> {
+        let (device, queue) = create_wgpu_ctx(force_gpu, features, limits)
+            .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?;
+
+        Ok(GraphicsContext { device, queue })
+    }
 }
 
 impl std::fmt::Debug for GraphicsContext {
@@ -192,7 +196,7 @@ pub struct PipelineCtx {
     pub output_framerate: Framerate,
     pub download_dir: Arc<PathBuf>,
     pub event_emitter: Arc<EventEmitter>,
-    #[cfg(target_os = "linux")]
+    #[cfg(feature = "vk-video")]
     pub vulkan_ctx: Option<Arc<vk_video::VulkanCtx>>,
 }
 
@@ -211,17 +215,14 @@ impl Pipeline {
     pub fn new(opts: Options) -> Result<(Self, Arc<dyn EventLoop>), InitPipelineError> {
         let preinitialized_ctx = match opts.wgpu_ctx {
             Some(ctx) => Some(ctx),
-            None => {
-                if cfg!(target_os = "linux") {
-                    Some(GraphicsContext::new(
-                        opts.force_gpu,
-                        opts.wgpu_features,
-                        Default::default(),
-                    )?)
-                } else {
-                    None
-                }
-            }
+            #[cfg(feature = "vk-video")]
+            None => Some(GraphicsContext::new(
+                opts.force_gpu,
+                opts.wgpu_features,
+                Default::default(),
+            )?),
+            #[cfg(not(feature = "vk-video"))]
+            None => None,
         };
 
         let wgpu_ctx = preinitialized_ctx
@@ -256,7 +257,7 @@ impl Pipeline {
                 output_framerate: opts.queue_options.output_framerate,
                 download_dir: download_dir.into(),
                 event_emitter,
-                #[cfg(target_os = "linux")]
+                #[cfg(feature = "vk-video")]
                 vulkan_ctx: preinitialized_ctx.and_then(|ctx| ctx.vulkan_ctx),
             },
         };
diff --git a/compositor_pipeline/src/pipeline/decoder/video.rs b/compositor_pipeline/src/pipeline/decoder/video.rs
index fad0da153..5129ee06d 100644
--- a/compositor_pipeline/src/pipeline/decoder/video.rs
+++ b/compositor_pipeline/src/pipeline/decoder/video.rs
@@ -10,6 +10,7 @@ use crate::{
 use super::VideoDecoderOptions;
 
 mod ffmpeg_h264;
+#[cfg(feature = "vk-video")]
 mod vulkan_video;
 
 pub fn start_video_decoder_thread(
@@ -20,17 +21,17 @@ pub fn start_video_decoder_thread(
     input_id: InputId,
 ) -> Result<(), InputInitError> {
     match (options.codec, options.decoder) {
-        (VideoCodec::H264, VideoDecoder::FFmpegH264) => {
-            ffmpeg_h264::start_ffmpeg_decoder_thread(chunks_receiver, frame_sender, input_id)
-        }
+        (VideoCodec::H264, VideoDecoder::FFmpegH264) => ffmpeg_h264::start_ffmpeg_decoder_thread(
+            pipeline_ctx,
+            chunks_receiver,
+            frame_sender,
+            input_id,
+        ),
 
+        #[cfg(feature = "vk-video")]
         (VideoCodec::H264, VideoDecoder::VulkanVideo) => {
-            let Some(vulkan_ctx) = pipeline_ctx.vulkan_ctx.as_ref().map(|ctx| ctx.clone()) else {
-                return Err(InputInitError::VulkanContextRequiredForVulkanDecoder);
-            };
-
             vulkan_video::start_vulkan_video_decoder_thread(
-                vulkan_ctx,
+                pipeline_ctx,
                 chunks_receiver,
                 frame_sender,
                 input_id,
diff --git a/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs b/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs
index 04df9e654..8f6c2cdd1 100644
--- a/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs
+++ b/compositor_pipeline/src/pipeline/decoder/video/ffmpeg_h264.rs
@@ -2,7 +2,10 @@ use std::time::Duration;
 
 use crate::{
     error::InputInitError,
-    pipeline::types::{EncodedChunk, EncodedChunkKind, VideoCodec},
+    pipeline::{
+        types::{EncodedChunk, EncodedChunkKind, VideoCodec},
+        PipelineCtx,
+    },
     queue::PipelineEvent,
 };
 
@@ -18,6 +21,7 @@ use ffmpeg_next::{
 use tracing::{debug, error, span, trace, warn, Level};
 
 pub fn start_ffmpeg_decoder_thread(
+    _pipeline_ctx: &PipelineCtx,
     chunks_receiver: Receiver<PipelineEvent<EncodedChunk>>,
     frame_sender: Sender<PipelineEvent<Frame>>,
     input_id: InputId,
diff --git a/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs
index 241cdf929..8e2363412 100644
--- a/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs
+++ b/compositor_pipeline/src/pipeline/decoder/video/vulkan_video.rs
@@ -7,16 +7,20 @@ use vk_video::{Decoder, VulkanCtx};
 
 use crate::{
     error::InputInitError,
-    pipeline::{EncodedChunk, EncodedChunkKind, VideoCodec},
+    pipeline::{EncodedChunk, EncodedChunkKind, PipelineCtx, VideoCodec},
     queue::PipelineEvent,
 };
 
 pub fn start_vulkan_video_decoder_thread(
-    vulkan_ctx: Arc<VulkanCtx>,
+    pipeline_ctx: &PipelineCtx,
     chunks_receiver: Receiver<PipelineEvent<EncodedChunk>>,
     frame_sender: Sender<PipelineEvent<Frame>>,
     input_id: InputId,
 ) -> Result<(), InputInitError> {
+    let Some(vulkan_ctx) = pipeline_ctx.vulkan_ctx.as_ref().map(|ctx| ctx.clone()) else {
+        return Err(InputInitError::VulkanContextRequiredForVulkanDecoder);
+    };
+
     let (init_result_sender, init_result_receiver) = crossbeam_channel::bounded(0);
 
     std::thread::Builder::new()
diff --git a/compositor_pipeline/src/pipeline/types.rs b/compositor_pipeline/src/pipeline/types.rs
index 6f028d402..680097626 100644
--- a/compositor_pipeline/src/pipeline/types.rs
+++ b/compositor_pipeline/src/pipeline/types.rs
@@ -58,6 +58,7 @@ pub struct RawDataSender {
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum VideoDecoder {
     FFmpegH264,
+    #[cfg(feature = "vk-video")]
     VulkanVideo,
 }
 
diff --git a/compositor_render/src/lib.rs b/compositor_render/src/lib.rs
index 5c6d93694..5d598473f 100644
--- a/compositor_render/src/lib.rs
+++ b/compositor_render/src/lib.rs
@@ -19,8 +19,8 @@ pub use state::Renderer;
 pub use state::RendererOptions;
 pub use state::RendererSpec;
 
-pub use wgpu::create_wgpu_ctx;
 pub use wgpu::WgpuFeatures;
+pub use wgpu::{create_wgpu_ctx, required_wgpu_features, set_required_wgpu_limits};
 
 pub mod image {
     pub use crate::transformations::image_renderer::{ImageSource, ImageSpec, ImageType};
diff --git a/compositor_render/src/wgpu.rs b/compositor_render/src/wgpu.rs
index d3814eb13..ff9921a65 100644
--- a/compositor_render/src/wgpu.rs
+++ b/compositor_render/src/wgpu.rs
@@ -6,8 +6,8 @@ pub(crate) mod format;
 pub(crate) mod texture;
 pub(crate) mod utils;
 
-pub use ctx::create_wgpu_ctx;
 pub(crate) use ctx::WgpuCtx;
+pub use ctx::{create_wgpu_ctx, required_wgpu_features, set_required_wgpu_limits};
 pub use wgpu::Features as WgpuFeatures;
 
 #[must_use]
diff --git a/compositor_render/src/wgpu/ctx.rs b/compositor_render/src/wgpu/ctx.rs
index e6ee5e984..4bf23706f 100644
--- a/compositor_render/src/wgpu/ctx.rs
+++ b/compositor_render/src/wgpu/ctx.rs
@@ -34,7 +34,7 @@ impl WgpuCtx {
                 Self::new_from_device_queue(device, queue)?
             }
             None => {
-                let (device, queue) = create_wgpu_ctx(force_gpu, features)?;
+                let (device, queue) = create_wgpu_ctx(force_gpu, features, Default::default())?;
                 Self::new_from_device_queue(device, queue)?
             }
         };
@@ -94,9 +94,17 @@ pub fn required_wgpu_features() -> wgpu::Features {
     }
 }
 
+pub fn set_required_wgpu_limits(limits: wgpu::Limits) -> wgpu::Limits {
+    wgpu::Limits {
+        max_push_constant_size: limits.max_push_constant_size.max(128),
+        ..limits
+    }
+}
+
 pub fn create_wgpu_ctx(
     force_gpu: bool,
     features: wgpu::Features,
+    limits: wgpu::Limits,
 ) -> Result<(Arc<wgpu::Device>, Arc<wgpu::Queue>), CreateWgpuCtxError> {
     let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
         backends: wgpu::Backends::all(),
@@ -134,10 +142,7 @@ pub fn create_wgpu_ctx(
     let (device, queue) = pollster::block_on(adapter.request_device(
         &wgpu::DeviceDescriptor {
             label: None,
-            required_limits: wgpu::Limits {
-                max_push_constant_size: 128,
-                ..Default::default()
-            },
+            required_limits: set_required_wgpu_limits(limits),
             required_features,
             memory_hints: wgpu::MemoryHints::default(),
         },
diff --git a/src/snapshot_tests/utils.rs b/src/snapshot_tests/utils.rs
index 99379b35b..d79b8cb67 100644
--- a/src/snapshot_tests/utils.rs
+++ b/src/snapshot_tests/utils.rs
@@ -22,7 +22,7 @@ fn global_wgpu_ctx(
 ) -> (Arc<wgpu::Device>, Arc<wgpu::Queue>) {
     static CTX: OnceLock<(Arc<wgpu::Device>, Arc<wgpu::Queue>)> = OnceLock::new();
 
-    CTX.get_or_init(|| create_wgpu_ctx(force_gpu, features).unwrap())
+    CTX.get_or_init(|| create_wgpu_ctx(force_gpu, features, Default::default()).unwrap())
         .clone()
 }
 
diff --git a/vk-video/examples/basic.rs b/vk-video/examples/basic.rs
index d75452d81..d57ab696c 100644
--- a/vk-video/examples/basic.rs
+++ b/vk-video/examples/basic.rs
@@ -1,6 +1,13 @@
-use std::io::Write;
-
+#[cfg(any(
+    windows,
+    all(
+        unix,
+        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
+    )
+))]
 fn main() {
+    use std::io::Write;
+
     let subscriber = tracing_subscriber::FmtSubscriber::builder()
         .with_max_level(tracing::Level::INFO)
         .finish();
@@ -37,3 +44,16 @@ fn main() {
         }
     }
 }
+
+#[cfg(not(any(
+    windows,
+    all(
+        unix,
+        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
+    )
+)))]
+fn main() {
+    println!(
+        "This crate doesn't work on your operating system, because it does not support vulkan"
+    );
+}
diff --git a/vk-video/examples/wgpu.rs b/vk-video/examples/wgpu.rs
index a5a8f03bd..c535b0744 100644
--- a/vk-video/examples/wgpu.rs
+++ b/vk-video/examples/wgpu.rs
@@ -1,6 +1,13 @@
-use std::io::Write;
-
+#[cfg(any(
+    windows,
+    all(
+        unix,
+        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
+    )
+))]
 fn main() {
+    use std::io::Write;
+
     let subscriber = tracing_subscriber::FmtSubscriber::builder()
         .with_max_level(tracing::Level::INFO)
         .finish();
@@ -40,11 +47,33 @@ fn main() {
     }
 }
 
+#[cfg(not(any(
+    windows,
+    all(
+        unix,
+        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
+    )
+)))]
+fn main() {
+    println!(
+        "This crate doesn't work on your operating system, because it does not support vulkan"
+    );
+}
+
+#[cfg(any(
+    windows,
+    all(
+        unix,
+        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
+    )
+))]
 fn download_wgpu_texture(
     device: &wgpu::Device,
     queue: &wgpu::Queue,
     frame: wgpu::Texture,
 ) -> Vec<u8> {
+    use std::io::Write;
+
     let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
     let y_plane_bytes_per_row = (frame.width() as u64 + 255) / 256 * 256;
     let y_plane_size = y_plane_bytes_per_row * frame.height() as u64;
diff --git a/vk-video/src/lib.rs b/vk-video/src/lib.rs
index e47c7deda..1b0fff581 100644
--- a/vk-video/src/lib.rs
+++ b/vk-video/src/lib.rs
@@ -1,3 +1,4 @@
+#![cfg(not(target_os = "macos"))]
 mod parser;
 mod vulkan_decoder;
 

From 0bc4e492d6d10c3348c7bc4bc1e35105b9adc0bf Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzy.wilczek@swmansion.com>
Date: Mon, 30 Sep 2024 13:17:44 +0200
Subject: [PATCH 04/13] Refactor video session resources to a separate module

---
 vk-video/src/vulkan_decoder.rs                | 495 ++----------------
 .../src/vulkan_decoder/session_resources.rs   | 170 ++++++
 .../session_resources/images.rs               | 291 ++++++++++
 .../session_resources/parameters.rs           | 116 ++++
 vk-video/src/vulkan_decoder/vulkan_ctx.rs     |  72 ++-
 vk-video/src/vulkan_decoder/wrappers.rs       |   2 +
 .../src/vulkan_decoder/wrappers/command.rs    |  39 --
 vk-video/src/vulkan_decoder/wrappers/mem.rs   |  41 ++
 .../{ => wrappers}/parameter_sets.rs          |  33 +-
 vk-video/src/vulkan_decoder/wrappers/video.rs | 136 +----
 10 files changed, 774 insertions(+), 621 deletions(-)
 create mode 100644 vk-video/src/vulkan_decoder/session_resources.rs
 create mode 100644 vk-video/src/vulkan_decoder/session_resources/images.rs
 create mode 100644 vk-video/src/vulkan_decoder/session_resources/parameters.rs
 rename vk-video/src/vulkan_decoder/{ => wrappers}/parameter_sets.rs (91%)

diff --git a/vk-video/src/vulkan_decoder.rs b/vk-video/src/vulkan_decoder.rs
index 9b78b74c4..c2b10aa09 100644
--- a/vk-video/src/vulkan_decoder.rs
+++ b/vk-video/src/vulkan_decoder.rs
@@ -3,19 +3,18 @@ use std::sync::Arc;
 use ash::vk;
 
 use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet};
+use session_resources::VideoSessionResources;
 use tracing::error;
 use wrappers::*;
 
 use crate::parser::{DecodeInformation, DecoderInstruction, ReferenceId};
 
-mod parameter_sets;
+mod session_resources;
 mod vulkan_ctx;
 mod wrappers;
 
 pub use vulkan_ctx::*;
 
-const MACROBLOCK_SIZE: u32 = 16;
-
 pub struct VulkanDecoder<'a> {
     vulkan_ctx: Arc<VulkanCtx>,
     video_session_resources: Option<VideoSessionResources<'a>>,
@@ -38,12 +37,6 @@ struct CommandBuffers {
     vulkan_to_wgpu_transfer_buffer: CommandBuffer,
 }
 
-struct VideoSessionResources<'a> {
-    video_session: VideoSession,
-    parameters_manager: VideoSessionParametersManager,
-    decoding_images: DecodingImages<'a>,
-}
-
 /// this cannot outlive the image and semaphore it borrows, but it seems impossible to encode that
 /// in the lifetimes
 struct DecodeOutput {
@@ -213,7 +206,7 @@ impl VulkanDecoder<'_> {
                         Some(dpb_idx) => self
                             .video_session_resources
                             .as_mut()
-                            .map(|s| s.decoding_images.free_reference_picture(dpb_idx)),
+                            .map(|s| s.free_reference_picture(dpb_idx)),
                         None => return Err(VulkanDecoderError::NonExistantReferenceRequested),
                     };
                 }
@@ -228,119 +221,31 @@ impl VulkanDecoder<'_> {
     }
 
     fn process_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> {
-        let profile = H264ProfileInfo::decode_h264_yuv420();
-
-        let width = match sps.frame_cropping {
-            None => (sps.pic_width_in_mbs_minus1 + 1) * MACROBLOCK_SIZE,
-            Some(_) => return Err(VulkanDecoderError::FrameCroppingNotSupported),
-        };
-
-        let height = match sps.frame_mbs_flags {
-            h264_reader::nal::sps::FrameMbsFlags::Frames => {
-                (sps.pic_height_in_map_units_minus1 + 1) * MACROBLOCK_SIZE
-            }
-            h264_reader::nal::sps::FrameMbsFlags::Fields { .. } => {
-                return Err(VulkanDecoderError::FieldsNotSupported)
-            }
-        };
-
-        let max_coded_extent = vk::Extent2D { width, height };
-        // +1 for current frame
-        let max_dpb_slots = sps.max_num_ref_frames + 1;
-        let max_active_references = sps.max_num_ref_frames;
-
-        if let Some(VideoSessionResources {
-            video_session,
-            parameters_manager: parameters,
-            ..
-        }) = &mut self.video_session_resources
-        {
-            if video_session.max_coded_extent.width >= width
-                && video_session.max_coded_extent.height >= height
-                && video_session.max_dpb_slots >= max_dpb_slots
-            {
-                // no need to change the session
-                parameters.put_sps(sps)?;
-                return Ok(());
-            }
-        }
-
-        let video_session = VideoSession::new(
-            &self.vulkan_ctx,
-            &profile.profile_info,
-            max_coded_extent,
-            max_dpb_slots,
-            max_active_references,
-            &self.vulkan_ctx.video_capabilities.std_header_version,
-        )?;
-
-        let parameters = self
-            .video_session_resources
-            .take()
-            .map(|r| r.parameters_manager);
-
-        let mut parameters = match parameters {
-            Some(mut parameters) => {
-                parameters.change_session(video_session.session)?;
-                parameters
+        match self.video_session_resources.as_mut() {
+            Some(session) => session.process_sps(
+                &self.vulkan_ctx,
+                &self.command_buffers.decode_buffer,
+                sps,
+                &self.sync_structures.fence_memory_barrier_completed,
+            )?,
+            None => {
+                self.video_session_resources = Some(VideoSessionResources::new_from_sps(
+                    &self.vulkan_ctx,
+                    &self.command_buffers.decode_buffer,
+                    sps,
+                    &self.sync_structures.fence_memory_barrier_completed,
+                )?)
             }
-            None => VideoSessionParametersManager::new(&self.vulkan_ctx, video_session.session)?,
-        };
-
-        parameters.put_sps(sps)?;
-
-        // FIXME: usually, sps arrives either at the start of the stream (when all spses are sent
-        // at the begginning of the stream) or right before an IDR. It is however possible for an
-        // sps nal to arrive in between P-frames. This would cause us to loose the reference
-        // pictures we need to decode the stream until we receive a new IDR. Don't know if this is
-        // an issue worth fixing, I don't think I ever saw a stream like this.
-        let (decoding_images, memory_barrier) = DecodingImages::new(
-            &self.vulkan_ctx,
-            profile,
-            &self.vulkan_ctx.h264_dpb_format_properties,
-            &self.vulkan_ctx.h264_dst_format_properties,
-            max_coded_extent,
-            max_dpb_slots,
-        )?;
-
-        self.command_buffers.decode_buffer.begin()?;
-
-        unsafe {
-            self.vulkan_ctx.device.cmd_pipeline_barrier2(
-                *self.command_buffers.decode_buffer,
-                &vk::DependencyInfo::default().image_memory_barriers(&memory_barrier),
-            );
         }
 
-        self.command_buffers.decode_buffer.end()?;
-
-        self.command_buffers.decode_buffer.submit(
-            *self.vulkan_ctx.queues.h264_decode.queue.lock().unwrap(),
-            &[],
-            &[],
-            Some(*self.sync_structures.fence_memory_barrier_completed),
-        )?;
-
-        // TODO: this shouldn't be a fence
-        self.sync_structures
-            .fence_memory_barrier_completed
-            .wait_and_reset(u64::MAX)?;
-
-        self.video_session_resources = Some(VideoSessionResources {
-            video_session,
-            parameters_manager: parameters,
-            decoding_images,
-        });
-
         Ok(())
     }
 
     fn process_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> {
         self.video_session_resources
             .as_mut()
-            .map(|r| &mut r.parameters_manager)
             .ok_or(VulkanDecoderError::NoSession)?
-            .put_pps(pps)?;
+            .process_pps(pps)?;
 
         Ok(())
     }
@@ -384,8 +289,11 @@ impl VulkanDecoder<'_> {
                 .min_bitstream_buffer_offset_alignment,
         );
 
-        let decode_buffer =
-            self.upload_decode_data_to_buffer(&decode_information.rbsp_bytes, size)?;
+        let decode_buffer = Buffer::new_with_decode_data(
+            self.vulkan_ctx.allocator.clone(),
+            &decode_information.rbsp_bytes,
+            size,
+        )?;
 
         // decode
         let video_session_resources = self
@@ -533,28 +441,22 @@ impl VulkanDecoder<'_> {
             .std_picture_info(&std_picture_info)
             .slice_offsets(&slice_offsets);
 
-        let dst_picture_resource_info = match &video_session_resources.decoding_images.dst_image {
-            Some(image) => image.video_resource_info[0],
-            None => *new_reference_slot_video_picture_resource_info,
-        };
+        let dst_picture_resource_info = &video_session_resources
+            .decoding_images
+            .target_picture_resource_info(new_reference_slot_index)
+            .unwrap();
 
         // these 3 veriables are for copying the result later
-        let (dst_image, dst_image_layout, dst_layer) =
-            match &video_session_resources.decoding_images.dst_image {
-                Some(image) => (**image.image, vk::ImageLayout::VIDEO_DECODE_DST_KHR, 0),
-                None => (
-                    **video_session_resources.decoding_images.dpb_image.image,
-                    vk::ImageLayout::VIDEO_DECODE_DPB_KHR,
-                    new_reference_slot_index,
-                ),
-            };
+        let (target_image, target_image_layout, target_layer) = video_session_resources
+            .decoding_images
+            .target_info(new_reference_slot_index);
 
         // fill out the final struct and issue the command
         let decode_info = vk::VideoDecodeInfoKHR::default()
             .src_buffer(*decode_buffer)
             .src_buffer_offset(0)
             .src_buffer_range(size)
-            .dst_picture_resource(dst_picture_resource_info)
+            .dst_picture_resource(*dst_picture_resource_info)
             .setup_reference_slot(&setup_reference_slot)
             .reference_slots(&pic_reference_slots)
             .push_next(&mut decode_h264_picture_info);
@@ -586,8 +488,8 @@ impl VulkanDecoder<'_> {
 
         self.command_buffers.decode_buffer.end()?;
 
-        self.command_buffers.decode_buffer.submit(
-            *self.vulkan_ctx.queues.h264_decode.queue.lock().unwrap(),
+        self.vulkan_ctx.queues.h264_decode.submit(
+            &self.command_buffers.decode_buffer,
             &[],
             &[(
                 *self.sync_structures.sem_decode_done,
@@ -605,10 +507,10 @@ impl VulkanDecoder<'_> {
         let dimensions = video_session_resources.video_session.max_coded_extent;
 
         Ok(DecodeOutput {
-            image: dst_image,
+            image: target_image,
             wait_semaphore: *self.sync_structures.sem_decode_done,
-            layer: dst_layer as u32,
-            current_layout: dst_image_layout,
+            layer: target_layer as u32,
+            current_layout: target_image_layout,
             dimensions,
             _input_buffer: decode_buffer,
         })
@@ -773,8 +675,8 @@ impl VulkanDecoder<'_> {
 
         self.command_buffers.vulkan_to_wgpu_transfer_buffer.end()?;
 
-        self.command_buffers.vulkan_to_wgpu_transfer_buffer.submit(
-            *self.vulkan_ctx.queues.transfer.queue.lock().unwrap(),
+        self.vulkan_ctx.queues.transfer.submit(
+            &self.command_buffers.vulkan_to_wgpu_transfer_buffer,
             &[(
                 decode_output.wait_semaphore,
                 vk::PipelineStageFlags2::TOP_OF_PIPE,
@@ -867,8 +769,7 @@ impl VulkanDecoder<'_> {
             .wait_and_reset(u64::MAX)?;
 
         let output = unsafe {
-            self.download_data_from_buffer(
-                &mut dst_buffer,
+            dst_buffer.download_data_from_buffer(
                 decode_output.dimensions.width as usize
                     * decode_output.dimensions.height as usize
                     * 3
@@ -932,55 +833,6 @@ impl VulkanDecoder<'_> {
         Ok(pic_reference_slots)
     }
 
-    /// ## Safety
-    /// the buffer has to be mappable and readable
-    unsafe fn download_data_from_buffer(
-        &self,
-        buffer: &mut Buffer,
-        size: usize,
-    ) -> Result<Vec<u8>, VulkanDecoderError> {
-        let mut output = Vec::new();
-        unsafe {
-            let memory = self
-                .vulkan_ctx
-                .allocator
-                .map_memory(&mut buffer.allocation)?;
-            let memory_slice = std::slice::from_raw_parts_mut(memory, size);
-            output.extend_from_slice(memory_slice);
-            self.vulkan_ctx
-                .allocator
-                .unmap_memory(&mut buffer.allocation);
-        }
-
-        Ok(output)
-    }
-
-    fn upload_decode_data_to_buffer(
-        &self,
-        data: &[u8],
-        buffer_size: u64,
-    ) -> Result<Buffer, VulkanDecoderError> {
-        let mut decode_buffer = Buffer::new_decode(
-            self.vulkan_ctx.allocator.clone(),
-            buffer_size,
-            &H264ProfileInfo::decode_h264_yuv420(),
-        )?;
-
-        unsafe {
-            let mem = self
-                .vulkan_ctx
-                .allocator
-                .map_memory(&mut decode_buffer.allocation)?;
-            let slice = std::slice::from_raw_parts_mut(mem.cast(), data.len());
-            slice.copy_from_slice(data);
-            self.vulkan_ctx
-                .allocator
-                .unmap_memory(&mut decode_buffer.allocation);
-        }
-
-        Ok(decode_buffer)
-    }
-
     #[allow(clippy::too_many_arguments)]
     fn copy_image_to_buffer(
         &self,
@@ -1091,8 +943,8 @@ impl VulkanDecoder<'_> {
 
         self.command_buffers.gpu_to_mem_transfer_buffer.end()?;
 
-        self.command_buffers.gpu_to_mem_transfer_buffer.submit(
-            *self.vulkan_ctx.queues.transfer.queue.lock().unwrap(),
+        self.vulkan_ctx.queues.transfer.submit(
+            &self.command_buffers.gpu_to_mem_transfer_buffer,
             wait_semaphores,
             signal_semaphores,
             fence,
@@ -1102,271 +954,6 @@ impl VulkanDecoder<'_> {
     }
 }
 
-impl From<crate::parser::PictureInfo> for vk::native::StdVideoDecodeH264ReferenceInfo {
-    fn from(picture_info: crate::parser::PictureInfo) -> Self {
-        vk::native::StdVideoDecodeH264ReferenceInfo {
-            flags: vk::native::StdVideoDecodeH264ReferenceInfoFlags {
-                __bindgen_padding_0: [0; 3],
-                _bitfield_align_1: [],
-                _bitfield_1: vk::native::StdVideoDecodeH264ReferenceInfoFlags::new_bitfield_1(
-                    0,
-                    0,
-                    picture_info.used_for_long_term_reference.into(),
-                    picture_info.non_existing.into(),
-                ),
-            },
-            FrameNum: picture_info.FrameNum,
-            PicOrderCnt: picture_info.PicOrderCnt,
-            reserved: 0,
-        }
-    }
-}
-
-pub(crate) struct DecodingImages<'a> {
-    pub(crate) dpb_image: DecodingImageBundle<'a>,
-    pub(crate) dpb_slot_active: Vec<bool>,
-    pub(crate) dst_image: Option<DecodingImageBundle<'a>>,
-}
-
-pub(crate) struct DecodingImageBundle<'a> {
-    pub(crate) image: Arc<Image>,
-    pub(crate) _image_view: ImageView,
-    pub(crate) video_resource_info: Vec<vk::VideoPictureResourceInfoKHR<'a>>,
-}
-
-impl<'a> DecodingImageBundle<'a> {
-    #[allow(clippy::too_many_arguments)]
-    pub(crate) fn new(
-        vulkan_ctx: &VulkanCtx,
-        format: &vk::VideoFormatPropertiesKHR<'a>,
-        dimensions: vk::Extent2D,
-        image_usage: vk::ImageUsageFlags,
-        profile_info: &H264ProfileInfo,
-        array_layer_count: u32,
-        queue_indices: Option<&[u32]>,
-        layout: vk::ImageLayout,
-    ) -> Result<(Self, vk::ImageMemoryBarrier2<'a>), VulkanDecoderError> {
-        let mut profile_list_info = vk::VideoProfileListInfoKHR::default()
-            .profiles(std::slice::from_ref(&profile_info.profile_info));
-
-        let mut image_create_info = vk::ImageCreateInfo::default()
-            .flags(format.image_create_flags)
-            .image_type(format.image_type)
-            .format(format.format)
-            .extent(vk::Extent3D {
-                width: dimensions.width,
-                height: dimensions.height,
-                depth: 1,
-            })
-            .mip_levels(1)
-            .array_layers(array_layer_count)
-            .samples(vk::SampleCountFlags::TYPE_1)
-            .tiling(format.image_tiling)
-            .usage(image_usage)
-            .initial_layout(vk::ImageLayout::UNDEFINED)
-            .push_next(&mut profile_list_info);
-
-        match queue_indices {
-            Some(indices) => {
-                image_create_info = image_create_info
-                    .sharing_mode(vk::SharingMode::CONCURRENT)
-                    .queue_family_indices(indices);
-            }
-            None => {
-                image_create_info = image_create_info.sharing_mode(vk::SharingMode::EXCLUSIVE);
-            }
-        }
-
-        let image = Arc::new(Image::new(
-            vulkan_ctx.allocator.clone(),
-            &image_create_info,
-        )?);
-
-        let subresource_range = vk::ImageSubresourceRange {
-            aspect_mask: vk::ImageAspectFlags::COLOR,
-            base_mip_level: 0,
-            level_count: 1,
-            base_array_layer: 0,
-            layer_count: vk::REMAINING_ARRAY_LAYERS,
-        };
-
-        let image_view_create_info = vk::ImageViewCreateInfo::default()
-            .flags(vk::ImageViewCreateFlags::empty())
-            .image(**image)
-            .view_type(if array_layer_count == 1 {
-                vk::ImageViewType::TYPE_2D
-            } else {
-                vk::ImageViewType::TYPE_2D_ARRAY
-            })
-            .format(format.format)
-            .components(vk::ComponentMapping::default())
-            .subresource_range(subresource_range);
-
-        let image_view = ImageView::new(
-            vulkan_ctx.device.clone(),
-            image.clone(),
-            &image_view_create_info,
-        )?;
-
-        let video_resource_info = (0..array_layer_count)
-            .map(|i| {
-                vk::VideoPictureResourceInfoKHR::default()
-                    .coded_offset(vk::Offset2D { x: 0, y: 0 })
-                    .coded_extent(dimensions)
-                    .base_array_layer(i)
-                    .image_view_binding(image_view.view)
-            })
-            .collect();
-
-        let image_memory_barrier = vk::ImageMemoryBarrier2::default()
-            .src_stage_mask(vk::PipelineStageFlags2::NONE)
-            .src_access_mask(vk::AccessFlags2::NONE)
-            .dst_stage_mask(vk::PipelineStageFlags2::NONE)
-            .dst_access_mask(vk::AccessFlags2::NONE)
-            .old_layout(vk::ImageLayout::UNDEFINED)
-            .new_layout(layout)
-            .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
-            .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
-            .image(**image)
-            .subresource_range(subresource_range);
-
-        Ok((
-            Self {
-                image,
-                _image_view: image_view,
-                video_resource_info,
-            },
-            image_memory_barrier,
-        ))
-    }
-}
-
-impl<'a> DecodingImages<'a> {
-    pub(crate) fn new(
-        vulkan_ctx: &VulkanCtx,
-        profile: H264ProfileInfo,
-        dpb_format: &vk::VideoFormatPropertiesKHR<'a>,
-        dst_format: &Option<vk::VideoFormatPropertiesKHR<'a>>,
-        dimensions: vk::Extent2D,
-        max_dpb_slots: u32,
-    ) -> Result<(Self, Vec<vk::ImageMemoryBarrier2<'a>>), VulkanDecoderError> {
-        let dpb_image_usage = if dst_format.is_some() {
-            dpb_format.image_usage_flags & vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR
-        } else {
-            dpb_format.image_usage_flags
-                & (vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR
-                    | vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR
-                    | vk::ImageUsageFlags::TRANSFER_SRC)
-        };
-
-        let queue_indices = [
-            vulkan_ctx.queues.transfer.idx as u32,
-            vulkan_ctx.queues.h264_decode.idx as u32,
-        ];
-
-        let (dpb_image, dpb_memory_barrier) = DecodingImageBundle::new(
-            vulkan_ctx,
-            dpb_format,
-            dimensions,
-            dpb_image_usage,
-            &profile,
-            max_dpb_slots,
-            if dst_format.is_some() {
-                None
-            } else {
-                Some(&queue_indices)
-            },
-            vk::ImageLayout::VIDEO_DECODE_DPB_KHR,
-        )?;
-
-        let output = dst_format
-            .map(|dst_format| {
-                let dst_image_usage = dst_format.image_usage_flags
-                    & (vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR
-                        | vk::ImageUsageFlags::TRANSFER_SRC);
-                DecodingImageBundle::new(
-                    vulkan_ctx,
-                    &dst_format,
-                    dimensions,
-                    dst_image_usage,
-                    &profile,
-                    1,
-                    Some(&queue_indices),
-                    vk::ImageLayout::VIDEO_DECODE_DST_KHR,
-                )
-            })
-            .transpose()?;
-
-        let (dst_image, dst_memory_barrier) = match output {
-            Some((output_images, output_memory_barrier)) => {
-                (Some(output_images), Some(output_memory_barrier))
-            }
-            None => (None, None),
-        };
-
-        let barriers = [dpb_memory_barrier]
-            .into_iter()
-            .chain(dst_memory_barrier)
-            .collect::<Vec<_>>();
-
-        Ok((
-            Self {
-                dpb_image,
-                dpb_slot_active: vec![false; max_dpb_slots as usize],
-                dst_image,
-            },
-            barriers,
-        ))
-    }
-
-    fn reference_slot_info(&self) -> Vec<vk::VideoReferenceSlotInfoKHR> {
-        self.dpb_image
-            .video_resource_info
-            .iter()
-            .enumerate()
-            .map(|(i, info)| {
-                vk::VideoReferenceSlotInfoKHR::default()
-                    .picture_resource(info)
-                    .slot_index(if self.dpb_slot_active[i] {
-                        i as i32
-                    } else {
-                        -1
-                    })
-            })
-            .collect()
-    }
-
-    fn allocate_reference_picture(&mut self) -> Result<usize, VulkanDecoderError> {
-        let i = self
-            .dpb_slot_active
-            .iter()
-            .enumerate()
-            .find(|(_, &v)| !v)
-            .map(|(i, _)| i)
-            .ok_or(VulkanDecoderError::NoFreeSlotsInDpb)?;
-
-        self.dpb_slot_active[i] = true;
-
-        Ok(i)
-    }
-
-    fn video_resource_info(&self, i: usize) -> Option<&vk::VideoPictureResourceInfoKHR> {
-        self.dpb_image.video_resource_info.get(i)
-    }
-
-    fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> {
-        self.dpb_slot_active[i] = false;
-
-        Ok(())
-    }
-
-    fn reset_all_allocations(&mut self) {
-        self.dpb_slot_active
-            .iter_mut()
-            .for_each(|slot| *slot = false);
-    }
-}
-
 pub(crate) struct H264ProfileInfo<'a> {
     profile_info: vk::VideoProfileInfoKHR<'a>,
     h264_info_ptr: *mut vk::VideoDecodeH264ProfileInfoKHR<'a>,
diff --git a/vk-video/src/vulkan_decoder/session_resources.rs b/vk-video/src/vulkan_decoder/session_resources.rs
new file mode 100644
index 000000000..e59c71e09
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/session_resources.rs
@@ -0,0 +1,170 @@
+use ash::vk;
+use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet};
+use images::DecodingImages;
+use parameters::VideoSessionParametersManager;
+
+use super::{
+    CommandBuffer, Fence, H264ProfileInfo, SeqParameterSetExt, VideoSession, VulkanCtx,
+    VulkanDecoderError,
+};
+
+mod images;
+mod parameters;
+
+pub(super) struct VideoSessionResources<'a> {
+    pub(crate) video_session: VideoSession,
+    pub(crate) parameters_manager: VideoSessionParametersManager,
+    pub(crate) decoding_images: DecodingImages<'a>,
+}
+
+impl VideoSessionResources<'_> {
+    pub(crate) fn new_from_sps(
+        vulkan_ctx: &VulkanCtx,
+        decode_buffer: &CommandBuffer,
+        sps: &SeqParameterSet,
+        fence_memory_barrier_completed: &Fence,
+    ) -> Result<Self, VulkanDecoderError> {
+        let profile = H264ProfileInfo::decode_h264_yuv420();
+
+        let width = sps.width()?;
+        let height = sps.height()?;
+
+        let max_coded_extent = vk::Extent2D { width, height };
+        // +1 for current frame
+        let max_dpb_slots = sps.max_num_ref_frames + 1;
+        let max_active_references = sps.max_num_ref_frames;
+
+        let video_session = VideoSession::new(
+            vulkan_ctx,
+            &profile.profile_info,
+            max_coded_extent,
+            max_dpb_slots,
+            max_active_references,
+            &vulkan_ctx.video_capabilities.std_header_version,
+        )?;
+
+        let mut parameters_manager =
+            VideoSessionParametersManager::new(vulkan_ctx, video_session.session)?;
+
+        parameters_manager.put_sps(sps)?;
+
+        let decoding_images = Self::new_decoding_images(
+            vulkan_ctx,
+            max_coded_extent,
+            max_dpb_slots,
+            decode_buffer,
+            fence_memory_barrier_completed,
+        )?;
+
+        Ok(VideoSessionResources {
+            video_session,
+            parameters_manager,
+            decoding_images,
+        })
+    }
+
+    pub(crate) fn process_sps(
+        &mut self,
+        vulkan_ctx: &VulkanCtx,
+        decode_buffer: &CommandBuffer,
+        sps: &SeqParameterSet,
+        fence_memory_barrier_completed: &Fence,
+    ) -> Result<(), VulkanDecoderError> {
+        let profile = H264ProfileInfo::decode_h264_yuv420();
+
+        let width = sps.width()?;
+        let height = sps.height()?;
+
+        let max_coded_extent = vk::Extent2D { width, height };
+        // +1 for current frame
+        let max_dpb_slots = sps.max_num_ref_frames + 1;
+        let max_active_references = sps.max_num_ref_frames;
+
+        if self.video_session.max_coded_extent.width >= width
+            && self.video_session.max_coded_extent.height >= height
+            && self.video_session.max_dpb_slots >= max_dpb_slots
+        {
+            // no need to change the session
+            self.parameters_manager.put_sps(sps)?;
+            return Ok(());
+        }
+
+        self.video_session = VideoSession::new(
+            vulkan_ctx,
+            &profile.profile_info,
+            max_coded_extent,
+            max_dpb_slots,
+            max_active_references,
+            &vulkan_ctx.video_capabilities.std_header_version,
+        )?;
+
+        self.parameters_manager
+            .change_session(self.video_session.session)?;
+        self.parameters_manager.put_sps(sps)?;
+
+        self.decoding_images = Self::new_decoding_images(
+            vulkan_ctx,
+            max_coded_extent,
+            max_dpb_slots,
+            decode_buffer,
+            fence_memory_barrier_completed,
+        )?;
+
+        Ok(())
+    }
+
+    pub(crate) fn process_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> {
+        self.parameters_manager.put_pps(pps)
+    }
+
+    fn new_decoding_images<'a>(
+        vulkan_ctx: &VulkanCtx,
+        max_coded_extent: vk::Extent2D,
+        max_dpb_slots: u32,
+        decode_buffer: &CommandBuffer,
+        fence_memory_barrier_completed: &Fence,
+    ) -> Result<DecodingImages<'a>, VulkanDecoderError> {
+        let profile = H264ProfileInfo::decode_h264_yuv420();
+
+        // FIXME: usually, sps arrives either at the start of the stream (when all spses are sent
+        // at the begginning of the stream) or right before an IDR. It is however possible for an
+        // sps nal to arrive in between P-frames. This would cause us to loose the reference
+        // pictures we need to decode the stream until we receive a new IDR. Don't know if this is
+        // an issue worth fixing, I don't think I ever saw a stream like this.
+        let (decoding_images, memory_barrier) = DecodingImages::new(
+            vulkan_ctx,
+            profile,
+            &vulkan_ctx.h264_dpb_format_properties,
+            &vulkan_ctx.h264_dst_format_properties,
+            max_coded_extent,
+            max_dpb_slots,
+        )?;
+
+        decode_buffer.begin()?;
+
+        unsafe {
+            vulkan_ctx.device.cmd_pipeline_barrier2(
+                **decode_buffer,
+                &vk::DependencyInfo::default().image_memory_barriers(&memory_barrier),
+            );
+        }
+
+        decode_buffer.end()?;
+
+        vulkan_ctx.queues.h264_decode.submit(
+            decode_buffer,
+            &[],
+            &[],
+            Some(**fence_memory_barrier_completed),
+        )?;
+
+        // TODO: this shouldn't be a fence
+        fence_memory_barrier_completed.wait_and_reset(u64::MAX)?;
+
+        Ok(decoding_images)
+    }
+
+    pub(crate) fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> {
+        self.decoding_images.free_reference_picture(i)
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/session_resources/images.rs b/vk-video/src/vulkan_decoder/session_resources/images.rs
new file mode 100644
index 000000000..c62afbd50
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/session_resources/images.rs
@@ -0,0 +1,291 @@
+use std::sync::Arc;
+
+use ash::vk;
+
+use crate::{
+    vulkan_decoder::{H264ProfileInfo, Image, ImageView},
+    VulkanCtx, VulkanDecoderError,
+};
+
+pub(crate) struct DecodingImages<'a> {
+    pub(crate) dpb_image: DecodingImageBundle<'a>,
+    pub(crate) dpb_slot_active: Vec<bool>,
+    pub(crate) dst_image: Option<DecodingImageBundle<'a>>,
+}
+
+pub(crate) struct DecodingImageBundle<'a> {
+    pub(crate) image: Arc<Image>,
+    pub(crate) _image_view: ImageView,
+    pub(crate) video_resource_info: Vec<vk::VideoPictureResourceInfoKHR<'a>>,
+}
+
+impl<'a> DecodingImageBundle<'a> {
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) fn new(
+        vulkan_ctx: &VulkanCtx,
+        format: &vk::VideoFormatPropertiesKHR<'a>,
+        dimensions: vk::Extent2D,
+        image_usage: vk::ImageUsageFlags,
+        profile_info: &H264ProfileInfo,
+        array_layer_count: u32,
+        queue_indices: Option<&[u32]>,
+        layout: vk::ImageLayout,
+    ) -> Result<(Self, vk::ImageMemoryBarrier2<'a>), VulkanDecoderError> {
+        let mut profile_list_info = vk::VideoProfileListInfoKHR::default()
+            .profiles(std::slice::from_ref(&profile_info.profile_info));
+
+        let mut image_create_info = vk::ImageCreateInfo::default()
+            .flags(format.image_create_flags)
+            .image_type(format.image_type)
+            .format(format.format)
+            .extent(vk::Extent3D {
+                width: dimensions.width,
+                height: dimensions.height,
+                depth: 1,
+            })
+            .mip_levels(1)
+            .array_layers(array_layer_count)
+            .samples(vk::SampleCountFlags::TYPE_1)
+            .tiling(format.image_tiling)
+            .usage(image_usage)
+            .initial_layout(vk::ImageLayout::UNDEFINED)
+            .push_next(&mut profile_list_info);
+
+        match queue_indices {
+            Some(indices) => {
+                image_create_info = image_create_info
+                    .sharing_mode(vk::SharingMode::CONCURRENT)
+                    .queue_family_indices(indices);
+            }
+            None => {
+                image_create_info = image_create_info.sharing_mode(vk::SharingMode::EXCLUSIVE);
+            }
+        }
+
+        let image = Arc::new(Image::new(
+            vulkan_ctx.allocator.clone(),
+            &image_create_info,
+        )?);
+
+        let subresource_range = vk::ImageSubresourceRange {
+            aspect_mask: vk::ImageAspectFlags::COLOR,
+            base_mip_level: 0,
+            level_count: 1,
+            base_array_layer: 0,
+            layer_count: vk::REMAINING_ARRAY_LAYERS,
+        };
+
+        let image_view_create_info = vk::ImageViewCreateInfo::default()
+            .flags(vk::ImageViewCreateFlags::empty())
+            .image(**image)
+            .view_type(if array_layer_count == 1 {
+                vk::ImageViewType::TYPE_2D
+            } else {
+                vk::ImageViewType::TYPE_2D_ARRAY
+            })
+            .format(format.format)
+            .components(vk::ComponentMapping::default())
+            .subresource_range(subresource_range);
+
+        let image_view = ImageView::new(
+            vulkan_ctx.device.clone(),
+            image.clone(),
+            &image_view_create_info,
+        )?;
+
+        let video_resource_info = (0..array_layer_count)
+            .map(|i| {
+                vk::VideoPictureResourceInfoKHR::default()
+                    .coded_offset(vk::Offset2D { x: 0, y: 0 })
+                    .coded_extent(dimensions)
+                    .base_array_layer(i)
+                    .image_view_binding(image_view.view)
+            })
+            .collect();
+
+        let image_memory_barrier = vk::ImageMemoryBarrier2::default()
+            .src_stage_mask(vk::PipelineStageFlags2::NONE)
+            .src_access_mask(vk::AccessFlags2::NONE)
+            .dst_stage_mask(vk::PipelineStageFlags2::NONE)
+            .dst_access_mask(vk::AccessFlags2::NONE)
+            .old_layout(vk::ImageLayout::UNDEFINED)
+            .new_layout(layout)
+            .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED)
+            .image(**image)
+            .subresource_range(subresource_range);
+
+        Ok((
+            Self {
+                image,
+                _image_view: image_view,
+                video_resource_info,
+            },
+            image_memory_barrier,
+        ))
+    }
+
+    fn extent(&self) -> vk::Extent3D {
+        self.image.extent
+    }
+}
+
+impl<'a> DecodingImages<'a> {
+    pub(crate) fn target_picture_resource_info(
+        &'a self,
+        new_reference_slot_index: usize,
+    ) -> Option<vk::VideoPictureResourceInfoKHR<'a>> {
+        match &self.dst_image {
+            Some(image) => Some(image.video_resource_info[0]),
+            None => self.video_resource_info(new_reference_slot_index).copied(),
+        }
+    }
+
+    pub(crate) fn target_info(
+        &self,
+        new_reference_slot_index: usize,
+    ) -> (vk::Image, vk::ImageLayout, usize) {
+        match &self.dst_image {
+            Some(image) => (**image.image, vk::ImageLayout::VIDEO_DECODE_DST_KHR, 0),
+            None => (
+                **self.dpb_image.image,
+                vk::ImageLayout::VIDEO_DECODE_DPB_KHR,
+                new_reference_slot_index,
+            ),
+        }
+    }
+
+    pub(crate) fn new(
+        vulkan_ctx: &VulkanCtx,
+        profile: H264ProfileInfo,
+        dpb_format: &vk::VideoFormatPropertiesKHR<'a>,
+        dst_format: &Option<vk::VideoFormatPropertiesKHR<'a>>,
+        dimensions: vk::Extent2D,
+        max_dpb_slots: u32,
+    ) -> Result<(Self, Vec<vk::ImageMemoryBarrier2<'a>>), VulkanDecoderError> {
+        let dpb_image_usage = if dst_format.is_some() {
+            dpb_format.image_usage_flags & vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR
+        } else {
+            dpb_format.image_usage_flags
+                & (vk::ImageUsageFlags::VIDEO_DECODE_DPB_KHR
+                    | vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR
+                    | vk::ImageUsageFlags::TRANSFER_SRC)
+        };
+
+        let queue_indices = [
+            vulkan_ctx.queues.transfer.idx as u32,
+            vulkan_ctx.queues.h264_decode.idx as u32,
+        ];
+
+        let (dpb_image, dpb_memory_barrier) = DecodingImageBundle::new(
+            vulkan_ctx,
+            dpb_format,
+            dimensions,
+            dpb_image_usage,
+            &profile,
+            max_dpb_slots,
+            if dst_format.is_some() {
+                None
+            } else {
+                Some(&queue_indices)
+            },
+            vk::ImageLayout::VIDEO_DECODE_DPB_KHR,
+        )?;
+
+        let output = dst_format
+            .map(|dst_format| {
+                let dst_image_usage = dst_format.image_usage_flags
+                    & (vk::ImageUsageFlags::VIDEO_DECODE_DST_KHR
+                        | vk::ImageUsageFlags::TRANSFER_SRC);
+                DecodingImageBundle::new(
+                    vulkan_ctx,
+                    &dst_format,
+                    dimensions,
+                    dst_image_usage,
+                    &profile,
+                    1,
+                    Some(&queue_indices),
+                    vk::ImageLayout::VIDEO_DECODE_DST_KHR,
+                )
+            })
+            .transpose()?;
+
+        let (dst_image, dst_memory_barrier) = match output {
+            Some((output_images, output_memory_barrier)) => {
+                (Some(output_images), Some(output_memory_barrier))
+            }
+            None => (None, None),
+        };
+
+        let barriers = [dpb_memory_barrier]
+            .into_iter()
+            .chain(dst_memory_barrier)
+            .collect::<Vec<_>>();
+
+        Ok((
+            Self {
+                dpb_image,
+                dpb_slot_active: vec![false; max_dpb_slots as usize],
+                dst_image,
+            },
+            barriers,
+        ))
+    }
+
+    #[allow(dead_code)]
+    pub(crate) fn dbp_extent(&self) -> vk::Extent3D {
+        self.dpb_image.extent()
+    }
+
+    #[allow(dead_code)]
+    pub(crate) fn dst_extent(&self) -> Option<vk::Extent3D> {
+        self.dst_image.as_ref().map(|i| i.extent())
+    }
+
+    pub(crate) fn reference_slot_info(&self) -> Vec<vk::VideoReferenceSlotInfoKHR> {
+        self.dpb_image
+            .video_resource_info
+            .iter()
+            .enumerate()
+            .map(|(i, info)| {
+                vk::VideoReferenceSlotInfoKHR::default()
+                    .picture_resource(info)
+                    .slot_index(if self.dpb_slot_active[i] {
+                        i as i32
+                    } else {
+                        -1
+                    })
+            })
+            .collect()
+    }
+
+    pub(crate) fn allocate_reference_picture(&mut self) -> Result<usize, VulkanDecoderError> {
+        let i = self
+            .dpb_slot_active
+            .iter()
+            .enumerate()
+            .find(|(_, &v)| !v)
+            .map(|(i, _)| i)
+            .ok_or(VulkanDecoderError::NoFreeSlotsInDpb)?;
+
+        self.dpb_slot_active[i] = true;
+
+        Ok(i)
+    }
+
+    pub(crate) fn video_resource_info(&self, i: usize) -> Option<&vk::VideoPictureResourceInfoKHR> {
+        self.dpb_image.video_resource_info.get(i)
+    }
+
+    pub(crate) fn free_reference_picture(&mut self, i: usize) -> Result<(), VulkanDecoderError> {
+        self.dpb_slot_active[i] = false;
+
+        Ok(())
+    }
+
+    pub(crate) fn reset_all_allocations(&mut self) {
+        self.dpb_slot_active
+            .iter_mut()
+            .for_each(|slot| *slot = false);
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/session_resources/parameters.rs b/vk-video/src/vulkan_decoder/session_resources/parameters.rs
new file mode 100644
index 000000000..e5f366f67
--- /dev/null
+++ b/vk-video/src/vulkan_decoder/session_resources/parameters.rs
@@ -0,0 +1,116 @@
+use std::{collections::HashMap, sync::Arc};
+
+use ash::vk;
+use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet};
+
+use crate::{
+    vulkan_decoder::{
+        Device, VideoSessionParameters, VkPictureParameterSet, VkSequenceParameterSet,
+    },
+    VulkanCtx, VulkanDecoderError,
+};
+
+/// Since `VideoSessionParameters` can only add sps and pps values (inserting sps or pps with an
+/// existing id is prohibited), this is an abstraction which provides the capability to replace an
+/// existing sps or pps.
+pub(crate) struct VideoSessionParametersManager {
+    pub(crate) parameters: VideoSessionParameters,
+    sps: HashMap<u8, VkSequenceParameterSet>,
+    pps: HashMap<(u8, u8), VkPictureParameterSet>,
+    device: Arc<Device>,
+    session: vk::VideoSessionKHR,
+}
+
+impl VideoSessionParametersManager {
+    pub(crate) fn new(
+        vulkan_ctx: &VulkanCtx,
+        session: vk::VideoSessionKHR,
+    ) -> Result<Self, VulkanDecoderError> {
+        Ok(Self {
+            parameters: VideoSessionParameters::new(
+                vulkan_ctx.device.clone(),
+                session,
+                &[],
+                &[],
+                None,
+            )?,
+            sps: HashMap::new(),
+            pps: HashMap::new(),
+            device: vulkan_ctx.device.clone(),
+            session,
+        })
+    }
+
+    pub(crate) fn parameters(&self) -> vk::VideoSessionParametersKHR {
+        self.parameters.parameters
+    }
+
+    pub(crate) fn change_session(
+        &mut self,
+        session: vk::VideoSessionKHR,
+    ) -> Result<(), VulkanDecoderError> {
+        if self.session == session {
+            return Ok(());
+        }
+        self.session = session;
+
+        let sps = self.sps.values().map(|sps| sps.sps).collect::<Vec<_>>();
+        let pps = self.pps.values().map(|pps| pps.pps).collect::<Vec<_>>();
+
+        self.parameters =
+            VideoSessionParameters::new(self.device.clone(), session, &sps, &pps, None)?;
+
+        Ok(())
+    }
+
+    // it is probably not optimal to insert sps and pps searately. this could be optimized, so that
+    // the insertion happens lazily when the parameters are bound to a session.
+    pub(crate) fn put_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> {
+        let key = sps.seq_parameter_set_id.id();
+        match self.sps.entry(key) {
+            std::collections::hash_map::Entry::Occupied(mut e) => {
+                e.insert(sps.try_into()?);
+
+                self.parameters = VideoSessionParameters::new(
+                    self.device.clone(),
+                    self.session,
+                    &[self.sps[&key].sps],
+                    &[],
+                    Some(&self.parameters),
+                )?
+            }
+            std::collections::hash_map::Entry::Vacant(e) => {
+                e.insert(sps.try_into()?);
+
+                self.parameters.add(&[self.sps[&key].sps], &[])?;
+            }
+        }
+
+        Ok(())
+    }
+
+    pub(crate) fn put_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> {
+        let key = (pps.seq_parameter_set_id.id(), pps.pic_parameter_set_id.id());
+        match self.pps.entry(key) {
+            std::collections::hash_map::Entry::Occupied(mut e) => {
+                e.insert(pps.try_into()?);
+
+                self.parameters = VideoSessionParameters::new(
+                    self.device.clone(),
+                    self.session,
+                    &[],
+                    &[self.pps[&key].pps],
+                    Some(&self.parameters),
+                )?;
+            }
+
+            std::collections::hash_map::Entry::Vacant(e) => {
+                e.insert(pps.try_into()?);
+
+                self.parameters.add(&[], &[self.pps[&key].pps])?;
+            }
+        }
+
+        Ok(())
+    }
+}
diff --git a/vk-video/src/vulkan_decoder/vulkan_ctx.rs b/vk-video/src/vulkan_decoder/vulkan_ctx.rs
index a6481f721..03683237f 100644
--- a/vk-video/src/vulkan_decoder/vulkan_ctx.rs
+++ b/vk-video/src/vulkan_decoder/vulkan_ctx.rs
@@ -6,7 +6,10 @@ use std::{
 use ash::{vk, Entry};
 use tracing::{error, info};
 
-use super::{Allocator, CommandPool, DebugMessenger, Device, H264ProfileInfo, Instance};
+use super::{
+    Allocator, CommandBuffer, CommandPool, DebugMessenger, Device, H264ProfileInfo, Instance,
+    VulkanDecoderError,
+};
 
 const REQUIRED_EXTENSIONS: &[&CStr] = &[
     vk::KHR_VIDEO_QUEUE_NAME,
@@ -73,6 +76,7 @@ pub(crate) struct Queue {
     _video_properties: vk::QueueFamilyVideoPropertiesKHR<'static>,
     pub(crate) query_result_status_properties:
         vk::QueueFamilyQueryResultStatusPropertiesKHR<'static>,
+    device: Arc<Device>,
 }
 
 impl Queue {
@@ -81,6 +85,48 @@ impl Queue {
             .query_result_status_support
             == vk::TRUE
     }
+
+    pub(crate) fn submit(
+        &self,
+        buffer: &CommandBuffer,
+        wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)],
+        signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)],
+        fence: Option<vk::Fence>,
+    ) -> Result<(), VulkanDecoderError> {
+        fn to_sem_submit_info(
+            submits: &[(vk::Semaphore, vk::PipelineStageFlags2)],
+        ) -> Vec<vk::SemaphoreSubmitInfo> {
+            submits
+                .iter()
+                .map(|&(sem, stage)| {
+                    vk::SemaphoreSubmitInfo::default()
+                        .semaphore(sem)
+                        .stage_mask(stage)
+                })
+                .collect::<Vec<_>>()
+        }
+
+        let wait_semaphores = to_sem_submit_info(wait_semaphores);
+        let signal_semaphores = to_sem_submit_info(signal_semaphores);
+
+        let buffer_submit_info =
+            [vk::CommandBufferSubmitInfo::default().command_buffer(buffer.buffer)];
+
+        let submit_info = [vk::SubmitInfo2::default()
+            .wait_semaphore_infos(&wait_semaphores)
+            .signal_semaphore_infos(&signal_semaphores)
+            .command_buffer_infos(&buffer_submit_info)];
+
+        unsafe {
+            self.device.queue_submit2(
+                *self.queue.lock().unwrap(),
+                &submit_info,
+                fence.unwrap_or(vk::Fence::null()),
+            )?
+        };
+
+        Ok(())
+    }
 }
 
 pub(crate) struct Queues {
@@ -222,6 +268,16 @@ impl VulkanCtx {
             .push_next(&mut vk_synch_2_feature);
 
         let device = unsafe { instance.create_device(physical_device, &device_create_info, None)? };
+        let video_queue_ext = ash::khr::video_queue::Device::new(&instance, &device);
+        let video_decode_queue_ext = ash::khr::video_decode_queue::Device::new(&instance, &device);
+
+        let device = Arc::new(Device {
+            device,
+            video_queue_ext,
+            video_decode_queue_ext,
+            _instance: instance.clone(),
+        });
+
         let h264_decode_queue =
             unsafe { device.get_device_queue(queue_indices.h264_decode.idx as u32, 0) };
         let transfer_queue =
@@ -229,6 +285,7 @@ impl VulkanCtx {
         let wgpu_queue = unsafe {
             device.get_device_queue(queue_indices.graphics_transfer_compute.idx as u32, 0)
         };
+
         let queues = Queues {
             transfer: Queue {
                 queue: transfer_queue.into(),
@@ -237,6 +294,7 @@ impl VulkanCtx {
                 query_result_status_properties: queue_indices
                     .transfer
                     .query_result_status_properties,
+                device: device.clone(),
             },
             h264_decode: Queue {
                 queue: h264_decode_queue.into(),
@@ -245,6 +303,7 @@ impl VulkanCtx {
                 query_result_status_properties: queue_indices
                     .h264_decode
                     .query_result_status_properties,
+                device: device.clone(),
             },
             wgpu: Queue {
                 queue: wgpu_queue.into(),
@@ -253,19 +312,10 @@ impl VulkanCtx {
                 query_result_status_properties: queue_indices
                     .graphics_transfer_compute
                     .query_result_status_properties,
+                device: device.clone(),
             },
         };
 
-        let video_queue_ext = ash::khr::video_queue::Device::new(&instance, &device);
-        let video_decode_queue_ext = ash::khr::video_decode_queue::Device::new(&instance, &device);
-
-        let device = Arc::new(Device {
-            device,
-            video_queue_ext,
-            video_decode_queue_ext,
-            _instance: instance.clone(),
-        });
-
         let wgpu_device = unsafe {
             wgpu_adapter.adapter.device_from_raw(
                 device.device.clone(),
diff --git a/vk-video/src/vulkan_decoder/wrappers.rs b/vk-video/src/vulkan_decoder/wrappers.rs
index 2d02da14c..5067ae512 100644
--- a/vk-video/src/vulkan_decoder/wrappers.rs
+++ b/vk-video/src/vulkan_decoder/wrappers.rs
@@ -5,6 +5,7 @@ use ash::Entry;
 mod command;
 mod debug;
 mod mem;
+mod parameter_sets;
 mod sync;
 mod video;
 mod vk_extensions;
@@ -12,6 +13,7 @@ mod vk_extensions;
 pub(crate) use command::*;
 pub(crate) use debug::*;
 pub(crate) use mem::*;
+pub(crate) use parameter_sets::*;
 pub(crate) use sync::*;
 pub(crate) use video::*;
 pub(crate) use vk_extensions::*;
diff --git a/vk-video/src/vulkan_decoder/wrappers/command.rs b/vk-video/src/vulkan_decoder/wrappers/command.rs
index d76eb5249..8add7a1fa 100644
--- a/vk-video/src/vulkan_decoder/wrappers/command.rs
+++ b/vk-video/src/vulkan_decoder/wrappers/command.rs
@@ -62,45 +62,6 @@ impl CommandBuffer {
         Ok(Self { pool, buffer })
     }
 
-    pub(crate) fn submit(
-        &self,
-        queue: vk::Queue,
-        wait_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)],
-        signal_semaphores: &[(vk::Semaphore, vk::PipelineStageFlags2)],
-        fence: Option<vk::Fence>,
-    ) -> Result<(), VulkanDecoderError> {
-        fn to_sem_submit_info(
-            submits: &[(vk::Semaphore, vk::PipelineStageFlags2)],
-        ) -> Vec<vk::SemaphoreSubmitInfo> {
-            submits
-                .iter()
-                .map(|&(sem, stage)| {
-                    vk::SemaphoreSubmitInfo::default()
-                        .semaphore(sem)
-                        .stage_mask(stage)
-                })
-                .collect::<Vec<_>>()
-        }
-
-        let wait_semaphores = to_sem_submit_info(wait_semaphores);
-        let signal_semaphores = to_sem_submit_info(signal_semaphores);
-
-        let buffer_submit_info =
-            [vk::CommandBufferSubmitInfo::default().command_buffer(self.buffer)];
-
-        let submit_info = [vk::SubmitInfo2::default()
-            .wait_semaphore_infos(&wait_semaphores)
-            .signal_semaphore_infos(&signal_semaphores)
-            .command_buffer_infos(&buffer_submit_info)];
-
-        unsafe {
-            self.device()
-                .queue_submit2(queue, &submit_info, fence.unwrap_or(vk::Fence::null()))?
-        };
-
-        Ok(())
-    }
-
     pub(crate) fn begin(&self) -> Result<(), VulkanDecoderError> {
         unsafe {
             self.device().begin_command_buffer(
diff --git a/vk-video/src/vulkan_decoder/wrappers/mem.rs b/vk-video/src/vulkan_decoder/wrappers/mem.rs
index 37f8e83bf..5876702ec 100644
--- a/vk-video/src/vulkan_decoder/wrappers/mem.rs
+++ b/vk-video/src/vulkan_decoder/wrappers/mem.rs
@@ -157,6 +157,44 @@ impl Buffer {
             allocator,
         })
     }
+
+    /// ## Safety
+    /// the buffer has to be mappable and readable
+    pub(crate) unsafe fn download_data_from_buffer(
+        &mut self,
+        size: usize,
+    ) -> Result<Vec<u8>, VulkanDecoderError> {
+        let mut output = Vec::new();
+        unsafe {
+            let memory = self.allocator.map_memory(&mut self.allocation)?;
+            let memory_slice = std::slice::from_raw_parts_mut(memory, size);
+            output.extend_from_slice(memory_slice);
+            self.allocator.unmap_memory(&mut self.allocation);
+        }
+
+        Ok(output)
+    }
+
+    pub(crate) fn new_with_decode_data(
+        allocator: Arc<Allocator>,
+        data: &[u8],
+        buffer_size: u64,
+    ) -> Result<Buffer, VulkanDecoderError> {
+        let mut decode_buffer = Buffer::new_decode(
+            allocator.clone(),
+            buffer_size,
+            &H264ProfileInfo::decode_h264_yuv420(),
+        )?;
+
+        unsafe {
+            let mem = allocator.map_memory(&mut decode_buffer.allocation)?;
+            let slice = std::slice::from_raw_parts_mut(mem.cast(), data.len());
+            slice.copy_from_slice(data);
+            allocator.unmap_memory(&mut decode_buffer.allocation);
+        }
+
+        Ok(decode_buffer)
+    }
 }
 
 impl Drop for Buffer {
@@ -180,6 +218,7 @@ pub(crate) struct Image {
     pub(crate) image: vk::Image,
     allocation: vk_mem::Allocation,
     allocator: Arc<Allocator>,
+    pub(crate) extent: vk::Extent3D,
 }
 
 impl Image {
@@ -187,6 +226,7 @@ impl Image {
         allocator: Arc<Allocator>,
         image_create_info: &vk::ImageCreateInfo,
     ) -> Result<Self, VulkanDecoderError> {
+        let extent = image_create_info.extent;
         let alloc_info = vk_mem::AllocationCreateInfo {
             usage: vk_mem::MemoryUsage::Auto,
             ..Default::default()
@@ -199,6 +239,7 @@ impl Image {
             image,
             allocation,
             allocator,
+            extent,
         })
     }
 }
diff --git a/vk-video/src/vulkan_decoder/parameter_sets.rs b/vk-video/src/vulkan_decoder/wrappers/parameter_sets.rs
similarity index 91%
rename from vk-video/src/vulkan_decoder/parameter_sets.rs
rename to vk-video/src/vulkan_decoder/wrappers/parameter_sets.rs
index ebf4267c5..2229662e3 100644
--- a/vk-video/src/vulkan_decoder/parameter_sets.rs
+++ b/vk-video/src/vulkan_decoder/wrappers/parameter_sets.rs
@@ -1,7 +1,34 @@
 use ash::vk;
 use h264_reader::nal::sps::SeqParameterSet;
 
-use super::VulkanDecoderError;
+use crate::VulkanDecoderError;
+
+const MACROBLOCK_SIZE: u32 = 16;
+
+pub(crate) trait SeqParameterSetExt {
+    fn width(&self) -> Result<u32, VulkanDecoderError>;
+    fn height(&self) -> Result<u32, VulkanDecoderError>;
+}
+
+impl SeqParameterSetExt for SeqParameterSet {
+    fn width(&self) -> Result<u32, VulkanDecoderError> {
+        match self.frame_cropping {
+            None => Ok((self.pic_width_in_mbs_minus1 + 1) * MACROBLOCK_SIZE),
+            Some(_) => Err(VulkanDecoderError::FrameCroppingNotSupported),
+        }
+    }
+
+    fn height(&self) -> Result<u32, VulkanDecoderError> {
+        match self.frame_mbs_flags {
+            h264_reader::nal::sps::FrameMbsFlags::Frames => {
+                Ok((self.pic_height_in_map_units_minus1 + 1) * MACROBLOCK_SIZE)
+            }
+            h264_reader::nal::sps::FrameMbsFlags::Fields { .. } => {
+                Err(VulkanDecoderError::FieldsNotSupported)
+            }
+        }
+    }
+}
 
 pub(crate) struct VkSequenceParameterSet {
     pub(crate) sps: vk::native::StdVideoH264SequenceParameterSet,
@@ -195,8 +222,8 @@ fn h264_level_idc_to_vk(level_idc: u8) -> u32 {
     }
 }
 
-pub(super) struct VkPictureParameterSet {
-    pub(super) pps: vk::native::StdVideoH264PictureParameterSet,
+pub(crate) struct VkPictureParameterSet {
+    pub(crate) pps: vk::native::StdVideoH264PictureParameterSet,
 }
 
 impl TryFrom<&'_ h264_reader::nal::pps::PicParameterSet> for VkPictureParameterSet {
diff --git a/vk-video/src/vulkan_decoder/wrappers/video.rs b/vk-video/src/vulkan_decoder/wrappers/video.rs
index 7dc745c1b..e548f6ea8 100644
--- a/vk-video/src/vulkan_decoder/wrappers/video.rs
+++ b/vk-video/src/vulkan_decoder/wrappers/video.rs
@@ -1,123 +1,11 @@
-use std::{collections::HashMap, sync::Arc};
+use std::sync::Arc;
 
 use ash::vk;
-use h264_reader::nal::{pps::PicParameterSet, sps::SeqParameterSet};
 
-use crate::{
-    vulkan_decoder::{
-        parameter_sets::{VkPictureParameterSet, VkSequenceParameterSet},
-        VulkanDecoderError,
-    },
-    VulkanCtx,
-};
+use crate::{vulkan_decoder::VulkanDecoderError, VulkanCtx};
 
 use super::{Device, MemoryAllocation, VideoQueueExt};
 
-/// Since `VideoSessionParameters` can only add sps and pps values (inserting sps or pps with an
-/// existing id is prohibited), this is an abstraction which provides the capability to replace an
-/// existing sps or pps.
-pub(crate) struct VideoSessionParametersManager {
-    pub(crate) parameters: VideoSessionParameters,
-    sps: HashMap<u8, VkSequenceParameterSet>,
-    pps: HashMap<(u8, u8), VkPictureParameterSet>,
-    device: Arc<Device>,
-    session: vk::VideoSessionKHR,
-}
-
-impl VideoSessionParametersManager {
-    pub(crate) fn new(
-        vulkan_ctx: &VulkanCtx,
-        session: vk::VideoSessionKHR,
-    ) -> Result<Self, VulkanDecoderError> {
-        Ok(Self {
-            parameters: VideoSessionParameters::new(
-                vulkan_ctx.device.clone(),
-                session,
-                &[],
-                &[],
-                None,
-            )?,
-            sps: HashMap::new(),
-            pps: HashMap::new(),
-            device: vulkan_ctx.device.clone(),
-            session,
-        })
-    }
-
-    pub(crate) fn parameters(&self) -> vk::VideoSessionParametersKHR {
-        self.parameters.parameters
-    }
-
-    pub(crate) fn change_session(
-        &mut self,
-        session: vk::VideoSessionKHR,
-    ) -> Result<(), VulkanDecoderError> {
-        if self.session == session {
-            return Ok(());
-        }
-        self.session = session;
-
-        let sps = self.sps.values().map(|sps| sps.sps).collect::<Vec<_>>();
-        let pps = self.pps.values().map(|pps| pps.pps).collect::<Vec<_>>();
-
-        self.parameters =
-            VideoSessionParameters::new(self.device.clone(), session, &sps, &pps, None)?;
-
-        Ok(())
-    }
-
-    // it is probably not optimal to insert sps and pps searately. this could be optimized, so that
-    // the insertion happens lazily when the parameters are bound to a session.
-    pub(crate) fn put_sps(&mut self, sps: &SeqParameterSet) -> Result<(), VulkanDecoderError> {
-        let key = sps.seq_parameter_set_id.id();
-        match self.sps.entry(key) {
-            std::collections::hash_map::Entry::Occupied(mut e) => {
-                e.insert(sps.try_into()?);
-
-                self.parameters = VideoSessionParameters::new(
-                    self.device.clone(),
-                    self.session,
-                    &[self.sps[&key].sps],
-                    &[],
-                    Some(&self.parameters),
-                )?
-            }
-            std::collections::hash_map::Entry::Vacant(e) => {
-                e.insert(sps.try_into()?);
-
-                self.parameters.add(&[self.sps[&key].sps], &[])?;
-            }
-        }
-
-        Ok(())
-    }
-
-    pub(crate) fn put_pps(&mut self, pps: &PicParameterSet) -> Result<(), VulkanDecoderError> {
-        let key = (pps.seq_parameter_set_id.id(), pps.pic_parameter_set_id.id());
-        match self.pps.entry(key) {
-            std::collections::hash_map::Entry::Occupied(mut e) => {
-                e.insert(pps.try_into()?);
-
-                self.parameters = VideoSessionParameters::new(
-                    self.device.clone(),
-                    self.session,
-                    &[],
-                    &[self.pps[&key].pps],
-                    Some(&self.parameters),
-                )?;
-            }
-
-            std::collections::hash_map::Entry::Vacant(e) => {
-                e.insert(pps.try_into()?);
-
-                self.parameters.add(&[], &[self.pps[&key].pps])?;
-            }
-        }
-
-        Ok(())
-    }
-}
-
 pub(crate) struct VideoSessionParameters {
     pub(crate) parameters: vk::VideoSessionParametersKHR,
     update_sequence_count: u32,
@@ -296,3 +184,23 @@ impl Drop for VideoSession {
         };
     }
 }
+
+impl From<crate::parser::PictureInfo> for vk::native::StdVideoDecodeH264ReferenceInfo {
+    fn from(picture_info: crate::parser::PictureInfo) -> Self {
+        vk::native::StdVideoDecodeH264ReferenceInfo {
+            flags: vk::native::StdVideoDecodeH264ReferenceInfoFlags {
+                __bindgen_padding_0: [0; 3],
+                _bitfield_align_1: [],
+                _bitfield_1: vk::native::StdVideoDecodeH264ReferenceInfoFlags::new_bitfield_1(
+                    0,
+                    0,
+                    picture_info.used_for_long_term_reference.into(),
+                    picture_info.non_existing.into(),
+                ),
+            },
+            FrameNum: picture_info.FrameNum,
+            PicOrderCnt: picture_info.PicOrderCnt,
+            reserved: 0,
+        }
+    }
+}

From cb8e4966dfc2aff255d1dc524b75ad06553ff893 Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzykwilczek@gmail.com>
Date: Tue, 1 Oct 2024 15:40:36 +0200
Subject: [PATCH 05/13] Refactor the reference manager to a new module.

---
 vk-video/src/parser.rs                   | 462 +---------------------
 vk-video/src/parser/reference_manager.rs | 468 +++++++++++++++++++++++
 2 files changed, 472 insertions(+), 458 deletions(-)
 create mode 100644 vk-video/src/parser/reference_manager.rs

diff --git a/vk-video/src/parser.rs b/vk-video/src/parser.rs
index 0b11a640e..622e86187 100644
--- a/vk-video/src/parser.rs
+++ b/vk-video/src/parser.rs
@@ -5,47 +5,16 @@ use std::{
 
 use h264_reader::{
     annexb::AnnexBReader,
-    nal::{
-        pps::PicParameterSet,
-        slice::{DecRefPicMarking, NumRefIdxActive, RefPicListModifications, SliceHeader},
-        sps::SeqParameterSet,
-        Nal, RefNal,
-    },
+    nal::{pps::PicParameterSet, slice::SliceHeader, sps::SeqParameterSet, Nal, RefNal},
     push::{AccumulatedNalHandler, NalAccumulator, NalInterest},
 };
+use reference_manager::ReferenceContext;
 use tracing::trace;
 
 mod au_splitter;
+mod reference_manager;
 
-#[derive(Debug, thiserror::Error)]
-pub enum ReferenceManagementError {
-    #[error("B frames are not supported")]
-    BFramesNotSupported,
-
-    #[error("Long-term references are not supported")]
-    LongTermRefsNotSupported,
-
-    #[error("SI frames are not supported")]
-    SIFramesNotSupported,
-
-    #[error("SP frames are not supported")]
-    SPFramesNotSupported,
-
-    #[error("Adaptive memory control decoded reference picture marking process is not supported")]
-    AdaptiveMemCtlNotSupported,
-
-    #[error("Reference picture list modifications are not supported")]
-    RefPicListModificationsNotSupported,
-
-    #[error("PicOrderCntType {0} is not supperted")]
-    PicOrderCntTypeNotSupported(u8),
-
-    #[error("pic_order_cnt_lsb is not present in a slice header, but is required for decoding")]
-    PicOrderCntLsbNotPresent,
-}
-
-#[derive(Debug, Default, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
-pub struct ReferenceId(usize);
+pub use reference_manager::{ReferenceId, ReferenceManagementError};
 
 #[derive(Clone, derivative::Derivative)]
 #[derivative(Debug)]
@@ -102,429 +71,6 @@ pub enum DecoderInstruction {
     Pps(PicParameterSet),
 }
 
-#[derive(Debug, Default)]
-struct ReferenceContext {
-    pictures: ReferencePictures,
-    next_reference_id: ReferenceId,
-    _previous_frame_num: usize,
-    prev_pic_order_cnt_msb: i32,
-    prev_pic_order_cnt_lsb: i32,
-}
-
-impl ReferenceContext {
-    fn get_next_reference_id(&mut self) -> ReferenceId {
-        let result = self.next_reference_id;
-        self.next_reference_id = ReferenceId(result.0 + 1);
-        result
-    }
-
-    fn reset_state(&mut self) {
-        *self = Self {
-            pictures: ReferencePictures::default(),
-            next_reference_id: ReferenceId::default(),
-            _previous_frame_num: 0,
-            prev_pic_order_cnt_msb: 0,
-            prev_pic_order_cnt_lsb: 0,
-        };
-    }
-
-    fn add_short_term_reference(
-        &mut self,
-        header: Arc<SliceHeader>,
-        pic_order_cnt: [i32; 2],
-    ) -> ReferenceId {
-        let id = self.get_next_reference_id();
-        self.pictures.short_term.push(ShortTermReferencePicture {
-            header,
-            id,
-            pic_order_cnt,
-        });
-        id
-    }
-
-    fn put_picture(
-        &mut self,
-        mut slices: Vec<Slice>,
-        sps: &SeqParameterSet,
-        pps: &PicParameterSet,
-    ) -> Result<Vec<DecoderInstruction>, ParserError> {
-        let header = slices.last().unwrap().header.clone();
-        let mut rbsp_bytes = Vec::new();
-        let mut slice_indices = Vec::new();
-        for slice in &mut slices {
-            if slice.rbsp_bytes.is_empty() {
-                continue;
-            }
-            slice_indices.push(rbsp_bytes.len());
-            rbsp_bytes.append(&mut slice.rbsp_bytes);
-        }
-
-        match header.dec_ref_pic_marking {
-            Some(DecRefPicMarking::Idr {
-                long_term_reference_flag,
-                ..
-            }) => {
-                if long_term_reference_flag {
-                    Err(ReferenceManagementError::LongTermRefsNotSupported)?;
-                }
-
-                let decode_info = self.decode_information_for_frame(
-                    header.clone(),
-                    slice_indices,
-                    rbsp_bytes,
-                    sps,
-                    pps,
-                )?;
-
-                self.reset_state();
-
-                let reference_id =
-                    self.add_short_term_reference(header, decode_info.picture_info.PicOrderCnt);
-
-                Ok(vec![DecoderInstruction::Idr {
-                    decode_info,
-                    reference_id,
-                }])
-            }
-
-            Some(DecRefPicMarking::SlidingWindow) => {
-                let num_short_term = self.pictures.short_term.len();
-                let num_long_term = self.pictures.long_term.len();
-
-                let decode_info = self.decode_information_for_frame(
-                    header.clone(),
-                    slice_indices,
-                    rbsp_bytes,
-                    sps,
-                    pps,
-                )?;
-                let reference_id = self
-                    .add_short_term_reference(header.clone(), decode_info.picture_info.PicOrderCnt);
-
-                let mut decoder_instructions = vec![DecoderInstruction::DecodeAndStoreAs {
-                    decode_info,
-                    reference_id,
-                }];
-
-                if num_short_term + num_long_term == sps.max_num_ref_frames.max(1) as usize
-                    && !self.pictures.short_term.is_empty()
-                {
-                    let (idx, _) = self
-                        .pictures
-                        .short_term
-                        .iter()
-                        .enumerate()
-                        .min_by_key(|(_, reference)| {
-                            reference
-                                .decode_picture_numbers(header.frame_num as i64, sps)
-                                .unwrap()
-                                .FrameNumWrap
-                        })
-                        .unwrap();
-
-                    decoder_instructions.push(DecoderInstruction::Drop {
-                        reference_ids: vec![self.pictures.short_term.remove(idx).id],
-                    })
-                }
-
-                Ok(decoder_instructions)
-            }
-
-            Some(DecRefPicMarking::Adaptive(_)) => {
-                Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)?
-            }
-
-            // this picture is not a reference
-            None => Ok(vec![DecoderInstruction::Decode {
-                decode_info: self.decode_information_for_frame(
-                    header,
-                    slice_indices,
-                    rbsp_bytes,
-                    sps,
-                    pps,
-                )?,
-            }]),
-        }
-    }
-
-    fn decode_information_for_frame(
-        &mut self,
-        header: Arc<SliceHeader>,
-        slice_indices: Vec<usize>,
-        rbsp_bytes: Vec<u8>,
-        sps: &SeqParameterSet,
-        pps: &PicParameterSet,
-    ) -> Result<DecodeInformation, ParserError> {
-        let reference_list = match header.slice_type.family {
-            h264_reader::nal::slice::SliceFamily::P => {
-                let reference_list =
-                    self.initialize_reference_picture_list_for_frame(&header, sps, pps)?;
-
-                match &header.ref_pic_list_modification {
-                    Some(RefPicListModifications::P {
-                        ref_pic_list_modification_l0,
-                    }) => {
-                        if !ref_pic_list_modification_l0.is_empty() {
-                            Err(ReferenceManagementError::RefPicListModificationsNotSupported)?;
-                        }
-                    }
-
-                    None
-                    | Some(RefPicListModifications::I)
-                    | Some(RefPicListModifications::B { .. }) => unreachable!(),
-                }
-
-                Some(reference_list)
-            }
-            h264_reader::nal::slice::SliceFamily::I => None,
-            h264_reader::nal::slice::SliceFamily::B => {
-                return Err(ReferenceManagementError::BFramesNotSupported)?
-            }
-            h264_reader::nal::slice::SliceFamily::SP => {
-                return Err(ReferenceManagementError::SPFramesNotSupported)?
-            }
-            h264_reader::nal::slice::SliceFamily::SI => {
-                return Err(ReferenceManagementError::SIFramesNotSupported)?
-            }
-        };
-
-        let pic_order_cnt = match sps.pic_order_cnt {
-            h264_reader::nal::sps::PicOrderCntType::TypeZero {
-                log2_max_pic_order_cnt_lsb_minus4,
-            } => {
-                // this section is very hard to read, but all of this code is just copied from the
-                // h.264 spec, where it looks almost exactly like this
-
-                let max_pic_order_cnt_lsb = 2_i32.pow(log2_max_pic_order_cnt_lsb_minus4 as u32 + 4);
-
-                let (prev_pic_order_cnt_msb, prev_pic_order_cnt_lsb) =
-                    if header.idr_pic_id.is_some() {
-                        (0, 0)
-                    } else {
-                        (self.prev_pic_order_cnt_msb, self.prev_pic_order_cnt_lsb)
-                    };
-
-                let (pic_order_cnt_lsb, delta_pic_order_cnt_bottom) = match header
-                    .pic_order_cnt_lsb
-                    .as_ref()
-                    .ok_or(ReferenceManagementError::PicOrderCntLsbNotPresent)?
-                {
-                    h264_reader::nal::slice::PicOrderCountLsb::Frame(pic_order_cnt_lsb) => {
-                        (*pic_order_cnt_lsb, 0)
-                    }
-                    h264_reader::nal::slice::PicOrderCountLsb::FieldsAbsolute {
-                        pic_order_cnt_lsb,
-                        delta_pic_order_cnt_bottom,
-                    } => (*pic_order_cnt_lsb, *delta_pic_order_cnt_bottom),
-                    h264_reader::nal::slice::PicOrderCountLsb::FieldsDelta(_) => {
-                        Err(ReferenceManagementError::PicOrderCntLsbNotPresent)?
-                    }
-                };
-
-                let pic_order_cnt_lsb = pic_order_cnt_lsb as i32;
-
-                let pic_order_cnt_msb = if pic_order_cnt_lsb < prev_pic_order_cnt_lsb
-                    && prev_pic_order_cnt_lsb - pic_order_cnt_lsb >= max_pic_order_cnt_lsb / 2
-                {
-                    prev_pic_order_cnt_msb + max_pic_order_cnt_lsb
-                } else if pic_order_cnt_lsb > prev_pic_order_cnt_lsb
-                    && pic_order_cnt_lsb - prev_pic_order_cnt_lsb > max_pic_order_cnt_lsb / 2
-                {
-                    prev_pic_order_cnt_msb - max_pic_order_cnt_lsb
-                } else {
-                    prev_pic_order_cnt_msb
-                };
-
-                let pic_order_cnt = if header.field_pic == h264_reader::nal::slice::FieldPic::Frame
-                {
-                    let top_field_order_cnt = pic_order_cnt_msb + pic_order_cnt_lsb;
-
-                    let bottom_field_order_cnt = top_field_order_cnt + delta_pic_order_cnt_bottom;
-
-                    top_field_order_cnt.min(bottom_field_order_cnt)
-                } else {
-                    pic_order_cnt_msb + pic_order_cnt_lsb
-                };
-
-                self.prev_pic_order_cnt_msb = pic_order_cnt_msb;
-                self.prev_pic_order_cnt_lsb = pic_order_cnt_lsb;
-
-                pic_order_cnt
-            }
-
-            h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => {
-                Err(ReferenceManagementError::PicOrderCntTypeNotSupported(1))?
-            }
-
-            h264_reader::nal::sps::PicOrderCntType::TypeTwo => match header.dec_ref_pic_marking {
-                None => 2 * header.frame_num as i32 - 1,
-                Some(DecRefPicMarking::Idr { .. }) | Some(DecRefPicMarking::SlidingWindow) => {
-                    2 * header.frame_num as i32
-                }
-                Some(DecRefPicMarking::Adaptive(..)) => {
-                    Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)?
-                }
-            },
-        };
-
-        let pic_order_cnt = [pic_order_cnt; 2];
-
-        Ok(DecodeInformation {
-            reference_list,
-            header: header.clone(),
-            slice_indices,
-            rbsp_bytes,
-            sps_id: sps.id().id(),
-            pps_id: pps.pic_parameter_set_id.id(),
-            picture_info: PictureInfo {
-                non_existing: false,
-                used_for_long_term_reference: false,
-                PicOrderCnt: pic_order_cnt,
-                FrameNum: header.frame_num,
-            },
-        })
-    }
-
-    fn initialize_short_term_reference_picture_list_for_frame(
-        &self,
-        header: &SliceHeader,
-        sps: &SeqParameterSet,
-    ) -> Result<Vec<ReferencePictureInfo>, ParserError> {
-        let mut short_term_reference_list = self
-            .pictures
-            .short_term
-            .iter()
-            .map(|reference| {
-                Ok((
-                    reference,
-                    reference.decode_picture_numbers(header.frame_num.into(), sps)?,
-                ))
-            })
-            .collect::<Result<Vec<_>, ParserError>>()?;
-
-        short_term_reference_list.sort_by_key(|(_, numbers)| -numbers.PicNum);
-
-        let short_term_reference_list = short_term_reference_list
-            .into_iter()
-            .map(|(reference, numbers)| ReferencePictureInfo {
-                id: reference.id,
-                picture_info: PictureInfo {
-                    FrameNum: numbers.FrameNum as u16,
-                    used_for_long_term_reference: false,
-                    non_existing: false,
-                    PicOrderCnt: reference.pic_order_cnt,
-                },
-            })
-            .collect::<Vec<_>>();
-
-        Ok(short_term_reference_list)
-    }
-
-    fn initialize_long_term_reference_picture_list_for_frame(
-        &self,
-    ) -> Result<Vec<ReferencePictureInfo>, ReferenceManagementError> {
-        if !self.pictures.long_term.is_empty() {
-            panic!("long-term references are not supported!");
-        }
-
-        Ok(Vec::new())
-    }
-
-    fn initialize_reference_picture_list_for_frame(
-        &self,
-        header: &SliceHeader,
-        sps: &SeqParameterSet,
-        pps: &PicParameterSet,
-    ) -> Result<Vec<ReferencePictureInfo>, ParserError> {
-        let num_ref_idx_l0_active = header
-            .num_ref_idx_active
-            .as_ref()
-            .map(|num| match num {
-                NumRefIdxActive::P {
-                    num_ref_idx_l0_active_minus1,
-                } => Ok(*num_ref_idx_l0_active_minus1),
-                NumRefIdxActive::B { .. } => Err(ReferenceManagementError::BFramesNotSupported),
-            })
-            .unwrap_or(Ok(pps.num_ref_idx_l0_default_active_minus1))?
-            + 1;
-
-        let short_term_reference_list =
-            self.initialize_short_term_reference_picture_list_for_frame(header, sps)?;
-
-        let long_term_reference_list =
-            self.initialize_long_term_reference_picture_list_for_frame()?;
-
-        let mut reference_list = short_term_reference_list
-            .into_iter()
-            .chain(long_term_reference_list)
-            .collect::<Vec<_>>();
-
-        reference_list.truncate(num_ref_idx_l0_active as usize);
-
-        Ok(reference_list)
-    }
-}
-
-#[derive(Debug)]
-struct ShortTermReferencePicture {
-    header: Arc<SliceHeader>,
-    id: ReferenceId,
-    pic_order_cnt: [i32; 2],
-}
-
-impl ShortTermReferencePicture {
-    #[allow(non_snake_case)]
-    fn decode_picture_numbers(
-        &self,
-        current_frame_num: i64,
-        sps: &SeqParameterSet,
-    ) -> Result<ShortTermReferencePictureNumbers, ParserError> {
-        if self.header.field_pic != h264_reader::nal::slice::FieldPic::Frame {
-            return Err(ParserError::FieldsNotSupported);
-        }
-
-        let MaxFrameNum = sps.max_frame_num();
-
-        let FrameNum = self.header.frame_num as i64;
-
-        let FrameNumWrap = if FrameNum > current_frame_num {
-            FrameNum - MaxFrameNum
-        } else {
-            FrameNum
-        };
-
-        // this assumes we're dealing with a short-term reference frame
-        let PicNum = FrameNumWrap;
-
-        Ok(ShortTermReferencePictureNumbers {
-            FrameNum,
-            FrameNumWrap,
-            PicNum,
-        })
-    }
-}
-
-#[derive(Debug)]
-struct LongTermReferencePicture {
-    _header: Arc<SliceHeader>,
-    _id: ReferenceId,
-}
-
-#[allow(non_snake_case)]
-struct ShortTermReferencePictureNumbers {
-    FrameNum: i64,
-
-    FrameNumWrap: i64,
-
-    PicNum: i64,
-}
-
-#[derive(Debug, Default)]
-struct ReferencePictures {
-    long_term: Vec<LongTermReferencePicture>,
-    short_term: Vec<ShortTermReferencePicture>,
-}
-
 #[derive(Debug, thiserror::Error)]
 pub enum ParserError {
     #[error(transparent)]
diff --git a/vk-video/src/parser/reference_manager.rs b/vk-video/src/parser/reference_manager.rs
new file mode 100644
index 000000000..424a74321
--- /dev/null
+++ b/vk-video/src/parser/reference_manager.rs
@@ -0,0 +1,468 @@
+use std::sync::Arc;
+
+use h264_reader::nal::{
+    pps::PicParameterSet,
+    slice::{DecRefPicMarking, NumRefIdxActive, RefPicListModifications, SliceHeader},
+    sps::SeqParameterSet,
+};
+
+use super::{
+    DecodeInformation, DecoderInstruction, ParserError, PictureInfo, ReferencePictureInfo, Slice,
+    SpsExt,
+};
+
+#[derive(Debug, thiserror::Error)]
+pub enum ReferenceManagementError {
+    #[error("B frames are not supported")]
+    BFramesNotSupported,
+
+    #[error("Long-term references are not supported")]
+    LongTermRefsNotSupported,
+
+    #[error("SI frames are not supported")]
+    SIFramesNotSupported,
+
+    #[error("SP frames are not supported")]
+    SPFramesNotSupported,
+
+    #[error("Adaptive memory control decoded reference picture marking process is not supported")]
+    AdaptiveMemCtlNotSupported,
+
+    #[error("Reference picture list modifications are not supported")]
+    RefPicListModificationsNotSupported,
+
+    #[error("PicOrderCntType {0} is not supperted")]
+    PicOrderCntTypeNotSupported(u8),
+
+    #[error("pic_order_cnt_lsb is not present in a slice header, but is required for decoding")]
+    PicOrderCntLsbNotPresent,
+}
+
+#[derive(Debug, Default, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub struct ReferenceId(usize);
+
+#[derive(Debug, Default)]
+pub(crate) struct ReferenceContext {
+    pictures: ReferencePictures,
+    next_reference_id: ReferenceId,
+    _previous_frame_num: usize,
+    prev_pic_order_cnt_msb: i32,
+    prev_pic_order_cnt_lsb: i32,
+}
+
+impl ReferenceContext {
+    fn get_next_reference_id(&mut self) -> ReferenceId {
+        let result = self.next_reference_id;
+        self.next_reference_id = ReferenceId(result.0 + 1);
+        result
+    }
+
+    fn reset_state(&mut self) {
+        *self = Self {
+            pictures: ReferencePictures::default(),
+            next_reference_id: ReferenceId::default(),
+            _previous_frame_num: 0,
+            prev_pic_order_cnt_msb: 0,
+            prev_pic_order_cnt_lsb: 0,
+        };
+    }
+
+    fn add_short_term_reference(
+        &mut self,
+        header: Arc<SliceHeader>,
+        pic_order_cnt: [i32; 2],
+    ) -> ReferenceId {
+        let id = self.get_next_reference_id();
+        self.pictures.short_term.push(ShortTermReferencePicture {
+            header,
+            id,
+            pic_order_cnt,
+        });
+        id
+    }
+
+    pub(crate) fn put_picture(
+        &mut self,
+        mut slices: Vec<Slice>,
+        sps: &SeqParameterSet,
+        pps: &PicParameterSet,
+    ) -> Result<Vec<DecoderInstruction>, ParserError> {
+        let header = slices.last().unwrap().header.clone();
+
+        // maybe this should be done in a different place, but if you think about it, there's not
+        // really that many places to put this code in
+        let mut rbsp_bytes = Vec::new();
+        let mut slice_indices = Vec::new();
+        for slice in &mut slices {
+            if slice.rbsp_bytes.is_empty() {
+                continue;
+            }
+            slice_indices.push(rbsp_bytes.len());
+            rbsp_bytes.append(&mut slice.rbsp_bytes);
+        }
+
+        match header.dec_ref_pic_marking {
+            Some(DecRefPicMarking::Idr {
+                long_term_reference_flag,
+                ..
+            }) => {
+                if long_term_reference_flag {
+                    Err(ReferenceManagementError::LongTermRefsNotSupported)?;
+                }
+
+                let decode_info = self.decode_information_for_frame(
+                    header.clone(),
+                    slice_indices,
+                    rbsp_bytes,
+                    sps,
+                    pps,
+                )?;
+
+                self.reset_state();
+
+                let reference_id =
+                    self.add_short_term_reference(header, decode_info.picture_info.PicOrderCnt);
+
+                Ok(vec![DecoderInstruction::Idr {
+                    decode_info,
+                    reference_id,
+                }])
+            }
+
+            Some(DecRefPicMarking::SlidingWindow) => {
+                let num_short_term = self.pictures.short_term.len();
+                let num_long_term = self.pictures.long_term.len();
+
+                let decode_info = self.decode_information_for_frame(
+                    header.clone(),
+                    slice_indices,
+                    rbsp_bytes,
+                    sps,
+                    pps,
+                )?;
+                let reference_id = self
+                    .add_short_term_reference(header.clone(), decode_info.picture_info.PicOrderCnt);
+
+                let mut decoder_instructions = vec![DecoderInstruction::DecodeAndStoreAs {
+                    decode_info,
+                    reference_id,
+                }];
+
+                if num_short_term + num_long_term == sps.max_num_ref_frames.max(1) as usize
+                    && !self.pictures.short_term.is_empty()
+                {
+                    let (idx, _) = self
+                        .pictures
+                        .short_term
+                        .iter()
+                        .enumerate()
+                        .min_by_key(|(_, reference)| {
+                            reference
+                                .decode_picture_numbers(header.frame_num as i64, sps)
+                                .unwrap()
+                                .FrameNumWrap
+                        })
+                        .unwrap();
+
+                    decoder_instructions.push(DecoderInstruction::Drop {
+                        reference_ids: vec![self.pictures.short_term.remove(idx).id],
+                    })
+                }
+
+                Ok(decoder_instructions)
+            }
+
+            Some(DecRefPicMarking::Adaptive(_)) => {
+                Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)?
+            }
+
+            // this picture is not a reference
+            None => Ok(vec![DecoderInstruction::Decode {
+                decode_info: self.decode_information_for_frame(
+                    header,
+                    slice_indices,
+                    rbsp_bytes,
+                    sps,
+                    pps,
+                )?,
+            }]),
+        }
+    }
+
+    fn decode_information_for_frame(
+        &mut self,
+        header: Arc<SliceHeader>,
+        slice_indices: Vec<usize>,
+        rbsp_bytes: Vec<u8>,
+        sps: &SeqParameterSet,
+        pps: &PicParameterSet,
+    ) -> Result<DecodeInformation, ParserError> {
+        let reference_list = match header.slice_type.family {
+            h264_reader::nal::slice::SliceFamily::P => {
+                let reference_list =
+                    self.initialize_reference_picture_list_for_frame(&header, sps, pps)?;
+
+                match &header.ref_pic_list_modification {
+                    Some(RefPicListModifications::P {
+                        ref_pic_list_modification_l0,
+                    }) => {
+                        if !ref_pic_list_modification_l0.is_empty() {
+                            Err(ReferenceManagementError::RefPicListModificationsNotSupported)?;
+                        }
+                    }
+
+                    None
+                    | Some(RefPicListModifications::I)
+                    | Some(RefPicListModifications::B { .. }) => unreachable!(),
+                }
+
+                Some(reference_list)
+            }
+            h264_reader::nal::slice::SliceFamily::I => None,
+            h264_reader::nal::slice::SliceFamily::B => {
+                return Err(ReferenceManagementError::BFramesNotSupported)?
+            }
+            h264_reader::nal::slice::SliceFamily::SP => {
+                return Err(ReferenceManagementError::SPFramesNotSupported)?
+            }
+            h264_reader::nal::slice::SliceFamily::SI => {
+                return Err(ReferenceManagementError::SIFramesNotSupported)?
+            }
+        };
+
+        let pic_order_cnt = match sps.pic_order_cnt {
+            h264_reader::nal::sps::PicOrderCntType::TypeZero {
+                log2_max_pic_order_cnt_lsb_minus4,
+            } => {
+                // this section is very hard to read, but all of this code is just copied from the
+                // h.264 spec, where it looks almost exactly like this
+
+                let max_pic_order_cnt_lsb = 2_i32.pow(log2_max_pic_order_cnt_lsb_minus4 as u32 + 4);
+
+                let (prev_pic_order_cnt_msb, prev_pic_order_cnt_lsb) =
+                    if header.idr_pic_id.is_some() {
+                        (0, 0)
+                    } else {
+                        (self.prev_pic_order_cnt_msb, self.prev_pic_order_cnt_lsb)
+                    };
+
+                let (pic_order_cnt_lsb, delta_pic_order_cnt_bottom) = match header
+                    .pic_order_cnt_lsb
+                    .as_ref()
+                    .ok_or(ReferenceManagementError::PicOrderCntLsbNotPresent)?
+                {
+                    h264_reader::nal::slice::PicOrderCountLsb::Frame(pic_order_cnt_lsb) => {
+                        (*pic_order_cnt_lsb, 0)
+                    }
+                    h264_reader::nal::slice::PicOrderCountLsb::FieldsAbsolute {
+                        pic_order_cnt_lsb,
+                        delta_pic_order_cnt_bottom,
+                    } => (*pic_order_cnt_lsb, *delta_pic_order_cnt_bottom),
+                    h264_reader::nal::slice::PicOrderCountLsb::FieldsDelta(_) => {
+                        Err(ReferenceManagementError::PicOrderCntLsbNotPresent)?
+                    }
+                };
+
+                let pic_order_cnt_lsb = pic_order_cnt_lsb as i32;
+
+                let pic_order_cnt_msb = if pic_order_cnt_lsb < prev_pic_order_cnt_lsb
+                    && prev_pic_order_cnt_lsb - pic_order_cnt_lsb >= max_pic_order_cnt_lsb / 2
+                {
+                    prev_pic_order_cnt_msb + max_pic_order_cnt_lsb
+                } else if pic_order_cnt_lsb > prev_pic_order_cnt_lsb
+                    && pic_order_cnt_lsb - prev_pic_order_cnt_lsb > max_pic_order_cnt_lsb / 2
+                {
+                    prev_pic_order_cnt_msb - max_pic_order_cnt_lsb
+                } else {
+                    prev_pic_order_cnt_msb
+                };
+
+                let pic_order_cnt = if header.field_pic == h264_reader::nal::slice::FieldPic::Frame
+                {
+                    let top_field_order_cnt = pic_order_cnt_msb + pic_order_cnt_lsb;
+
+                    let bottom_field_order_cnt = top_field_order_cnt + delta_pic_order_cnt_bottom;
+
+                    top_field_order_cnt.min(bottom_field_order_cnt)
+                } else {
+                    pic_order_cnt_msb + pic_order_cnt_lsb
+                };
+
+                self.prev_pic_order_cnt_msb = pic_order_cnt_msb;
+                self.prev_pic_order_cnt_lsb = pic_order_cnt_lsb;
+
+                pic_order_cnt
+            }
+
+            h264_reader::nal::sps::PicOrderCntType::TypeOne { .. } => {
+                Err(ReferenceManagementError::PicOrderCntTypeNotSupported(1))?
+            }
+
+            h264_reader::nal::sps::PicOrderCntType::TypeTwo => match header.dec_ref_pic_marking {
+                None => 2 * header.frame_num as i32 - 1,
+                Some(DecRefPicMarking::Idr { .. }) | Some(DecRefPicMarking::SlidingWindow) => {
+                    2 * header.frame_num as i32
+                }
+                Some(DecRefPicMarking::Adaptive(..)) => {
+                    Err(ReferenceManagementError::AdaptiveMemCtlNotSupported)?
+                }
+            },
+        };
+
+        let pic_order_cnt = [pic_order_cnt; 2];
+
+        Ok(DecodeInformation {
+            reference_list,
+            header: header.clone(),
+            slice_indices,
+            rbsp_bytes,
+            sps_id: sps.id().id(),
+            pps_id: pps.pic_parameter_set_id.id(),
+            picture_info: PictureInfo {
+                non_existing: false,
+                used_for_long_term_reference: false,
+                PicOrderCnt: pic_order_cnt,
+                FrameNum: header.frame_num,
+            },
+        })
+    }
+
+    fn initialize_short_term_reference_picture_list_for_frame(
+        &self,
+        header: &SliceHeader,
+        sps: &SeqParameterSet,
+    ) -> Result<Vec<ReferencePictureInfo>, ParserError> {
+        let mut short_term_reference_list = self
+            .pictures
+            .short_term
+            .iter()
+            .map(|reference| {
+                Ok((
+                    reference,
+                    reference.decode_picture_numbers(header.frame_num.into(), sps)?,
+                ))
+            })
+            .collect::<Result<Vec<_>, ParserError>>()?;
+
+        short_term_reference_list.sort_by_key(|(_, numbers)| -numbers.PicNum);
+
+        let short_term_reference_list = short_term_reference_list
+            .into_iter()
+            .map(|(reference, numbers)| ReferencePictureInfo {
+                id: reference.id,
+                picture_info: PictureInfo {
+                    FrameNum: numbers.FrameNum as u16,
+                    used_for_long_term_reference: false,
+                    non_existing: false,
+                    PicOrderCnt: reference.pic_order_cnt,
+                },
+            })
+            .collect::<Vec<_>>();
+
+        Ok(short_term_reference_list)
+    }
+
+    fn initialize_long_term_reference_picture_list_for_frame(
+        &self,
+    ) -> Result<Vec<ReferencePictureInfo>, ReferenceManagementError> {
+        if !self.pictures.long_term.is_empty() {
+            panic!("long-term references are not supported!");
+        }
+
+        Ok(Vec::new())
+    }
+
+    fn initialize_reference_picture_list_for_frame(
+        &self,
+        header: &SliceHeader,
+        sps: &SeqParameterSet,
+        pps: &PicParameterSet,
+    ) -> Result<Vec<ReferencePictureInfo>, ParserError> {
+        let num_ref_idx_l0_active = header
+            .num_ref_idx_active
+            .as_ref()
+            .map(|num| match num {
+                NumRefIdxActive::P {
+                    num_ref_idx_l0_active_minus1,
+                } => Ok(*num_ref_idx_l0_active_minus1),
+                NumRefIdxActive::B { .. } => Err(ReferenceManagementError::BFramesNotSupported),
+            })
+            .unwrap_or(Ok(pps.num_ref_idx_l0_default_active_minus1))?
+            + 1;
+
+        let short_term_reference_list =
+            self.initialize_short_term_reference_picture_list_for_frame(header, sps)?;
+
+        let long_term_reference_list =
+            self.initialize_long_term_reference_picture_list_for_frame()?;
+
+        let mut reference_list = short_term_reference_list
+            .into_iter()
+            .chain(long_term_reference_list)
+            .collect::<Vec<_>>();
+
+        reference_list.truncate(num_ref_idx_l0_active as usize);
+
+        Ok(reference_list)
+    }
+}
+
+#[derive(Debug)]
+struct ShortTermReferencePicture {
+    header: Arc<SliceHeader>,
+    id: ReferenceId,
+    pic_order_cnt: [i32; 2],
+}
+
+impl ShortTermReferencePicture {
+    #[allow(non_snake_case)]
+    fn decode_picture_numbers(
+        &self,
+        current_frame_num: i64,
+        sps: &SeqParameterSet,
+    ) -> Result<ShortTermReferencePictureNumbers, ParserError> {
+        if self.header.field_pic != h264_reader::nal::slice::FieldPic::Frame {
+            return Err(ParserError::FieldsNotSupported);
+        }
+
+        let MaxFrameNum = sps.max_frame_num();
+
+        let FrameNum = self.header.frame_num as i64;
+
+        let FrameNumWrap = if FrameNum > current_frame_num {
+            FrameNum - MaxFrameNum
+        } else {
+            FrameNum
+        };
+
+        // this assumes we're dealing with a short-term reference frame
+        let PicNum = FrameNumWrap;
+
+        Ok(ShortTermReferencePictureNumbers {
+            FrameNum,
+            FrameNumWrap,
+            PicNum,
+        })
+    }
+}
+
+#[derive(Debug)]
+struct LongTermReferencePicture {
+    _header: Arc<SliceHeader>,
+    _id: ReferenceId,
+}
+
+#[allow(non_snake_case)]
+struct ShortTermReferencePictureNumbers {
+    FrameNum: i64,
+
+    FrameNumWrap: i64,
+
+    PicNum: i64,
+}
+
+#[derive(Debug, Default)]
+struct ReferencePictures {
+    long_term: Vec<LongTermReferencePicture>,
+    short_term: Vec<ShortTermReferencePicture>,
+}

From d3605dc20fa9fdcea55676318cffd3bfa0e52d20 Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzykwilczek@gmail.com>
Date: Wed, 2 Oct 2024 11:54:11 +0200
Subject: [PATCH 06/13] Add a comment.

---
 vk-video/src/parser/au_splitter.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vk-video/src/parser/au_splitter.rs b/vk-video/src/parser/au_splitter.rs
index 29a84d964..ad46ffabd 100644
--- a/vk-video/src/parser/au_splitter.rs
+++ b/vk-video/src/parser/au_splitter.rs
@@ -40,6 +40,9 @@ impl AUSplitter {
     }
 }
 
+// The below code is taken from Membrane's AU Splitter in their h264 parser. The comments contain
+// elixir versions of the functions below them.
+
 // defguardp first_mb_in_slice_zero(a)
 //           when a.first_mb_in_slice == 0 and
 //                  a.nal_unit_type in [1, 2, 5]

From 748a75e41ce01d1b24f67a615da3bc2337256da4 Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzykwilczek@gmail.com>
Date: Mon, 7 Oct 2024 13:03:30 +0200
Subject: [PATCH 07/13] Review suggestions

---
 Cargo.lock                                    |  15 +-
 Cargo.toml                                    |   1 +
 .../src/types/from_register_input.rs          |  30 ++-
 compositor_api/src/types/register_input.rs    |   3 -
 compositor_pipeline/Cargo.toml                |   1 +
 integration_tests/examples/vulkan.rs          | 211 +++++++++++++-----
 integration_tests/src/examples.rs             |   2 +-
 vk-video/Cargo.toml                           |   2 +
 vk-video/build.rs                             |  13 ++
 vk-video/examples/basic.rs                    |  16 +-
 vk-video/examples/wgpu.rs                     |  24 +-
 11 files changed, 196 insertions(+), 122 deletions(-)
 create mode 100644 vk-video/build.rs

diff --git a/Cargo.lock b/Cargo.lock
index 2cfcc0e00..c9e9d1f48 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -395,6 +395,12 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
 
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
 [[package]]
 name = "chromium_sys"
 version = "0.1.0"
@@ -2044,7 +2050,7 @@ dependencies = [
  "arrayvec",
  "bit-set",
  "bitflags 2.6.0",
- "cfg_aliases",
+ "cfg_aliases 0.1.1",
  "codespan-reporting",
  "hexf-parse",
  "indexmap 2.0.1",
@@ -3969,6 +3975,7 @@ name = "vk-video"
 version = "0.1.0"
 dependencies = [
  "ash",
+ "cfg_aliases 0.2.1",
  "derivative",
  "h264-reader",
  "thiserror",
@@ -4114,7 +4121,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e1d1c4ba43f80542cf63a0a6ed3134629ae73e8ab51e4b765a67f3aa062eb433"
 dependencies = [
  "arrayvec",
- "cfg_aliases",
+ "cfg_aliases 0.1.1",
  "document-features",
  "js-sys",
  "log",
@@ -4141,7 +4148,7 @@ dependencies = [
  "arrayvec",
  "bit-vec",
  "bitflags 2.6.0",
- "cfg_aliases",
+ "cfg_aliases 0.1.1",
  "document-features",
  "indexmap 2.0.1",
  "log",
@@ -4169,7 +4176,7 @@ dependencies = [
  "bit-set",
  "bitflags 2.6.0",
  "block",
- "cfg_aliases",
+ "cfg_aliases 0.1.1",
  "core-graphics-types",
  "d3d12",
  "glow",
diff --git a/Cargo.toml b/Cargo.toml
index bea6c7e76..c86786cf6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -102,6 +102,7 @@ http-body-util = "0.1.2"
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 shared_memory = { workspace = true }
 
+# platforms that support vulkan are: windows and all non-apple unixes. emscripten is something for the web, where vulkan is not available either
 [target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies]
 compositor_api = { workspace = true, features = ["vk-video"] }
 
diff --git a/compositor_api/src/types/from_register_input.rs b/compositor_api/src/types/from_register_input.rs
index e3d705a2b..54b092f3f 100644
--- a/compositor_api/src/types/from_register_input.rs
+++ b/compositor_api/src/types/from_register_input.rs
@@ -102,23 +102,19 @@ impl TryFrom<RtpInput> for pipeline::RegisterInputOptions {
         }
 
         let rtp_stream = input::rtp::RtpStream {
-            video: video.as_ref().map(|video| Ok(input::rtp::InputVideoStream {
-                options: match video {
-                    InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions {
-                        codec: pipeline::VideoCodec::H264,
-                        decoder: pipeline::VideoDecoder::FFmpegH264,
-                    },
-                    #[cfg(feature = "vk-video")]
-                    InputRtpVideoOptions::VulkanVideo => decoder::VideoDecoderOptions {
-                        decoder: pipeline::VideoDecoder::VulkanVideo,
-                        codec: pipeline::VideoCodec::H264,
-                    },
-                    #[cfg(not(feature = "vk-video"))]
-                    InputRtpVideoOptions::VulkanVideo => return Err(TypeError::new(
-                        "This Live Compositor binary was build without Vulkan Video support. Rebuilt it on a platform which supports Vulkan Video."
-                    )),
-                }
-            })).transpose()?,
+            video: video
+                .as_ref()
+                .map(|video| {
+                    Ok(input::rtp::InputVideoStream {
+                        options: match video {
+                            InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions {
+                                codec: pipeline::VideoCodec::H264,
+                                decoder: pipeline::VideoDecoder::FFmpegH264,
+                            },
+                        },
+                    })
+                })
+                .transpose()?,
             audio: audio.map(TryFrom::try_from).transpose()?,
         };
 
diff --git a/compositor_api/src/types/register_input.rs b/compositor_api/src/types/register_input.rs
index 635386e61..e66f14fc4 100644
--- a/compositor_api/src/types/register_input.rs
+++ b/compositor_api/src/types/register_input.rs
@@ -126,7 +126,4 @@ pub enum InputRtpAudioOptions {
 pub enum InputRtpVideoOptions {
     #[serde(rename = "ffmpeg_h264")]
     FfmepgH264,
-
-    #[serde(rename = "vulkan_video")]
-    VulkanVideo,
 }
diff --git a/compositor_pipeline/Cargo.toml b/compositor_pipeline/Cargo.toml
index 199f9e224..4a15b344c 100644
--- a/compositor_pipeline/Cargo.toml
+++ b/compositor_pipeline/Cargo.toml
@@ -34,5 +34,6 @@ glyphon = { workspace = true }
 [target.x86_64-unknown-linux-gnu.dependencies]
 decklink = { path = "../decklink", optional = true }
 
+# platforms that support vulkan are: windows and all non-apple unixes. emscripten is something for the web, where vulkan is not available either
 [target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies]
 vk-video = { path = "../vk-video/", optional = true }
diff --git a/integration_tests/examples/vulkan.rs b/integration_tests/examples/vulkan.rs
index 675985901..2fcc4b70a 100644
--- a/integration_tests/examples/vulkan.rs
+++ b/integration_tests/examples/vulkan.rs
@@ -1,10 +1,47 @@
 use anyhow::Result;
 use compositor_api::types::Resolution;
-use serde_json::json;
-use std::time::Duration;
+use compositor_pipeline::{
+    pipeline::{
+        decoder::VideoDecoderOptions,
+        encoder::{
+            ffmpeg_h264::{EncoderPreset, Options as H264Options},
+            VideoEncoderOptions,
+        },
+        input::{
+            rtp::{InputVideoStream, RtpReceiverOptions, RtpStream},
+            InputOptions,
+        },
+        output::{
+            rtp::{RtpConnectionOptions, RtpSenderOptions},
+            OutputOptions, OutputProtocolOptions,
+        },
+        rtp::{RequestedPort, TransportProtocol},
+        Options, OutputVideoOptions, PipelineOutputEndCondition, Port, RegisterInputOptions,
+        RegisterOutputOptions, VideoCodec, VideoDecoder,
+    },
+    queue::QueueInputOptions,
+    Pipeline,
+};
+use compositor_render::{
+    error::ErrorStack,
+    scene::{
+        Component, ComponentId, HorizontalAlign, InputStreamComponent, RGBAColor, TilesComponent,
+        VerticalAlign,
+    },
+    InputId, OutputId,
+};
+use live_compositor::{
+    config::{read_config, LoggerConfig, LoggerFormat},
+    logger::{self, FfmpegLogLevel},
+};
+use signal_hook::{consts, iterator::Signals};
+use std::{
+    sync::{Arc, Mutex},
+    time::Duration,
+};
 
 use integration_tests::{
-    examples::{self, run_example, TestSample},
+    examples::{download_all_assets, TestSample},
     ffmpeg::{start_ffmpeg_receive, start_ffmpeg_send},
 };
 
@@ -20,82 +57,120 @@ const OUTPUT_PORT: u16 = 8004;
 const VIDEOS: u16 = 6;
 
 fn main() {
-    run_example(client_code);
+    ffmpeg_next::format::network::init();
+    logger::init_logger(LoggerConfig {
+        ffmpeg_logger_level: FfmpegLogLevel::Info,
+        format: LoggerFormat::Compact,
+        level: "info,wgpu_hal=warn,wgpu_core=warn".to_string(),
+    });
+
+    download_all_assets().unwrap();
+
+    client_code().unwrap();
 }
 
 fn client_code() -> Result<()> {
     start_ffmpeg_receive(Some(OUTPUT_PORT), None)?;
 
+    let config = read_config();
+    let (pipeline, event_loop) = Pipeline::new(Options {
+        queue_options: config.queue_options,
+        stream_fallback_timeout: config.stream_fallback_timeout,
+        web_renderer: config.web_renderer,
+        force_gpu: config.force_gpu,
+        download_root: config.download_root,
+        output_sample_rate: config.output_sample_rate,
+        wgpu_features: config.required_wgpu_features,
+        load_system_fonts: Some(true),
+        wgpu_ctx: None,
+    })
+    .unwrap_or_else(|err| {
+        panic!(
+            "Failed to start compositor.\n{}",
+            ErrorStack::new(&err).into_string()
+        )
+    });
+
+    let pipeline = Arc::new(Mutex::new(pipeline));
+
     let mut children = Vec::new();
 
     for i in 1..VIDEOS + 1 {
-        let input_name = format!("input_{i}");
-
-        examples::post(
-            &format!("input/{input_name}/register"),
-            &json!({
-                    "type": "rtp_stream",
-                    "port": INPUT_PORT + 2 + 2 * i,
-                    "video": {
-                    "decoder": "vulkan_video"
-                }
+        let input_id = InputId(format!("input_{i}").into());
+
+        let input_options = RegisterInputOptions {
+            input_options: InputOptions::Rtp(RtpReceiverOptions {
+                port: RequestedPort::Exact(INPUT_PORT + 2 + 2 * i),
+                transport_protocol: TransportProtocol::Udp,
+                stream: RtpStream {
+                    video: Some(InputVideoStream {
+                        options: VideoDecoderOptions {
+                            codec: VideoCodec::H264,
+                            decoder: VideoDecoder::VulkanVideo,
+                        },
+                    }),
+                    audio: None,
+                },
             }),
-        )?;
-
-        children.push(json!({
-            "type": "input_stream",
-            "input_id": input_name,
+            queue_options: QueueInputOptions {
+                offset: Some(Duration::ZERO),
+                required: false,
+                buffer_duration: None,
+            },
+        };
+
+        Pipeline::register_input(&pipeline, input_id.clone(), input_options).unwrap();
+
+        children.push(Component::InputStream(InputStreamComponent {
+            id: None,
+            input_id,
         }));
     }
 
-    let scene = json!({
-        "type": "tiles",
-        "id": "tile",
-        "padding": 5,
-        "background_color_rgba": "#444444FF",
-        "children": children,
-        "transition": {
-            "duration_ms": 700,
-            "easing_function": {
-                "function_name": "cubic_bezier",
-                "points": [0.35, 0.22, 0.1, 0.8]
-            }
+    let output_options = RegisterOutputOptions {
+        output_options: OutputOptions {
+            output_protocol: OutputProtocolOptions::Rtp(RtpSenderOptions {
+                video: Some(VideoCodec::H264),
+                audio: None,
+                connection_options: RtpConnectionOptions::Udp {
+                    port: Port(OUTPUT_PORT),
+                    ip: IP.into(),
+                },
+            }),
+            video: Some(VideoEncoderOptions::H264(H264Options {
+                preset: EncoderPreset::Ultrafast,
+                resolution: VIDEO_RESOLUTION.into(),
+                raw_options: Vec::new(),
+            })),
+            audio: None,
         },
-    });
+        video: Some(OutputVideoOptions {
+            initial: Component::Tiles(TilesComponent {
+                id: Some(ComponentId("tiles".into())),
+                padding: 5.0,
+                background_color: RGBAColor(0x44, 0x44, 0x44, 0xff),
+                children,
+                width: None,
+                height: None,
+                margin: 0.0,
+                transition: None,
+                vertical_align: VerticalAlign::Center,
+                horizontal_align: HorizontalAlign::Center,
+                tile_aspect_ratio: (16, 9),
+            }),
 
-    let shader_source = include_str!("./silly.wgsl");
-    examples::post(
-        "shader/shader_example_1/register",
-        &json!({
-            "source": shader_source,
+            end_condition: PipelineOutputEndCondition::Never,
         }),
-    )?;
-
-    examples::post(
-        "output/output_1/register",
-        &json!({
-            "type": "rtp_stream",
-            "port": OUTPUT_PORT,
-            "ip": IP,
-            "video": {
-                "resolution": {
-                    "width": VIDEO_RESOLUTION.width,
-                    "height": VIDEO_RESOLUTION.height,
-                },
-                "encoder": {
-                    "type": "ffmpeg_h264",
-                    "preset": "ultrafast"
-                },
-                "initial": {
-                    "root": scene
-                }
-            }
-        }),
-    )?;
+        audio: None,
+    };
 
-    std::thread::sleep(Duration::from_millis(500));
+    pipeline
+        .lock()
+        .unwrap()
+        .register_output(OutputId("output_1".into()), output_options)
+        .unwrap();
 
-    examples::post("start", &json!({}))?;
+    Pipeline::start(&pipeline);
 
     for i in 1..VIDEOS + 1 {
         start_ffmpeg_send(
@@ -105,5 +180,17 @@ fn client_code() -> Result<()> {
             TestSample::BigBuckBunny,
         )?;
     }
+
+    let event_loop_fallback = || {
+        let mut signals = Signals::new([consts::SIGINT]).unwrap();
+        signals.forever().next();
+    };
+    if let Err(err) = event_loop.run_with_fallback(&event_loop_fallback) {
+        panic!(
+            "Failed to start event loop.\n{}",
+            ErrorStack::new(&err).into_string()
+        )
+    }
+
     Ok(())
 }
diff --git a/integration_tests/src/examples.rs b/integration_tests/src/examples.rs
index 92db7929a..9a3f08d4a 100644
--- a/integration_tests/src/examples.rs
+++ b/integration_tests/src/examples.rs
@@ -200,7 +200,7 @@ struct AssetData {
     path: PathBuf,
 }
 
-fn download_all_assets() -> Result<()> {
+pub fn download_all_assets() -> Result<()> {
     let assets = [AssetData {
         url: String::from("https://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4"),
         path: examples_root_dir().join("examples/assets/BigBuckBunny.mp4"),
diff --git a/vk-video/Cargo.toml b/vk-video/Cargo.toml
index 31b60e970..becd44adf 100644
--- a/vk-video/Cargo.toml
+++ b/vk-video/Cargo.toml
@@ -21,3 +21,5 @@ wgpu =  "22.1.0"
 [dev-dependencies]
 tracing-subscriber = "0.3.18"
 
+[build-dependencies]
+cfg_aliases = "0.2.1"
diff --git a/vk-video/build.rs b/vk-video/build.rs
new file mode 100644
index 000000000..6fc1495c2
--- /dev/null
+++ b/vk-video/build.rs
@@ -0,0 +1,13 @@
+fn main() {
+    cfg_aliases::cfg_aliases! {
+            vulkan: {
+                any(
+                    windows,
+                    all(
+                        unix,
+                        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
+                    )
+                )
+        },
+    }
+}
diff --git a/vk-video/examples/basic.rs b/vk-video/examples/basic.rs
index d57ab696c..388078fb3 100644
--- a/vk-video/examples/basic.rs
+++ b/vk-video/examples/basic.rs
@@ -1,10 +1,4 @@
-#[cfg(any(
-    windows,
-    all(
-        unix,
-        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
-    )
-))]
+#[cfg(vulkan)]
 fn main() {
     use std::io::Write;
 
@@ -45,13 +39,7 @@ fn main() {
     }
 }
 
-#[cfg(not(any(
-    windows,
-    all(
-        unix,
-        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
-    )
-)))]
+#[cfg(not(vulkan))]
 fn main() {
     println!(
         "This crate doesn't work on your operating system, because it does not support vulkan"
diff --git a/vk-video/examples/wgpu.rs b/vk-video/examples/wgpu.rs
index c535b0744..36f52be03 100644
--- a/vk-video/examples/wgpu.rs
+++ b/vk-video/examples/wgpu.rs
@@ -1,10 +1,4 @@
-#[cfg(any(
-    windows,
-    all(
-        unix,
-        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
-    )
-))]
+#[cfg(vulkan)]
 fn main() {
     use std::io::Write;
 
@@ -47,26 +41,14 @@ fn main() {
     }
 }
 
-#[cfg(not(any(
-    windows,
-    all(
-        unix,
-        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
-    )
-)))]
+#[cfg(not(vulkan))]
 fn main() {
     println!(
         "This crate doesn't work on your operating system, because it does not support vulkan"
     );
 }
 
-#[cfg(any(
-    windows,
-    all(
-        unix,
-        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
-    )
-))]
+#[cfg(vulkan)]
 fn download_wgpu_texture(
     device: &wgpu::Device,
     queue: &wgpu::Queue,

From 54dedce8da5d9bf73af8aed3d144a2da1e45792d Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzy.wilczek@swmansion.com>
Date: Mon, 7 Oct 2024 16:38:36 +0200
Subject: [PATCH 08/13] Move GraphicsContext to its own module

---
 compositor_pipeline/src/pipeline.rs           | 70 +-----------------
 .../src/pipeline/graphics_context.rs          | 72 +++++++++++++++++++
 2 files changed, 75 insertions(+), 67 deletions(-)
 create mode 100644 compositor_pipeline/src/pipeline/graphics_context.rs

diff --git a/compositor_pipeline/src/pipeline.rs b/compositor_pipeline/src/pipeline.rs
index d3e4e58d8..6e534d124 100644
--- a/compositor_pipeline/src/pipeline.rs
+++ b/compositor_pipeline/src/pipeline.rs
@@ -48,6 +48,7 @@ use self::input::InputOptions;
 
 pub mod decoder;
 pub mod encoder;
+mod graphics_context;
 pub mod input;
 pub mod output;
 mod pipeline_input;
@@ -63,9 +64,10 @@ pub use self::types::{
     AudioCodec, EncodedChunk, EncodedChunkKind, EncoderOutputEvent, RawDataReceiver, VideoCodec,
     VideoDecoder,
 };
-use compositor_render::{create_wgpu_ctx, error::InitRendererEngineError};
 pub use pipeline_output::PipelineOutputEndCondition;
 
+pub use graphics_context::GraphicsContext;
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct Port(pub u16);
 
@@ -111,72 +113,6 @@ pub struct Pipeline {
     is_started: bool,
 }
 
-pub struct GraphicsContext {
-    pub device: Arc<wgpu::Device>,
-    pub queue: Arc<wgpu::Queue>,
-
-    #[cfg(feature = "vk-video")]
-    pub vulkan_ctx: Option<Arc<vk_video::VulkanCtx>>,
-}
-
-impl GraphicsContext {
-    #[cfg(feature = "vk-video")]
-    pub fn new(
-        force_gpu: bool,
-        features: wgpu::Features,
-        limits: wgpu::Limits,
-    ) -> Result<Self, InitPipelineError> {
-        use compositor_render::{required_wgpu_features, set_required_wgpu_limits};
-
-        let vulkan_features =
-            features | required_wgpu_features() | wgpu::Features::TEXTURE_FORMAT_NV12;
-
-        let limits = set_required_wgpu_limits(limits);
-
-        match vk_video::VulkanCtx::new(vulkan_features, limits.clone()) {
-            Ok(ctx) => Ok(GraphicsContext {
-                device: ctx.wgpu_ctx.device.clone(),
-                queue: ctx.wgpu_ctx.queue.clone(),
-                vulkan_ctx: Some(ctx.into()),
-            }),
-
-            Err(err) => {
-                info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support.");
-
-                let (device, queue) = create_wgpu_ctx(force_gpu, features, limits)
-                    .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?;
-
-                Ok(GraphicsContext {
-                    device,
-                    queue,
-                    vulkan_ctx: None,
-                })
-            }
-        }
-    }
-
-    #[cfg(not(feature = "vk-video"))]
-    pub fn new(
-        force_gpu: bool,
-        features: wgpu::Features,
-        limits: wgpu::Limits,
-    ) -> Result<Self, InitPipelineError> {
-        let (device, queue) = create_wgpu_ctx(force_gpu, features, limits)
-            .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?;
-
-        Ok(GraphicsContext { device, queue })
-    }
-}
-
-impl std::fmt::Debug for GraphicsContext {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("PreinitializedContext")
-            .field("device", &self.device)
-            .field("queue", &self.queue)
-            .finish()
-    }
-}
-
 #[derive(Debug)]
 pub struct Options {
     pub queue_options: QueueOptions,
diff --git a/compositor_pipeline/src/pipeline/graphics_context.rs b/compositor_pipeline/src/pipeline/graphics_context.rs
new file mode 100644
index 000000000..1724a469e
--- /dev/null
+++ b/compositor_pipeline/src/pipeline/graphics_context.rs
@@ -0,0 +1,72 @@
+use crate::error::InitPipelineError;
+use std::sync::Arc;
+
+pub struct GraphicsContext {
+    pub device: Arc<wgpu::Device>,
+    pub queue: Arc<wgpu::Queue>,
+
+    #[cfg(feature = "vk-video")]
+    pub vulkan_ctx: Option<Arc<vk_video::VulkanCtx>>,
+}
+
+impl GraphicsContext {
+    #[cfg(feature = "vk-video")]
+    pub fn new(
+        force_gpu: bool,
+        features: wgpu::Features,
+        limits: wgpu::Limits,
+    ) -> Result<Self, InitPipelineError> {
+        use compositor_render::{
+            create_wgpu_ctx, error::InitRendererEngineError, required_wgpu_features,
+            set_required_wgpu_limits,
+        };
+        use tracing::info;
+
+        let vulkan_features =
+            features | required_wgpu_features() | wgpu::Features::TEXTURE_FORMAT_NV12;
+
+        let limits = set_required_wgpu_limits(limits);
+
+        match vk_video::VulkanCtx::new(vulkan_features, limits.clone()) {
+            Ok(ctx) => Ok(GraphicsContext {
+                device: ctx.wgpu_ctx.device.clone(),
+                queue: ctx.wgpu_ctx.queue.clone(),
+                vulkan_ctx: Some(ctx.into()),
+            }),
+
+            Err(err) => {
+                info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support.");
+
+                let (device, queue) = create_wgpu_ctx(force_gpu, features, limits)
+                    .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?;
+
+                Ok(GraphicsContext {
+                    device,
+                    queue,
+                    vulkan_ctx: None,
+                })
+            }
+        }
+    }
+
+    #[cfg(not(feature = "vk-video"))]
+    pub fn new(
+        force_gpu: bool,
+        features: wgpu::Features,
+        limits: wgpu::Limits,
+    ) -> Result<Self, InitPipelineError> {
+        let (device, queue) = create_wgpu_ctx(force_gpu, features, limits)
+            .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?;
+
+        Ok(GraphicsContext { device, queue })
+    }
+}
+
+impl std::fmt::Debug for GraphicsContext {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("PreinitializedContext")
+            .field("device", &self.device)
+            .field("queue", &self.queue)
+            .finish()
+    }
+}

From ffc70010cbd335db893fc208bb6fc3b748db81ba Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzy.wilczek@swmansion.com>
Date: Mon, 7 Oct 2024 16:43:24 +0200
Subject: [PATCH 09/13] Small review suggestions

---
 Cargo.toml                                     |  2 +-
 compositor_pipeline/Cargo.toml                 |  2 +-
 .../src/pipeline/graphics_context.rs           |  4 ++--
 vk-video/build.rs                              | 18 +++++++++---------
 vk-video/src/vulkan_decoder.rs                 | 10 +++++-----
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index c86786cf6..564977e5f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -102,7 +102,7 @@ http-body-util = "0.1.2"
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 shared_memory = { workspace = true }
 
-# platforms that support vulkan are: windows and all non-apple unixes. emscripten is something for the web, where vulkan is not available either
+# platforms that support vulkan are: windows and all non-apple unixes. emscripten is a web-based platform, where vulkan is not available either
 [target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies]
 compositor_api = { workspace = true, features = ["vk-video"] }
 
diff --git a/compositor_pipeline/Cargo.toml b/compositor_pipeline/Cargo.toml
index 4a15b344c..c7593a088 100644
--- a/compositor_pipeline/Cargo.toml
+++ b/compositor_pipeline/Cargo.toml
@@ -34,6 +34,6 @@ glyphon = { workspace = true }
 [target.x86_64-unknown-linux-gnu.dependencies]
 decklink = { path = "../decklink", optional = true }
 
-# platforms that support vulkan are: windows and all non-apple unixes. emscripten is something for the web, where vulkan is not available either
+# platforms that support vulkan are: windows and all non-apple unixes. emscripten is a web-based platform, where vulkan is not available either
 [target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_os = "ios"), not(target_os = "macos"))))'.dependencies]
 vk-video = { path = "../vk-video/", optional = true }
diff --git a/compositor_pipeline/src/pipeline/graphics_context.rs b/compositor_pipeline/src/pipeline/graphics_context.rs
index 1724a469e..fd2fae7bf 100644
--- a/compositor_pipeline/src/pipeline/graphics_context.rs
+++ b/compositor_pipeline/src/pipeline/graphics_context.rs
@@ -20,7 +20,7 @@ impl GraphicsContext {
             create_wgpu_ctx, error::InitRendererEngineError, required_wgpu_features,
             set_required_wgpu_limits,
         };
-        use tracing::info;
+        use tracing::warn;
 
         let vulkan_features =
             features | required_wgpu_features() | wgpu::Features::TEXTURE_FORMAT_NV12;
@@ -35,7 +35,7 @@ impl GraphicsContext {
             }),
 
             Err(err) => {
-                info!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support.");
+                warn!("Cannot initialize vulkan video decoding context. Reason: {err}. Initializing without vulkan video support.");
 
                 let (device, queue) = create_wgpu_ctx(force_gpu, features, limits)
                     .map_err(InitRendererEngineError::FailedToInitWgpuCtx)?;
diff --git a/vk-video/build.rs b/vk-video/build.rs
index 6fc1495c2..299b2f9ff 100644
--- a/vk-video/build.rs
+++ b/vk-video/build.rs
@@ -1,13 +1,13 @@
 fn main() {
     cfg_aliases::cfg_aliases! {
-            vulkan: {
-                any(
-                    windows,
-                    all(
-                        unix,
-                        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
-                    )
+        vulkan: {
+            any(
+                windows,
+                all(
+                    unix,
+                    not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
                 )
-        },
-    }
+            )
+    },
+}
 }
diff --git a/vk-video/src/vulkan_decoder.rs b/vk-video/src/vulkan_decoder.rs
index c2b10aa09..f2152e472 100644
--- a/vk-video/src/vulkan_decoder.rs
+++ b/vk-video/src/vulkan_decoder.rs
@@ -75,7 +75,7 @@ pub enum VulkanDecoderError {
     NoFreeSlotsInDpb,
 
     #[error("A picture which is not in the decoded pictures buffer was requested as a reference picture")]
-    NonExistantReferenceRequested,
+    NonExistentReferenceRequested,
 
     #[error("A vulkan decode operation failed with code {0:?}")]
     DecodeOperationFailed(vk::QueryResultStatusKHR),
@@ -207,7 +207,7 @@ impl VulkanDecoder<'_> {
                             .video_session_resources
                             .as_mut()
                             .map(|s| s.free_reference_picture(dpb_idx)),
-                        None => return Err(VulkanDecoderError::NonExistantReferenceRequested),
+                        None => return Err(VulkanDecoderError::NonExistentReferenceRequested),
                     };
                 }
             }
@@ -815,14 +815,14 @@ impl VulkanDecoder<'_> {
         {
             let i = *reference_id_to_dpb_slot_index
                 .get(&ref_info.id)
-                .ok_or(VulkanDecoderError::NonExistantReferenceRequested)?;
+                .ok_or(VulkanDecoderError::NonExistentReferenceRequested)?;
 
             let reference = *reference_slots
                 .get(i)
-                .ok_or(VulkanDecoderError::NonExistantReferenceRequested)?;
+                .ok_or(VulkanDecoderError::NonExistentReferenceRequested)?;
 
             if reference.slot_index < 0 || reference.p_picture_resource.is_null() {
-                return Err(VulkanDecoderError::NonExistantReferenceRequested);
+                return Err(VulkanDecoderError::NonExistentReferenceRequested);
             }
 
             let reference = reference.push_next(dpb_slot_info);

From fefb847826ec9be70af1aad958ceb66486920bfd Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzy.wilczek@swmansion.com>
Date: Mon, 7 Oct 2024 17:05:49 +0200
Subject: [PATCH 10/13] Remove the possibility to configure a codec and a
 decoder for inputs.

---
 .../src/types/from_register_input.rs          |  1 -
 compositor_pipeline/src/pipeline/decoder.rs   |  2 --
 .../src/pipeline/decoder/video.rs             | 20 +++++++++----------
 .../src/pipeline/input/mp4/mp4_file_reader.rs |  1 -
 .../src/pipeline/input/rtp/depayloader.rs     |  5 +++--
 compositor_pipeline/src/pipeline/types.rs     |  2 +-
 integration_tests/examples/vulkan.rs          |  3 +--
 vk-video/build.rs                             | 18 ++++++++---------
 8 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/compositor_api/src/types/from_register_input.rs b/compositor_api/src/types/from_register_input.rs
index 54b092f3f..fa2e0ab51 100644
--- a/compositor_api/src/types/from_register_input.rs
+++ b/compositor_api/src/types/from_register_input.rs
@@ -108,7 +108,6 @@ impl TryFrom<RtpInput> for pipeline::RegisterInputOptions {
                     Ok(input::rtp::InputVideoStream {
                         options: match video {
                             InputRtpVideoOptions::FfmepgH264 => decoder::VideoDecoderOptions {
-                                codec: pipeline::VideoCodec::H264,
                                 decoder: pipeline::VideoDecoder::FFmpegH264,
                             },
                         },
diff --git a/compositor_pipeline/src/pipeline/decoder.rs b/compositor_pipeline/src/pipeline/decoder.rs
index 8dd84cd09..6a6c3538f 100644
--- a/compositor_pipeline/src/pipeline/decoder.rs
+++ b/compositor_pipeline/src/pipeline/decoder.rs
@@ -1,6 +1,5 @@
 use crate::{audio_mixer::InputSamples, queue::PipelineEvent};
 
-use super::types::VideoCodec;
 use super::types::VideoDecoder;
 
 use bytes::Bytes;
@@ -19,7 +18,6 @@ pub(super) use video::start_video_decoder_thread;
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct VideoDecoderOptions {
     pub decoder: VideoDecoder,
-    pub codec: VideoCodec,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
diff --git a/compositor_pipeline/src/pipeline/decoder/video.rs b/compositor_pipeline/src/pipeline/decoder/video.rs
index 5129ee06d..4dcc79669 100644
--- a/compositor_pipeline/src/pipeline/decoder/video.rs
+++ b/compositor_pipeline/src/pipeline/decoder/video.rs
@@ -3,7 +3,7 @@ use crossbeam_channel::{Receiver, Sender};
 
 use crate::{
     error::InputInitError,
-    pipeline::{types::EncodedChunk, PipelineCtx, VideoCodec, VideoDecoder},
+    pipeline::{types::EncodedChunk, PipelineCtx, VideoDecoder},
     queue::PipelineEvent,
 };
 
@@ -20,8 +20,8 @@ pub fn start_video_decoder_thread(
     frame_sender: Sender<PipelineEvent<Frame>>,
     input_id: InputId,
 ) -> Result<(), InputInitError> {
-    match (options.codec, options.decoder) {
-        (VideoCodec::H264, VideoDecoder::FFmpegH264) => ffmpeg_h264::start_ffmpeg_decoder_thread(
+    match options.decoder {
+        VideoDecoder::FFmpegH264 => ffmpeg_h264::start_ffmpeg_decoder_thread(
             pipeline_ctx,
             chunks_receiver,
             frame_sender,
@@ -29,13 +29,11 @@ pub fn start_video_decoder_thread(
         ),
 
         #[cfg(feature = "vk-video")]
-        (VideoCodec::H264, VideoDecoder::VulkanVideo) => {
-            vulkan_video::start_vulkan_video_decoder_thread(
-                pipeline_ctx,
-                chunks_receiver,
-                frame_sender,
-                input_id,
-            )
-        }
+        VideoDecoder::VulkanVideoH264 => vulkan_video::start_vulkan_video_decoder_thread(
+            pipeline_ctx,
+            chunks_receiver,
+            frame_sender,
+            input_id,
+        ),
     }
 }
diff --git a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs
index 3811ea005..205c76d6d 100644
--- a/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs
+++ b/compositor_pipeline/src/pipeline/input/mp4/mp4_file_reader.rs
@@ -234,7 +234,6 @@ impl Mp4FileReader<VideoDecoderOptions> {
         };
 
         let decoder_options = VideoDecoderOptions {
-            codec: VideoCodec::H264,
             decoder: VideoDecoder::FFmpegH264,
         };
 
diff --git a/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs b/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs
index 4c922cbd3..7f5518e10 100644
--- a/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs
+++ b/compositor_pipeline/src/pipeline/input/rtp/depayloader.rs
@@ -11,6 +11,7 @@ use crate::pipeline::{
     decoder::{self, AacDecoderOptions},
     rtp::{AUDIO_PAYLOAD_TYPE, VIDEO_PAYLOAD_TYPE},
     types::{AudioCodec, EncodedChunk, EncodedChunkKind, VideoCodec},
+    VideoDecoder,
 };
 
 use self::aac::AacDepayloaderNewError;
@@ -81,8 +82,8 @@ pub enum VideoDepayloader {
 
 impl VideoDepayloader {
     pub fn new(options: &decoder::VideoDecoderOptions) -> Self {
-        match options.codec {
-            VideoCodec::H264 => VideoDepayloader::H264 {
+        match options.decoder {
+            VideoDecoder::FFmpegH264 | VideoDecoder::VulkanVideoH264 => VideoDepayloader::H264 {
                 depayloader: H264Packet::default(),
                 buffer: vec![],
                 rollover_state: RolloverState::default(),
diff --git a/compositor_pipeline/src/pipeline/types.rs b/compositor_pipeline/src/pipeline/types.rs
index 680097626..a2f26c613 100644
--- a/compositor_pipeline/src/pipeline/types.rs
+++ b/compositor_pipeline/src/pipeline/types.rs
@@ -59,7 +59,7 @@ pub struct RawDataSender {
 pub enum VideoDecoder {
     FFmpegH264,
     #[cfg(feature = "vk-video")]
-    VulkanVideo,
+    VulkanVideoH264,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
diff --git a/integration_tests/examples/vulkan.rs b/integration_tests/examples/vulkan.rs
index 2fcc4b70a..7409ac83f 100644
--- a/integration_tests/examples/vulkan.rs
+++ b/integration_tests/examples/vulkan.rs
@@ -105,8 +105,7 @@ fn client_code() -> Result<()> {
                 stream: RtpStream {
                     video: Some(InputVideoStream {
                         options: VideoDecoderOptions {
-                            codec: VideoCodec::H264,
-                            decoder: VideoDecoder::VulkanVideo,
+                            decoder: VideoDecoder::VulkanVideoH264,
                         },
                     }),
                     audio: None,
diff --git a/vk-video/build.rs b/vk-video/build.rs
index 299b2f9ff..6fc1495c2 100644
--- a/vk-video/build.rs
+++ b/vk-video/build.rs
@@ -1,13 +1,13 @@
 fn main() {
     cfg_aliases::cfg_aliases! {
-        vulkan: {
-            any(
-                windows,
-                all(
-                    unix,
-                    not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
+            vulkan: {
+                any(
+                    windows,
+                    all(
+                        unix,
+                        not(any(target_os = "macos", target_os = "ios", target_os = "emscripten"))
+                    )
                 )
-            )
-    },
-}
+        },
+    }
 }

From e3aba56c979c126e42595b506444090f6a26fd5a Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzy.wilczek@swmansion.com>
Date: Mon, 7 Oct 2024 17:25:32 +0200
Subject: [PATCH 11/13] WIP for manual initialization example.

---
 .../manual_graphics_initialization.rs         | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 integration_tests/examples/manual_graphics_initialization.rs

diff --git a/integration_tests/examples/manual_graphics_initialization.rs b/integration_tests/examples/manual_graphics_initialization.rs
new file mode 100644
index 000000000..d26ca05ee
--- /dev/null
+++ b/integration_tests/examples/manual_graphics_initialization.rs
@@ -0,0 +1,51 @@
+use compositor_pipeline::{
+    pipeline::{GraphicsContext, Options},
+    Pipeline,
+};
+use live_compositor::config::read_config;
+
+// This example illustrates how to initialize a GraphicsContext separately to get access to a wgpu
+// instance, adapter, queue and device.
+
+#[cfg(target_os = "linux")]
+fn main() {
+    let graphics_context =
+        GraphicsContext::new(false, wgpu::Features::default(), wgpu::Limits::default()).unwrap();
+
+    let _device = graphics_context.device.clone();
+    let _queue = graphics_context.queue.clone();
+
+    let _adapter = graphics_context
+        .vulkan_ctx
+        .as_ref()
+        .unwrap()
+        .wgpu_ctx
+        .adapter
+        .clone();
+
+    let _instance = graphics_context
+        .vulkan_ctx
+        .as_ref()
+        .unwrap()
+        .wgpu_ctx
+        .instance
+        .clone();
+
+    let config = read_config();
+
+    let _pipeline = Pipeline::new(Options {
+        wgpu_ctx: Some(graphics_context),
+        queue_options: config.queue_options,
+        stream_fallback_timeout: config.stream_fallback_timeout,
+        web_renderer: config.web_renderer,
+        force_gpu: config.force_gpu,
+        download_root: config.download_root,
+        output_sample_rate: config.output_sample_rate,
+        wgpu_features: config.required_wgpu_features,
+        load_system_fonts: Some(true),
+    })
+    .unwrap();
+}
+
+#[cfg(target_os = "macos")]
+fn main() {}

From d56603a062c015ea7f1ae5a20231adf708404027 Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <jerzykwilczek@gmail.com>
Date: Fri, 11 Oct 2024 10:40:58 +0200
Subject: [PATCH 12/13] Impl `Debug` for `VulkanCtx`

---
 compositor_pipeline/src/pipeline/graphics_context.rs | 10 +---------
 vk-video/src/vulkan_decoder/vulkan_ctx.rs            |  6 ++++++
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/compositor_pipeline/src/pipeline/graphics_context.rs b/compositor_pipeline/src/pipeline/graphics_context.rs
index fd2fae7bf..206d5d55b 100644
--- a/compositor_pipeline/src/pipeline/graphics_context.rs
+++ b/compositor_pipeline/src/pipeline/graphics_context.rs
@@ -1,6 +1,7 @@
 use crate::error::InitPipelineError;
 use std::sync::Arc;
 
+#[derive(Debug)]
 pub struct GraphicsContext {
     pub device: Arc<wgpu::Device>,
     pub queue: Arc<wgpu::Queue>,
@@ -61,12 +62,3 @@ impl GraphicsContext {
         Ok(GraphicsContext { device, queue })
     }
 }
-
-impl std::fmt::Debug for GraphicsContext {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("PreinitializedContext")
-            .field("device", &self.device)
-            .field("queue", &self.queue)
-            .finish()
-    }
-}
diff --git a/vk-video/src/vulkan_decoder/vulkan_ctx.rs b/vk-video/src/vulkan_decoder/vulkan_ctx.rs
index 03683237f..953b29ea5 100644
--- a/vk-video/src/vulkan_decoder/vulkan_ctx.rs
+++ b/vk-video/src/vulkan_decoder/vulkan_ctx.rs
@@ -373,6 +373,12 @@ impl VulkanCtx {
     }
 }
 
+impl std::fmt::Debug for VulkanCtx {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("VulkanCtx").finish()
+    }
+}
+
 struct ChosenDevice<'a> {
     physical_device: vk::PhysicalDevice,
     queue_indices: QueueIndices<'a>,

From e67305c034195901e34db8040b3b17f002236b62 Mon Sep 17 00:00:00 2001
From: Jerzy Wilczek <72213407+jerzywilczek@users.noreply.github.com>
Date: Fri, 11 Oct 2024 10:43:08 +0200
Subject: [PATCH 13/13] Reword  error messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Wojciech Barczyński <104033489+WojciechBarczynski@users.noreply.github.com>
---
 vk-video/src/lib.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vk-video/src/lib.rs b/vk-video/src/lib.rs
index 1b0fff581..5826803b8 100644
--- a/vk-video/src/lib.rs
+++ b/vk-video/src/lib.rs
@@ -17,10 +17,10 @@ pub struct Decoder<'a> {
 
 #[derive(Debug, thiserror::Error)]
 pub enum DecoderError {
-    #[error("Error originating in the decoder: {0}")]
+    #[error("Decoder error: {0}")]
     VulkanDecoderError(#[from] VulkanDecoderError),
 
-    #[error("Error originating in the h264 parser: {0}")]
+    #[error("H264 parser error: {0}")]
     ParserError(#[from] ParserError),
 }