From 4d64e09a971f37db85c39a41a02c71e144047180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonardo=20Gibrowski=20Fa=C3=A9?= Date: Sun, 25 Feb 2024 15:43:48 -0300 Subject: [PATCH 1/5] only swap R and B channels when necessary --- daemon/src/bump_pool.rs | 11 ++---- daemon/src/main.rs | 88 +++++++++++++++++++++++++++++++++++++---- src/imgproc.rs | 61 ++++++++++++++-------------- src/main.rs | 82 ++++++++++++++++++++++---------------- utils/src/ipc.rs | 52 ++++++++++++++++++++++++ 5 files changed, 217 insertions(+), 77 deletions(-) diff --git a/daemon/src/bump_pool.rs b/daemon/src/bump_pool.rs index 09adb8f9..21696650 100644 --- a/daemon/src/bump_pool.rs +++ b/daemon/src/bump_pool.rs @@ -4,10 +4,7 @@ use std::sync::{ }; use smithay_client_toolkit::shm::{raw::RawPool, Shm}; -use wayland_client::{ - protocol::{wl_buffer::WlBuffer, wl_shm}, - QueueHandle, -}; +use wayland_client::{protocol::wl_buffer::WlBuffer, QueueHandle}; use crate::Daemon; @@ -53,7 +50,7 @@ impl BumpPool { width, height, width * 4, - wl_shm::Format::Xrgb8888, + crate::wl_shm_format(), released.clone(), qh, ), @@ -99,7 +96,7 @@ impl BumpPool { self.width, self.height, self.width * 4, - wl_shm::Format::Xrgb8888, + crate::wl_shm_format(), released.clone(), qh, ), @@ -165,7 +162,7 @@ impl BumpPool { width, height, width * 4, - wl_shm::Format::Xrgb8888, + crate::wl_shm_format(), released.clone(), qh, ), diff --git a/daemon/src/main.rs b/daemon/src/main.rs index f5672b01..e39c6351 100644 --- a/daemon/src/main.rs +++ b/daemon/src/main.rs @@ -29,7 +29,8 @@ use std::{ use smithay_client_toolkit::{ compositor::{CompositorHandler, CompositorState, Region}, - delegate_compositor, delegate_layer, delegate_output, delegate_registry, delegate_shm, + delegate_compositor, delegate_layer, delegate_output, delegate_registry, + globals::GlobalData, output::{OutputHandler, OutputState}, registry::{ProvidesRegistryState, RegistryState}, registry_handlers, @@ -42,11 +43,16 @@ use smithay_client_toolkit::{ use wayland_client::{ globals::{registry_queue_init, GlobalList}, - protocol::{wl_buffer::WlBuffer, wl_output, wl_surface}, + protocol::{ + wl_buffer::WlBuffer, + wl_output, + wl_shm::{self, WlShm}, + wl_surface, + }, Connection, Dispatch, QueueHandle, }; -use utils::ipc::{get_socket_path, Answer, ArchivedRequest, BgInfo, Request}; +use utils::ipc::{get_socket_path, Answer, ArchivedRequest, BgInfo, PixelFormat, Request}; use animations::Animator; @@ -64,8 +70,28 @@ fn should_daemon_exit() -> bool { } static POLL_WAKER: OnceLock = OnceLock::new(); +static WL_SHM_FORMAT: OnceLock = OnceLock::new(); +static PIXEL_FORMAT: OnceLock = OnceLock::new(); + +#[inline] +pub fn wl_shm_format() -> wl_shm::Format { + debug_assert!(WL_SHM_FORMAT.get().is_some()); + // SAFETY: this is safe because we initialize it in Daemon::new, before we ever call this in + // the wallpaper structs + *unsafe { WL_SHM_FORMAT.get().unwrap_unchecked() } +} + +#[inline] +pub fn pixel_format() -> PixelFormat { + debug_assert!(PIXEL_FORMAT.get().is_some()); + // SAFETY: this is safe because we initialize it in Daemon::new, before we ever call this in + // the wallpaper structs + *unsafe { PIXEL_FORMAT.get().unwrap_unchecked() } +} +#[inline] pub fn wake_poll() { + debug_assert!(POLL_WAKER.get().is_some()); if let Err(e) = nix::unistd::write(*unsafe { POLL_WAKER.get().unwrap_unchecked() }, &[0]) { error!("failed to write to pipe file descriptor: {e}"); } @@ -234,6 +260,8 @@ struct Daemon { registry_state: RegistryState, output_state: OutputState, shm: Shm, + pixel_format: PixelFormat, + shm_format: wl_shm::Format, // swww stuff wallpapers: Vec>, @@ -252,14 +280,19 @@ impl Daemon { let shm = Shm::bind(globals, qh).expect("wl_shm is not available"); + let pixel_format = PixelFormat::Xrgb; + let shm_format = wl_shm::Format::Xrgb8888; + Self { + layer_shell, // Outputs may be hotplugged at runtime, therefore we need to setup a registry state to // listen for Outputs. registry_state: RegistryState::new(globals), output_state: OutputState::new(globals, qh), compositor_state, shm, - layer_shell, + pixel_format, + shm_format, wallpapers: Vec::new(), animator: Animator::new(), @@ -350,6 +383,7 @@ impl Daemon { .unwrap_or((0, 0)), scale_factor: info.scale_factor, img: wallpaper.get_img_info(), + pixel_format: pixel_format(), }); } } @@ -435,6 +469,11 @@ impl OutputHandler for Daemon { qh: &QueueHandle, output: wl_output::WlOutput, ) { + if PIXEL_FORMAT.get().is_none() { + assert!(PIXEL_FORMAT.set(self.pixel_format).is_ok()); + assert!(WL_SHM_FORMAT.set(self.shm_format).is_ok()); + log::info!("Selected wl_shm format: {:?}", self.shm_format); + } if let Some(output_info) = self.output_state.info(&output) { let surface = self.compositor_state.create_surface(qh); @@ -526,6 +565,44 @@ impl OutputHandler for Daemon { } } +impl Dispatch for Daemon { + fn event( + state: &mut Self, + _proxy: &WlShm, + event: ::Event, + _data: &GlobalData, + _conn: &Connection, + _qhandle: &QueueHandle, + ) { + match event { + wl_shm::Event::Format { format: wenum } => { + match wenum { + wayland_client::WEnum::Value(format) => { + //if format == wl_shm::Format::Bgr888 { + // shm_format = wl_shm::Format::Bgr888; + // pixel_format = PixelFormat::Brg; + // break; + //} else if format == wl_shm::Format::Rgb888 { + // shm_format = wl_shm::Format::Rgb888; + // pixel_format = PixelFormat::Rgb; + /*} else*/ + if format == wl_shm::Format::Xbgr8888 + && state.pixel_format == PixelFormat::Xrgb + { + state.shm_format = wl_shm::Format::Xbgr8888; + state.pixel_format = PixelFormat::Xbgr; + } + } + wayland_client::WEnum::Unknown(v) => { + error!("Received unknown shm format number {v} from server") + } + } + } + e => warn!("Unhandled WlShm event: {e:?}"), + } + } +} + impl ShmHandler for Daemon { fn shm_state(&mut self) -> &mut Shm { &mut self.shm @@ -578,10 +655,7 @@ impl Dispatch> for Daemon { delegate_compositor!(Daemon); delegate_output!(Daemon); -delegate_shm!(Daemon); - delegate_layer!(Daemon); - delegate_registry!(Daemon); impl ProvidesRegistryState for Daemon { diff --git a/src/imgproc.rs b/src/imgproc.rs index 33ee7a33..0d471b07 100644 --- a/src/imgproc.rs +++ b/src/imgproc.rs @@ -14,7 +14,7 @@ use std::{ use utils::{ compression::{BitPack, Compressor}, - ipc::{self, Coord, Position}, + ipc::{self, ArchivedPixelFormat, Coord, Position}, }; use crate::cli::ResizeStrategy; @@ -147,6 +147,7 @@ pub fn frame_to_rgb(frame: image::Frame) -> RgbImage { pub fn compress_frames( mut frames: Frames, dim: (u32, u32), + format: ArchivedPixelFormat, filter: FilterType, resize: ResizeStrategy, color: &[u8; 3], @@ -159,9 +160,9 @@ pub fn compress_frames( let first_duration = first.delay().numer_denom_ms(); let mut first_duration = Duration::from_millis((first_duration.0 / first_duration.1).into()); let first_img = match resize { - ResizeStrategy::No => img_pad(frame_to_rgb(first), dim, color)?, - ResizeStrategy::Crop => img_resize_crop(frame_to_rgb(first), dim, filter)?, - ResizeStrategy::Fit => img_resize_fit(frame_to_rgb(first), dim, filter, color)?, + ResizeStrategy::No => img_pad(frame_to_rgb(first), dim, format, color)?, + ResizeStrategy::Crop => img_resize_crop(frame_to_rgb(first), dim, format, filter)?, + ResizeStrategy::Fit => img_resize_fit(frame_to_rgb(first), dim, format, filter, color)?, }; let mut canvas: Option> = None; @@ -170,9 +171,9 @@ pub fn compress_frames( let duration = Duration::from_millis((dur_num / dur_div).into()); let img = match resize { - ResizeStrategy::No => img_pad(frame_to_rgb(frame), dim, color)?, - ResizeStrategy::Crop => img_resize_crop(frame_to_rgb(frame), dim, filter)?, - ResizeStrategy::Fit => img_resize_fit(frame_to_rgb(frame), dim, filter, color)?, + ResizeStrategy::No => img_pad(frame_to_rgb(frame), dim, format, color)?, + ResizeStrategy::Crop => img_resize_crop(frame_to_rgb(frame), dim, format, filter)?, + ResizeStrategy::Fit => img_resize_fit(frame_to_rgb(frame), dim, format, filter, color)?, }; if let Some(canvas) = canvas.as_ref() { @@ -219,8 +220,13 @@ pub fn make_filter(filter: &cli::Filter) -> fast_image_resize::FilterType { pub fn img_pad( mut img: RgbImage, dimensions: (u32, u32), + format: ArchivedPixelFormat, color: &[u8; 3], ) -> Result, String> { + let mut color = color.to_owned(); + if format.must_swap_r_and_b_channels() { + color.swap(0, 2); + } let (padded_w, padded_h) = dimensions; let (padded_w, padded_h) = (padded_w as usize, padded_h as usize); let mut padded = Vec::with_capacity(padded_h * padded_w * 3); @@ -239,9 +245,7 @@ pub fn img_pad( let raw_img = img.into_vec(); for _ in 0..(((padded_h - img_h) / 2) * padded_w) { - padded.push(color[2]); - padded.push(color[1]); - padded.push(color[0]); + padded.extend(color); } // Calculate left and right border widths. `u32::div` rounds toward 0, so, if `img_w` is odd, @@ -251,27 +255,23 @@ pub fn img_pad( for row in 0..img_h { for _ in 0..left_border_w { - padded.push(color[2]); - padded.push(color[1]); - padded.push(color[0]); + padded.extend(color); } for pixel in raw_img[(row * img_w * 3)..((row + 1) * img_w * 3)].chunks_exact(3) { - padded.push(pixel[2]); - padded.push(pixel[1]); - padded.push(pixel[0]); + if format.must_swap_r_and_b_channels() { + padded.extend(pixel.iter().rev()); + } else { + padded.extend(pixel); + } } for _ in 0..right_border_w { - padded.push(color[2]); - padded.push(color[1]); - padded.push(color[0]); + padded.extend(color); } } while padded.len() < (padded_h * padded_w * 3) { - padded.push(color[2]); - padded.push(color[1]); - padded.push(color[0]); + padded.extend(color); } Ok(padded) @@ -290,6 +290,7 @@ fn rgb_to_brg(rgb: &mut [u8]) { pub fn img_resize_fit( img: RgbImage, dimensions: (u32, u32), + format: ArchivedPixelFormat, filter: FilterType, padding_color: &[u8; 3], ) -> Result, String> { @@ -298,7 +299,7 @@ pub fn img_resize_fit( if (img_w, img_h) != (width, height) { // if our image is already scaled to fit, skip resizing it and just pad it directly if img_w == width || img_h == height { - return img_pad(img, dimensions, padding_color); + return img_pad(img, dimensions, format, padding_color); } let ratio = width as f32 / height as f32; @@ -338,13 +339,14 @@ pub fn img_resize_fit( img_pad( image::RgbImage::from_raw(trg_w, trg_h, dst.into_vec()).unwrap(), dimensions, + format, padding_color, ) } else { let mut res = img.into_vec(); - // The ARGB is 'little endian', so here we must put the order - // of bytes 'in reverse', so it needs to be BGRA. - rgb_to_brg(&mut res); + if format.must_swap_r_and_b_channels() { + rgb_to_brg(&mut res); + } Ok(res) } } @@ -352,6 +354,7 @@ pub fn img_resize_fit( pub fn img_resize_crop( img: RgbImage, dimensions: (u32, u32), + format: ArchivedPixelFormat, filter: FilterType, ) -> Result, String> { let (width, height) = dimensions; @@ -387,9 +390,9 @@ pub fn img_resize_crop( img.into_vec() }; - // The ARGB is 'little endian', so here we must put the order - // of bytes 'in reverse', so it needs to be BGRA. - rgb_to_brg(&mut resized_img); + if format.must_swap_r_and_b_channels() { + rgb_to_brg(&mut resized_img); + } Ok(resized_img) } diff --git a/src/main.rs b/src/main.rs index 6cd8868d..55097fa0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,10 @@ use std::{os::unix::net::UnixStream, path::PathBuf, process::Stdio, time::Durati use utils::{ cache, - ipc::{self, get_socket_path, read_socket, AnimationRequest, Answer, ArchivedAnswer, Request}, + ipc::{ + self, get_socket_path, read_socket, AnimationRequest, Answer, ArchivedAnswer, + ArchivedPixelFormat, Request, + }, }; mod imgproc; @@ -123,11 +126,12 @@ fn make_request(args: &Swww) -> Result, String> { Swww::ClearCache => unreachable!("there is no request for clear-cache"), Swww::Img(img) => { let requested_outputs = split_cmdline_outputs(&img.outputs); - let (dims, outputs) = get_dimensions_and_outputs(&requested_outputs)?; + let (format, dims, outputs) = get_format_dims_and_outputs(&requested_outputs)?; let imgbuf = ImgBuf::new(&img.path)?; if imgbuf.is_animated() { match std::thread::scope::<_, Result<_, String>>(|s1| { - let animations = s1.spawn(|| make_animation_request(img, &dims, &outputs)); + let animations = + s1.spawn(|| make_animation_request(img, &dims, format, &outputs)); let first_frame = imgbuf .into_frames()? .next() @@ -135,7 +139,7 @@ fn make_request(args: &Swww) -> Result, String> { .map_err(|e| format!("unable to decode first frame: {e}"))?; let img_request = - make_img_request(img, frame_to_rgb(first_frame), &dims, &outputs)?; + make_img_request(img, frame_to_rgb(first_frame), format, &dims, &outputs)?; let animations = animations.join().unwrap_or_else(|e| Err(format!("{e:?}"))); let socket = connect_to_socket(5, 100)?; @@ -153,7 +157,7 @@ fn make_request(args: &Swww) -> Result, String> { } else { let img_raw = imgbuf.decode()?; Ok(Some(Request::Img(make_img_request( - img, img_raw, &dims, &outputs, + img, img_raw, format, &dims, &outputs, )?))) } } @@ -171,38 +175,44 @@ fn make_request(args: &Swww) -> Result, String> { fn make_img_request( img: &cli::Img, img_raw: image::RgbImage, + pixel_format: ArchivedPixelFormat, dims: &[(u32, u32)], outputs: &[Vec], ) -> Result { let transition = make_transition(img); let mut unique_requests = Vec::with_capacity(dims.len()); for (dim, outputs) in dims.iter().zip(outputs) { - unique_requests.push(( - ipc::Img { - img: match img.resize { - ResizeStrategy::No => img_pad(img_raw.clone(), *dim, &img.fill_color)?, - ResizeStrategy::Crop => { - img_resize_crop(img_raw.clone(), *dim, make_filter(&img.filter))? - } - ResizeStrategy::Fit => img_resize_fit( - img_raw.clone(), - *dim, - make_filter(&img.filter), - &img.fill_color, - )?, + let path = match img.path.canonicalize() { + Ok(p) => p.to_string_lossy().to_string(), + Err(e) => { + if let Some("-") = img.path.to_str() { + "STDIN".to_string() + } else { + return Err(format!("failed no canonicalize image path: {e}")); } - .into_boxed_slice(), - path: match img.path.canonicalize() { - Ok(p) => p.to_string_lossy().to_string(), - Err(e) => { - if let Some("-") = img.path.to_str() { - "STDIN".to_string() - } else { - return Err(format!("failed no canonicalize image path: {e}")); - } - } - }, - }, + } + }; + + let img = match img.resize { + ResizeStrategy::No => img_pad(img_raw.clone(), *dim, pixel_format, &img.fill_color)?, + ResizeStrategy::Crop => img_resize_crop( + img_raw.clone(), + *dim, + pixel_format, + make_filter(&img.filter), + )?, + ResizeStrategy::Fit => img_resize_fit( + img_raw.clone(), + *dim, + pixel_format, + make_filter(&img.filter), + &img.fill_color, + )?, + } + .into_boxed_slice(); + + unique_requests.push(( + ipc::Img { img, path }, outputs.to_owned().into_boxed_slice(), )); } @@ -211,9 +221,9 @@ fn make_img_request( } #[allow(clippy::type_complexity)] -fn get_dimensions_and_outputs( +fn get_format_dims_and_outputs( requested_outputs: &[String], -) -> Result<(Vec<(u32, u32)>, Vec>), String> { +) -> Result<(ArchivedPixelFormat, Vec<(u32, u32)>, Vec>), String> { let mut outputs: Vec> = Vec::new(); let mut dims: Vec<(u32, u32)> = Vec::new(); let mut imgs: Vec = Vec::new(); @@ -225,7 +235,9 @@ fn get_dimensions_and_outputs( let answer = Answer::receive(&bytes); match answer { ArchivedAnswer::Info(infos) => { + let mut format = ArchivedPixelFormat::Xrgb; for info in infos.iter() { + format = info.pixel_format; let info_img = info.img.de(); let name = info.name.to_string(); if !requested_outputs.is_empty() && !requested_outputs.contains(&name) { @@ -251,7 +263,7 @@ fn get_dimensions_and_outputs( if outputs.is_empty() { Err("none of the requested outputs are valid".to_owned()) } else { - Ok((dims, outputs)) + Ok((format, dims, outputs)) } } ArchivedAnswer::Err(e) => Err(format!("daemon error when sending query: {e}")), @@ -262,6 +274,7 @@ fn get_dimensions_and_outputs( fn make_animation_request( img: &cli::Img, dims: &[(u32, u32)], + pixel_format: ArchivedPixelFormat, outputs: &[Vec], ) -> Result { let filter = make_filter(&img.filter); @@ -286,6 +299,7 @@ fn make_animation_request( animation: compress_frames( imgbuf.into_frames()?, *dim, + pixel_format, filter, img.resize, &img.fill_color, @@ -389,7 +403,7 @@ fn is_daemon_running() -> Result { } fn restore_from_cache(requested_outputs: &[String]) -> Result<(), String> { - let (_, outputs) = get_dimensions_and_outputs(requested_outputs)?; + let (_, _, outputs) = get_format_dims_and_outputs(requested_outputs)?; for output in outputs.iter().flatten() { let img_path = utils::cache::get_previous_image_path(output)?; diff --git a/utils/src/ipc.rs b/utils/src/ipc.rs index 0b8f6ac3..17ce61e9 100644 --- a/utils/src/ipc.rs +++ b/utils/src/ipc.rs @@ -138,15 +138,67 @@ impl fmt::Display for ArchivedBgImg { } } +#[derive(Clone, Copy, Archive, Serialize, PartialEq)] +#[archive_attr(derive(Clone, Copy))] +pub enum PixelFormat { + /// No swap, can copy directly onto WlBuffer + Brg, + /// Swap R and B channels at client, can copy directly onto WlBuffer + Rgb, + /// No swap, must extend pixel with an extra byte when copying + Xbgr, + /// Swap R and B channels at client, must extend pixel with an extra byte when copying + Xrgb, +} + +impl PixelFormat { + #[inline] + #[must_use] + pub fn channels(&self) -> u8 { + match self { + Self::Rgb => 3, + Self::Brg => 3, + Self::Xbgr => 4, + Self::Xrgb => 4, + } + } + + #[inline] + #[must_use] + pub fn can_copy_directly_onto_wl_buffer(&self) -> bool { + match self { + Self::Brg => true, + Self::Rgb => true, + Self::Xbgr => false, + Self::Xrgb => false, + } + } +} + +impl ArchivedPixelFormat { + #[inline] + #[must_use] + pub fn must_swap_r_and_b_channels(&self) -> bool { + match self { + Self::Brg => false, + Self::Rgb => true, + Self::Xbgr => false, + Self::Xrgb => true, + } + } +} + #[derive(Clone, Archive, Serialize)] pub struct BgInfo { pub name: String, pub dim: (u32, u32), pub scale_factor: i32, pub img: BgImg, + pub pixel_format: PixelFormat, } impl BgInfo { + #[inline] #[must_use] pub fn real_dim(&self) -> (u32, u32) { ( From d67bb8bb17307398dba767ff22acaa96db2733ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonardo=20Gibrowski=20Fa=C3=A9?= Date: Sun, 25 Feb 2024 16:53:29 -0300 Subject: [PATCH 2/5] Use 3 channel formats when available In order to do this, we've had to adapt all the functions that deal with images in the client and in the 'utils' crate. We've also had to change the caching strategy to record what pixel type we cached. --- daemon/src/animations/mod.rs | 2 +- daemon/src/animations/transitions.rs | 17 +- daemon/src/bump_pool.rs | 10 +- daemon/src/main.rs | 40 ++-- daemon/src/wallpaper.rs | 14 +- src/imgproc.rs | 6 +- src/main.rs | 3 +- utils/benches/compression.rs | 18 +- utils/src/cache.rs | 18 +- utils/src/compression/comp/sse2.rs | 8 +- utils/src/compression/decomp/mod.rs | 47 ++++- utils/src/compression/decomp/ssse3.rs | 8 +- utils/src/compression/mod.rs | 254 ++++++++++++++++---------- utils/src/ipc.rs | 39 +++- 14 files changed, 318 insertions(+), 166 deletions(-) diff --git a/daemon/src/animations/mod.rs b/daemon/src/animations/mod.rs index 4d113c8d..23e1bb62 100644 --- a/daemon/src/animations/mod.rs +++ b/daemon/src/animations/mod.rs @@ -149,7 +149,7 @@ impl Animator { } let result = wallpapers[i].canvas_change(|canvas| { - decompressor.decompress_archived(frame, canvas) + decompressor.decompress_archived(frame, canvas, crate::pixel_format()) }); if let Err(e) = result { diff --git a/daemon/src/animations/transitions.rs b/daemon/src/animations/transitions.rs index ebb3ad22..c4e063ae 100644 --- a/daemon/src/animations/transitions.rs +++ b/daemon/src/animations/transitions.rs @@ -55,6 +55,7 @@ pub(super) struct Transition { bezier: BezierCurve, wave: (f32, f32), invert_y: bool, + color_channels: usize, } /// All transitions return whether or not they completed @@ -89,6 +90,7 @@ impl Transition { ), wave: transition.wave, invert_y: transition.invert_y, + color_channels: crate::pixel_format().channels() as usize, } } @@ -141,7 +143,10 @@ impl Transition { done = true; for wallpaper in self.wallpapers.iter_mut() { wallpaper.canvas_change(|canvas| { - for (old, new) in canvas.chunks_exact_mut(4).zip(new_img.chunks_exact(3)) { + for (old, new) in canvas + .chunks_exact_mut(self.color_channels) + .zip(new_img.chunks_exact(3)) + { change_cols!(step, old, new, done); } }); @@ -160,7 +165,7 @@ impl Transition { for wallpaper in self.wallpapers.iter_mut() { wallpaper.canvas_change(|canvas| { canvas - .par_chunks_exact_mut(4) + .par_chunks_exact_mut(self.color_channels) .zip(new_img.par_chunks_exact(3)) .for_each(|(old_pix, new_pix)| { for (old_col, new_col) in old_pix.iter_mut().zip(new_pix) { @@ -229,7 +234,7 @@ impl Transition { for wallpaper in self.wallpapers.iter_mut() { wallpaper.canvas_change(|canvas| { canvas - .par_chunks_exact_mut(4) + .par_chunks_exact_mut(self.color_channels) .zip(new_img.par_chunks_exact(3)) .enumerate() .for_each(|(i, (old, new))| { @@ -290,7 +295,7 @@ impl Transition { for wallpaper in self.wallpapers.iter_mut() { wallpaper.canvas_change(|canvas| { canvas - .par_chunks_exact_mut(4) + .par_chunks_exact_mut(self.color_channels) .zip(new_img.par_chunks_exact(3)) .enumerate() .for_each(|(i, (old, new))| { @@ -337,7 +342,7 @@ impl Transition { for wallpaper in self.wallpapers.iter_mut() { wallpaper.canvas_change(|canvas| { canvas - .par_chunks_exact_mut(4) + .par_chunks_exact_mut(self.color_channels) .zip(new_img.par_chunks_exact(3)) .enumerate() .for_each(|(i, (old, new))| { @@ -388,7 +393,7 @@ impl Transition { for wallpaper in self.wallpapers.iter_mut() { wallpaper.canvas_change(|canvas| { canvas - .par_chunks_exact_mut(4) + .par_chunks_exact_mut(self.color_channels) .zip(new_img.par_chunks_exact(3)) .enumerate() .for_each(|(i, (old, new))| { diff --git a/daemon/src/bump_pool.rs b/daemon/src/bump_pool.rs index 21696650..d6be16f8 100644 --- a/daemon/src/bump_pool.rs +++ b/daemon/src/bump_pool.rs @@ -41,7 +41,7 @@ pub(crate) struct BumpPool { impl BumpPool { /// We assume `width` and `height` have already been multiplied by their scale factor pub(crate) fn new(width: i32, height: i32, shm: &Shm, qh: &QueueHandle) -> Self { - let len = width as usize * height as usize * 4; + let len = width as usize * height as usize * crate::pixel_format().channels() as usize; let mut pool = RawPool::new(len, shm).expect("failed to create RawPool"); let released = Arc::new(AtomicBool::new(true)); let buffers = vec![Buffer::new( @@ -49,7 +49,7 @@ impl BumpPool { 0, width, height, - width * 4, + width * crate::pixel_format().channels() as i32, crate::wl_shm_format(), released.clone(), qh, @@ -68,7 +68,7 @@ impl BumpPool { #[inline] fn buffer_len(&self) -> usize { - self.width as usize * self.height as usize * 4 + self.width as usize * self.height as usize * crate::pixel_format().channels() as usize } #[inline] @@ -95,7 +95,7 @@ impl BumpPool { self.buffer_offset(new_buffer_index).try_into().unwrap(), self.width, self.height, - self.width * 4, + self.width * crate::pixel_format().channels() as i32, crate::wl_shm_format(), released.clone(), qh, @@ -161,7 +161,7 @@ impl BumpPool { 0, width, height, - width * 4, + width * crate::pixel_format().channels() as i32, crate::wl_shm_format(), released.clone(), qh, diff --git a/daemon/src/main.rs b/daemon/src/main.rs index e39c6351..c23b7a53 100644 --- a/daemon/src/main.rs +++ b/daemon/src/main.rs @@ -575,29 +575,27 @@ impl Dispatch for Daemon { _qhandle: &QueueHandle, ) { match event { - wl_shm::Event::Format { format: wenum } => { - match wenum { - wayland_client::WEnum::Value(format) => { - //if format == wl_shm::Format::Bgr888 { - // shm_format = wl_shm::Format::Bgr888; - // pixel_format = PixelFormat::Brg; - // break; - //} else if format == wl_shm::Format::Rgb888 { - // shm_format = wl_shm::Format::Rgb888; - // pixel_format = PixelFormat::Rgb; - /*} else*/ - if format == wl_shm::Format::Xbgr8888 - && state.pixel_format == PixelFormat::Xrgb - { - state.shm_format = wl_shm::Format::Xbgr8888; - state.pixel_format = PixelFormat::Xbgr; - } - } - wayland_client::WEnum::Unknown(v) => { - error!("Received unknown shm format number {v} from server") + wl_shm::Event::Format { format: wenum } => match wenum { + wayland_client::WEnum::Value(format) => { + if format == wl_shm::Format::Bgr888 { + state.shm_format = wl_shm::Format::Bgr888; + state.pixel_format = PixelFormat::Brg; + } else if format == wl_shm::Format::Rgb888 + && state.pixel_format != PixelFormat::Brg + { + state.shm_format = wl_shm::Format::Rgb888; + state.pixel_format = PixelFormat::Rgb; + } else if format == wl_shm::Format::Xbgr8888 + && state.pixel_format == PixelFormat::Xrgb + { + state.shm_format = wl_shm::Format::Xbgr8888; + state.pixel_format = PixelFormat::Xbgr; } } - } + wayland_client::WEnum::Unknown(v) => { + error!("Received unknown shm format number {v} from server") + } + }, e => warn!("Unhandled WlShm event: {e:?}"), } } diff --git a/daemon/src/wallpaper.rs b/daemon/src/wallpaper.rs index 992b1da8..1e038cb4 100644 --- a/daemon/src/wallpaper.rs +++ b/daemon/src/wallpaper.rs @@ -201,12 +201,16 @@ impl Wallpaper { .store(false, Ordering::Release); } - pub(super) fn clear(&self, color: [u8; 3]) { + pub(super) fn clear(&self, mut color: [u8; 3]) { + let pixel_format = super::pixel_format(); + + if pixel_format.must_swap_r_and_b_channels() { + color.swap(0, 2); + } + self.canvas_change(|canvas| { - for pixel in canvas.chunks_exact_mut(4) { - pixel[2] = color[0]; - pixel[1] = color[1]; - pixel[0] = color[2]; + for pixel in canvas.chunks_exact_mut(pixel_format.channels().into()) { + pixel[0..3].copy_from_slice(&color); } }) } diff --git a/src/imgproc.rs b/src/imgproc.rs index 0d471b07..823f2f70 100644 --- a/src/imgproc.rs +++ b/src/imgproc.rs @@ -177,7 +177,7 @@ pub fn compress_frames( }; if let Some(canvas) = canvas.as_ref() { - match compressor.compress(canvas, &img) { + match compressor.compress(canvas, &img, format) { Some(bytes) => compressed_frames.push((bytes, duration)), None => match compressed_frames.last_mut() { Some(last) => last.1 += duration, @@ -185,7 +185,7 @@ pub fn compress_frames( }, } } else { - match compressor.compress(&first_img, &img) { + match compressor.compress(&first_img, &img, format) { Some(bytes) => compressed_frames.push((bytes, duration)), None => first_duration += duration, } @@ -195,7 +195,7 @@ pub fn compress_frames( //Add the first frame we got earlier: if let Some(canvas) = canvas.as_ref() { - match compressor.compress(canvas, &first_img) { + match compressor.compress(canvas, &first_img, format) { Some(bytes) => compressed_frames.push((bytes, first_duration)), None => match compressed_frames.last_mut() { Some(last) => last.1 += first_duration, diff --git a/src/main.rs b/src/main.rs index 55097fa0..e12cac81 100644 --- a/src/main.rs +++ b/src/main.rs @@ -282,7 +282,7 @@ fn make_animation_request( for (dim, outputs) in dims.iter().zip(outputs) { //TODO: make cache work for all resize strategies if img.resize == ResizeStrategy::Crop { - match cache::load_animation_frames(&img.path, *dim) { + match cache::load_animation_frames(&img.path, *dim, pixel_format.de()) { Ok(Some(animation)) => { animations.push((animation, outputs.to_owned().into_boxed_slice())); continue; @@ -305,6 +305,7 @@ fn make_animation_request( &img.fill_color, )? .into_boxed_slice(), + pixel_format: pixel_format.de(), }; animations.push((animation, outputs.to_owned().into_boxed_slice())); } diff --git a/utils/benches/compression.rs b/utils/benches/compression.rs index 6398dfc2..8f714f5c 100644 --- a/utils/benches/compression.rs +++ b/utils/benches/compression.rs @@ -42,17 +42,27 @@ pub fn compression_and_decompression(c: &mut Criterion) { let mut compressor = Compressor::new(); let mut comp = c.benchmark_group("compression"); comp.bench_function("Full", |b| { - b.iter(|| black_box(compressor.compress(&prev, &cur).is_some())) + b.iter(|| { + black_box( + compressor + .compress(&prev, &cur, utils::ipc::ArchivedPixelFormat::Xrgb) + .is_some(), + ) + }) }); comp.finish(); - let mut decomp = c.benchmark_group("decompression"); - let bitpack = compressor.compress(&prev, &cur).unwrap(); + let mut decomp = c.benchmark_group("decompression 4 channels"); + let bitpack = compressor + .compress(&prev, &cur, utils::ipc::ArchivedPixelFormat::Xrgb) + .unwrap(); let mut canvas = buf_from(&prev); let mut decompressor = Decompressor::new(); decomp.bench_function("Full", |b| { - b.iter(|| black_box(decompressor.decompress(&bitpack, &mut canvas))) + b.iter(|| { + black_box(decompressor.decompress(&bitpack, &mut canvas, utils::ipc::PixelFormat::Xrgb)) + }) }); decomp.finish(); diff --git a/utils/src/cache.rs b/utils/src/cache.rs index e849b3df..e0b22a5a 100644 --- a/utils/src/cache.rs +++ b/utils/src/cache.rs @@ -12,7 +12,7 @@ use std::{ use rkyv::{Deserialize, Infallible}; -use crate::ipc::Animation; +use crate::ipc::{Animation, PixelFormat}; pub fn store(output_name: &str, img_path: &str) -> Result<(), String> { let mut filepath = cache_dir()?; @@ -26,7 +26,11 @@ pub fn store(output_name: &str, img_path: &str) -> Result<(), String> { } pub fn store_animation_frames(animation: &Animation) -> Result<(), String> { - let filename = animation_filename(&PathBuf::from(&animation.path), animation.dimensions); + let filename = animation_filename( + &PathBuf::from(&animation.path), + animation.dimensions, + animation.pixel_format, + ); let mut filepath = cache_dir()?; filepath.push(&filename); @@ -50,8 +54,9 @@ pub fn store_animation_frames(animation: &Animation) -> Result<(), String> { pub fn load_animation_frames( path: &Path, dimensions: (u32, u32), + pixel_format: PixelFormat, ) -> Result, String> { - let filename = animation_filename(path, dimensions); + let filename = animation_filename(path, dimensions, pixel_format); let cache_dir = cache_dir()?; let mut filepath = cache_dir.clone(); filepath.push(filename); @@ -155,7 +160,7 @@ fn clean_previous_verions(cache_dir: &Path) { // only the images we've cached will have a _v token, indicating their version if let Some(i) = filename.rfind("_v") { - if &filename[i..] != current_version { + if &filename[i + 2..] != current_version { if let Err(e) = std::fs::remove_file(entry.path()) { eprintln!( "WARNING: failed to remove cache file {} of old swww version {:?}", @@ -194,12 +199,13 @@ fn cache_dir() -> Result { } #[must_use] -fn animation_filename(path: &Path, dimensions: (u32, u32)) -> PathBuf { +fn animation_filename(path: &Path, dimensions: (u32, u32), pixel_format: PixelFormat) -> PathBuf { format!( - "{}__{}x{}_v{}", + "{}__{}x{}_{:?}_v{}", path.to_string_lossy().replace('/', "_"), dimensions.0, dimensions.1, + pixel_format, env!("CARGO_PKG_VERSION"), ) .into() diff --git a/utils/src/compression/comp/sse2.rs b/utils/src/compression/comp/sse2.rs index 69dab6bc..d8f6edfe 100644 --- a/utils/src/compression/comp/sse2.rs +++ b/utils/src/compression/comp/sse2.rs @@ -110,7 +110,7 @@ pub(super) unsafe fn pack_bytes(cur: &[u8], goal: &[u8], v: &mut Vec) { #[cfg(test)] mod tests { use super::*; - use crate::compression::unpack_bytes; + use crate::compression::unpack_bytes_4channels; use rand::prelude::random; #[test] @@ -157,7 +157,7 @@ mod tests { unsafe { pack_bytes(&frame1, &frame2, &mut compressed) }; let mut buf = buf_from(&frame1); - unpack_bytes(&mut buf, &compressed); + unpack_bytes_4channels(&mut buf, &compressed); for i in 0..2 { for j in 0..3 { assert_eq!( @@ -196,7 +196,7 @@ mod tests { let mut buf = buf_from(original.last().unwrap()); for i in 0..20 { - unpack_bytes(&mut buf, &compressed[i]); + unpack_bytes_4channels(&mut buf, &compressed[i]); let mut j = 0; let mut l = 0; while j < 3000 { @@ -252,7 +252,7 @@ mod tests { let mut buf = buf_from(original.last().unwrap()); for i in 0..20 { - unpack_bytes(&mut buf, &compressed[i]); + unpack_bytes_4channels(&mut buf, &compressed[i]); let mut j = 0; let mut l = 0; while j < 3000 { diff --git a/utils/src/compression/decomp/mod.rs b/utils/src/compression/decomp/mod.rs index aaedeaa0..b7649765 100644 --- a/utils/src/compression/decomp/mod.rs +++ b/utils/src/compression/decomp/mod.rs @@ -7,13 +7,13 @@ pub(super) mod ssse3; /// diff must be a slice produced by a BitPack /// buf must have the EXACT expected size by the BitPack #[inline(always)] -pub(super) fn unpack_bytes(buf: &mut [u8], diff: &[u8]) { +pub(super) fn unpack_bytes_4channels(buf: &mut [u8], diff: &[u8]) { // use the most efficient implementation available: #[cfg(not(test))] // when testing, we want to use the specific implementation { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if super::cpu::features::ssse3() { - return unsafe { ssse3::unpack_bytes(buf, diff) }; + return unsafe { ssse3::unpack_bytes_4channels(buf, diff) }; } } @@ -58,3 +58,46 @@ pub(super) fn unpack_bytes(buf: &mut [u8], diff: &[u8]) { pix_idx += 1; } } + +#[inline(always)] +pub(super) fn unpack_bytes_3channels(buf: &mut [u8], diff: &[u8]) { + // The very final byte is just padding to let us read 4 bytes at once without going out of + // bounds + let len = diff.len() - 1; + let buf_ptr = buf.as_mut_ptr(); + let diff_ptr = diff.as_ptr(); + + let mut diff_idx = 0; + let mut pix_idx = 0; + while diff_idx + 1 < len { + while unsafe { diff_ptr.add(diff_idx).read() } == u8::MAX { + pix_idx += u8::MAX as usize; + diff_idx += 1; + } + pix_idx += unsafe { diff_ptr.add(diff_idx).read() } as usize; + diff_idx += 1; + + let mut to_cpy = 0; + while unsafe { diff_ptr.add(diff_idx).read() } == u8::MAX { + to_cpy += u8::MAX as usize; + diff_idx += 1; + } + to_cpy += unsafe { diff_ptr.add(diff_idx).read() } as usize; + diff_idx += 1; + + debug_assert!( + diff_idx + to_cpy * 3 <= diff.len(), + "diff_idx: {diff_idx}, to_copy: {to_cpy} diff.len(): {}", + diff.len() + ); + unsafe { + std::ptr::copy_nonoverlapping( + diff_ptr.add(diff_idx), + buf_ptr.add(pix_idx * 3), + to_cpy * 3, + ); + } + diff_idx += to_cpy * 3; + pix_idx += to_cpy + 1; + } +} diff --git a/utils/src/compression/decomp/ssse3.rs b/utils/src/compression/decomp/ssse3.rs index 4b1653bd..8511a18d 100644 --- a/utils/src/compression/decomp/ssse3.rs +++ b/utils/src/compression/decomp/ssse3.rs @@ -1,6 +1,6 @@ #[inline] #[target_feature(enable = "ssse3")] -pub(super) unsafe fn unpack_bytes(buf: &mut [u8], diff: &[u8]) { +pub(super) unsafe fn unpack_bytes_4channels(buf: &mut [u8], diff: &[u8]) { use std::arch::x86_64 as intr; // The very final byte is just padding to let us read 4 bytes at once without going out of @@ -78,7 +78,7 @@ mod tests { unsafe { pack_bytes(&frame1, &frame2, &mut compressed) } let mut buf = buf_from(&frame1); - unsafe { unpack_bytes(&mut buf, &compressed) } + unsafe { unpack_bytes_4channels(&mut buf, &compressed) } for i in 0..2 { for j in 0..3 { assert_eq!( @@ -117,7 +117,7 @@ mod tests { let mut buf = buf_from(original.last().unwrap()); for i in 0..20 { - unsafe { unpack_bytes(&mut buf, &compressed[i]) } + unsafe { unpack_bytes_4channels(&mut buf, &compressed[i]) } let mut j = 0; let mut l = 0; while j < 3000 { @@ -172,7 +172,7 @@ mod tests { let mut buf = buf_from(original.last().unwrap()); for i in 0..20 { - unsafe { unpack_bytes(&mut buf, &compressed[i]) } + unsafe { unpack_bytes_4channels(&mut buf, &compressed[i]) } let mut j = 0; let mut l = 0; while j < 3000 { diff --git a/utils/src/compression/mod.rs b/utils/src/compression/mod.rs index c49a555a..da02198c 100644 --- a/utils/src/compression/mod.rs +++ b/utils/src/compression/mod.rs @@ -3,10 +3,12 @@ //! Our compression strategy is documented in `comp/mod.rs` use comp::pack_bytes; -use decomp::unpack_bytes; +use decomp::{unpack_bytes_3channels, unpack_bytes_4channels}; use std::ffi::{c_char, c_int}; use rkyv::{Archive, Deserialize, Serialize}; + +use crate::ipc::{ArchivedPixelFormat, PixelFormat}; mod comp; mod cpu; mod decomp; @@ -79,7 +81,12 @@ impl Compressor { /// * the len of the diff buffer is larger than 0x7E000000. In practice, this can only /// happen for 64k monitors and beyond #[inline] - pub fn compress(&mut self, prev: &[u8], cur: &[u8]) -> Option { + pub fn compress( + &mut self, + prev: &[u8], + cur: &[u8], + pixel_format: ArchivedPixelFormat, + ) -> Option { assert_eq!( prev.len(), cur.len(), @@ -114,9 +121,16 @@ impl Compressor { ) as usize }; v.truncate(n); + + let expected_buf_size = if pixel_format.channels() == 3 { + cur.len() + } else { + (cur.len() / 3) * 4 + }; + Some(BitPack { inner: v.into_boxed_slice(), - expected_buf_size: (cur.len() / 3) * 4, + expected_buf_size, compressed_size: self.buf.len() as i32, }) } @@ -180,7 +194,12 @@ impl Decompressor { ///returns whether unpacking was successful. Note it can only fail if `buf.len() != ///expected_buf_size` #[inline] - pub fn decompress(&mut self, bitpack: &BitPack, buf: &mut [u8]) -> Result<(), String> { + pub fn decompress( + &mut self, + bitpack: &BitPack, + buf: &mut [u8], + pixel_format: PixelFormat, + ) -> Result<(), String> { if buf.len() != bitpack.expected_buf_size { return Err(format!( "buf has len {}, but expected len is {}", @@ -206,7 +225,12 @@ impl Decompressor { let v = unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), bitpack.compressed_size as usize) }; - unpack_bytes(buf, v); + + if pixel_format.can_copy_directly_onto_wl_buffer() { + unpack_bytes_3channels(buf, v); + } else { + unpack_bytes_4channels(buf, v); + } Ok(()) } @@ -219,6 +243,7 @@ impl Decompressor { &mut self, archived: &ArchivedBitPack, buf: &mut [u8], + pixel_format: PixelFormat, ) -> Result<(), String> { let expected_len: usize = archived .expected_buf_size @@ -252,7 +277,12 @@ impl Decompressor { // SAFETY: the call to self.ensure_capacity guarantees the pointer has the necessary size // to hold all the data let v = unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), cap as usize) }; - unpack_bytes(buf, v); + + if pixel_format.can_copy_directly_onto_wl_buffer() { + unpack_bytes_3channels(buf, v); + } else { + unpack_bytes_4channels(buf, v); + } Ok(()) } @@ -263,7 +293,13 @@ mod tests { use super::*; use rand::prelude::random; - fn buf_from(slice: &[u8]) -> Vec { + const C_FORMS: [ArchivedPixelFormat; 2] = [ArchivedPixelFormat::Xrgb, ArchivedPixelFormat::Rgb]; + const D_FORMS: [PixelFormat; 2] = [PixelFormat::Xrgb, PixelFormat::Rgb]; + + fn buf_from(slice: &[u8], original_channels: usize) -> Vec { + if original_channels == 3 { + return slice.to_vec(); + } let mut v = Vec::new(); for pix in slice.chunks_exact(3) { v.extend_from_slice(pix); @@ -275,65 +311,77 @@ mod tests { #[test] //Use this when annoying problems show up fn small() { - let frame1 = [1, 2, 3, 4, 5, 6]; - let frame2 = [1, 2, 3, 6, 5, 4]; - let compressed = Compressor::new().compress(&frame1, &frame2).unwrap(); - - let mut buf = buf_from(&frame1); - assert!(Decompressor::new() - .decompress(&compressed, &mut buf) - .is_ok()); - for i in 0..2 { - for j in 0..3 { - assert_eq!( - frame2[i * 3 + j], - buf[i * 4 + j], - "\nframe2: {frame2:?}, buf: {buf:?}\n" - ); + for (c_form, d_form) in C_FORMS.into_iter().zip(D_FORMS) { + let frame1 = [1, 2, 3, 4, 5, 6]; + let frame2 = [1, 2, 3, 6, 5, 4]; + let compressed = Compressor::new() + .compress(&frame1, &frame2, c_form) + .unwrap(); + + let mut buf = buf_from(&frame1, c_form.channels().into()); + Decompressor::new() + .decompress(&compressed, &mut buf, d_form) + .unwrap(); + for i in 0..2 { + for j in 0..3 { + assert_eq!( + frame2[i * 3 + j], + buf[i * c_form.channels() as usize + j], + "\nframe2: {frame2:?}, buf: {buf:?}\n" + ); + } } } } #[test] fn total_random() { - for _ in 0..10 { - let mut original = Vec::with_capacity(20); - for _ in 0..20 { - let mut v = Vec::with_capacity(3000); - for _ in 0..3000 { - v.push(random::()); + for (c_form, d_form) in C_FORMS.into_iter().zip(D_FORMS) { + for _ in 0..10 { + let mut original = Vec::with_capacity(20); + for _ in 0..20 { + let mut v = Vec::with_capacity(3000); + for _ in 0..3000 { + v.push(random::()); + } + original.push(v); } - original.push(v); - } - let mut compressed = Vec::with_capacity(20); - let mut compressor = Compressor::new(); - let mut decompressor = Decompressor::new(); - compressed.push( - compressor - .compress(original.last().unwrap(), &original[0]) - .unwrap(), - ); - for i in 1..20 { - compressed.push(compressor.compress(&original[i - 1], &original[i]).unwrap()); - } + let mut compressed = Vec::with_capacity(20); + let mut compressor = Compressor::new(); + let mut decompressor = Decompressor::new(); + compressed.push( + compressor + .compress(original.last().unwrap(), &original[0], c_form) + .unwrap(), + ); + for i in 1..20 { + compressed.push( + compressor + .compress(&original[i - 1], &original[i], c_form) + .unwrap(), + ); + } - let mut buf = buf_from(original.last().unwrap()); - for i in 0..20 { - assert!(decompressor.decompress(&compressed[i], &mut buf).is_ok()); - let mut j = 0; - let mut l = 0; - while j < 3000 { - for k in 0..3 { - assert_eq!( - buf[j + l + k], - original[i][j + k], - "Failed at index: {}", - j + k - ); + let mut buf = buf_from(original.last().unwrap(), c_form.channels().into()); + for i in 0..20 { + decompressor + .decompress(&compressed[i], &mut buf, d_form) + .unwrap(); + let mut j = 0; + let mut l = 0; + while j < 3000 { + for k in 0..3 { + assert_eq!( + buf[j + l + k], + original[i][j + k], + "Failed at index: {}", + j + k + ); + } + j += 3; + l += !d_form.can_copy_directly_onto_wl_buffer() as usize; } - j += 3; - l += 1; } } } @@ -341,53 +389,61 @@ mod tests { #[test] fn full() { - for _ in 0..10 { - let mut original = Vec::with_capacity(20); - for _ in 0..20 { - let mut v = Vec::with_capacity(3000); - for _ in 0..750 { - v.push(random::()); - } - for i in 0..750 { - v.push((i % 255) as u8); - } - for _ in 0..750 { - v.push(random::()); - } - for i in 0..750 { - v.push((i % 255) as u8); + for (c_form, d_form) in C_FORMS.into_iter().zip(D_FORMS) { + for _ in 0..10 { + let mut original = Vec::with_capacity(20); + for _ in 0..20 { + let mut v = Vec::with_capacity(3000); + for _ in 0..750 { + v.push(random::()); + } + for i in 0..750 { + v.push((i % 255) as u8); + } + for _ in 0..750 { + v.push(random::()); + } + for i in 0..750 { + v.push((i % 255) as u8); + } + original.push(v); } - original.push(v); - } - let mut compressor = Compressor::new(); - let mut decompressor = Decompressor::new(); - let mut compressed = Vec::with_capacity(20); - compressed.push( - compressor - .compress(original.last().unwrap(), &original[0]) - .unwrap(), - ); - for i in 1..20 { - compressed.push(compressor.compress(&original[i - 1], &original[i]).unwrap()); - } + let mut compressor = Compressor::new(); + let mut decompressor = Decompressor::new(); + let mut compressed = Vec::with_capacity(20); + compressed.push( + compressor + .compress(original.last().unwrap(), &original[0], c_form) + .unwrap(), + ); + for i in 1..20 { + compressed.push( + compressor + .compress(&original[i - 1], &original[i], c_form) + .unwrap(), + ); + } - let mut buf = buf_from(original.last().unwrap()); - for i in 0..20 { - assert!(decompressor.decompress(&compressed[i], &mut buf).is_ok()); - let mut j = 0; - let mut l = 0; - while j < 3000 { - for k in 0..3 { - assert_eq!( - buf[j + l + k], - original[i][j + k], - "Failed at index: {}", - j + k - ); + let mut buf = buf_from(original.last().unwrap(), c_form.channels().into()); + for i in 0..20 { + decompressor + .decompress(&compressed[i], &mut buf, d_form) + .unwrap(); + let mut j = 0; + let mut l = 0; + while j < 3000 { + for k in 0..3 { + assert_eq!( + buf[j + l + k], + original[i][j + k], + "Failed at index: {}", + j + k + ); + } + j += 3; + l += !d_form.can_copy_directly_onto_wl_buffer() as usize; } - j += 3; - l += 1; } } } diff --git a/utils/src/ipc.rs b/utils/src/ipc.rs index 17ce61e9..f63f15ac 100644 --- a/utils/src/ipc.rs +++ b/utils/src/ipc.rs @@ -138,8 +138,8 @@ impl fmt::Display for ArchivedBgImg { } } -#[derive(Clone, Copy, Archive, Serialize, PartialEq)] -#[archive_attr(derive(Clone, Copy))] +#[derive(Clone, Copy, Debug, Archive, Serialize, Deserialize, PartialEq)] +#[archive_attr(derive(Clone, Copy, Debug))] pub enum PixelFormat { /// No swap, can copy directly onto WlBuffer Brg, @@ -154,7 +154,7 @@ pub enum PixelFormat { impl PixelFormat { #[inline] #[must_use] - pub fn channels(&self) -> u8 { + pub const fn channels(&self) -> u8 { match self { Self::Rgb => 3, Self::Brg => 3, @@ -165,7 +165,18 @@ impl PixelFormat { #[inline] #[must_use] - pub fn can_copy_directly_onto_wl_buffer(&self) -> bool { + pub const fn must_swap_r_and_b_channels(&self) -> bool { + match self { + Self::Brg => false, + Self::Rgb => true, + Self::Xbgr => false, + Self::Xrgb => true, + } + } + + #[inline] + #[must_use] + pub const fn can_copy_directly_onto_wl_buffer(&self) -> bool { match self { Self::Brg => true, Self::Rgb => true, @@ -178,7 +189,18 @@ impl PixelFormat { impl ArchivedPixelFormat { #[inline] #[must_use] - pub fn must_swap_r_and_b_channels(&self) -> bool { + pub const fn channels(&self) -> u8 { + match self { + Self::Rgb => 3, + Self::Brg => 3, + Self::Xbgr => 4, + Self::Xrgb => 4, + } + } + + #[inline] + #[must_use] + pub const fn must_swap_r_and_b_channels(&self) -> bool { match self { Self::Brg => false, Self::Rgb => true, @@ -186,6 +208,12 @@ impl ArchivedPixelFormat { Self::Xrgb => true, } } + + #[inline] + #[must_use] + pub fn de(&self) -> PixelFormat { + self.deserialize(&mut rkyv::Infallible).unwrap() + } } #[derive(Clone, Archive, Serialize)] @@ -260,6 +288,7 @@ pub struct Animation { pub animation: Box<[(BitPack, Duration)]>, pub path: String, pub dimensions: (u32, u32), + pub pixel_format: PixelFormat, } pub type AnimationRequest = Box<[(Animation, Box<[String]>)]>; From 0aad5f23b81f0b8b93bb222cbb50a7ba17850a48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonardo=20Gibrowski=20Fa=C3=A9?= Date: Sun, 25 Feb 2024 18:45:01 -0300 Subject: [PATCH 3/5] specialize transitions for 3channels --- daemon/src/animations/transitions.rs | 253 ++++++++++++++------------- daemon/src/main.rs | 1 + 2 files changed, 128 insertions(+), 126 deletions(-) diff --git a/daemon/src/animations/transitions.rs b/daemon/src/animations/transitions.rs index c4e063ae..5119b75a 100644 --- a/daemon/src/animations/transitions.rs +++ b/daemon/src/animations/transitions.rs @@ -14,34 +14,6 @@ use keyframe::{ functions::BezierCurve, keyframes, mint::Vector2, num_traits::Pow, AnimationSequence, }; -macro_rules! change_cols { - ($step:ident, $old:ident, $new:ident, $done:ident) => { - for (old_col, new_col) in $old.iter_mut().zip($new) { - if old_col.abs_diff(*new_col) < $step { - *old_col = *new_col; - } else if *old_col > *new_col { - *old_col -= $step; - $done = false; - } else { - *old_col += $step; - $done = false; - } - } - }; - - ($step:ident, $old:ident, $new:ident) => { - for (old_col, new_col) in $old.iter_mut().zip($new) { - if old_col.abs_diff(*new_col) < $step { - *old_col = *new_col; - } else if *old_col > *new_col { - *old_col -= $step; - } else { - *old_col += $step; - } - } - }; -} - pub(super) struct Transition { animation_tokens: Vec, wallpapers: Vec>, @@ -139,20 +111,50 @@ impl Transition { let step = self.step; let mut now = Instant::now(); let mut done = false; - while !done { - done = true; - for wallpaper in self.wallpapers.iter_mut() { - wallpaper.canvas_change(|canvas| { - for (old, new) in canvas - .chunks_exact_mut(self.color_channels) - .zip(new_img.chunks_exact(3)) - { - change_cols!(step, old, new, done); - } - }); - wallpaper.draw(); + if self.color_channels == 4 { + while !done { + done = true; + for wallpaper in self.wallpapers.iter_mut() { + wallpaper.canvas_change(|canvas| { + for (old, new) in canvas.chunks_exact_mut(4).zip(new_img.chunks_exact(3)) { + for (old, new) in old.iter_mut().zip(new) { + if old.abs_diff(*new) < step { + *old = *new; + } else if *old > *new { + *old -= step; + done = false; + } else { + *old += step; + done = false; + } + } + } + }); + wallpaper.draw(); + } + self.send_frame(&mut now); + } + } else { + while !done { + done = true; + for wallpaper in self.wallpapers.iter_mut() { + wallpaper.canvas_change(|canvas| { + for (old, new) in canvas.iter_mut().zip(new_img) { + if old.abs_diff(*new) < step { + *old = *new; + } else if *old > *new { + *old -= step; + done = false; + } else { + *old += step; + done = false; + } + } + }); + wallpaper.draw(); + } + self.send_frame(&mut now); } - self.send_frame(&mut now); } } @@ -162,20 +164,9 @@ impl Transition { let mut now = Instant::now(); while start.elapsed().as_secs_f64() < seq.duration() { - for wallpaper in self.wallpapers.iter_mut() { - wallpaper.canvas_change(|canvas| { - canvas - .par_chunks_exact_mut(self.color_channels) - .zip(new_img.par_chunks_exact(3)) - .for_each(|(old_pix, new_pix)| { - for (old_col, new_col) in old_pix.iter_mut().zip(new_pix) { - *old_col = - (*old_col as f64 * (1.0 - step) + *new_col as f64 * step) as u8; - } - }); - }); - wallpaper.draw(); - } + self.draw_all(new_img, |_, old_col, new_col| { + *old_col = (*old_col as f64 * (1.0 - step) + *new_col as f64 * step) as u8; + }); self.send_frame(&mut now); step = seq.now() as f64; seq.advance_to(start.elapsed().as_secs_f64()); @@ -231,22 +222,13 @@ impl Transition { let step = self.step; while start.elapsed().as_secs_f64() < seq.duration() { - for wallpaper in self.wallpapers.iter_mut() { - wallpaper.canvas_change(|canvas| { - canvas - .par_chunks_exact_mut(self.color_channels) - .zip(new_img.par_chunks_exact(3)) - .enumerate() - .for_each(|(i, (old, new))| { - let pix_x = i % width; - let pix_y = height - i / width; - if is_low(pix_x as f64, pix_y as f64, offset) { - change_cols!(step, old, new); - } - }); - }); - wallpaper.draw(); - } + self.draw_all(new_img, |i, old, new| { + let pix_x = i % width; + let pix_y = height - i / width; + if is_low(pix_x as f64, pix_y as f64, offset) { + change_byte(step, old, new); + } + }); self.send_frame(&mut now); offset = seq.now() as f64; @@ -292,22 +274,13 @@ impl Transition { let step = self.step; while start.elapsed().as_secs_f64() < seq.duration() { - for wallpaper in self.wallpapers.iter_mut() { - wallpaper.canvas_change(|canvas| { - canvas - .par_chunks_exact_mut(self.color_channels) - .zip(new_img.par_chunks_exact(3)) - .enumerate() - .for_each(|(i, (old, new))| { - let pix_x = i % width; - let pix_y = height - i / width; - if is_low(pix_x as f64, pix_y as f64, offset, circle_radius) { - change_cols!(step, old, new); - } - }); - }); - wallpaper.draw(); - } + self.draw_all(new_img, |i, old, new| { + let pix_x = i % width; + let pix_y = height - i / width; + if is_low(pix_x as f64, pix_y as f64, offset, circle_radius) { + change_byte(step, old, new); + } + }); self.send_frame(&mut now); offset = seq.now() as f64; @@ -336,31 +309,21 @@ impl Transition { let (width, height) = (width as usize, height as usize); let (center_x, center_y) = (center_x as usize, center_y as usize); + let step = self.step; let (mut seq, start) = self.bezier_seq(0.0, dist_end); let mut now = Instant::now(); while start.elapsed().as_secs_f64() < seq.duration() { - for wallpaper in self.wallpapers.iter_mut() { - wallpaper.canvas_change(|canvas| { - canvas - .par_chunks_exact_mut(self.color_channels) - .zip(new_img.par_chunks_exact(3)) - .enumerate() - .for_each(|(i, (old, new))| { - let pix_x = i % width; - let pix_y = height - i / width; - let diff_x = pix_x.abs_diff(center_x); - let diff_y = pix_y.abs_diff(center_y); - let pix_center_dist = f32::sqrt((diff_x.pow(2) + diff_y.pow(2)) as f32); - if pix_center_dist <= dist_center { - let step = self - .step - .saturating_add((dist_center - pix_center_dist).log2() as u8); - change_cols!(step, old, new); - } - }); - }); - wallpaper.draw(); - } + self.draw_all(new_img, |i, old, new| { + let pix_x = i % width; + let pix_y = height - i / width; + let diff_x = pix_x.abs_diff(center_x); + let diff_y = pix_y.abs_diff(center_y); + let pix_center_dist = f32::sqrt((diff_x.pow(2) + diff_y.pow(2)) as f32); + if pix_center_dist <= dist_center { + let step = step.saturating_add((dist_center - pix_center_dist).log2() as u8); + change_byte(step, old, new); + } + }); self.send_frame(&mut now); dist_center = seq.now(); @@ -387,37 +350,75 @@ impl Transition { let (width, height) = (width as usize, height as usize); let (center_x, center_y) = (center_x as usize, center_y as usize); + let step = self.step; let (mut seq, start) = self.bezier_seq(dist_center, 0.0); let mut now = Instant::now(); while start.elapsed().as_secs_f64() < seq.duration() { + self.draw_all(new_img, |i, old, new| { + let pix_x = i % width; + let pix_y = height - i / width; + let diff_x = pix_x.abs_diff(center_x); + let diff_y = pix_y.abs_diff(center_y); + let pix_center_dist = f32::sqrt((diff_x.pow(2) + diff_y.pow(2)) as f32); + if pix_center_dist >= dist_center { + let step = step.saturating_add((pix_center_dist - dist_center).log2() as u8); + change_byte(step, old, new); + } + }); + self.send_frame(&mut now); + + dist_center = seq.now(); + seq.advance_to(start.elapsed().as_secs_f64()); + } + self.step = 4 + self.step / 4; + self.simple(new_img) + } + + /// Runs pixels_change_fn for every byte in the old img + #[inline(always)] + fn draw_all(&mut self, new_img: &[u8], pixels_change_fn: F) + where + F: FnOnce(usize, &mut u8, &u8) + Copy + Send + Sync, + { + if self.color_channels == 4 { for wallpaper in self.wallpapers.iter_mut() { wallpaper.canvas_change(|canvas| { canvas - .par_chunks_exact_mut(self.color_channels) + .par_chunks_exact_mut(4) .zip(new_img.par_chunks_exact(3)) .enumerate() .for_each(|(i, (old, new))| { - let pix_x = i % width; - let pix_y = height - i / width; - let diff_x = pix_x.abs_diff(center_x); - let diff_y = pix_y.abs_diff(center_y); - let pix_center_dist = f32::sqrt((diff_x.pow(2) + diff_y.pow(2)) as f32); - if pix_center_dist >= dist_center { - let step = self - .step - .saturating_add((pix_center_dist - dist_center).log2() as u8); - change_cols!(step, old, new); + for (old, new) in old.iter_mut().zip(new) { + pixels_change_fn(i, old, new); } }); }); wallpaper.draw(); } - self.send_frame(&mut now); - - dist_center = seq.now(); - seq.advance_to(start.elapsed().as_secs_f64()); + } else { + for wallpaper in self.wallpapers.iter_mut() { + wallpaper.canvas_change(|canvas| { + canvas + .par_iter_mut() + .zip(new_img.par_iter()) + .enumerate() + .for_each(|(i, (old, new))| { + pixels_change_fn(i / 3, old, new); + }); + }); + wallpaper.draw(); + } } - self.step = 4 + self.step / 4; - self.simple(new_img) + } +} + +#[inline(always)] +fn change_byte(step: u8, old: &mut u8, new: &u8) { + if old.abs_diff(*new) < step { + *old = *new; + } else if *old > *new { + *old -= step; + } else { + *old += step; } } diff --git a/daemon/src/main.rs b/daemon/src/main.rs index c23b7a53..d5ba0a68 100644 --- a/daemon/src/main.rs +++ b/daemon/src/main.rs @@ -104,6 +104,7 @@ extern "C" fn signal_handler(_s: i32) { fn main() -> Result<(), String> { rayon::ThreadPoolBuilder::default() .thread_name(|i| format!("rayon thread {i}")) + .stack_size(1 << 18) // 256KiB; we do not need a large stack .build_global() .expect("failed to configure rayon global thread pool"); make_logger(); From 5cd00b2cd83c90f1a4bd4f98998c452fc52bd8d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonardo=20Gibrowski=20Fa=C3=A9?= Date: Wed, 28 Feb 2024 10:48:24 -0300 Subject: [PATCH 4/5] do not use unsafe for the FORMAT statics --- daemon/src/main.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/daemon/src/main.rs b/daemon/src/main.rs index d5ba0a68..e4a14a0f 100644 --- a/daemon/src/main.rs +++ b/daemon/src/main.rs @@ -76,22 +76,22 @@ static PIXEL_FORMAT: OnceLock = OnceLock::new(); #[inline] pub fn wl_shm_format() -> wl_shm::Format { debug_assert!(WL_SHM_FORMAT.get().is_some()); - // SAFETY: this is safe because we initialize it in Daemon::new, before we ever call this in - // the wallpaper structs - *unsafe { WL_SHM_FORMAT.get().unwrap_unchecked() } + *WL_SHM_FORMAT.get().unwrap_or(&wl_shm::Format::Xrgb8888) } #[inline] pub fn pixel_format() -> PixelFormat { debug_assert!(PIXEL_FORMAT.get().is_some()); - // SAFETY: this is safe because we initialize it in Daemon::new, before we ever call this in - // the wallpaper structs - *unsafe { PIXEL_FORMAT.get().unwrap_unchecked() } + *PIXEL_FORMAT.get().unwrap_or(&PixelFormat::Xrgb) } #[inline] pub fn wake_poll() { debug_assert!(POLL_WAKER.get().is_some()); + + // SAFETY: POLL_WAKER is set up in setup_signals_and_pipe, which is called early in main + // and panics if it fails. By the time anyone calls this function, POLL_WAKER will certainly + // already have been initialized. if let Err(e) = nix::unistd::write(*unsafe { POLL_WAKER.get().unwrap_unchecked() }, &[0]) { error!("failed to write to pipe file descriptor: {e}"); } From b6b9a2d0dacc604107bdf701d9d3c8521084b3c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonardo=20Gibrowski=20Fa=C3=A9?= Date: Wed, 28 Feb 2024 18:30:56 -0300 Subject: [PATCH 5/5] prevent UB from a malformed BitPack at decompression --- utils/src/compression/comp/mod.rs | 5 +- utils/src/compression/comp/sse2.rs | 5 +- utils/src/compression/decomp/mod.rs | 102 +++++++++++++++++++++----- utils/src/compression/decomp/ssse3.rs | 19 +++-- utils/src/compression/mod.rs | 23 ++++-- 5 files changed, 116 insertions(+), 38 deletions(-) diff --git a/utils/src/compression/comp/mod.rs b/utils/src/compression/comp/mod.rs index 82b9dc3d..f0024151 100644 --- a/utils/src/compression/comp/mod.rs +++ b/utils/src/compression/comp/mod.rs @@ -112,8 +112,9 @@ pub(super) unsafe fn pack_bytes(cur: &[u8], goal: &[u8], v: &mut Vec) { } if !v.is_empty() { - // add one extra zero to prevent access out of bounds later during decompression - v.push(0) + // add two extra bytes to prevent access out of bounds later during decompression + v.push(0); + v.push(0); } } diff --git a/utils/src/compression/comp/sse2.rs b/utils/src/compression/comp/sse2.rs index d8f6edfe..9c291dd1 100644 --- a/utils/src/compression/comp/sse2.rs +++ b/utils/src/compression/comp/sse2.rs @@ -102,8 +102,9 @@ pub(super) unsafe fn pack_bytes(cur: &[u8], goal: &[u8], v: &mut Vec) { } if !v.is_empty() { - // add one extra zero to prevent access out of bounds later during decompression - v.push(0) + // add two extra bytes to prevent access out of bounds later during decompression + v.push(0); + v.push(0); } } diff --git a/utils/src/compression/decomp/mod.rs b/utils/src/compression/decomp/mod.rs index b7649765..2ffcedd6 100644 --- a/utils/src/compression/decomp/mod.rs +++ b/utils/src/compression/decomp/mod.rs @@ -8,6 +8,10 @@ pub(super) mod ssse3; /// buf must have the EXACT expected size by the BitPack #[inline(always)] pub(super) fn unpack_bytes_4channels(buf: &mut [u8], diff: &[u8]) { + assert!( + diff[diff.len() - 1] | diff[diff.len() - 2] == 0, + "Poorly formed BitPack" + ); // use the most efficient implementation available: #[cfg(not(test))] // when testing, we want to use the specific implementation { @@ -17,15 +21,14 @@ pub(super) fn unpack_bytes_4channels(buf: &mut [u8], diff: &[u8]) { } } - // The very final byte is just padding to let us read 4 bytes at once without going out of - // bounds - let len = diff.len() - 1; + // The final bytes are just padding to prevent us from going out of bounds + let len = diff.len() - 3; let buf_ptr = buf.as_mut_ptr(); let diff_ptr = diff.as_ptr(); let mut diff_idx = 0; let mut pix_idx = 0; - while diff_idx + 1 < len { + while diff_idx < len { while unsafe { diff_ptr.add(diff_idx).read() } == u8::MAX { pix_idx += u8::MAX as usize; diff_idx += 1; @@ -41,14 +44,13 @@ pub(super) fn unpack_bytes_4channels(buf: &mut [u8], diff: &[u8]) { to_cpy += unsafe { diff_ptr.add(diff_idx).read() } as usize; diff_idx += 1; + assert!( + diff_idx + to_cpy * 3 + 1 < diff.len(), + "copying: {}, diff.len(): {}", + diff_idx + to_cpy * 3 + 1, + diff.len() + ); for _ in 0..to_cpy { - // it is much faster to use this assertion for testing than miri - debug_assert!( - diff_idx + 3 < diff.len(), - "diff_idx + 3: {}, diff.len(): {}", - diff_idx + 3, - diff.len() - ); unsafe { std::ptr::copy_nonoverlapping(diff_ptr.add(diff_idx), buf_ptr.add(pix_idx * 4), 4) } @@ -61,15 +63,18 @@ pub(super) fn unpack_bytes_4channels(buf: &mut [u8], diff: &[u8]) { #[inline(always)] pub(super) fn unpack_bytes_3channels(buf: &mut [u8], diff: &[u8]) { - // The very final byte is just padding to let us read 4 bytes at once without going out of - // bounds - let len = diff.len() - 1; + assert!( + diff[diff.len() - 1] | diff[diff.len() - 2] == 0, + "Poorly formed BitPack" + ); + // The final bytes are just padding to prevent us from going out of bounds + let len = diff.len() - 3; let buf_ptr = buf.as_mut_ptr(); let diff_ptr = diff.as_ptr(); let mut diff_idx = 0; let mut pix_idx = 0; - while diff_idx + 1 < len { + while diff_idx < len { while unsafe { diff_ptr.add(diff_idx).read() } == u8::MAX { pix_idx += u8::MAX as usize; diff_idx += 1; @@ -85,9 +90,9 @@ pub(super) fn unpack_bytes_3channels(buf: &mut [u8], diff: &[u8]) { to_cpy += unsafe { diff_ptr.add(diff_idx).read() } as usize; diff_idx += 1; - debug_assert!( + assert!( diff_idx + to_cpy * 3 <= diff.len(), - "diff_idx: {diff_idx}, to_copy: {to_cpy} diff.len(): {}", + "diff_idx: {diff_idx}, to_copy: {to_cpy}, diff.len(): {}", diff.len() ); unsafe { @@ -101,3 +106,66 @@ pub(super) fn unpack_bytes_3channels(buf: &mut [u8], diff: &[u8]) { pix_idx += to_cpy + 1; } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[should_panic] + fn ub_unpack_bytes4_poorly_formed() { + let mut bytes = vec![u8::MAX; 9]; + let diff = vec![u8::MAX; 18]; + unpack_bytes_4channels(&mut bytes, &diff); + } + + #[test] + #[should_panic] + fn ub_unpack_bytes3_poorly_formed() { + let mut bytes = vec![u8::MAX; 9]; + let diff = vec![u8::MAX; 18]; + unpack_bytes_3channels(&mut bytes, &diff); + } + + #[test] + #[should_panic] + fn ub_unpack_bytes4_poorly_formed2() { + let mut bytes = vec![u8::MAX; 9]; + let mut diff = vec![u8::MAX; 18]; + diff[8] = 0; + diff[7] = 0; + unpack_bytes_4channels(&mut bytes, &diff); + } + + #[test] + #[should_panic] + fn ub_unpack_bytes3_poorly_formed2() { + let mut bytes = vec![u8::MAX; 9]; + let mut diff = vec![u8::MAX; 18]; + diff[8] = 0; + diff[7] = 0; + unpack_bytes_3channels(&mut bytes, &diff); + } + + #[test] + #[should_panic] + fn ub_unpack_bytes4_poorly_formed3() { + let mut bytes = vec![u8::MAX; 9]; + let mut diff = vec![u8::MAX; 18]; + diff[8] = 0; + diff[7] = 0; + diff[2] = 0; + unpack_bytes_4channels(&mut bytes, &diff); + } + + #[test] + #[should_panic] + fn ub_unpack_bytes3_poorly_formed3() { + let mut bytes = vec![u8::MAX; 9]; + let mut diff = vec![u8::MAX; 18]; + diff[8] = 0; + diff[7] = 0; + diff[2] = 0; + unpack_bytes_3channels(&mut bytes, &diff); + } +} diff --git a/utils/src/compression/decomp/ssse3.rs b/utils/src/compression/decomp/ssse3.rs index 8511a18d..6b616fe0 100644 --- a/utils/src/compression/decomp/ssse3.rs +++ b/utils/src/compression/decomp/ssse3.rs @@ -3,16 +3,15 @@ pub(super) unsafe fn unpack_bytes_4channels(buf: &mut [u8], diff: &[u8]) { use std::arch::x86_64 as intr; - // The very final byte is just padding to let us read 4 bytes at once without going out of - // bounds - let len = diff.len() - 1; + // The final bytes are just padding to prevent us from going out of bounds + let len = diff.len() - 3; let buf_ptr = buf.as_mut_ptr(); let diff_ptr = diff.as_ptr(); let mask = intr::_mm_set_epi8(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0); let mut diff_idx = 0; let mut pix_idx = 0; - while diff_idx + 1 < len { + while diff_idx < len { while diff_ptr.add(diff_idx).read() == u8::MAX { pix_idx += u8::MAX as usize; diff_idx += 1; @@ -28,6 +27,12 @@ pub(super) unsafe fn unpack_bytes_4channels(buf: &mut [u8], diff: &[u8]) { to_cpy += diff_ptr.add(diff_idx).read() as usize; diff_idx += 1; + assert!( + diff_idx + to_cpy * 3 + 1 < diff.len(), + "copying: {}, diff.len(): {}", + diff_idx + to_cpy * 3 + 1, + diff.len() + ); while to_cpy > 4 { let d = intr::_mm_loadu_si128(diff_ptr.add(diff_idx).cast()); let to_store = intr::_mm_shuffle_epi8(d, mask); @@ -38,12 +43,6 @@ pub(super) unsafe fn unpack_bytes_4channels(buf: &mut [u8], diff: &[u8]) { to_cpy -= 4; } for _ in 0..to_cpy { - debug_assert!( - diff_idx + 3 < diff.len(), - "diff_idx + 3: {}, diff.len(): {}", - diff_idx + 3, - diff.len() - ); std::ptr::copy_nonoverlapping(diff_ptr.add(diff_idx), buf_ptr.add(pix_idx * 4), 4); diff_idx += 3; pix_idx += 1; diff --git a/utils/src/compression/mod.rs b/utils/src/compression/mod.rs index da02198c..a6f4e8ee 100644 --- a/utils/src/compression/mod.rs +++ b/utils/src/compression/mod.rs @@ -207,17 +207,22 @@ impl Decompressor { bitpack.expected_buf_size )); } + self.ensure_capacity(bitpack.compressed_size as usize); // SAFETY: errors will never happen because BitPacked is *always* only produced - // with correct lz4 compression - unsafe { + // with correct lz4 compression, and ptr has the necessary capacity + let size = unsafe { LZ4_decompress_safe( bitpack.inner.as_ptr() as _, self.ptr.as_ptr() as _, bitpack.inner.len() as c_int, bitpack.compressed_size as c_int, - ); + ) + }; + + if size != bitpack.compressed_size { + return Err("BitPack is malformed!".to_string()); } // SAFETY: the call to self.ensure_capacity guarantees the pointer has the necessary size @@ -256,22 +261,26 @@ impl Decompressor { expected_len )); } - let cap: i32 = archived .compressed_size .deserialize(&mut rkyv::Infallible) .unwrap(); + self.ensure_capacity(cap as usize); // SAFETY: errors will never happen because BitPacked is *always* only produced - // with correct lz4 compression - unsafe { + // with correct lz4 compression, and ptr has the necessary capacity + let size = unsafe { LZ4_decompress_safe( archived.inner.as_ptr() as _, self.ptr.as_ptr() as _, archived.inner.len() as c_int, cap as c_int, - ); + ) + }; + + if size != cap { + return Err("BitPack is malformed!".to_string()); } // SAFETY: the call to self.ensure_capacity guarantees the pointer has the necessary size