diff --git a/src/encode_ascii.rs b/src/encode_ascii.rs index ea4f86e..0ef5b6e 100644 --- a/src/encode_ascii.rs +++ b/src/encode_ascii.rs @@ -1,33 +1,23 @@ use std::io::{Read, Write}; -use std::mem::{transmute, MaybeUninit}; use crate::encode_ascii_mapping::ASCII_TO_QWORD; -fn encode_buffer_ascii( - input: &[u8], - output_buf: &mut [MaybeUninit], - mut cur: usize, -) -> Result { - // SAFETY: `output_buf[cur]` - // Accessing the element `cur` of `output_buf` is safe because - // - `cur <= 18 * input_buf.len()` because we increment `cur` by at most 18 for each byte read - // - `18 * input_buf.len() <= output_buf` as check by the `assert!` below - assert!(output_buf.len() >= input.len() * 18 + cur); +fn encode_buffer_ascii(input: &[u8], output_buf: &mut Vec) -> Result<(), std::io::Error> { + let mut cur = output_buf.len(); + output_buf.reserve(input.len() * 18 + cur); for c in input { let (bytes, len) = ASCII_TO_QWORD[*c as usize]; if len == 0 { } else if len <= 8 { if (*c == b'\t' || *c == b'\n' || *c == b'\r') && cur > 0 - // SAFETY: transmuting `output_buf[cur - 1]` from `MaybeInit` to `u8` is safe - // since `cur` starts at 0 and we always write an element before increment `cur` - // and see `output_buf[cur]` above - && unsafe { transmute::, u8>(*output_buf.get_unchecked_mut(cur - 1)) } == b' ' + // SAFETY: accessing `output_buf[cur - 1]` is safe because we only increase cur + // after writing to `output_buf` and `cur > 0` is checked on the previous line + && unsafe { *(output_buf.as_ptr().add(cur - 1)) } == b' ' { cur -= 1; } - // SAFETY: each byte of the chunk might advance `cur` by up to 18; the `assert!` at - // the top of the function ensures we can write up to 18 bytes for each input byte + // SAFETY: we reserved 18 bytes in output_buf for each byte in input unsafe { let dst = output_buf.as_mut_ptr().add(cur) as *mut u64; dst.write_unaligned(bytes); @@ -35,19 +25,16 @@ fn encode_buffer_ascii( } else { // handle only ASCII character encoded as more than 7 elements + space assert_eq!(*c, b'%'); - // SAFETY: source and destination derived from references, slices are of the - // correct length (replace with `MaybeUninit::copy_from_slice()` once stabilized) - // and see `output_buf[cur]` above - unsafe { - transmute::<&mut [MaybeUninit], &mut [u8]>( - output_buf.get_unchecked_mut(cur..cur + 18), - ) - } - .copy_from_slice(b"----- -..-. ----- "); + // SAFETY: we reserved 18 bytes in output_buf for each byte in input + unsafe { std::slice::from_raw_parts_mut(output_buf.as_mut_ptr().add(cur), 18) } + .copy_from_slice(b"----- -..-. ----- "); } cur += len; } - Ok(cur) + // SAFETY: the first `cur` bytes of `output_buf` are initialized because we only increase cur + // after writing to `output_buf` + unsafe { output_buf.set_len(cur) }; + Ok(()) } /// Encode ASCII characters from a [byte slice][slice] into a [String]. @@ -69,15 +56,10 @@ fn encode_buffer_ascii( /// assert_eq!(morse, "-- --- .-. ... . / -.-. --- -.. ."); /// ``` pub fn encode_string_ascii(input: &[u8]) -> String { - let mut output_buf = vec![MaybeUninit::uninit(); input.len() * 18]; - let cur = encode_buffer_ascii(input, &mut output_buf, 0).unwrap(); - output_buf.truncate(cur); - // SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit` to - // `u8` is safe since `cur` starts at 0 and we always write an element before increment - // `cur` - let mut output_buf: Vec = unsafe { transmute(output_buf) }; + let mut output_buf = Vec::new(); + encode_buffer_ascii(input, &mut output_buf).unwrap(); if output_buf.last() == Some(&b' ') { - output_buf.pop().unwrap(); + output_buf.pop(); } String::from_utf8(output_buf).unwrap() } @@ -108,36 +90,22 @@ pub fn encode_string_ascii(input: &[u8]) -> String { /// ``` pub fn encode_stream_ascii(input: &mut impl Read, output: &mut impl Write) { let mut input_buf = vec![0u8; 1 << 15]; - let mut output_buf = vec![MaybeUninit::uninit(); 19 << 15]; - let mut cur = 0; + let mut output_buf = Vec::new(); loop { let bytes_read = input.read(&mut input_buf).unwrap(); if bytes_read == 0 { break; } - cur = encode_buffer_ascii(&input_buf[..bytes_read], &mut output_buf, cur).unwrap(); - if cur == 0 { - continue; - } - // SAFETY: transmuting `output_buf[cur - 1]` from `MaybeInit` to `u8` is safe - // since `cur` starts at 0 and we always write an element before increment `cur` - // and see `output_buf[cur]` above - if unsafe { transmute::, u8>(*output_buf.get_unchecked(cur - 1)) } == b' ' { - cur -= 1; - // SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit` to - // `u8` is safe since `cur` starts at 0 and we always write an element before increment - // `cur` - let init: &[u8] = unsafe { transmute(&output_buf[..cur]) }; - output.write_all(init).unwrap(); - output_buf[0].write(b' '); - cur = 1; + encode_buffer_ascii(&input_buf[..bytes_read], &mut output_buf).unwrap(); + if output_buf.is_empty() { + } else if output_buf.last() == Some(&b' ') { + output_buf.pop(); + output.write_all(&output_buf).unwrap(); + output_buf.clear(); + output_buf.push(b' '); } else { - // SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit` to - // `u8` is safe since `cur` starts at 0 and we always write an element before increment - // `cur` - let init: &[u8] = unsafe { transmute(&output_buf[..cur]) }; - output.write_all(init).unwrap(); - cur = 0; + output.write_all(&output_buf).unwrap(); + output_buf.clear(); } } }