Skip to content

Commit

Permalink
Simplify encode_buffer_ascii interface using Vec
Browse files Browse the repository at this point in the history
  • Loading branch information
qsantos committed Jun 19, 2024
1 parent 426b518 commit c4de554
Showing 1 changed file with 27 additions and 59 deletions.
86 changes: 27 additions & 59 deletions src/encode_ascii.rs
Original file line number Diff line number Diff line change
@@ -1,53 +1,40 @@
use std::io::{Read, Write};
use std::mem::{transmute, MaybeUninit};

use crate::encode_ascii_mapping::ASCII_TO_QWORD;

fn encode_buffer_ascii(
input: &[u8],
output_buf: &mut [MaybeUninit<u8>],
mut cur: usize,
) -> Result<usize, std::io::Error> {
// SAFETY: `output_buf[cur]`
// Accessing the element `cur` of `output_buf` is safe because
// - `cur <= 18 * input_buf.len()` because we increment `cur` by at most 18 for each byte read
// - `18 * input_buf.len() <= output_buf` as check by the `assert!` below
assert!(output_buf.len() >= input.len() * 18 + cur);
fn encode_buffer_ascii(input: &[u8], output_buf: &mut Vec<u8>) -> Result<(), std::io::Error> {
let mut cur = output_buf.len();
output_buf.reserve(input.len() * 18 + cur);
for c in input {
let (bytes, len) = ASCII_TO_QWORD[*c as usize];
if len == 0 {
} else if len <= 8 {
if (*c == b'\t' || *c == b'\n' || *c == b'\r')
&& cur > 0
// SAFETY: transmuting `output_buf[cur - 1]` from `MaybeInit<u8>` to `u8` is safe
// since `cur` starts at 0 and we always write an element before increment `cur`
// and see `output_buf[cur]` above
&& unsafe { transmute::<MaybeUninit<u8>, u8>(*output_buf.get_unchecked_mut(cur - 1)) } == b' '
// SAFETY: accessing `output_buf[cur - 1]` is safe because we only increase cur
// after writing to `output_buf` and `cur > 0` is checked on the previous line
&& unsafe { *(output_buf.as_ptr().add(cur - 1)) } == b' '
{
cur -= 1;
}
// SAFETY: each byte of the chunk might advance `cur` by up to 18; the `assert!` at
// the top of the function ensures we can write up to 18 bytes for each input byte
// SAFETY: we reserved 18 bytes in output_buf for each byte in input
unsafe {
let dst = output_buf.as_mut_ptr().add(cur) as *mut u64;
dst.write_unaligned(bytes);
}
} else {
// handle only ASCII character encoded as more than 7 elements + space
assert_eq!(*c, b'%');
// SAFETY: source and destination derived from references, slices are of the
// correct length (replace with `MaybeUninit::copy_from_slice()` once stabilized)
// and see `output_buf[cur]` above
unsafe {
transmute::<&mut [MaybeUninit<u8>], &mut [u8]>(
output_buf.get_unchecked_mut(cur..cur + 18),
)
}
.copy_from_slice(b"----- -..-. ----- ");
// SAFETY: we reserved 18 bytes in output_buf for each byte in input
unsafe { std::slice::from_raw_parts_mut(output_buf.as_mut_ptr().add(cur), 18) }
.copy_from_slice(b"----- -..-. ----- ");
}
cur += len;
}
Ok(cur)
// SAFETY: the first `cur` bytes of `output_buf` are initialized because we only increase cur
// after writing to `output_buf`
unsafe { output_buf.set_len(cur) };
Ok(())
}

/// Encode ASCII characters from a [byte slice][slice] into a [String].
Expand All @@ -69,15 +56,10 @@ fn encode_buffer_ascii(
/// assert_eq!(morse, "-- --- .-. ... . / -.-. --- -.. .");
/// ```
pub fn encode_string_ascii(input: &[u8]) -> String {
let mut output_buf = vec![MaybeUninit::uninit(); input.len() * 18];
let cur = encode_buffer_ascii(input, &mut output_buf, 0).unwrap();
output_buf.truncate(cur);
// SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit<u8>` to
// `u8` is safe since `cur` starts at 0 and we always write an element before increment
// `cur`
let mut output_buf: Vec<u8> = unsafe { transmute(output_buf) };
let mut output_buf = Vec::new();
encode_buffer_ascii(input, &mut output_buf).unwrap();
if output_buf.last() == Some(&b' ') {
output_buf.pop().unwrap();
output_buf.pop();
}
String::from_utf8(output_buf).unwrap()
}
Expand Down Expand Up @@ -108,36 +90,22 @@ pub fn encode_string_ascii(input: &[u8]) -> String {
/// ```
pub fn encode_stream_ascii(input: &mut impl Read, output: &mut impl Write) {
let mut input_buf = vec![0u8; 1 << 15];
let mut output_buf = vec![MaybeUninit::uninit(); 19 << 15];
let mut cur = 0;
let mut output_buf = Vec::new();
loop {
let bytes_read = input.read(&mut input_buf).unwrap();
if bytes_read == 0 {
break;
}
cur = encode_buffer_ascii(&input_buf[..bytes_read], &mut output_buf, cur).unwrap();
if cur == 0 {
continue;
}
// SAFETY: transmuting `output_buf[cur - 1]` from `MaybeInit<u8>` to `u8` is safe
// since `cur` starts at 0 and we always write an element before increment `cur`
// and see `output_buf[cur]` above
if unsafe { transmute::<MaybeUninit<u8>, u8>(*output_buf.get_unchecked(cur - 1)) } == b' ' {
cur -= 1;
// SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit<u8>` to
// `u8` is safe since `cur` starts at 0 and we always write an element before increment
// `cur`
let init: &[u8] = unsafe { transmute(&output_buf[..cur]) };
output.write_all(init).unwrap();
output_buf[0].write(b' ');
cur = 1;
encode_buffer_ascii(&input_buf[..bytes_read], &mut output_buf).unwrap();
if output_buf.is_empty() {
} else if output_buf.last() == Some(&b' ') {
output_buf.pop();
output.write_all(&output_buf).unwrap();
output_buf.clear();
output_buf.push(b' ');
} else {
// SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit<u8>` to
// `u8` is safe since `cur` starts at 0 and we always write an element before increment
// `cur`
let init: &[u8] = unsafe { transmute(&output_buf[..cur]) };
output.write_all(init).unwrap();
cur = 0;
output.write_all(&output_buf).unwrap();
output_buf.clear();
}
}
}
Expand Down

0 comments on commit c4de554

Please sign in to comment.