From e64de8a3f1ade59bd6ca442c9ac3a85e5a42ac38 Mon Sep 17 00:00:00 2001 From: Xiaoyu Lu Date: Wed, 16 Dec 2020 13:51:56 +0800 Subject: [PATCH] add efi-str crate Signed-off-by: Xiaoyu Lu --- Cargo.toml | 1 + efi-str/.gitignore | 5 ++ efi-str/Cargo.toml | 15 ++++ efi-str/src/encoder.rs | 180 +++++++++++++++++++++++++++++++++++++++ efi-str/src/lib.rs | 32 +++++++ efi-str/src/macros.rs | 11 +++ efi-str/src/os_str.rs | 168 ++++++++++++++++++++++++++++++++++++ efi-str/src/os_string.rs | 102 ++++++++++++++++++++++ 8 files changed, 514 insertions(+) create mode 100644 efi-str/.gitignore create mode 100644 efi-str/Cargo.toml create mode 100644 efi-str/src/encoder.rs create mode 100644 efi-str/src/lib.rs create mode 100644 efi-str/src/macros.rs create mode 100644 efi-str/src/os_str.rs create mode 100644 efi-str/src/os_string.rs diff --git a/Cargo.toml b/Cargo.toml index d5593824..574cfa29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ panic = "abort" # disable stack unwinding on panic cpuio = "0.3.0" spin = "0.4.9" r-efi = {path = "r-efi"} +efi-str = { path = "efi-str" } [dependencies.lazy_static] version = "1.0" diff --git a/efi-str/.gitignore b/efi-str/.gitignore new file mode 100644 index 00000000..3362db75 --- /dev/null +++ b/efi-str/.gitignore @@ -0,0 +1,5 @@ +# Cargo places all built files here. +/target/ + +# Libraries should not commit their .lock files. +Cargo.lock diff --git a/efi-str/Cargo.toml b/efi-str/Cargo.toml new file mode 100644 index 00000000..0a251266 --- /dev/null +++ b/efi-str/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "efi-str" +version = "0.1.0" +authors = ["Xiaoyu Lu "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +# no features by default +#default = ["string"] +default = [] + +string = [] + +[dependencies] diff --git a/efi-str/src/encoder.rs b/efi-str/src/encoder.rs new file mode 100644 index 00000000..fd1852ed --- /dev/null +++ b/efi-str/src/encoder.rs @@ -0,0 +1,180 @@ +//! This library is for encoding UCS-2 in uefi +//! +//! Copyright (c) 2020, Intel Corporation. All rights reserved.
+//! SPDX-License-Identifier: BSD-2-Clause-Patent + +#[derive(Debug, Clone, Copy)] +pub enum Error { + BufferOverFlow, + MultiByte, + NotUtf8String, + Impossible, +} + +/// Encode UTF-8 str to UCS2 with a callback function. +/// F is a function take one parameter Result +/// param: u16 is each ucs2 encode's character +/// +pub fn encode_fnc(input: &str, mut fnc: F) -> Result +where F: FnMut(Result) -> Result +{ + let mut bytes_index = 0; + let bytes = input.as_bytes(); + let bytes_len = bytes.len(); + + while bytes_index < bytes_len { + let ret; + + // REF: https://tools.ietf.org/html/rfc3629 + + match bytes[bytes_index] { + 0b0000_0000..=0b0111_1111 => { + // 1 byte + ret = Ok(u16::from(bytes[bytes_index])); + bytes_index += 1; + } + 0b1100_0000..=0b1101_1111 => { + // 2 byte + if bytes_index + 2 > bytes_len { + return Err(Error::NotUtf8String); + } + let a = u16::from(bytes[bytes_index] & 0b0001_1111); + let b = u16::from(bytes[bytes_index + 1] & 0b0011_1111); + ret = Ok(a << 6 | b); + bytes_index += 2; + } + 0b1110_0000..=0b1110_1111 => { + // 3 byte + if bytes_index + 3 > bytes_len { + return Err(Error::NotUtf8String); + } + let a = u16::from(bytes[bytes_index] & 0b0000_1111); + let b = u16::from(bytes[bytes_index + 1] & 0b0011_1111); + let c = u16::from(bytes[bytes_index + 2] & 0b0011_1111); + ret = Ok(a << 12 | b << 6 | c); + bytes_index += 3; + } + 0b1111_0000..=0b1111_0111 => { + // 4 byte + if bytes_index + 4 > bytes_len { + return Err(Error::NotUtf8String); + } + ret = Err(Error::MultiByte); + } + _ => { + return Err(Error::NotUtf8String); + } + } + fnc(ret)?; + } + + Ok(bytes_index) +} + +/// Encode UTF-8 str to an u16 array(UCS-2 encode string). +/// +/// # Example +/// +/// ```rust +/// use efi_str::encoder::*; +/// let mut buffer = [0u16; 1]; +/// assert_eq!(encode("中", &mut buffer).is_ok(), true); +/// ``` +pub fn encode(input: &str, buffer: &mut [u16]) -> Result { + let mut i = 0; + let buffer_len = buffer.len(); + encode_fnc(input, |ret| { + match ret { + Ok(ch) => { + if i > buffer_len + 1 { + return Err(Error::BufferOverFlow); + } + buffer[i] = ch; + i += 1; + } + Err(err) => { return Err(err);} + } + Ok(i) + })?; + Ok(i) +} + +/// Decode an u16 array(UCS2 encode string) to an u8 array(UTF8 encode string) . +/// +/// # Example +/// +/// ```rust +/// use efi_str::encoder::*; +/// let mut u8_buffer = [0u8; 6]; +/// let u16_str = [0x4e2du16, 0x56fdu16]; +/// let len = decode(&u16_str, &mut u8_buffer).unwrap_or(0); +/// assert_eq!(len, 6); +/// assert_eq!(core::str::from_utf8(&u8_buffer[..]), Ok("中国")); +/// ``` +pub fn decode(input: &[u16], buffer: &mut [u8]) -> Result { + let buffer_size = buffer.len(); + let mut index = 0; + + for &ch in input.iter() { + match ch { + 0x0000..=0x007F => { + // 1 byte + if index + 1 > buffer_size { + return Err(Error::BufferOverFlow); + } + buffer[index] = ch as u8; + index += 1; + } + 0x0080..=0x07FF => { + // 2 byte + if index + 2 > buffer_size { + return Err(Error::BufferOverFlow); + } + let ch0_6 = ((ch << 10) >> 10) as u8; + let ch6_12 = ((ch << 5) >> 11) as u8; + //let ch12_16 = ((ch << 0) >> 12) as u8; + buffer[index] = 0b1100_0000 + ch6_12 as u8; + buffer[index + 1] = 0b1000_0000 + ch0_6 as u8; + + } + 0x800..=0xFFFF => { + // 3 byte + if index + 3 > buffer_size { + return Err(Error::BufferOverFlow); + } + let ch0_6 = ((ch << 10) >> (10+0)) as u8; + let ch6_12 = ((ch << 4) >> (4+6)) as u8; + let ch12_16 = ((ch << 0) >> 12) as u8; + buffer[index] = 0b1110_0000 + ch12_16; + buffer[index + 1] = 0b1000_0000 + ch6_12; + buffer[index + 2] = 0b1000_0000 + ch0_6; + index += 3; + } + } + } + + Ok(index) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_encode() { + let input = "中国"; + let mut buffer = [0u16; 2]; + assert_eq!(encode(input, &mut buffer).is_ok(), true); + assert_eq!(buffer[0], 0x4e2du16); + assert_eq!(buffer[1], 0x56fdu16); + } + + #[test] + fn test_decode() { + let mut u8_buffer = [0u8; 6]; + let u16_str = [0x4e2du16, 0x56fdu16]; // 中国 + let len = decode(&u16_str, &mut u8_buffer).unwrap_or(0); + assert_eq!(len, 6); + assert_eq!(core::str::from_utf8(&u8_buffer[..]), Ok("中国")); + } +} diff --git a/efi-str/src/lib.rs b/efi-str/src/lib.rs new file mode 100644 index 00000000..29762668 --- /dev/null +++ b/efi-str/src/lib.rs @@ -0,0 +1,32 @@ +#![cfg_attr(not(test), no_std)] + +pub mod encoder; + +#[macro_use] +mod macros; + +mod os_str; +pub use os_str::OsStr; + +#[cfg(feature="string")] +mod os_string; +#[cfg(feature="string")] +pub use os_string::OsString; + +#[cfg(test)] +mod tests { + use crate::OsStr; + #[test] + fn test_os_str() { + let path = [0x4e2du16, 0x56fdu16, 0x0u16]; + let path_osstr = OsStr::from_char16_with_nul(&path[..] as *const [u16] as *const u16); + println!("path is {}", path_osstr); + let path_osstr_nul = OsStr::from_u16_slice(&path[..]); + let path_osstr = OsStr::from_u16_slice_with_nul(&path[..]); + println!("path is {}", path_osstr); + assert_eq!(path_osstr, "中国"); + assert_eq!(path_osstr_nul, "中国\0"); + assert_ne!(path_osstr, path_osstr_nul); + assert_ne!("中1", path_osstr); + } +} diff --git a/efi-str/src/macros.rs b/efi-str/src/macros.rs new file mode 100644 index 00000000..971b065e --- /dev/null +++ b/efi-str/src/macros.rs @@ -0,0 +1,11 @@ +#[cfg(not(test))] +#[macro_export] +macro_rules! ucs2_str { + ($source_str:expr) => ({ + let mut ucs2_str = [0u16; $source_str.len() + 1]; + let result = $crate::encoder::encode($source_str, &mut ucs2_str); + result.unwrap(); + ucs2_str + } + ) +} diff --git a/efi-str/src/os_str.rs b/efi-str/src/os_str.rs new file mode 100644 index 00000000..4f11acb6 --- /dev/null +++ b/efi-str/src/os_str.rs @@ -0,0 +1,168 @@ +use core::fmt; +use core::slice::Iter; + +pub struct OsStr([u16]); + +#[cfg(feature = "string")] +use crate::os_string::OsString; + +impl OsStr { + pub fn new + ?Sized>(s: &S) -> &OsStr { + s.as_ref() + } + + pub fn as_u16_slice(&self) -> & [u16] { + &self.0[..] + } + + #[cfg(feature = "string")] + pub fn to_os_string(&self) -> OsString { + let mut s = OsString::new(); + s.push(self); + s + } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn from_u16_slice_with_nul(s: &[u16]) -> &OsStr {unsafe { + let len = OsStr::char16_with_null_len(s as *const [u16] as *const u16); + &*(&s[0..len] as *const [u16] as *const OsStr) + }} + + pub fn from_u16_slice(s: &[u16]) -> &OsStr { + unsafe { &*(s as *const [u16] as *const OsStr) } + } + + pub fn from_u16_slice_mut(s: &mut [u16]) -> &mut OsStr { + unsafe { &mut *(s as *mut [u16] as *mut OsStr) } + } + + unsafe fn char16_with_null_len(s: *const u16) -> usize { + let mut len = 0; + loop { + let v = (( *(((s as u64) + (2 * len as u64)) as *const u16) ) & 0xffu16) as u32; + + if v == 0 { + break; + } + len += 1; + } + len + } + + pub fn from_char16_with_nul(s: *const u16) -> &'static Self { + let s = unsafe{core::slice::from_raw_parts(s, Self::char16_with_null_len(s))}; + OsStr::from_u16_slice(s) + } + + pub fn from_char16_with_nul_mut(s: *mut u16) ->&'static mut Self { + let s = unsafe{core::slice::from_raw_parts_mut(s as *mut u16, Self::char16_with_null_len(s))}; + OsStr::from_u16_slice_mut(s) + } + + pub fn iter(&self) -> Iter<'_, u16> { + self.0.iter() + } + + fn format_fn(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let len = self.0.len(); + const BUFFER_LEN: usize = 42; + + let mut buffer = [0u8; BUFFER_LEN * 3 + 1]; + + let mut end_index; + let mut res: core::result::Result<(), core::fmt::Error> = Ok(()); + for i in 0..((len + BUFFER_LEN) / BUFFER_LEN) { + if (i + 1) * BUFFER_LEN >= len { + end_index = len; + } else { + end_index = (i + 1) * BUFFER_LEN; + } + let ret = crate::encoder::decode(&(self.0[i * BUFFER_LEN..end_index]), &mut buffer); + if let Ok(length) = ret { + res = write!( + f, + "{}", + core::str::from_utf8(&buffer[..length]).expect("error encoder") + ); + res? + } + } + res + } +} + +impl fmt::Debug for &OsStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.format_fn(f) + } +} + +impl fmt::Debug for &mut OsStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.format_fn(f) + } +} + +impl fmt::Display for &OsStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.format_fn(f) + } +} + +impl fmt::Display for &mut OsStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.format_fn(f) + } +} + +impl AsRef for OsStr { + fn as_ref(&self) -> &OsStr { + self + } +} + + +impl core::cmp::PartialEq for OsStr { + fn eq(&self, other: &str) -> bool { + if self.0.len() == other.chars().count() { + let mut i = 0; + for c in other.chars() { + if c as u32 != self.0[i] as u32 { + return false; + } + i += 1; + } + return true; + } + return false; + } +} + + +impl core::cmp::PartialEq for str{ + fn eq(&self, other: &OsStr) -> bool { + if other.0.len() == self.chars().count() { + let mut i = 0; + for c in self.chars() { + if c as u32 != other.0[i] as u32 { + return false; + } + i += 1; + } + return true; + } + return false; + } +} + +impl core::cmp::PartialEq for OsStr{ + fn eq(&self, other: &OsStr) -> bool { + if other.0.len() == self.0.len() { + return self.0 == other.0; + } + return false; + } +} \ No newline at end of file diff --git a/efi-str/src/os_string.rs b/efi-str/src/os_string.rs new file mode 100644 index 00000000..a715371f --- /dev/null +++ b/efi-str/src/os_string.rs @@ -0,0 +1,102 @@ +extern crate alloc; +pub use alloc::vec::Vec; +use core::fmt; + +use crate::OsStr; + +pub struct OsString(Vec); + +impl OsString { + pub fn new() -> OsString { + OsString(Vec::new()) + } + + pub fn as_mut_ptr(&mut self) -> *mut u16 { + self.0.as_mut_ptr() + } + + pub fn as_ptr(&self) -> *const u16 { + self.0.as_ptr() + } + + pub fn push>(&mut self, s: T) { + for v in s.as_ref().iter() { + self.0.push(*v) + } + } +} + +impl Default for OsString { + fn default() -> OsString { + OsString::new() + } +} + +impl From<&str> for OsString { + + // Get OsString object from &str + // if error occur, immediately return. + fn from(s: &str) -> OsString { + let mut res = OsString::new(); + + let add_char = |ret| { + match ret { + Ok(ch) => { + res.0.push(ch); + Ok(res.0.len()) + } + Err(err) => { + Err(err) + } + } + }; + + crate::encoder::encode_fnc(s, add_char).unwrap_or(0); + res.0.push(0u16); + res + } +} + +impl From<&OsStr> for OsString { + + // Get OsString object from &str + // if error occur, immediately return. + fn from(s: &OsStr) -> OsString { + let mut res = OsString::new(); + res.push(s); + res + } +} + +impl fmt::Display for OsString { + // TODO: directly output u16 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let len = self.0.len(); + let mut vec : Vec = Vec::new(); + vec.resize(len * 3 + 1, 0u8); + let _res = crate::encoder::decode(&(self.0), vec.as_mut_slice()); + write!(f, "{}", core::str::from_utf8(&vec[..]).unwrap()) + } +} + +impl core::ops::Deref for OsString { + type Target = OsStr; + + fn deref(&self) -> &OsStr { + &self[..] + } +} + +impl core::ops::Index for OsString { + type Output = OsStr; + fn index(&self, _index: core::ops::RangeFull) -> &OsStr { + OsStr::from_u16_slice(&(self.0[..])) + } +} + + +impl AsRef for OsString { + fn as_ref(&self) -> &OsStr { + self + } +}