Skip to content

Commit

Permalink
Merge pull request #38 from kivikakk/opt
Browse files Browse the repository at this point in the history
String->Vec<u8> optimisations
  • Loading branch information
Ashe Connor authored Sep 13, 2017
2 parents f92f400 + b06cc6c commit 4a3a6b8
Show file tree
Hide file tree
Showing 16 changed files with 453 additions and 437 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ entities = "1.0.0"
unicode_categories = "0.1.1"
clap = { version = "2.22.2", optional = true }
clippy = { version = "~0.0.123", optional = true }
twoway = "0.1.3"

[features]
default = ["clap"]
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F)
iter_nodes(root, &|node| {
match &mut node.data.borrow_mut().value {
&mut NodeValue::Text(ref mut text) => {
*text = text.replace("my", "your");
let orig = std::mem::replace(text, vec![]);
*text = String::from_utf8(orig).unwrap().replace("my", "your").as_bytes().to_vec();
}
_ => (),
}
Expand Down
40 changes: 22 additions & 18 deletions src/arena_tree.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*!
Included from https://github.com/SimonSapin/rust-forest/blob/5783c8be8680b84c0438638bdee07d4e4aca40ac/arena-tree/lib.rs.
/*!
Included from https://github.com/SimonSapin/rust-forest/blob/
5783c8be8680b84c0438638bdee07d4e4aca40ac/arena-tree/lib.rs.
MIT license (per Cargo.toml).
A DOM-like tree data structure based on `&Node` references.
Expand Down Expand Up @@ -282,7 +283,7 @@ impl<'a, T> Iterator for Descendants<'a, T> {
match self.0.next() {
Some(NodeEdge::Start(node)) => return Some(node),
Some(NodeEdge::End(_)) => {}
None => return None
None => return None,
}
}
}
Expand Down Expand Up @@ -353,12 +354,14 @@ macro_rules! traverse_iterator {
}

traverse_iterator! {
#[doc = "An iterator of the start and end edges of a given node and its descendants, in tree order."]
#[doc = "An iterator of the start and end edges of a given
node and its descendants, in tree order."]
Traverse: first_child, next_sibling
}

traverse_iterator! {
#[doc = "An iterator of the start and end edges of a given node and its descendants, in reverse tree order."]
#[doc = "An iterator of the start and end edges of a given
node and its descendants, in reverse tree order."]
ReverseTraverse: last_child, previous_sibling
}

Expand All @@ -384,26 +387,27 @@ fn it_works() {
arena.alloc(Node::new((new_counter, DropTracker(&drop_counter))))
};

let a = new(); // 1
a.append(new()); // 2
a.append(new()); // 3
a.prepend(new()); // 4
let b = new(); // 5
let a = new(); // 1
a.append(new()); // 2
a.append(new()); // 3
a.prepend(new()); // 4
let b = new(); // 5
b.append(a);
a.insert_before(new()); // 6
a.insert_before(new()); // 7
a.insert_after(new()); // 8
a.insert_after(new()); // 9
let c = new(); // 10
a.insert_before(new()); // 6
a.insert_before(new()); // 7
a.insert_after(new()); // 8
a.insert_after(new()); // 9
let c = new(); // 10
b.append(c);

assert_eq!(drop_counter.get(), 0);
c.previous_sibling.get().unwrap().detach();
assert_eq!(drop_counter.get(), 0);

assert_eq!(b.descendants().map(|node| node.data.0).collect::<Vec<_>>(), [
5, 6, 7, 1, 4, 2, 3, 9, 10
]);
assert_eq!(
b.descendants().map(|node| node.data.0).collect::<Vec<_>>(),
[5, 6, 7, 1, 4, 2, 3, 9, 10]
);
}

assert_eq!(drop_counter.get(), 10);
Expand Down
53 changes: 27 additions & 26 deletions src/cm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,14 +365,14 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
}

if ncb.info.is_empty() &&
(ncb.literal.len() > 2 && !isspace(ncb.literal.as_bytes()[0]) &&
!(isspace(ncb.literal.as_bytes()[ncb.literal.len() - 1]) &&
isspace(ncb.literal.as_bytes()[ncb.literal.len() - 2]))) &&
(ncb.literal.len() > 2 && !isspace(ncb.literal[0]) &&
!(isspace(ncb.literal[ncb.literal.len() - 1]) &&
isspace(ncb.literal[ncb.literal.len() - 2]))) &&
!first_in_list_item
{
write!(self, " ").unwrap();
write!(self.prefix, " ").unwrap();
write!(self, "{}", ncb.literal).unwrap();
self.write_all(&ncb.literal).unwrap();
let new_len = self.prefix.len() - 4;
self.prefix.truncate(new_len);
} else {
Expand All @@ -381,10 +381,11 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
write!(self, "`").unwrap();
}
if !ncb.info.is_empty() {
write!(self, " {}", ncb.info).unwrap();
write!(self, " ").unwrap();
self.write_all(&ncb.info).unwrap();
}
self.cr();
write!(self, "{}", ncb.literal).unwrap();
self.write_all(&ncb.literal).unwrap();
self.cr();
for _ in 0..numticks {
write!(self, "`").unwrap();
Expand All @@ -396,7 +397,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
NodeValue::HtmlBlock(ref nhb) => {
if entering {
self.blankline();
self.write_all(nhb.literal.as_bytes()).unwrap();
self.write_all(&nhb.literal).unwrap();
self.blankline();
}
}
Expand All @@ -414,7 +415,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
}
NodeValue::Text(ref literal) => {
if entering {
self.output(literal.as_bytes(), allow_wrap, Escaping::Normal);
self.output(literal, allow_wrap, Escaping::Normal);
}
}
NodeValue::LineBreak => {
Expand All @@ -440,11 +441,11 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
for _ in 0..numticks {
write!(self, "`").unwrap();
}
if literal.is_empty() || literal.as_bytes()[0] == b'`' {
if literal.is_empty() || literal[0] == b'`' {
write!(self, " ").unwrap();
}
self.output(literal.as_bytes(), allow_wrap, Escaping::Literal);
if literal.is_empty() || literal.as_bytes()[literal.len() - 1] == b'`' {
self.output(literal, allow_wrap, Escaping::Literal);
if literal.is_empty() || literal[literal.len() - 1] == b'`' {
write!(self, " ").unwrap();
}
for _ in 0..numticks {
Expand All @@ -454,7 +455,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
}
NodeValue::HtmlInline(ref literal) => {
if entering {
self.write_all(literal.as_bytes()).unwrap();
self.write_all(literal).unwrap();
}
}
NodeValue::Strong => {
Expand Down Expand Up @@ -501,10 +502,10 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
if is_autolink(node, nl) {
if entering {
write!(self, "<").unwrap();
if &nl.url[..7] == "mailto:" {
self.write_all(nl.url[7..].as_bytes()).unwrap();
if &nl.url[..7] == b"mailto:" {
self.write_all(&nl.url[7..]).unwrap();
} else {
self.write_all(nl.url.as_bytes()).unwrap();
self.write_all(&nl.url).unwrap();
}
write!(self, ">").unwrap();
return false;
Expand All @@ -513,10 +514,10 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
write!(self, "[").unwrap();
} else {
write!(self, "](").unwrap();
self.output(nl.url.as_bytes(), false, Escaping::URL);
self.output(&nl.url, false, Escaping::URL);
if !nl.title.is_empty() {
write!(self, " \"").unwrap();
self.output(nl.title.as_bytes(), false, Escaping::Title);
self.output(&nl.title, false, Escaping::Title);
write!(self, "\"").unwrap();
}
write!(self, ")").unwrap();
Expand All @@ -527,10 +528,10 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
write!(self, "![").unwrap();
} else {
write!(self, "](").unwrap();
self.output(nl.url.as_bytes(), false, Escaping::URL);
self.output(&nl.url, false, Escaping::URL);
if !nl.title.is_empty() {
self.output(&[b' ', b'"'], allow_wrap, Escaping::Literal);
self.output(nl.title.as_bytes(), false, Escaping::Title);
self.output(&nl.title, false, Escaping::Title);
write!(self, "\"").unwrap();
}
write!(self, ")").unwrap();
Expand Down Expand Up @@ -592,10 +593,10 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
}
}

fn longest_backtick_sequence(literal: &str) -> usize {
fn longest_backtick_sequence(literal: &[u8]) -> usize {
let mut longest = 0;
let mut current = 0;
for c in literal.as_bytes() {
for c in literal {
if *c == b'`' {
current += 1;
} else {
Expand All @@ -611,10 +612,10 @@ fn longest_backtick_sequence(literal: &str) -> usize {
longest
}

fn shortest_unused_sequence(literal: &str, f: u8) -> usize {
fn shortest_unused_sequence(literal: &[u8], f: u8) -> usize {
let mut used = 1;
let mut current = 0;
for c in literal.as_bytes() {
for c in literal {
if *c == f {
current += 1;
} else {
Expand Down Expand Up @@ -656,12 +657,12 @@ fn is_autolink<'a>(node: &'a AstNode<'a>, nl: &NodeLink) -> bool {
}
};

let mut real_url: &str = &nl.url;
if &real_url[..7] == "mailto:" {
let mut real_url: &[u8] = &nl.url;
if &real_url[..7] == b"mailto:" {
real_url = &real_url[7..];
}

real_url == link_text
real_url == &*link_text
}

fn table_escape<'a>(node: &'a AstNode<'a>, c: u8) -> bool {
Expand Down
51 changes: 26 additions & 25 deletions src/entity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use ctype::isdigit;
use entities::ENTITIES;
use std::char;
use std::cmp::min;
use std::str;

pub const ENTITY_MIN_LENGTH: usize = 2;
pub const ENTITY_MAX_LENGTH: usize = 31;
Expand All @@ -11,86 +12,86 @@ fn isxdigit(ch: &u8) -> bool {
(*ch >= b'0' && *ch <= b'9') || (*ch >= b'a' && *ch <= b'f') || (*ch >= b'A' && *ch <= b'F')
}

pub fn unescape(text: &str) -> Option<(String, usize)> {
if text.len() >= 3 && text.as_bytes()[0] == b'#' {
pub fn unescape(text: &[u8]) -> Option<(Vec<u8>, usize)> {
if text.len() >= 3 && text[0] == b'#' {
let mut codepoint: u32 = 0;
let mut i = 0;

let num_digits = if isdigit(text.as_bytes()[1]) {
let num_digits = if isdigit(text[1]) {
i = 1;
while i < text.len() && isdigit(text.as_bytes()[i]) {
codepoint = (codepoint * 10) + (text.as_bytes()[i] as u32 - '0' as u32);
codepoint = min(codepoint, 0x110000);
while i < text.len() && isdigit(text[i]) {
codepoint = (codepoint * 10) + (text[i] as u32 - '0' as u32);
codepoint = min(codepoint, 0x11_0000);
i += 1;
}
i - 1
} else if text.as_bytes()[1] == b'x' || text.as_bytes()[1] == b'X' {
} else if text[1] == b'x' || text[1] == b'X' {
i = 2;
while i < text.len() && isxdigit(&text.as_bytes()[i]) {
codepoint = (codepoint * 16) + ((text.as_bytes()[i] as u32 | 32) % 39 - 9);
codepoint = min(codepoint, 0x110000);
while i < text.len() && isxdigit(&text[i]) {
codepoint = (codepoint * 16) + ((text[i] as u32 | 32) % 39 - 9);
codepoint = min(codepoint, 0x11_0000);
i += 1;
}
i - 2
} else {
0
};

if num_digits >= 1 && num_digits <= 8 && i < text.len() && text.as_bytes()[i] == b';' {
if num_digits >= 1 && num_digits <= 8 && i < text.len() && text[i] == b';' {
if codepoint == 0 || (codepoint >= 0xD800 && codepoint <= 0xE000) ||
codepoint >= 0x110000
{
codepoint = 0xFFFD;
}
return Some((
char::from_u32(codepoint).unwrap_or('\u{FFFD}').to_string(),
char::from_u32(codepoint).unwrap_or('\u{FFFD}').to_string().into_bytes(),
i + 1,
));
}
}

let size = min(text.len(), ENTITY_MAX_LENGTH);
for i in ENTITY_MIN_LENGTH..size {
if text.as_bytes()[i] == b' ' {
if text[i] == b' ' {
return None;
}

if text.as_bytes()[i] == b';' {
return lookup(&text[..i]).map(|e| (e.to_string(), i + 1));
if text[i] == b';' {
return lookup(&text[..i]).map(|e| (e.to_vec(), i + 1));
}
}

None
}

fn lookup(text: &str) -> Option<&str> {
let entity_str = format!("&{};", text);
fn lookup(text: &[u8]) -> Option<&[u8]> {
let entity_str = format!("&{};", unsafe {str::from_utf8_unchecked(text) });

let entity = ENTITIES.iter().find(|e| e.entity == entity_str);

match entity {
Some(e) => Some(e.characters),
Some(e) => Some(e.characters.as_bytes()),
None => None,
}
}

pub fn unescape_html(src: &str) -> String {
pub fn unescape_html(src: &[u8]) -> Vec<u8> {
let size = src.len();
let mut i = 0;
let mut v = String::with_capacity(size);
let mut v = Vec::with_capacity(size);

while i < size {
let org = i;
while i < size && src.as_bytes()[i] != b'&' {
while i < size && src[i] != b'&' {
i += 1;
}

if i > org {
if org == 0 && i >= size {
return src.to_string();
return src.to_vec();
}

v += &src[org..i];
v.extend_from_slice(&src[org..i]);
}

if i >= size {
Expand All @@ -100,10 +101,10 @@ pub fn unescape_html(src: &str) -> String {
i += 1;
match unescape(&src[i..]) {
Some((chs, size)) => {
v += &chs;
v.extend_from_slice(&chs);
i += size;
}
None => v.push('&'),
None => v.push(b'&'),
}
}

Expand Down
Loading

0 comments on commit 4a3a6b8

Please sign in to comment.