Skip to content

Commit

Permalink
perf: Delay generate repr (toml-rs#202)
Browse files Browse the repository at this point in the history
When parsing, we create the `Value`, which will cause a default repr to
be generated.  We then overwrite it with the actual repr.

Similarly, when indexing, we generate a repr for the `Key` but usually
throw that key away.

This makes it so we only generate a repr when needed, and allows the
user to force a default repr.

The downside is that anything with a default repr will be re-generated
multiple times if the user renders the document multiple times.  This is
probably rare.  As we expand our formatting support though, we should
consider a "fill in details where non-exist", so that all goes away.
  • Loading branch information
epage authored Sep 13, 2021
1 parent 168d51c commit c3a29ec
Show file tree
Hide file tree
Showing 11 changed files with 308 additions and 248 deletions.
11 changes: 1 addition & 10 deletions src/easy/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,22 +314,13 @@ impl_into_value!(Table: Table);
///
/// This trait is sealed and not intended for implementation outside of the
/// `toml` crate.
pub trait Index: Sealed {
pub trait Index: crate::private::Sealed {
#[doc(hidden)]
fn index<'a>(&self, val: &'a Value) -> Option<&'a Value>;
#[doc(hidden)]
fn index_mut<'a>(&self, val: &'a mut Value) -> Option<&'a mut Value>;
}

/// An implementation detail that should not be implemented, this will change in
/// the future and break code otherwise.
#[doc(hidden)]
pub trait Sealed {}
impl Sealed for usize {}
impl Sealed for str {}
impl Sealed for String {}
impl<'a, T: Sealed + ?Sized> Sealed for &'a T {}

impl Index for usize {
fn index<'a>(&self, val: &'a Value) -> Option<&'a Value> {
match *val {
Expand Down
221 changes: 217 additions & 4 deletions src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ use std::fmt::{Display, Formatter, Result, Write};

use itertools::Itertools;

use crate::datetime::*;
use crate::document::Document;
use crate::inline_table::DEFAULT_INLINE_KEY_DECOR;
use crate::key::Key;
use crate::repr::{DecorDisplay, Formatted, Repr};
use crate::repr::{DecorDisplay, Formatted, Repr, ValueRepr};
use crate::table::{DEFAULT_KEY_DECOR, DEFAULT_KEY_PATH_DECOR, DEFAULT_TABLE_DECOR};
use crate::value::DEFAULT_VALUE_DECOR;
use crate::{Array, InlineTable, Item, Table, Value};
Expand All @@ -28,20 +29,232 @@ impl Display for Repr {
}
}

impl<T> Display for Formatted<T> {
impl<T> Display for Formatted<T>
where
T: ValueRepr,
{
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
let repr = self.to_repr();
write!(
f,
"{}",
self.decor().display(self.repr(), DEFAULT_VALUE_DECOR)
self.decor().display(repr.as_ref(), DEFAULT_VALUE_DECOR)
)
}
}

impl ValueRepr for String {
fn to_repr(&self) -> Repr {
to_string_repr(self, None, None)
}
}

pub(crate) fn to_string_repr(
value: &str,
style: Option<StringStyle>,
literal: Option<bool>,
) -> Repr {
let (style, literal) = match (style, literal) {
(Some(style), Some(literal)) => (style, literal),
(_, Some(literal)) => (infer_style(value).0, literal),
(Some(style), _) => (style, infer_style(value).1),
(_, _) => infer_style(value),
};

let mut output = String::with_capacity(value.len() * 2);
if literal {
output.push_str(style.literal_start());
output.push_str(value);
output.push_str(style.literal_end());
} else {
output.push_str(style.standard_start());
for ch in value.chars() {
match ch {
'\u{8}' => output.push_str("\\b"),
'\u{9}' => output.push_str("\\t"),
'\u{a}' => match style {
StringStyle::NewlineTripple => output.push('\n'),
StringStyle::OnelineSingle => output.push_str("\\n"),
_ => unreachable!(),
},
'\u{c}' => output.push_str("\\f"),
'\u{d}' => output.push_str("\\r"),
'\u{22}' => output.push_str("\\\""),
'\u{5c}' => output.push_str("\\\\"),
c if c <= '\u{1f}' || c == '\u{7f}' => {
write!(output, "\\u{:04X}", ch as u32).unwrap();
}
ch => output.push(ch),
}
}
output.push_str(style.standard_end());
}

Repr::new_unchecked(output)
}

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(crate) enum StringStyle {
NewlineTripple,
OnelineTripple,
OnelineSingle,
}

impl StringStyle {
fn literal_start(self) -> &'static str {
match self {
Self::NewlineTripple => "'''\n",
Self::OnelineTripple => "'''",
Self::OnelineSingle => "'",
}
}
fn literal_end(self) -> &'static str {
match self {
Self::NewlineTripple => "'''",
Self::OnelineTripple => "'''",
Self::OnelineSingle => "'",
}
}

fn standard_start(self) -> &'static str {
match self {
Self::NewlineTripple => "\"\"\"\n",
// note: OnelineTripple can happen if do_pretty wants to do
// '''it's one line'''
// but literal == false
Self::OnelineTripple | Self::OnelineSingle => "\"",
}
}

fn standard_end(self) -> &'static str {
match self {
Self::NewlineTripple => "\"\"\"",
// note: OnelineTripple can happen if do_pretty wants to do
// '''it's one line'''
// but literal == false
Self::OnelineTripple | Self::OnelineSingle => "\"",
}
}
}

fn infer_style(value: &str) -> (StringStyle, bool) {
// For doing pretty prints we store in a new String
// because there are too many cases where pretty cannot
// work. We need to determine:
// - if we are a "multi-line" pretty (if there are \n)
// - if ['''] appears if multi or ['] if single
// - if there are any invalid control characters
//
// Doing it any other way would require multiple passes
// to determine if a pretty string works or not.
let mut out = String::with_capacity(value.len() * 2);
let mut ty = StringStyle::OnelineSingle;
// found consecutive single quotes
let mut max_found_singles = 0;
let mut found_singles = 0;
let mut prefer_literal = false;
let mut can_be_pretty = true;

for ch in value.chars() {
if can_be_pretty {
if ch == '\'' {
found_singles += 1;
if found_singles >= 3 {
can_be_pretty = false;
}
} else {
if found_singles > max_found_singles {
max_found_singles = found_singles;
}
found_singles = 0
}
match ch {
'\t' => {}
'\\' => {
prefer_literal = true;
}
'\n' => ty = StringStyle::NewlineTripple,
// Escape codes are needed if any ascii control
// characters are present, including \b \f \r.
c if c <= '\u{1f}' || c == '\u{7f}' => can_be_pretty = false,
_ => {}
}
out.push(ch);
} else {
// the string cannot be represented as pretty,
// still check if it should be multiline
if ch == '\n' {
ty = StringStyle::NewlineTripple;
}
}
}
if found_singles > 0 && value.ends_with('\'') {
// We cannot escape the ending quote so we must use """
can_be_pretty = false;
}
if !prefer_literal {
can_be_pretty = false;
}
if !can_be_pretty {
debug_assert!(ty != StringStyle::OnelineTripple);
return (ty, false);
}
if found_singles > max_found_singles {
max_found_singles = found_singles;
}
debug_assert!(max_found_singles < 3);
if ty == StringStyle::OnelineSingle && max_found_singles >= 1 {
// no newlines, but must use ''' because it has ' in it
ty = StringStyle::OnelineTripple;
}
(ty, true)
}

impl ValueRepr for i64 {
fn to_repr(&self) -> Repr {
Repr::new_unchecked(self.to_string())
}
}

impl ValueRepr for f64 {
fn to_repr(&self) -> Repr {
to_f64_repr(*self)
}
}

fn to_f64_repr(f: f64) -> Repr {
let repr = match (f.is_sign_negative(), f.is_nan(), f == 0.0) {
(true, true, _) => "-nan".to_owned(),
(false, true, _) => "nan".to_owned(),
(true, false, true) => "-0.0".to_owned(),
(false, false, true) => "0.0".to_owned(),
(_, false, false) => {
if f % 1.0 == 0.0 {
format!("{}.0", f)
} else {
format!("{}", f)
}
}
};
Repr::new_unchecked(repr)
}

impl ValueRepr for bool {
fn to_repr(&self) -> Repr {
Repr::new_unchecked(self.to_string())
}
}

impl ValueRepr for Datetime {
fn to_repr(&self) -> Repr {
Repr::new_unchecked(self.to_string())
}
}

impl Display for Key {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
// HACK: For now, leaving off decor since we don't know the defaults to use in this context
self.repr().fmt(f)
self.to_repr().as_ref().fmt(f)
}
}

Expand Down
11 changes: 1 addition & 10 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::{value, InlineTable, Item, Table, Value};
// copied from
// https://github.com/serde-rs/json/blob/master/src/value/index.rs

pub trait Index: private::Sealed {
pub trait Index: crate::private::Sealed {
/// Return `Option::None` if the key is not already in the array or table.
#[doc(hidden)]
fn index<'v>(&self, v: &'v Item) -> Option<&'v Item>;
Expand Down Expand Up @@ -151,12 +151,3 @@ impl<'s> ops::IndexMut<&'s str> for Document {
self.root.index_mut(key)
}
}

// Prevent users from implementing the Index trait.
mod private {
pub trait Sealed {}
impl Sealed for usize {}
impl Sealed for str {}
impl Sealed for String {}
impl<'a, T: ?Sized> Sealed for &'a T where T: Sealed {}
}
34 changes: 21 additions & 13 deletions src/key.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use std::borrow::Cow;
use std::str::FromStr;

use combine::stream::position::Stream;

use crate::encode::{to_string_repr, StringStyle};
use crate::parser;
use crate::parser::is_unquoted_char;
use crate::repr::{Decor, InternalString, Repr};
use crate::value::{to_string_repr, StringStyle};

/// Key as part of a Key/Value Pair or a table header.
///
Expand All @@ -32,26 +33,25 @@ use crate::value::{to_string_repr, StringStyle};
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Hash, Clone)]
pub struct Key {
key: InternalString,
pub(crate) repr: Repr,
pub(crate) repr: Option<Repr>,
pub(crate) decor: Decor,
}

impl Key {
/// Create a new table key
pub fn new(key: impl AsRef<str>) -> Self {
let key = key.as_ref();
let repr = to_key_repr(key);
Self::new_unchecked(repr, key.to_owned())
}

pub(crate) fn new_unchecked(repr: Repr, key: InternalString) -> Self {
Self {
key,
repr,
key: key.as_ref().into(),
repr: None,
decor: Default::default(),
}
}

pub(crate) fn with_repr_unchecked(mut self, repr: Repr) -> Self {
self.repr = Some(repr);
self
}

/// While creating the `Key`, add `Decor` to it
pub fn with_decor(mut self, decor: Decor) -> Self {
self.decor = decor;
Expand All @@ -64,8 +64,11 @@ impl Key {
}

/// Returns the key raw representation.
pub fn repr(&self) -> &Repr {
&self.repr
pub fn to_repr(&self) -> Cow<Repr> {
self.repr
.as_ref()
.map(Cow::Borrowed)
.unwrap_or_else(|| Cow::Owned(to_key_repr(&self.key)))
}

/// Returns the surrounding whitespace
Expand All @@ -78,14 +81,19 @@ impl Key {
&self.decor
}

/// Auto formats the key.
pub fn fmt(&mut self) {
self.repr = Some(to_key_repr(&self.key));
}

fn try_parse(s: &str) -> Result<Key, parser::TomlError> {
use combine::EasyParser;
let result = parser::key_parser().easy_parse(Stream::new(s));
match result {
Ok((_, ref rest)) if !rest.input.is_empty() => {
Err(parser::TomlError::from_unparsed(rest.positioner, s))
}
Ok(((raw, key), _)) => Ok(Key::new_unchecked(Repr::new_unchecked(raw), key)),
Ok(((raw, key), _)) => Ok(Key::new(key).with_repr_unchecked(Repr::new_unchecked(raw))),
Err(e) => Err(parser::TomlError::new(e, s)),
}
}
Expand Down
13 changes: 13 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,16 @@ pub use crate::table::{
Entry, IntoIter, Iter, IterMut, OccupiedEntry, Table, TableLike, VacantEntry,
};
pub use crate::value::Value;

// Prevent users from some traits.
pub(crate) mod private {
pub trait Sealed {}
impl Sealed for usize {}
impl Sealed for str {}
impl Sealed for String {}
impl Sealed for i64 {}
impl Sealed for f64 {}
impl Sealed for bool {}
impl Sealed for crate::Datetime {}
impl<'a, T: ?Sized> Sealed for &'a T where T: Sealed {}
}
Loading

0 comments on commit c3a29ec

Please sign in to comment.