Skip to content

Commit

Permalink
Adds newline counts within long strings for location metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
desaikd committed Feb 5, 2025
1 parent 4ecd122 commit e5e4a26
Showing 1 changed file with 30 additions and 17 deletions.
47 changes: 30 additions & 17 deletions src/lazy/text/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,33 +281,39 @@ impl<'top> TextBuffer<'top> {
/// Matches one or more whitespace characters.
pub fn match_whitespace1(&mut self) -> IonMatchResult<'top> {
let result = take_while(1.., WHITESPACE_BYTES).parse_next(self)?;
self.update_location_metadata(result);
self.update_location_metadata(result.data);
Ok(*self)
}

/// Updates the location metadata based on the matched whitespace bytes in the consumed buffer
fn update_location_metadata(&mut self, result: TextBuffer<'top>) {
// If the bytes contain '\r\n' in this order then this must be coming from windows line ending pattern and hence should be counted as 1.
let crlf_count = result.data.windows(2).filter(|window| window == b"\r\n").count();

// Subtract the crlf_count from total count of all newline characters to get the correct number of newline match count.
let newline_match_count = result.data.iter().filter(|b| NEWLINE_BYTES.contains(b)).count() - crlf_count;

// Gets index for the last occurrence of the newline byte and subtracts from the result length to get non newline bytes length
let last_index_newline_byte = result.data.iter().rposition(|b| NEWLINE_BYTES.contains(b)).unwrap_or(0);
let non_newline_match_length = result.data.len() - last_index_newline_byte - 1;
self.row += newline_match_count;
fn update_location_metadata(&mut self, data: &'top [u8]) {
if !data.is_empty() {
// If the bytes contain '\r\n' in this order then this must be coming from windows line ending pattern and hence should be counted as 1.
let crlf_count = data.windows(2).filter(|window| window == b"\r\n").count();

// Subtract the crlf_count from total count of all newline characters to get the correct number of newline match count.
let newline_match_count = data.iter().filter(|b| NEWLINE_BYTES.contains(b)).count() - crlf_count;

// Gets index for the last occurrence of the newline byte and subtracts from the result length to get non newline bytes length
let last_index_newline_byte = data.iter().rposition(|b| NEWLINE_BYTES.contains(b)).unwrap_or(0);
let non_newline_match_length = data.len() - last_index_newline_byte - 1;
self.row += newline_match_count;

// Stores this newline offset as previous newline offset for calculating column position since this has already been matched/parsed
if self.offset < non_newline_match_length {
// this means that the input is not yet consumed hence get the input length + the current offset
self.prev_newline_offset = self.offset + data.len() - non_newline_match_length;
} else {
self.prev_newline_offset = self.offset - non_newline_match_length;

// Stores this newline offset as previous newline offset for calculating column position since this has already been matched/parsed
self.prev_newline_offset = self.offset - non_newline_match_length;
}
}
}

/// Matches zero or more whitespace characters.
pub fn match_whitespace0(&mut self) -> IonMatchResult<'top> {
let result = take_while(0.., WHITESPACE_BYTES).parse_next(self)?;
if !result.data.is_empty() {
self.update_location_metadata(result);
}
self.update_location_metadata(result.data);
Ok(*self)
}

Expand Down Expand Up @@ -1645,6 +1651,8 @@ impl<'top> TextBuffer<'top> {
let delimiter_head = delimiter.as_bytes()[0];
// Whether we've encountered any escapes while looking for the delimiter
let mut contained_escapes = false;
// This input may contain newline characters hence update the location metadata.
self.update_location_metadata(self.bytes());
// The input left to search
let mut remaining = *self;
loop {
Expand Down Expand Up @@ -2905,6 +2913,11 @@ mod tests {
MatchTest::new_1_0(input).expect_match_location(match_length(TextBuffer::match_whitespace0), expected_location);
}

#[test]
fn expect_newline_long_text() {
MatchTest::new_1_0("'''long \n\r\n\t hello'''").expect_match_location(match_length(TextBuffer::match_string), (3, 11));
}

#[test]
fn expect_foo() {
MatchTest::new_1_0("\"hello\"").expect_match(match_length(TextBuffer::match_string));
Expand Down

0 comments on commit e5e4a26

Please sign in to comment.