diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 6d1c9b23..3b6666d6 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -281,33 +281,39 @@ impl<'top> TextBuffer<'top> { /// Matches one or more whitespace characters. pub fn match_whitespace1(&mut self) -> IonMatchResult<'top> { let result = take_while(1.., WHITESPACE_BYTES).parse_next(self)?; - self.update_location_metadata(result); + self.update_location_metadata(result.data); Ok(*self) } /// Updates the location metadata based on the matched whitespace bytes in the consumed buffer - fn update_location_metadata(&mut self, result: TextBuffer<'top>) { - // If the bytes contain '\r\n' in this order then this must be coming from windows line ending pattern and hence should be counted as 1. - let crlf_count = result.data.windows(2).filter(|window| window == b"\r\n").count(); - - // Subtract the crlf_count from total count of all newline characters to get the correct number of newline match count. - let newline_match_count = result.data.iter().filter(|b| NEWLINE_BYTES.contains(b)).count() - crlf_count; - - // Gets index for the last occurrence of the newline byte and subtracts from the result length to get non newline bytes length - let last_index_newline_byte = result.data.iter().rposition(|b| NEWLINE_BYTES.contains(b)).unwrap_or(0); - let non_newline_match_length = result.data.len() - last_index_newline_byte - 1; - self.row += newline_match_count; + fn update_location_metadata(&mut self, data: &'top [u8]) { + if !data.is_empty() { + // If the bytes contain '\r\n' in this order then this must be coming from windows line ending pattern and hence should be counted as 1. + let crlf_count = data.windows(2).filter(|window| window == b"\r\n").count(); + + // Subtract the crlf_count from total count of all newline characters to get the correct number of newline match count. + let newline_match_count = data.iter().filter(|b| NEWLINE_BYTES.contains(b)).count() - crlf_count; + + // Gets index for the last occurrence of the newline byte and subtracts from the result length to get non newline bytes length + let last_index_newline_byte = data.iter().rposition(|b| NEWLINE_BYTES.contains(b)).unwrap_or(0); + let non_newline_match_length = data.len() - last_index_newline_byte - 1; + self.row += newline_match_count; + + // Stores this newline offset as previous newline offset for calculating column position since this has already been matched/parsed + if self.offset < non_newline_match_length { + // this means that the input is not yet consumed hence get the input length + the current offset + self.prev_newline_offset = self.offset + data.len() - non_newline_match_length; + } else { + self.prev_newline_offset = self.offset - non_newline_match_length; - // Stores this newline offset as previous newline offset for calculating column position since this has already been matched/parsed - self.prev_newline_offset = self.offset - non_newline_match_length; + } + } } /// Matches zero or more whitespace characters. pub fn match_whitespace0(&mut self) -> IonMatchResult<'top> { let result = take_while(0.., WHITESPACE_BYTES).parse_next(self)?; - if !result.data.is_empty() { - self.update_location_metadata(result); - } + self.update_location_metadata(result.data); Ok(*self) } @@ -1645,6 +1651,8 @@ impl<'top> TextBuffer<'top> { let delimiter_head = delimiter.as_bytes()[0]; // Whether we've encountered any escapes while looking for the delimiter let mut contained_escapes = false; + // This input may contain newline characters hence update the location metadata. + self.update_location_metadata(self.bytes()); // The input left to search let mut remaining = *self; loop { @@ -2905,6 +2913,11 @@ mod tests { MatchTest::new_1_0(input).expect_match_location(match_length(TextBuffer::match_whitespace0), expected_location); } + #[test] + fn expect_newline_long_text() { + MatchTest::new_1_0("'''long \n\r\n\t hello'''").expect_match_location(match_length(TextBuffer::match_string), (3, 11)); + } + #[test] fn expect_foo() { MatchTest::new_1_0("\"hello\"").expect_match(match_length(TextBuffer::match_string));