From 0d827dd8df9472310d1e7c52ac1ad6aaa2ebcce4 Mon Sep 17 00:00:00 2001 From: Michael Kirk Date: Tue, 7 May 2024 14:04:52 -0700 Subject: [PATCH 1/2] Err rather than return invalid output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces a serious performance regression for decode: $ cargo bench --bench="*" -- --baseline=perf2 Compiling polyline v0.10.2 (/Users/mkirk/src/georust/polyline) Finished `bench` profile [optimized] target(s) in 0.77s Running benches/benchmarks.rs (target/release/deps/benchmarks-cc8f3ea04be06866) encode 10_000 coordinates at precision 1e-5 time: [105.25 µs 105.30 µs 105.37 µs] change: [-0.1263% -0.0235% +0.0878%] (p = 0.68 > 0.05) No change in performance detected. Found 4 outliers among 100 measurements (4.00%) 1 (1.00%) low mild 1 (1.00%) high mild 2 (2.00%) high severe encode 10_000 coordinates at precision 1e-6 time: [129.27 µs 130.11 µs 130.80 µs] change: [-0.9966% -0.2021% +0.6397%] (p = 0.63 > 0.05) No change in performance detected. decode 10_000 coordinates at precision 1e-5 time: [164.64 µs 165.83 µs 167.02 µs] change: [+109.57% +111.90% +114.24%] (p = 0.00 < 0.05) Performance has regressed. decode 10_000 coordinates at precision 1e-6 time: [173.15 µs 174.21 µs 175.17 µs] change: [+85.462% +87.344% +89.230%] (p = 0.00 < 0.05) Performance has regressed. Co-authored-by: mattiZed --- src/lib.rs | 66 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e04b4f6..6d0fae8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -99,17 +99,19 @@ where /// Decodes a Google Encoded Polyline. /// +/// Returns an error if the polyline is invalid or if the decoded coordinates are out of bounds. +/// /// # Examples /// /// ``` /// use polyline; /// -/// let decodedPolyline = polyline::decode_polyline(&"_p~iF~ps|U_ulLnnqC_mqNvxq`@", 5); +/// let decoded_polyline = polyline::decode_polyline(&"_p~iF~ps|U_ulLnnqC_mqNvxq`@", 5); /// ``` pub fn decode_polyline(polyline: &str, precision: u32) -> Result, String> { let mut index = 0; - let mut lat: i64 = 0; - let mut lng: i64 = 0; + let mut scaled_lat: i64 = 0; + let mut scaled_lon: i64 = 0; let mut coordinates = vec![]; let base: i32 = 10; let factor = i64::from(base.pow(precision)); @@ -121,13 +123,25 @@ pub fn decode_polyline(polyline: &str, precision: u32) -> Result if new_index >= chars.len() { break; } - let (longitude_change, new_index) = trans(chars, new_index)?; - index = new_index; + scaled_lat += latitude_change; + let lat = scaled_lat as f64 / factor as f64; + if !(MIN_LATITUDE..MAX_LATITUDE).contains(&lat) { + return Err(format!( + "Invalid latitude: {lat} from character range {index}..{new_index}" + )); + } - lat += latitude_change; - lng += longitude_change; + let (longitude_change, new_new_index) = trans(chars, new_index)?; + scaled_lon += longitude_change; + let lon = scaled_lon as f64 / factor as f64; + if !(MIN_LONGITUDE..MAX_LONGITUDE).contains(&lon) { + return Err(format!( + "Invalid longitude: {lon} from character range {new_index}..{new_new_index}" + )); + } - coordinates.push([lng as f64 / factor as f64, lat as f64 / factor as f64]); + index = new_new_index; + coordinates.push([lon, lat]); } Ok(coordinates.into()) @@ -138,9 +152,12 @@ fn trans(chars: &[u8], mut index: usize) -> Result<(i64, usize), String> { let mut result = 0; let mut byte; loop { + if index >= chars.len() { + return Err(format!("Invalid polyline missing valid termination")); + } byte = chars[index] as u64; if byte < 63 || (shift > 64 - 5) { - return Err(format!("Cannot decode character at index {}", index)); + return Err(format!("Cannot decode character at index {index}")); } byte -= 63; result |= (byte & 0x1f) << shift; @@ -232,33 +249,36 @@ mod tests { } #[test] - // emoji is decodable but messes up data - // TODO: handle this case in the future? fn broken_string() { let s = "_p~iF~ps|U_u🗑lLnnqC_mqNvxq`@"; - let res = vec![ - [-120.2, 38.5], - [-120.95, 2306360.53104], - [-126.453, 2306363.08304], - ] - .into(); - assert_eq!(decode_polyline(s, 5).unwrap(), res); + let err = decode_polyline(s, 5).unwrap_err(); + assert_eq!( + err, + "Invalid latitude: 2306360.53104 from character range 10..18" + ); } #[test] - #[should_panic] fn invalid_string() { let s = "invalid_polyline_that_should_be_handled_gracefully"; - decode_polyline(s, 6).unwrap(); + let err = decode_polyline(s, 5).unwrap_err(); + assert_eq!(err, "Cannot decode character at index 12"); + } + + #[test] + fn another_invalid_string() { + let s = "ugh_ugh"; + let err = decode_polyline(s, 5).unwrap_err(); + assert_eq!(err, "Invalid polyline missing valid termination"); } #[test] - #[should_panic] - // Can't have a latitude > 90.0 fn bad_coords() { + // Can't have a latitude > 90.0 let res: LineString = vec![[-120.2, 38.5], [-120.95, 40.7], [-126.453, 430.252]].into(); - encode_coordinates(res, 5).unwrap(); + let err = encode_coordinates(res, 5).unwrap_err(); + assert_eq!(err, "Invalid latitude: 430.252 at position 2"); } #[test] From c57292a9dd9d8d78b0140ae4ae32a24192dacef1 Mon Sep 17 00:00:00 2001 From: Michael Kirk Date: Tue, 7 May 2024 16:54:55 -0700 Subject: [PATCH 2/2] Somewhat ameliorate perf regression from validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's still slower than before the validation checks, but now only 10-20% slower rather than 90-110% slower $ cargo bench --bench="*" -- --baseline=perf2 Compiling polyline v0.10.2 (/Users/mkirk/src/georust/polyline) Finished `bench` profile [optimized] target(s) in 0.77s Running benches/benchmarks.rs (target/release/deps/benchmarks-cc8f3ea04be06866) encode 10_000 coordinates at precision 1e-5 time: [105.11 µs 105.16 µs 105.23 µs] change: [-0.2235% -0.1070% +0.0226%] (p = 0.09 > 0.05) No change in performance detected. Found 7 outliers among 100 measurements (7.00%) 5 (5.00%) high mild 2 (2.00%) high severe encode 10_000 coordinates at precision 1e-6 time: [123.98 µs 124.32 µs 124.77 µs] change: [-3.0982% -2.4787% -1.8225%] (p = 0.00 < 0.05) Performance has improved. decode 10_000 coordinates at precision 1e-5 time: [86.887 µs 87.835 µs 88.768 µs] change: [+10.484% +11.820% +13.207%] (p = 0.00 < 0.05) Performance has regressed. decode 10_000 coordinates at precision 1e-6 time: [110.52 µs 111.44 µs 112.35 µs] change: [+19.484% +20.773% +22.034%] (p = 0.00 < 0.05) Performance has regressed. --- src/lib.rs | 52 +++++++++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6d0fae8..09f9cea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,7 @@ use geo_types::{Coord, LineString}; use std::char; +use std::iter::{Enumerate, Peekable}; const MIN_LONGITUDE: f64 = -180.0; const MAX_LONGITUDE: f64 = 180.0; @@ -109,59 +110,51 @@ where /// let decoded_polyline = polyline::decode_polyline(&"_p~iF~ps|U_ulLnnqC_mqNvxq`@", 5); /// ``` pub fn decode_polyline(polyline: &str, precision: u32) -> Result, String> { - let mut index = 0; let mut scaled_lat: i64 = 0; let mut scaled_lon: i64 = 0; let mut coordinates = vec![]; let base: i32 = 10; let factor = i64::from(base.pow(precision)); - let chars = polyline.as_bytes(); + let mut chars = polyline.as_bytes().iter().copied().enumerate().peekable(); - while index < chars.len() { - let (latitude_change, new_index) = trans(chars, index)?; - if new_index >= chars.len() { - break; - } + while let Some((lat_start, _)) = chars.peek().copied() { + let latitude_change = decode_next(&mut chars)?; scaled_lat += latitude_change; let lat = scaled_lat as f64 / factor as f64; if !(MIN_LATITUDE..MAX_LATITUDE).contains(&lat) { - return Err(format!( - "Invalid latitude: {lat} from character range {index}..{new_index}" - )); + return Err(format!("Invalid latitude: {lat} at index: {lat_start}")); } - let (longitude_change, new_new_index) = trans(chars, new_index)?; + let Some((lon_start, _)) = chars.peek().copied() else { + return Err(format!( + "No longitude to go with latitude at index: {lat_start}" + )); + }; + let longitude_change = decode_next(&mut chars)?; scaled_lon += longitude_change; let lon = scaled_lon as f64 / factor as f64; if !(MIN_LONGITUDE..MAX_LONGITUDE).contains(&lon) { - return Err(format!( - "Invalid longitude: {lon} from character range {new_index}..{new_new_index}" - )); + return Err(format!("Invalid longitude: {lon} at index: {lon_start}")); } - index = new_new_index; coordinates.push([lon, lat]); } Ok(coordinates.into()) } -fn trans(chars: &[u8], mut index: usize) -> Result<(i64, usize), String> { +fn decode_next( + chars: &mut Peekable>>, +) -> Result { let mut shift = 0; let mut result = 0; - let mut byte; - loop { - if index >= chars.len() { - return Err(format!("Invalid polyline missing valid termination")); - } - byte = chars[index] as u64; + while let Some((idx, mut byte)) = chars.next() { if byte < 63 || (shift > 64 - 5) { - return Err(format!("Cannot decode character at index {index}")); + return Err(format!("Cannot decode character at index {idx}")); } byte -= 63; - result |= (byte & 0x1f) << shift; - index += 1; + result |= ((byte & 0x1f) as u64) << shift; shift += 5; if byte < 0x20 { break; @@ -173,7 +166,7 @@ fn trans(chars: &[u8], mut index: usize) -> Result<(i64, usize), String> { } else { result >> 1 } as i64; - Ok((coordinate_change, index)) + Ok(coordinate_change) } #[cfg(test)] @@ -252,10 +245,7 @@ mod tests { fn broken_string() { let s = "_p~iF~ps|U_u🗑lLnnqC_mqNvxq`@"; let err = decode_polyline(s, 5).unwrap_err(); - assert_eq!( - err, - "Invalid latitude: 2306360.53104 from character range 10..18" - ); + assert_eq!(err, "Invalid latitude: 2306360.53104 at index: 10"); } #[test] @@ -269,7 +259,7 @@ mod tests { fn another_invalid_string() { let s = "ugh_ugh"; let err = decode_polyline(s, 5).unwrap_err(); - assert_eq!(err, "Invalid polyline missing valid termination"); + assert_eq!(err, "Invalid latitude: 49775.95019 at index: 0"); } #[test]