From 64310d5fc87edd511b28b4553b0c1362da30c5c0 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 19 Oct 2023 10:28:09 +1100 Subject: [PATCH] Enforce maximum string length BIP-173 states that a bech32 string must not exceed 90 characters. However BOLT-11 states that the string limit may be exceeded. This puts us, architecturally speaking in a conundrum - we want to support lightning but this crate pretty heavily documents itself as an implementation of BIP-173 and BIP-350. The solution we choose is to enforce the string limit in the segwit modules and not in the functions in `lib.rs`. We document this decision in the crate level docs as well as on the individual functions. FTR in `bech32 v0.9.0` the lengths were not enforced either. --- src/lib.rs | 113 +++++++++++++++++++++++++++++++++++++++ src/primitives/decode.rs | 21 ++++++-- src/primitives/mod.rs | 5 ++ src/segwit.rs | 85 +++++++++++++++++++++-------- 4 files changed, 199 insertions(+), 25 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bc6effa50..8cd8b2f48 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,9 @@ //! a data part. A checksum at the end of the string provides error detection to prevent mistakes //! when the string is written off or read out loud. //! +//! Please note, so as to support lighting ([BOLT-11]) we explicitly do not do string length checks +//! in the top level API. We do however enforce the 90 character limit within the `segwit` modules. +//! //! # Usage //! //! - If you are doing segwit stuff you likely want to use the [`segwit`] API. @@ -113,6 +116,8 @@ //! //! # } //! ``` +//! +//! [BOLT-11]: #![cfg_attr(all(not(feature = "std"), not(test)), no_std)] // Experimental features we need. @@ -169,6 +174,8 @@ pub use { /// If your input string has no checksum use the [`CheckedHrpstring`] constructor, which allows /// selecting the checksum algorithm explicitly. /// +/// Note: this function does not enforce any restrictions on the total length of the input string. +/// /// # Returns /// /// The human-readable part and the encoded data with the checksum removed. @@ -214,6 +221,15 @@ pub fn decode(s: &str) -> Result<(Hrp, Vec), DecodeError> { /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "alloc")] #[inline] pub fn encode(hrp: Hrp, data: &[u8]) -> Result { @@ -224,6 +240,15 @@ pub fn encode(hrp: Hrp, data: &[u8]) -> Result /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "alloc")] #[inline] pub fn encode_lower(hrp: Hrp, data: &[u8]) -> Result { @@ -236,6 +261,15 @@ pub fn encode_lower(hrp: Hrp, data: &[u8]) -> Result +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "alloc")] #[inline] pub fn encode_upper(hrp: Hrp, data: &[u8]) -> Result { @@ -248,6 +282,15 @@ pub fn encode_upper(hrp: Hrp, data: &[u8]) -> Result +/// [BIP-350]: +/// [BOLT-11]: #[inline] pub fn encode_to_fmt( fmt: &mut W, @@ -261,6 +304,15 @@ pub fn encode_to_fmt( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[inline] pub fn encode_lower_to_fmt( fmt: &mut W, @@ -279,6 +331,15 @@ pub fn encode_lower_to_fmt( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[inline] pub fn encode_upper_to_fmt( fmt: &mut W, @@ -297,6 +358,15 @@ pub fn encode_upper_to_fmt( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "std")] #[inline] pub fn encode_to_writer( @@ -311,6 +381,15 @@ pub fn encode_to_writer( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "std")] #[inline] pub fn encode_lower_to_writer( @@ -330,6 +409,15 @@ pub fn encode_lower_to_writer( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "std")] #[inline] pub fn encode_upper_to_writer( @@ -493,4 +581,29 @@ mod tests { assert_eq!(got, want); } + + #[test] + fn can_encode_really_long_string() { + // Encode around the bech32 limit, mainly here as documentation. + let tcs = vec![ + // Also shows there are no limit checks on the data slice (since segwit limits this to 40 bytes). + ([0_u8; 50], Hrp::parse_unchecked("abc")), // Encodes to 90 characters. + ([0_u8; 50], Hrp::parse_unchecked("abcd")), // Encodes to 91 characters. + ]; + for (data, hrp) in tcs { + assert!(encode::(&hrp, &data).is_ok()); + } + + // Encode something arbitrarily long. + let data = [0_u8; 1024]; + let hrp = Hrp::parse_unchecked("abc"); + assert!(encode::(&hrp, &data).is_ok()); + } + + #[test] + fn can_decode_long_string() { + // A 91 character long string, greater than the segwit enforced maximum of 90. + let s = "abcd1qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrw9z3s"; + assert!(decode(s).is_ok()); + } } diff --git a/src/primitives/decode.rs b/src/primitives/decode.rs index 5b5cb53f3..07dcae9aa 100644 --- a/src/primitives/decode.rs +++ b/src/primitives/decode.rs @@ -81,6 +81,7 @@ use crate::primitives::gf32::Fe32; use crate::primitives::hrp::{self, Hrp}; use crate::primitives::iter::{Fe32IterExt, FesToBytes}; use crate::primitives::segwit::{self, WitnessLengthError, VERSION_0}; +use crate::primitives::MAX_STRING_LENGTH; use crate::{Bech32, Bech32m}; /// Separator between the hrp and payload (as defined by BIP-173). @@ -89,7 +90,7 @@ const SEP: char = '1'; /// An HRP string that has been parsed but not yet had the checksum checked. /// /// Parsing an HRP string only checks validity of the characters, it does not validate the -/// checksum in any way. +/// checksum in any way. Nor do we check the length of the parsed string. /// /// Unless you are attempting to validate a string with multiple checksums then you likely do not /// want to use this type directly, instead use [`CheckedHrpstring::new(s)`]. @@ -217,7 +218,9 @@ impl<'s> UncheckedHrpstring<'s> { /// > We first describe the general checksummed base32 format called Bech32 and then /// > define Segregated Witness addresses using it. /// -/// This type abstracts over the general checksummed base32 format called Bech32. +/// This type abstracts over the general checksummed base32 format called Bech32. In order to +/// support `BOLT-11` we explicitly do not check the length of the parsed string, this is however, +/// done by the [`SegwitHrpstring`] type. /// /// # Examples /// @@ -331,8 +334,8 @@ impl<'s> CheckedHrpstring<'s> { } } -/// An HRP string that has been parsed, had the checksum validated, had the witness version -/// validated, had the witness data length checked, and the had witness version and checksum +/// An valid length HRP string that has been parsed, had the checksum validated, had the witness +/// version validated, had the witness data length checked, and the had witness version and checksum /// removed. /// /// # Examples @@ -368,6 +371,11 @@ impl<'s> SegwitHrpstring<'s> { /// to get strict BIP conformance (also [`Hrp::is_valid_on_mainnet`] and friends). #[inline] pub fn new(s: &'s str) -> Result { + let len = s.len(); + if len > MAX_STRING_LENGTH { + return Err(SegwitHrpstringError::Unchecked(UncheckedHrpstringError::TooLong(len))); + } + let unchecked = UncheckedHrpstring::new(s)?; if unchecked.data.is_empty() { @@ -658,6 +666,8 @@ impl From for CheckedHrpstringError { #[derive(Debug, Clone, PartialEq, Eq)] #[non_exhaustive] pub enum UncheckedHrpstringError { + /// String exceeds maximum allowed length. + TooLong(usize), /// An error with the characters of the input string. Char(CharError), /// The human-readable part is invalid. @@ -669,6 +679,8 @@ impl fmt::Display for UncheckedHrpstringError { use UncheckedHrpstringError::*; match *self { + TooLong(len) => + write!(f, "string exceeds spec limit of {} chars: {}", MAX_STRING_LENGTH, len), Char(ref e) => write_err!(f, "character error"; e), Hrp(ref e) => write_err!(f, "invalid human-readable part"; e), } @@ -681,6 +693,7 @@ impl std::error::Error for UncheckedHrpstringError { use UncheckedHrpstringError::*; match *self { + TooLong(_) => None, Char(ref e) => Some(e), Hrp(ref e) => Some(e), } diff --git a/src/primitives/mod.rs b/src/primitives/mod.rs index 1d496ba2b..297292991 100644 --- a/src/primitives/mod.rs +++ b/src/primitives/mod.rs @@ -12,6 +12,11 @@ pub mod segwit; use checksum::{Checksum, PackedNull}; +/// The maximum allowed length of a bech32 string (see [`BIP-173`]). +/// +/// [`BIP-173`]: +pub const MAX_STRING_LENGTH: usize = 90; + /// The "null checksum" used on bech32 strings for which we want to do no checksum checking. #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum NoChecksum {} diff --git a/src/segwit.rs b/src/segwit.rs index 282c4164f..1a205a170 100644 --- a/src/segwit.rs +++ b/src/segwit.rs @@ -53,7 +53,7 @@ use crate::primitives::iter::{ByteIterExt, Fe32IterExt}; #[cfg(feature = "alloc")] use crate::primitives::segwit; use crate::primitives::segwit::{InvalidWitnessVersionError, WitnessLengthError}; -use crate::primitives::{Bech32, Bech32m}; +use crate::primitives::{Bech32, Bech32m, MAX_STRING_LENGTH}; #[rustfmt::skip] // Keep public re-exports separate. #[doc(inline)] @@ -79,7 +79,8 @@ pub fn decode(s: &str) -> Result<(Hrp, Fe32, Vec), DecodeError> { /// Encodes a segwit address. /// -/// Does validity checks on the `witness_version` and length checks on the `witness_program`. +/// Does validity checks on the `witness_version`, length checks on the `witness_program`, and +/// checks the total encoded string length. /// /// As specified by [`BIP-350`] we use the [`Bech32m`] checksum algorithm for witness versions 1 and /// above, and for witness version 0 we use the original ([`BIP-173`]) [`Bech32`] checksum @@ -101,12 +102,20 @@ pub fn encode( segwit::validate_witness_version(witness_version)?; segwit::validate_witness_program_length(witness_program.len(), witness_version)?; + let encoded_length = encoded_length(hrp, witness_version, witness_program); + if encoded_length > MAX_STRING_LENGTH { + return Err(EncodeError::TooLong(encoded_length)); + } + let mut buf = String::new(); encode_to_fmt_unchecked(&mut buf, hrp, witness_version, witness_program)?; Ok(buf) } /// Encodes a segwit version 0 address. +/// +/// Does validity checks on the `witness_version`, length checks on the `witness_program`, and +/// checks the total encoded string length. #[cfg(feature = "alloc")] #[inline] pub fn encode_v0(hrp: Hrp, witness_program: &[u8]) -> Result { @@ -114,6 +123,9 @@ pub fn encode_v0(hrp: Hrp, witness_program: &[u8]) -> Result Result { @@ -122,8 +134,8 @@ pub fn encode_v1(hrp: Hrp, witness_program: &[u8]) -> Result( fmt: &mut W, @@ -136,8 +148,8 @@ pub fn encode_to_fmt_unchecked( /// Encodes a segwit address to a writer ([`fmt::Write`]) using lowercase characters. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. pub fn encode_lower_to_fmt_unchecked( fmt: &mut W, hrp: Hrp, @@ -164,8 +176,8 @@ pub fn encode_lower_to_fmt_unchecked( /// /// This is provided for use when creating QR codes. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. #[inline] pub fn encode_upper_to_fmt_unchecked( fmt: &mut W, @@ -192,8 +204,8 @@ pub fn encode_upper_to_fmt_unchecked( /// Encodes a segwit address to a writer ([`std::io::Write`]) using lowercase characters. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. #[cfg(feature = "std")] #[inline] pub fn encode_to_writer_unchecked( @@ -207,8 +219,8 @@ pub fn encode_to_writer_unchecked( /// Encodes a segwit address to a writer ([`std::io::Write`]) using lowercase characters. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. #[cfg(feature = "std")] #[inline] pub fn encode_lower_to_writer_unchecked( @@ -237,8 +249,8 @@ pub fn encode_lower_to_writer_unchecked( /// /// This is provided for use when creating QR codes. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. #[cfg(feature = "std")] #[inline] pub fn encode_upper_to_writer_unchecked( @@ -265,13 +277,6 @@ pub fn encode_upper_to_writer_unchecked( } /// Returns the length of the bech32 string after encoding HRP, witness version and program. -/// -/// # Returns -/// -/// Returns the encoded length, ether as `Ok(len)` if valid or as `Err(EncodedLengthError(len))` if -/// invalid (exceeds the maximum of 90 characters as defined in [BIP-173]). -/// -/// [`BIP-173`]: pub fn encoded_length( hrp: Hrp, _witness_version: Fe32, // Emphasize that this is only for segwit. @@ -313,6 +318,8 @@ pub enum EncodeError { WitnessVersion(InvalidWitnessVersionError), /// Invalid witness length. WitnessLength(WitnessLengthError), + /// Encoding HRP, witver, and program into a bech32 string exceeds maximum allowed. + TooLong(usize), /// Writing to formatter failed. Fmt(fmt::Error), } @@ -324,6 +331,8 @@ impl fmt::Display for EncodeError { match *self { WitnessVersion(ref e) => write_err!(f, "witness version"; e), WitnessLength(ref e) => write_err!(f, "witness length"; e), + TooLong(len) => + write!(f, "encoded length {} exceeds spec limit {} chars", len, MAX_STRING_LENGTH), Fmt(ref e) => write_err!(f, "writing to formatter failed"; e), } } @@ -337,6 +346,7 @@ impl std::error::Error for EncodeError { match *self { WitnessVersion(ref e) => Some(e), WitnessLength(ref e) => Some(e), + TooLong(_) => None, Fmt(ref e) => Some(e), } } @@ -465,4 +475,37 @@ mod tests { assert_eq!(got, want); } } + + #[test] + fn cannot_encode_address_too_long() { + let tcs = vec![ + ("anhrpthatis19charsx", 91), + ("anhrpthatisthemaximumallowedlengthofeightythreebytesxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", 155) + ]; + + for (hrp, len) in tcs { + let program = [0_u8; 40]; // Maximum witness program length. + let hrp = Hrp::parse_unchecked(hrp); + match encode(&hrp, VERSION_1, &program) { + Err(e) => assert_eq!(e, EncodeError::TooLong(len)), + Ok(address) => println!("len: {}", address.len()), + } + } + } + + #[test] + fn decode_long_address() { + use crate::primitives::decode::{SegwitHrpstringError, UncheckedHrpstringError}; + + // 90 characters long, maximum allowed. + let address = "anhrpthatisnineteen1pqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqghfyyfz"; + assert!(decode(address).is_ok()); + + // 91 characters long, one over xmaximum allowed. + let address = "anhrpthatistwentychs1pqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgqfrwjz"; + assert_eq!( + decode(address).unwrap_err(), + DecodeError(SegwitHrpstringError::Unchecked(UncheckedHrpstringError::TooLong(91))) + ); + } }