From bc86ca716cb9c87e1e9babe3adb955374d2d1874 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sun, 12 Jan 2025 22:36:45 +0100 Subject: [PATCH 01/37] add XRobotsTag, initial implementation --- rama-http-types/src/lib.rs | 3 + rama-http/src/headers/mod.rs | 4 + rama-http/src/headers/x_robots_tag/element.rs | 46 +++++++ .../src/headers/x_robots_tag/iterator.rs | 19 +++ rama-http/src/headers/x_robots_tag/mod.rs | 69 +++++++++++ rama-http/src/headers/x_robots_tag/rule.rs | 113 ++++++++++++++++++ 6 files changed, 254 insertions(+) create mode 100644 rama-http/src/headers/x_robots_tag/element.rs create mode 100644 rama-http/src/headers/x_robots_tag/iterator.rs create mode 100644 rama-http/src/headers/x_robots_tag/mod.rs create mode 100644 rama-http/src/headers/x_robots_tag/rule.rs diff --git a/rama-http-types/src/lib.rs b/rama-http-types/src/lib.rs index cf60b2e7..806ead8d 100644 --- a/rama-http-types/src/lib.rs +++ b/rama-http-types/src/lib.rs @@ -131,6 +131,9 @@ pub mod header { "x-real-ip", ]; + // non-std web-crawler info headers + static_header!["x-robots-tag",]; + /// Static Header Value that is can be used as `User-Agent` or `Server` header. pub static RAMA_ID_HEADER_VALUE: HeaderValue = HeaderValue::from_static( const_format::formatcp!("{}/{}", rama_utils::info::NAME, rama_utils::info::VERSION), diff --git a/rama-http/src/headers/mod.rs b/rama-http/src/headers/mod.rs index 06dc4a44..c6f1bb8c 100644 --- a/rama-http/src/headers/mod.rs +++ b/rama-http/src/headers/mod.rs @@ -102,4 +102,8 @@ pub mod authorization { pub use ::rama_http_types::headers::HeaderExt; pub(crate) mod util; + +mod x_robots_tag; +pub use x_robots_tag::XRobotsTag; + pub use util::quality_value::{Quality, QualityValue}; diff --git a/rama-http/src/headers/x_robots_tag/element.rs b/rama-http/src/headers/x_robots_tag/element.rs new file mode 100644 index 00000000..515889b7 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/element.rs @@ -0,0 +1,46 @@ +use crate::headers::x_robots_tag::rule::Rule; +use rama_core::error::OpaqueError; +use std::fmt::Formatter; +use std::str::FromStr; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Element { + bot_name: Option, // or `rama_ua::UserAgent` ??? + indexing_rule: Rule, +} + +impl std::fmt::Display for Element { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self.bot_name { + None => write!(f, "{}", self.indexing_rule), + Some(bot) => write!(f, "{}: {}", bot, self.indexing_rule), + } + } +} + +impl FromStr for Element { + type Err = OpaqueError; + + fn from_str(s: &str) -> Result { + let (bot_name, indexing_rule) = match Rule::from_str(s) { + Ok(rule) => (None, Ok(rule)), + Err(e) => match *s.split(":").map(str::trim).collect::>().as_slice() { + [bot_name, rule] => (Some(bot_name.to_owned()), rule.parse()), + [bot_name, rule_name, rule_value] => ( + Some(bot_name.to_owned()), + [rule_name, rule_value][..].try_into(), + ), + _ => (None, Err(e)), + }, + }; + match indexing_rule { + Ok(indexing_rule) => Ok(Element { + bot_name, + indexing_rule, + }), + Err(_) => Err(OpaqueError::from_display( + "Failed to parse XRobotsTagElement", + )), + } + } +} diff --git a/rama-http/src/headers/x_robots_tag/iterator.rs b/rama-http/src/headers/x_robots_tag/iterator.rs new file mode 100644 index 00000000..285cbb16 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/iterator.rs @@ -0,0 +1,19 @@ +use crate::headers::x_robots_tag::Element; + +#[derive(Debug, Clone)] +/// An iterator over the `XRobotsTag` header's elements. +pub struct Iterator(std::vec::IntoIter); + +impl core::iter::Iterator for Iterator { + type Item = Element; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +impl Iterator { + pub fn new(elements: std::vec::IntoIter) -> Self { + Self(elements) + } +} diff --git a/rama-http/src/headers/x_robots_tag/mod.rs b/rama-http/src/headers/x_robots_tag/mod.rs new file mode 100644 index 00000000..a4b8c062 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/mod.rs @@ -0,0 +1,69 @@ +mod rule; + +mod element; + +mod iterator; + +// ----------------------------------------------- \\ + +use crate::headers::Header; +use element::Element; +use http::{HeaderName, HeaderValue}; +use iterator::Iterator as XRobotsTagIterator; +use std::fmt::Formatter; +use std::iter::Iterator; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct XRobotsTag(Vec); + +impl Header for XRobotsTag { + fn name() -> &'static HeaderName { + &crate::header::X_ROBOTS_TAG + } + + fn decode<'i, I>(values: &mut I) -> Result + where + Self: Sized, + I: Iterator, + { + crate::headers::util::csv::from_comma_delimited(values).map(XRobotsTag) + } + + fn encode>(&self, values: &mut E) { + use std::fmt; + struct Format(F); + impl fmt::Display for Format + where + F: Fn(&mut Formatter<'_>) -> fmt::Result, + { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + self.0(f) + } + } + let s = format!( + "{}", + Format(|f: &mut Formatter<'_>| { + crate::headers::util::csv::fmt_comma_delimited(&mut *f, self.0.iter()) + }) + ); + values.extend(Some(HeaderValue::from_str(&s).unwrap())) + } +} + +impl FromIterator for XRobotsTag { + fn from_iter(iter: T) -> Self + where + T: IntoIterator, + { + XRobotsTag(iter.into_iter().collect()) + } +} + +impl IntoIterator for XRobotsTag { + type Item = Element; + type IntoIter = XRobotsTagIterator; + + fn into_iter(self) -> Self::IntoIter { + XRobotsTagIterator::new(self.0.into_iter()) + } +} diff --git a/rama-http/src/headers/x_robots_tag/rule.rs b/rama-http/src/headers/x_robots_tag/rule.rs new file mode 100644 index 00000000..1c090742 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/rule.rs @@ -0,0 +1,113 @@ +use rama_core::error::OpaqueError; +use std::convert::{TryFrom, TryInto}; +use std::fmt::Formatter; +use std::str::FromStr; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub(super) enum Rule { + All, + NoIndex, + NoFollow, + None, + NoSnippet, + IndexIfEmbedded, + MaxSnippet(u32), + MaxImagePreview(MaxImagePreviewSetting), + MaxVideoPreview(Option), + NoTranslate, + NoImageIndex, + UnavailableAfter(String), // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." +} + +impl std::fmt::Display for Rule { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Rule::All => write!(f, "all"), + Rule::NoIndex => write!(f, "noindex"), + Rule::NoFollow => write!(f, "nofollow"), + Rule::None => write!(f, "none"), + Rule::NoSnippet => write!(f, "nosnippet"), + Rule::IndexIfEmbedded => write!(f, "indexifembedded"), + Rule::MaxSnippet(number) => write!(f, "maxsnippet: {}", number), + Rule::MaxImagePreview(setting) => write!(f, "max-image-preview: {}", setting), + Rule::MaxVideoPreview(number) => match number { + Some(number) => write!(f, "max-video-preview: {}", number), + None => write!(f, "max-video-preview: -1"), + }, + Rule::NoTranslate => write!(f, "notranslate"), + Rule::NoImageIndex => write!(f, "noimageindex"), + Rule::UnavailableAfter(date) => write!(f, "unavailable_after: {}", date), + } + } +} + +impl FromStr for Rule { + type Err = OpaqueError; + + fn from_str(s: &str) -> Result { + s.split(":") + .map(str::trim) + .collect::>() + .as_slice() + .try_into() + } +} + +impl TryFrom<&[&str]> for Rule { + type Error = OpaqueError; + + fn try_from(value: &[&str]) -> Result { + match *value { + ["all"] => Ok(Rule::All), + ["no_index"] => Ok(Rule::NoIndex), + ["no_follow"] => Ok(Rule::NoFollow), + ["none"] => Ok(Rule::None), + ["no_snippet"] => Ok(Rule::NoSnippet), + ["indexifembedded"] => Ok(Rule::IndexIfEmbedded), + ["max-snippet", number] => Ok(Rule::MaxSnippet( + number.parse().map_err(OpaqueError::from_display)?, + )), + ["max-image-preview", setting] => Ok(Rule::MaxImagePreview(setting.parse()?)), + ["max-video-preview", number] => Ok(Rule::MaxVideoPreview(match number { + "-1" => None, + n => Some(n.parse().map_err(OpaqueError::from_display)?), + })), + ["notranslate"] => Ok(Rule::NoTranslate), + ["noimageindex"] => Ok(Rule::NoImageIndex), + ["unavailable_after", date] => Ok(Rule::UnavailableAfter(date.to_owned())), + _ => Err(OpaqueError::from_display("Invalid X-Robots-Tag rule")), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +enum MaxImagePreviewSetting { + None, + Standard, + Large, +} + +impl std::fmt::Display for MaxImagePreviewSetting { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + MaxImagePreviewSetting::None => write!(f, "none"), + MaxImagePreviewSetting::Standard => write!(f, "standard"), + MaxImagePreviewSetting::Large => write!(f, "large"), + } + } +} + +impl FromStr for MaxImagePreviewSetting { + type Err = OpaqueError; + + fn from_str(s: &str) -> Result { + match s { + "none" => Ok(MaxImagePreviewSetting::None), + "standard" => Ok(MaxImagePreviewSetting::Standard), + "large" => Ok(MaxImagePreviewSetting::Large), + _ => Err(OpaqueError::from_display( + "failed to parse MaxImagePreviewSetting", + )), + } + } +} From 4888e89c9758ba6a4884901e89fefe815b5a905d Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Wed, 15 Jan 2025 14:32:51 +0100 Subject: [PATCH 02/37] add value_string.rs --- rama-http/src/headers/util/mod.rs | 2 + rama-http/src/headers/util/value_string.rs | 81 ++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 rama-http/src/headers/util/value_string.rs diff --git a/rama-http/src/headers/util/mod.rs b/rama-http/src/headers/util/mod.rs index af107ff1..277c6dc8 100644 --- a/rama-http/src/headers/util/mod.rs +++ b/rama-http/src/headers/util/mod.rs @@ -1,3 +1,5 @@ pub(crate) mod csv; /// Internal utility functions for headers. pub(crate) mod quality_value; + +pub(crate) mod value_string; diff --git a/rama-http/src/headers/util/value_string.rs b/rama-http/src/headers/util/value_string.rs new file mode 100644 index 00000000..83a8f7e2 --- /dev/null +++ b/rama-http/src/headers/util/value_string.rs @@ -0,0 +1,81 @@ +use std::{ + fmt, + str::{self, FromStr}, +}; + +use bytes::Bytes; +use http::header::HeaderValue; + +use crate::headers::Error; + +/// A value that is both a valid `HeaderValue` and `String`. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct HeaderValueString { + /// Care must be taken to only set this value when it is also + /// a valid `String`, since `as_str` will convert to a `&str` + /// in an unchecked manner. + value: HeaderValue, +} + +impl HeaderValueString { + pub(crate) fn from_val(val: &HeaderValue) -> Result { + if val.to_str().is_ok() { + Ok(HeaderValueString { value: val.clone() }) + } else { + Err(Error::invalid()) + } + } + + pub(crate) fn from_string(src: String) -> Option { + // A valid `str` (the argument)... + let bytes = Bytes::from(src); + HeaderValue::from_maybe_shared(bytes) + .ok() + .map(|value| HeaderValueString { value }) + } + + pub(crate) fn from_static(src: &'static str) -> HeaderValueString { + // A valid `str` (the argument)... + HeaderValueString { + value: HeaderValue::from_static(src), + } + } + + pub(crate) fn as_str(&self) -> &str { + // HeaderValueString is only created from HeaderValues + // that have validated they are also UTF-8 strings. + unsafe { str::from_utf8_unchecked(self.value.as_bytes()) } + } +} + +impl fmt::Debug for HeaderValueString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self.as_str(), f) + } +} + +impl fmt::Display for HeaderValueString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self.as_str(), f) + } +} + +impl<'a> From<&'a HeaderValueString> for HeaderValue { + fn from(src: &'a HeaderValueString) -> HeaderValue { + src.value.clone() + } +} + +#[derive(Debug)] +pub(crate) struct FromStrError(()); + +impl FromStr for HeaderValueString { + type Err = FromStrError; + + fn from_str(src: &str) -> Result { + // A valid `str` (the argument)... + src.parse() + .map(|value| HeaderValueString { value }) + .map_err(|_| FromStrError(())) + } +} From 2dc2e93536f0cac9644d118f46f51526caa656f2 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Wed, 15 Jan 2025 14:34:44 +0100 Subject: [PATCH 03/37] add more context with comments --- rama-http-types/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rama-http-types/src/lib.rs b/rama-http-types/src/lib.rs index 806ead8d..7b779b1e 100644 --- a/rama-http-types/src/lib.rs +++ b/rama-http-types/src/lib.rs @@ -132,7 +132,10 @@ pub mod header { ]; // non-std web-crawler info headers - static_header!["x-robots-tag",]; + // + // More information at + // . + static_header!["x-robots-tag"]; /// Static Header Value that is can be used as `User-Agent` or `Server` header. pub static RAMA_ID_HEADER_VALUE: HeaderValue = HeaderValue::from_static( From e6b7b53486d304915f87f0d7d8629f3b635a852a Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Wed, 15 Jan 2025 14:35:40 +0100 Subject: [PATCH 04/37] add ValidDate, custom rules --- rama-http/src/headers/x_robots_tag/mod.rs | 2 + rama-http/src/headers/x_robots_tag/rule.rs | 19 ++++-- .../src/headers/x_robots_tag/valid_date.rs | 65 +++++++++++++++++++ 3 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 rama-http/src/headers/x_robots_tag/valid_date.rs diff --git a/rama-http/src/headers/x_robots_tag/mod.rs b/rama-http/src/headers/x_robots_tag/mod.rs index a4b8c062..54bb6743 100644 --- a/rama-http/src/headers/x_robots_tag/mod.rs +++ b/rama-http/src/headers/x_robots_tag/mod.rs @@ -4,6 +4,8 @@ mod element; mod iterator; +mod valid_date; + // ----------------------------------------------- \\ use crate::headers::Header; diff --git a/rama-http/src/headers/x_robots_tag/rule.rs b/rama-http/src/headers/x_robots_tag/rule.rs index 1c090742..e2f9a0e1 100644 --- a/rama-http/src/headers/x_robots_tag/rule.rs +++ b/rama-http/src/headers/x_robots_tag/rule.rs @@ -2,9 +2,10 @@ use rama_core::error::OpaqueError; use std::convert::{TryFrom, TryInto}; use std::fmt::Formatter; use std::str::FromStr; +use crate::headers::x_robots_tag::valid_date::ValidDate; #[derive(Clone, Debug, Eq, PartialEq)] -pub(super) enum Rule { +pub enum Rule { All, NoIndex, NoFollow, @@ -16,7 +17,11 @@ pub(super) enum Rule { MaxVideoPreview(Option), NoTranslate, NoImageIndex, - UnavailableAfter(String), // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." + UnavailableAfter(ValidDate), // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." + // custom rules + NoAi, + NoImageAi, + SPC } impl std::fmt::Display for Rule { @@ -37,6 +42,9 @@ impl std::fmt::Display for Rule { Rule::NoTranslate => write!(f, "notranslate"), Rule::NoImageIndex => write!(f, "noimageindex"), Rule::UnavailableAfter(date) => write!(f, "unavailable_after: {}", date), + Rule::NoAi => write!(f, "noai"), + Rule::NoImageAi => write!(f, "noimageai"), + Rule::SPC => write!(f, "spc"), } } } @@ -74,14 +82,17 @@ impl TryFrom<&[&str]> for Rule { })), ["notranslate"] => Ok(Rule::NoTranslate), ["noimageindex"] => Ok(Rule::NoImageIndex), - ["unavailable_after", date] => Ok(Rule::UnavailableAfter(date.to_owned())), + ["unavailable_after", date] => Ok(Rule::UnavailableAfter(date.parse()?)), + ["noai"] => Ok(Rule::NoAi), + ["noimageai"] => Ok(Rule::NoImageAi), + ["spc"] => Ok(Rule::SPC), _ => Err(OpaqueError::from_display("Invalid X-Robots-Tag rule")), } } } #[derive(Clone, Debug, Eq, PartialEq)] -enum MaxImagePreviewSetting { +pub enum MaxImagePreviewSetting { None, Standard, Large, diff --git a/rama-http/src/headers/x_robots_tag/valid_date.rs b/rama-http/src/headers/x_robots_tag/valid_date.rs new file mode 100644 index 00000000..d16e38f2 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/valid_date.rs @@ -0,0 +1,65 @@ +use rama_core::error::OpaqueError; +use regex::Regex; +use std::fmt::{Display, Formatter}; +use std::ops::Deref; +use std::str::FromStr; + +// "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct ValidDate(String); + +impl ValidDate { + pub(crate) fn new(date: &str) -> Option { + let new = Self(date.to_owned()); + match new.is_valid() { + true => Some(new), + false => None, + } + } + + pub(crate) fn date(&self) -> &str { + &self.0 + } + + pub(crate) fn into_date(self) -> String { + self.0 + } + + pub(crate) fn is_valid(&self) -> bool { + let rfc_822 = r"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s(0[1-9]|[12]\d|3[01])\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{2}\s([01]\d|2[0-4]):([0-5]\d|60):([0-5]\d|60)\s(UT|GMT|EST|EDT|CST|CDT|MST|MDT|PST|PDT|[+-]\d{4})$"; + let rfc_850 = r"^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s(0?[1-9]|[12]\d|3[01])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\d{2}\s([01]\d|2[0-4]):([0-5]\d|60):([0-5]\d|60)\s(UT|GMT|EST|EDT|CST|CDT|MST|MDT|PST|PDT|[+-]\d{4})$"; + let iso_8601 = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])\s([01]\d|2[0-4]):([0-5]\d|60):([0-5]\d|60).\d{3}$"; + + check_is_valid(rfc_822, self.date()) + || check_is_valid(rfc_850, self.date()) + || check_is_valid(iso_8601, self.date()) + } +} + +pub(crate) fn check_is_valid(re: &str, date: &str) -> bool { + Regex::new(re) + .and_then(|r| Ok(r.is_match(date))) + .unwrap_or(false) +} + +impl FromStr for ValidDate { + type Err = OpaqueError; + + fn from_str(s: &str) -> Result { + Self::new(s).ok_or_else(|| OpaqueError::from_display("Invalid date format")) + } +} + +impl Display for ValidDate { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", &self) + } +} + +impl Deref for ValidDate { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.date() + } +} From fe4394c4547027761654554b6d0eec1faae82a8c Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Wed, 15 Jan 2025 14:48:41 +0100 Subject: [PATCH 05/37] fix value_string.rs visibility issues --- rama-http/src/headers/util/value_string.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rama-http/src/headers/util/value_string.rs b/rama-http/src/headers/util/value_string.rs index 83a8f7e2..9bf6ba05 100644 --- a/rama-http/src/headers/util/value_string.rs +++ b/rama-http/src/headers/util/value_string.rs @@ -67,7 +67,7 @@ impl<'a> From<&'a HeaderValueString> for HeaderValue { } #[derive(Debug)] -pub(crate) struct FromStrError(()); +pub struct FromStrError(()); impl FromStr for HeaderValueString { type Err = FromStrError; From 09775f71cdd17eeb86d35d0bcadb0a3322f1d6aa Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Fri, 17 Jan 2025 23:18:36 +0100 Subject: [PATCH 06/37] rename Iterator to ElementIter --- .../headers/x_robots_tag/{iterator.rs => element_iter.rs} | 6 +++--- rama-http/src/headers/x_robots_tag/mod.rs | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) rename rama-http/src/headers/x_robots_tag/{iterator.rs => element_iter.rs} (75%) diff --git a/rama-http/src/headers/x_robots_tag/iterator.rs b/rama-http/src/headers/x_robots_tag/element_iter.rs similarity index 75% rename from rama-http/src/headers/x_robots_tag/iterator.rs rename to rama-http/src/headers/x_robots_tag/element_iter.rs index 285cbb16..12f702da 100644 --- a/rama-http/src/headers/x_robots_tag/iterator.rs +++ b/rama-http/src/headers/x_robots_tag/element_iter.rs @@ -2,9 +2,9 @@ use crate::headers::x_robots_tag::Element; #[derive(Debug, Clone)] /// An iterator over the `XRobotsTag` header's elements. -pub struct Iterator(std::vec::IntoIter); +pub struct ElementIter(std::vec::IntoIter); -impl core::iter::Iterator for Iterator { +impl Iterator for ElementIter { type Item = Element; fn next(&mut self) -> Option { @@ -12,7 +12,7 @@ impl core::iter::Iterator for Iterator { } } -impl Iterator { +impl ElementIter { pub fn new(elements: std::vec::IntoIter) -> Self { Self(elements) } diff --git a/rama-http/src/headers/x_robots_tag/mod.rs b/rama-http/src/headers/x_robots_tag/mod.rs index 54bb6743..48083b51 100644 --- a/rama-http/src/headers/x_robots_tag/mod.rs +++ b/rama-http/src/headers/x_robots_tag/mod.rs @@ -2,7 +2,7 @@ mod rule; mod element; -mod iterator; +mod element_iter; mod valid_date; @@ -11,7 +11,7 @@ mod valid_date; use crate::headers::Header; use element::Element; use http::{HeaderName, HeaderValue}; -use iterator::Iterator as XRobotsTagIterator; +use element_iter::ElementIter; use std::fmt::Formatter; use std::iter::Iterator; @@ -63,9 +63,9 @@ impl FromIterator for XRobotsTag { impl IntoIterator for XRobotsTag { type Item = Element; - type IntoIter = XRobotsTagIterator; + type IntoIter = ElementIter; fn into_iter(self) -> Self::IntoIter { - XRobotsTagIterator::new(self.0.into_iter()) + ElementIter::new(self.0.into_iter()) } } From 51b31713ca03800a075c0a69209b61123deaa008 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Fri, 17 Jan 2025 23:20:27 +0100 Subject: [PATCH 07/37] fix visibility issues --- rama-http/src/headers/x_robots_tag/valid_date.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag/valid_date.rs b/rama-http/src/headers/x_robots_tag/valid_date.rs index d16e38f2..07077f05 100644 --- a/rama-http/src/headers/x_robots_tag/valid_date.rs +++ b/rama-http/src/headers/x_robots_tag/valid_date.rs @@ -6,10 +6,10 @@ use std::str::FromStr; // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." #[derive(Clone, Debug, Eq, PartialEq)] -pub(crate) struct ValidDate(String); +pub struct ValidDate(String); impl ValidDate { - pub(crate) fn new(date: &str) -> Option { + pub fn new(date: &str) -> Option { let new = Self(date.to_owned()); match new.is_valid() { true => Some(new), @@ -17,15 +17,15 @@ impl ValidDate { } } - pub(crate) fn date(&self) -> &str { + pub fn date(&self) -> &str { &self.0 } - pub(crate) fn into_date(self) -> String { + pub fn into_date(self) -> String { self.0 } - pub(crate) fn is_valid(&self) -> bool { + pub fn is_valid(&self) -> bool { let rfc_822 = r"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s(0[1-9]|[12]\d|3[01])\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{2}\s([01]\d|2[0-4]):([0-5]\d|60):([0-5]\d|60)\s(UT|GMT|EST|EDT|CST|CDT|MST|MDT|PST|PDT|[+-]\d{4})$"; let rfc_850 = r"^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s(0?[1-9]|[12]\d|3[01])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\d{2}\s([01]\d|2[0-4]):([0-5]\d|60):([0-5]\d|60)\s(UT|GMT|EST|EDT|CST|CDT|MST|MDT|PST|PDT|[+-]\d{4})$"; let iso_8601 = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])\s([01]\d|2[0-4]):([0-5]\d|60):([0-5]\d|60).\d{3}$"; @@ -36,7 +36,7 @@ impl ValidDate { } } -pub(crate) fn check_is_valid(re: &str, date: &str) -> bool { +fn check_is_valid(re: &str, date: &str) -> bool { Regex::new(re) .and_then(|r| Ok(r.is_match(date))) .unwrap_or(false) From 62bd0d66f1415a1038775eca2d119f3e164ff21e Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Fri, 17 Jan 2025 23:22:47 +0100 Subject: [PATCH 08/37] change trait TryFrom<&[&str]> to private function from_iter --- rama-http/src/headers/x_robots_tag/rule.rs | 77 ++++++++++++---------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag/rule.rs b/rama-http/src/headers/x_robots_tag/rule.rs index e2f9a0e1..9796590c 100644 --- a/rama-http/src/headers/x_robots_tag/rule.rs +++ b/rama-http/src/headers/x_robots_tag/rule.rs @@ -1,8 +1,7 @@ +use crate::headers::x_robots_tag::valid_date::ValidDate; use rama_core::error::OpaqueError; -use std::convert::{TryFrom, TryInto}; use std::fmt::Formatter; use std::str::FromStr; -use crate::headers::x_robots_tag::valid_date::ValidDate; #[derive(Clone, Debug, Eq, PartialEq)] pub enum Rule { @@ -21,7 +20,7 @@ pub enum Rule { // custom rules NoAi, NoImageAi, - SPC + SPC, } impl std::fmt::Display for Rule { @@ -53,39 +52,51 @@ impl FromStr for Rule { type Err = OpaqueError; fn from_str(s: &str) -> Result { - s.split(":") - .map(str::trim) - .collect::>() - .as_slice() - .try_into() + Self::from_iter(s.split(":").map(str::trim)) } } -impl TryFrom<&[&str]> for Rule { - type Error = OpaqueError; - - fn try_from(value: &[&str]) -> Result { - match *value { - ["all"] => Ok(Rule::All), - ["no_index"] => Ok(Rule::NoIndex), - ["no_follow"] => Ok(Rule::NoFollow), - ["none"] => Ok(Rule::None), - ["no_snippet"] => Ok(Rule::NoSnippet), - ["indexifembedded"] => Ok(Rule::IndexIfEmbedded), - ["max-snippet", number] => Ok(Rule::MaxSnippet( - number.parse().map_err(OpaqueError::from_display)?, - )), - ["max-image-preview", setting] => Ok(Rule::MaxImagePreview(setting.parse()?)), - ["max-video-preview", number] => Ok(Rule::MaxVideoPreview(match number { - "-1" => None, - n => Some(n.parse().map_err(OpaqueError::from_display)?), - })), - ["notranslate"] => Ok(Rule::NoTranslate), - ["noimageindex"] => Ok(Rule::NoImageIndex), - ["unavailable_after", date] => Ok(Rule::UnavailableAfter(date.parse()?)), - ["noai"] => Ok(Rule::NoAi), - ["noimageai"] => Ok(Rule::NoImageAi), - ["spc"] => Ok(Rule::SPC), +impl<'a> Rule { + fn from_iter(mut value: impl Iterator) -> Result { + match value.next() { + Some("all") => Ok(Rule::All), + Some("no_index") => Ok(Rule::NoIndex), + Some("no_follow") => Ok(Rule::NoFollow), + Some("none") => Ok(Rule::None), + Some("no_snippet") => Ok(Rule::NoSnippet), + Some("indexifembedded") => Ok(Rule::IndexIfEmbedded), + Some("max-snippet") => match value.next() { + Some(number) => Ok(Rule::MaxSnippet(number.parse().map_err(OpaqueError::from_display)?)), + None => Err(OpaqueError::from_display( + "No number specified for 'max-snippet'", + )), + }, + Some("max-image-preview") => match value.next() { + Some(setting) => Ok(Rule::MaxImagePreview(setting.parse()?)), + None => Err(OpaqueError::from_display( + "No setting specified for 'max-image-preview'", + )), + }, + Some("max-video-preview") => match value.next() { + Some(number) => Ok(Rule::MaxVideoPreview(match number { + "-1" => None, + n => Some(n.parse().map_err(OpaqueError::from_display)?), + })), + None => Err(OpaqueError::from_display( + "No number specified for 'max-video-preview'", + )), + }, + Some("notranslate") => Ok(Rule::NoTranslate), + Some("noimageindex") => Ok(Rule::NoImageIndex), + Some("unavailable_after") => match value.next() { + Some(date) => Ok(Rule::UnavailableAfter(date.parse()?)), + None => Err(OpaqueError::from_display( + "No date specified for 'unavailable-after'", + )), + }, + Some("noai") => Ok(Rule::NoAi), + Some("noimageai") => Ok(Rule::NoImageAi), + Some("spc") => Ok(Rule::SPC), _ => Err(OpaqueError::from_display("Invalid X-Robots-Tag rule")), } } From f17550ce2b00af72e4c22d2602a9f475ae1f1005 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Fri, 17 Jan 2025 23:24:43 +0100 Subject: [PATCH 09/37] separate 'split_csv_str' function from 'from_comma_delimited' --- rama-http/src/headers/util/csv.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/rama-http/src/headers/util/csv.rs b/rama-http/src/headers/util/csv.rs index b91f3567..dec5ca88 100644 --- a/rama-http/src/headers/util/csv.rs +++ b/rama-http/src/headers/util/csv.rs @@ -11,24 +11,27 @@ use crate::HeaderValue; pub(crate) fn from_comma_delimited<'i, I, T, E>(values: &mut I) -> Result where I: Iterator, - T: ::std::str::FromStr, - E: ::std::iter::FromIterator, + T: std::str::FromStr, + E: FromIterator, { values .flat_map(|value| { value.to_str().into_iter().flat_map(|string| { - string - .split(',') - .filter_map(|x| match x.trim() { - "" => None, - y => Some(y), - }) - .map(|x| x.parse().map_err(|_| Error::invalid())) + split_csv_str(string) }) }) .collect() } +pub(crate) fn split_csv_str( + string: &str, +) -> impl Iterator> + use<'_, T> { + string.split(',').filter_map(|x| match x.trim() { + "" => None, + y => Some(y.parse().map_err(|_| Error::invalid())), + }) +} + /// Format an array into a comma-delimited string. pub(crate) fn fmt_comma_delimited( f: &mut fmt::Formatter, From dcf3586f757ed847c59d5a9ee26cfeddb62eca51 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Fri, 17 Jan 2025 23:27:40 +0100 Subject: [PATCH 10/37] change bot_name field type to 'HeaderValueString' and indexing_rule field to 'Vec' --- rama-http/src/headers/x_robots_tag/element.rs | 43 ++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag/element.rs b/rama-http/src/headers/x_robots_tag/element.rs index 515889b7..27244034 100644 --- a/rama-http/src/headers/x_robots_tag/element.rs +++ b/rama-http/src/headers/x_robots_tag/element.rs @@ -1,3 +1,5 @@ +use crate::headers::util::csv::{fmt_comma_delimited, split_csv_str}; +use crate::headers::util::value_string::HeaderValueString; use crate::headers::x_robots_tag::rule::Rule; use rama_core::error::OpaqueError; use std::fmt::Formatter; @@ -5,15 +7,46 @@ use std::str::FromStr; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Element { - bot_name: Option, // or `rama_ua::UserAgent` ??? - indexing_rule: Rule, + bot_name: Option, + indexing_rules: Vec, +} + +impl Element { + pub fn new() -> Self { + Self { + bot_name: None, + indexing_rules: Vec::new(), + } + } + + pub fn with_bot_name(bot_name: HeaderValueString) -> Self { + Self { + bot_name: Some(bot_name), + indexing_rules: Vec::new(), + } + } + + pub fn add_indexing_rule(&mut self, indexing_rule: Rule) { + self.indexing_rules.push(indexing_rule); + } + + pub fn bot_name(&self) -> Option<&HeaderValueString> { + self.bot_name.as_ref() + } + + pub fn indexing_rules(&self) -> &[Rule] { + &self.indexing_rules + } } impl std::fmt::Display for Element { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match &self.bot_name { - None => write!(f, "{}", self.indexing_rule), - Some(bot) => write!(f, "{}: {}", bot, self.indexing_rule), + match self.bot_name() { + None => fmt_comma_delimited(f, self.indexing_rules().iter()), + Some(bot) => { + write!(f, "{bot}: ")?; + fmt_comma_delimited(f, self.indexing_rules().iter()) + } } } } From 879394d151127faa2c8aa4c1071a35640d7b4728 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Fri, 17 Jan 2025 23:28:04 +0100 Subject: [PATCH 11/37] implement FromStr for Element --- rama-http/src/headers/x_robots_tag/element.rs | 52 ++++++++++++------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag/element.rs b/rama-http/src/headers/x_robots_tag/element.rs index 27244034..72e04db5 100644 --- a/rama-http/src/headers/x_robots_tag/element.rs +++ b/rama-http/src/headers/x_robots_tag/element.rs @@ -1,7 +1,8 @@ use crate::headers::util::csv::{fmt_comma_delimited, split_csv_str}; use crate::headers::util::value_string::HeaderValueString; use crate::headers::x_robots_tag::rule::Rule; -use rama_core::error::OpaqueError; +use rama_core::error::{ErrorContext, OpaqueError}; +use regex::Regex; use std::fmt::Formatter; use std::str::FromStr; @@ -55,25 +56,36 @@ impl FromStr for Element { type Err = OpaqueError; fn from_str(s: &str) -> Result { - let (bot_name, indexing_rule) = match Rule::from_str(s) { - Ok(rule) => (None, Ok(rule)), - Err(e) => match *s.split(":").map(str::trim).collect::>().as_slice() { - [bot_name, rule] => (Some(bot_name.to_owned()), rule.parse()), - [bot_name, rule_name, rule_value] => ( - Some(bot_name.to_owned()), - [rule_name, rule_value][..].try_into(), - ), - _ => (None, Err(e)), - }, - }; - match indexing_rule { - Ok(indexing_rule) => Ok(Element { - bot_name, - indexing_rule, - }), - Err(_) => Err(OpaqueError::from_display( - "Failed to parse XRobotsTagElement", - )), + let regex = Regex::new(r"^\s*([^:]+?):\s*(.+)$") + .context("Failed to compile a regular expression")?; + + let mut bot_name = None; + let mut rules_str = s; + + if let Some(captures) = regex.captures(s) { + let bot_name_candidate = captures + .get(1) + .context("Failed to capture the target bot name")? + .as_str() + .trim(); + + if bot_name_candidate.parse::().is_err() { + bot_name = HeaderValueString::from_string(bot_name_candidate.to_owned()); + rules_str = captures + .get(2) + .context("Failed to capture the indexing rules")? + .as_str() + .trim(); + } } + + let indexing_rules = split_csv_str(rules_str) + .collect::, _>>() + .context("Failed to parse the indexing rules")?; + + Ok(Self { + bot_name, + indexing_rules, + }) } } From 33b63f32cd5d220f702c4d384121606d330ee595 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Fri, 17 Jan 2025 23:29:53 +0100 Subject: [PATCH 12/37] reformat with rustfmt --- rama-http/src/headers/util/csv.rs | 7 ++++--- rama-http/src/headers/util/value_string.rs | 4 ++-- rama-http/src/headers/x_robots_tag/element.rs | 2 +- rama-http/src/headers/x_robots_tag/mod.rs | 2 +- rama-http/src/headers/x_robots_tag/rule.rs | 4 +++- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/rama-http/src/headers/util/csv.rs b/rama-http/src/headers/util/csv.rs index dec5ca88..9523510d 100644 --- a/rama-http/src/headers/util/csv.rs +++ b/rama-http/src/headers/util/csv.rs @@ -16,9 +16,10 @@ where { values .flat_map(|value| { - value.to_str().into_iter().flat_map(|string| { - split_csv_str(string) - }) + value + .to_str() + .into_iter() + .flat_map(|string| split_csv_str(string)) }) .collect() } diff --git a/rama-http/src/headers/util/value_string.rs b/rama-http/src/headers/util/value_string.rs index 9bf6ba05..ecac531c 100644 --- a/rama-http/src/headers/util/value_string.rs +++ b/rama-http/src/headers/util/value_string.rs @@ -1,6 +1,6 @@ use std::{ - fmt, - str::{self, FromStr}, + fmt, + str::{self, FromStr}, }; use bytes::Bytes; diff --git a/rama-http/src/headers/x_robots_tag/element.rs b/rama-http/src/headers/x_robots_tag/element.rs index 72e04db5..c2d0cea8 100644 --- a/rama-http/src/headers/x_robots_tag/element.rs +++ b/rama-http/src/headers/x_robots_tag/element.rs @@ -68,7 +68,7 @@ impl FromStr for Element { .context("Failed to capture the target bot name")? .as_str() .trim(); - + if bot_name_candidate.parse::().is_err() { bot_name = HeaderValueString::from_string(bot_name_candidate.to_owned()); rules_str = captures diff --git a/rama-http/src/headers/x_robots_tag/mod.rs b/rama-http/src/headers/x_robots_tag/mod.rs index 48083b51..a160abe9 100644 --- a/rama-http/src/headers/x_robots_tag/mod.rs +++ b/rama-http/src/headers/x_robots_tag/mod.rs @@ -10,8 +10,8 @@ mod valid_date; use crate::headers::Header; use element::Element; -use http::{HeaderName, HeaderValue}; use element_iter::ElementIter; +use http::{HeaderName, HeaderValue}; use std::fmt::Formatter; use std::iter::Iterator; diff --git a/rama-http/src/headers/x_robots_tag/rule.rs b/rama-http/src/headers/x_robots_tag/rule.rs index 9796590c..35435f45 100644 --- a/rama-http/src/headers/x_robots_tag/rule.rs +++ b/rama-http/src/headers/x_robots_tag/rule.rs @@ -66,7 +66,9 @@ impl<'a> Rule { Some("no_snippet") => Ok(Rule::NoSnippet), Some("indexifembedded") => Ok(Rule::IndexIfEmbedded), Some("max-snippet") => match value.next() { - Some(number) => Ok(Rule::MaxSnippet(number.parse().map_err(OpaqueError::from_display)?)), + Some(number) => Ok(Rule::MaxSnippet( + number.parse().map_err(OpaqueError::from_display)?, + )), None => Err(OpaqueError::from_display( "No number specified for 'max-snippet'", )), From 9e0b8aecc74068e63e3e2181c253ea49dc8f890a Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Fri, 17 Jan 2025 23:45:10 +0100 Subject: [PATCH 13/37] todo/ fix XRobotsTag::decode() --- rama-http/src/headers/x_robots_tag/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rama-http/src/headers/x_robots_tag/mod.rs b/rama-http/src/headers/x_robots_tag/mod.rs index a160abe9..030c5c8d 100644 --- a/rama-http/src/headers/x_robots_tag/mod.rs +++ b/rama-http/src/headers/x_robots_tag/mod.rs @@ -28,7 +28,8 @@ impl Header for XRobotsTag { Self: Sized, I: Iterator, { - crate::headers::util::csv::from_comma_delimited(values).map(XRobotsTag) + todo!(); + crate::headers::util::csv::from_comma_delimited(values).map(XRobotsTag) // wouldn't really work, need more complex logic } fn encode>(&self, values: &mut E) { From bae6cad668a62a7549189e3c9f984f36fe83cec9 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Mon, 27 Jan 2025 18:01:37 +0100 Subject: [PATCH 14/37] add chrono crate to dependencies --- Cargo.lock | 74 +++++++++++++++++++++++++++++++++++++++++++- rama-http/Cargo.toml | 1 + 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index f6a63914..01758206 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,6 +59,21 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.6.18" @@ -450,6 +465,20 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -1344,6 +1373,29 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core 0.52.0", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "1.5.0" @@ -1833,6 +1885,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "object" version = "0.36.7" @@ -2249,6 +2310,7 @@ dependencies = [ "bitflags", "brotli", "bytes", + "chrono", "const_format", "csv", "flate2", @@ -3514,6 +3576,7 @@ checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", ] @@ -3621,7 +3684,16 @@ version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" dependencies = [ - "windows-core", + "windows-core 0.58.0", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ "windows-targets", ] diff --git a/rama-http/Cargo.toml b/rama-http/Cargo.toml index ac596aa2..f388f1d7 100644 --- a/rama-http/Cargo.toml +++ b/rama-http/Cargo.toml @@ -30,6 +30,7 @@ async-compression = { workspace = true, features = [ base64 = { workspace = true } bitflags = { workspace = true } bytes = { workspace = true } +chrono = "0.4.39" const_format = { workspace = true } csv = { workspace = true } futures-lite = { workspace = true } From f8d78aca35ff42c1b3f3a0410e2b8e226bb34598 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Mon, 27 Jan 2025 18:41:17 +0100 Subject: [PATCH 15/37] rework API --- rama-http/src/headers/mod.rs | 3 +- .../src/headers/x_robots_tag/custom_rule.rs | 31 ++++ rama-http/src/headers/x_robots_tag/element.rs | 91 ------------ .../src/headers/x_robots_tag/element_iter.rs | 19 --- .../x_robots_tag/max_image_preview_setting.rs | 35 +++++ rama-http/src/headers/x_robots_tag/mod.rs | 77 +--------- .../src/headers/x_robots_tag/robots_tag.rs | 102 +++++++++++++ .../x_robots_tag/robots_tag_builder.rs | 61 ++++++++ rama-http/src/headers/x_robots_tag/rule.rs | 137 ------------------ .../src/headers/x_robots_tag/valid_date.rs | 65 --------- 10 files changed, 238 insertions(+), 383 deletions(-) create mode 100644 rama-http/src/headers/x_robots_tag/custom_rule.rs delete mode 100644 rama-http/src/headers/x_robots_tag/element.rs delete mode 100644 rama-http/src/headers/x_robots_tag/element_iter.rs create mode 100644 rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs create mode 100644 rama-http/src/headers/x_robots_tag/robots_tag.rs create mode 100644 rama-http/src/headers/x_robots_tag/robots_tag_builder.rs delete mode 100644 rama-http/src/headers/x_robots_tag/rule.rs delete mode 100644 rama-http/src/headers/x_robots_tag/valid_date.rs diff --git a/rama-http/src/headers/mod.rs b/rama-http/src/headers/mod.rs index c6f1bb8c..6abc2554 100644 --- a/rama-http/src/headers/mod.rs +++ b/rama-http/src/headers/mod.rs @@ -103,7 +103,6 @@ pub use ::rama_http_types::headers::HeaderExt; pub(crate) mod util; -mod x_robots_tag; -pub use x_robots_tag::XRobotsTag; +pub mod x_robots_tag; pub use util::quality_value::{Quality, QualityValue}; diff --git a/rama-http/src/headers/x_robots_tag/custom_rule.rs b/rama-http/src/headers/x_robots_tag/custom_rule.rs new file mode 100644 index 00000000..aeb86a27 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/custom_rule.rs @@ -0,0 +1,31 @@ +use crate::headers::util::value_string::{FromStrError, HeaderValueString}; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct CustomRule { + key: HeaderValueString, + value: Option, +} + +impl CustomRule { + pub fn new(key: &str) -> Result { + Ok(Self { + key: key.parse()?, + value: None, + }) + } + + pub fn with_value(key: &str, value: &str) -> Result { + Ok(Self { + key: key.parse()?, + value: Some(value.parse()?), + }) + } + + pub fn key(&self) -> &HeaderValueString { + &self.key + } + + pub fn value(&self) -> Option<&HeaderValueString> { + self.value.as_ref() + } +} diff --git a/rama-http/src/headers/x_robots_tag/element.rs b/rama-http/src/headers/x_robots_tag/element.rs deleted file mode 100644 index c2d0cea8..00000000 --- a/rama-http/src/headers/x_robots_tag/element.rs +++ /dev/null @@ -1,91 +0,0 @@ -use crate::headers::util::csv::{fmt_comma_delimited, split_csv_str}; -use crate::headers::util::value_string::HeaderValueString; -use crate::headers::x_robots_tag::rule::Rule; -use rama_core::error::{ErrorContext, OpaqueError}; -use regex::Regex; -use std::fmt::Formatter; -use std::str::FromStr; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Element { - bot_name: Option, - indexing_rules: Vec, -} - -impl Element { - pub fn new() -> Self { - Self { - bot_name: None, - indexing_rules: Vec::new(), - } - } - - pub fn with_bot_name(bot_name: HeaderValueString) -> Self { - Self { - bot_name: Some(bot_name), - indexing_rules: Vec::new(), - } - } - - pub fn add_indexing_rule(&mut self, indexing_rule: Rule) { - self.indexing_rules.push(indexing_rule); - } - - pub fn bot_name(&self) -> Option<&HeaderValueString> { - self.bot_name.as_ref() - } - - pub fn indexing_rules(&self) -> &[Rule] { - &self.indexing_rules - } -} - -impl std::fmt::Display for Element { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self.bot_name() { - None => fmt_comma_delimited(f, self.indexing_rules().iter()), - Some(bot) => { - write!(f, "{bot}: ")?; - fmt_comma_delimited(f, self.indexing_rules().iter()) - } - } - } -} - -impl FromStr for Element { - type Err = OpaqueError; - - fn from_str(s: &str) -> Result { - let regex = Regex::new(r"^\s*([^:]+?):\s*(.+)$") - .context("Failed to compile a regular expression")?; - - let mut bot_name = None; - let mut rules_str = s; - - if let Some(captures) = regex.captures(s) { - let bot_name_candidate = captures - .get(1) - .context("Failed to capture the target bot name")? - .as_str() - .trim(); - - if bot_name_candidate.parse::().is_err() { - bot_name = HeaderValueString::from_string(bot_name_candidate.to_owned()); - rules_str = captures - .get(2) - .context("Failed to capture the indexing rules")? - .as_str() - .trim(); - } - } - - let indexing_rules = split_csv_str(rules_str) - .collect::, _>>() - .context("Failed to parse the indexing rules")?; - - Ok(Self { - bot_name, - indexing_rules, - }) - } -} diff --git a/rama-http/src/headers/x_robots_tag/element_iter.rs b/rama-http/src/headers/x_robots_tag/element_iter.rs deleted file mode 100644 index 12f702da..00000000 --- a/rama-http/src/headers/x_robots_tag/element_iter.rs +++ /dev/null @@ -1,19 +0,0 @@ -use crate::headers::x_robots_tag::Element; - -#[derive(Debug, Clone)] -/// An iterator over the `XRobotsTag` header's elements. -pub struct ElementIter(std::vec::IntoIter); - -impl Iterator for ElementIter { - type Item = Element; - - fn next(&mut self) -> Option { - self.0.next() - } -} - -impl ElementIter { - pub fn new(elements: std::vec::IntoIter) -> Self { - Self(elements) - } -} diff --git a/rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs b/rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs new file mode 100644 index 00000000..c5c5f2df --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs @@ -0,0 +1,35 @@ +use rama_core::error::OpaqueError; +use std::fmt::Formatter; +use std::str::FromStr; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum MaxImagePreviewSetting { + None, + Standard, + Large, +} + +impl std::fmt::Display for MaxImagePreviewSetting { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + MaxImagePreviewSetting::None => write!(f, "none"), + MaxImagePreviewSetting::Standard => write!(f, "standard"), + MaxImagePreviewSetting::Large => write!(f, "large"), + } + } +} + +impl FromStr for MaxImagePreviewSetting { + type Err = OpaqueError; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().trim() { + "none" => Ok(MaxImagePreviewSetting::None), + "standard" => Ok(MaxImagePreviewSetting::Standard), + "large" => Ok(MaxImagePreviewSetting::Large), + _ => Err(OpaqueError::from_display( + "failed to parse MaxImagePreviewSetting", + )), + } + } +} diff --git a/rama-http/src/headers/x_robots_tag/mod.rs b/rama-http/src/headers/x_robots_tag/mod.rs index 030c5c8d..8d5de836 100644 --- a/rama-http/src/headers/x_robots_tag/mod.rs +++ b/rama-http/src/headers/x_robots_tag/mod.rs @@ -1,72 +1,11 @@ -mod rule; +pub mod robots_tag; +pub use robots_tag::RobotsTag; -mod element; +pub mod robots_tag_builder; +pub use robots_tag_builder::RobotsTagBuilder; -mod element_iter; +pub mod max_image_preview_setting; +pub use max_image_preview_setting::MaxImagePreviewSetting; -mod valid_date; - -// ----------------------------------------------- \\ - -use crate::headers::Header; -use element::Element; -use element_iter::ElementIter; -use http::{HeaderName, HeaderValue}; -use std::fmt::Formatter; -use std::iter::Iterator; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct XRobotsTag(Vec); - -impl Header for XRobotsTag { - fn name() -> &'static HeaderName { - &crate::header::X_ROBOTS_TAG - } - - fn decode<'i, I>(values: &mut I) -> Result - where - Self: Sized, - I: Iterator, - { - todo!(); - crate::headers::util::csv::from_comma_delimited(values).map(XRobotsTag) // wouldn't really work, need more complex logic - } - - fn encode>(&self, values: &mut E) { - use std::fmt; - struct Format(F); - impl fmt::Display for Format - where - F: Fn(&mut Formatter<'_>) -> fmt::Result, - { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.0(f) - } - } - let s = format!( - "{}", - Format(|f: &mut Formatter<'_>| { - crate::headers::util::csv::fmt_comma_delimited(&mut *f, self.0.iter()) - }) - ); - values.extend(Some(HeaderValue::from_str(&s).unwrap())) - } -} - -impl FromIterator for XRobotsTag { - fn from_iter(iter: T) -> Self - where - T: IntoIterator, - { - XRobotsTag(iter.into_iter().collect()) - } -} - -impl IntoIterator for XRobotsTag { - type Item = Element; - type IntoIter = ElementIter; - - fn into_iter(self) -> Self::IntoIter { - ElementIter::new(self.0.into_iter()) - } -} +pub mod custom_rule; +pub use custom_rule::CustomRule; diff --git a/rama-http/src/headers/x_robots_tag/robots_tag.rs b/rama-http/src/headers/x_robots_tag/robots_tag.rs new file mode 100644 index 00000000..2eccd50b --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/robots_tag.rs @@ -0,0 +1,102 @@ +use crate::headers::util::value_string::HeaderValueString; +use crate::headers::x_robots_tag::custom_rule::CustomRule; +use crate::headers::x_robots_tag::max_image_preview_setting::MaxImagePreviewSetting; +use crate::headers::x_robots_tag::robots_tag_builder::RobotsTagBuilder; + +macro_rules! getter_setter { + ($field:ident, $type:ty) => { + paste::paste! { + pub fn [<$field>](&self) -> $type { + self.[<$field>] + } + + pub fn [](&mut self, [<$field>]: $type) -> &mut Self { + self.[<$field>] = [<$field>]; + self + } + } + }; + + ($field:ident, $type:ty, optional) => { + paste::paste! { + pub fn [<$field>](&self) -> Option<&$type> { + self.[<$field>].as_ref() + } + + pub fn [](&mut self, [<$field>]: $type) -> &mut Self { + self.[<$field>] = Some([<$field>]); + self + } + } + }; + + ($field:ident, $type:ty, vec) => { + paste::paste! { + pub fn [<$field>](&self) -> &Vec<$type> { + &self.[<$field>] + } + + pub fn [](&mut self, [<$field>]: $type) -> &mut Self { + self.[<$field>].push([<$field>]); + self + } + } + }; +} + +#[derive(Clone, Debug, Eq, PartialEq, Default)] +pub struct RobotsTag { + bot_name: Option, + all: bool, + no_index: bool, + no_follow: bool, + none: bool, + no_snippet: bool, + index_if_embedded: bool, + max_snippet: u32, + max_image_preview: Option, + max_video_preview: Option, + no_translate: bool, + no_image_index: bool, + unavailable_after: Option>, // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." + // custom rules + no_ai: bool, + no_image_ai: bool, + spc: bool, + custom_rules: Vec, +} + +impl RobotsTag { + pub fn new() -> Self { + Default::default() + } + + pub fn with_bot_name(bot_name: Option) -> Self { + Self { + bot_name, + ..Default::default() + } + } + + pub fn builder() -> RobotsTagBuilder { + RobotsTagBuilder::new() + } + + getter_setter!(bot_name, HeaderValueString, optional); + getter_setter!(all, bool); + getter_setter!(no_index, bool); + getter_setter!(no_follow, bool); + getter_setter!(none, bool); + getter_setter!(no_snippet, bool); + getter_setter!(index_if_embedded, bool); + getter_setter!(max_snippet, u32); + getter_setter!(max_image_preview, MaxImagePreviewSetting, optional); + getter_setter!(max_video_preview, u32, optional); + getter_setter!(no_translate, bool); + getter_setter!(no_image_index, bool); + getter_setter!(unavailable_after, chrono::DateTime, optional); + getter_setter!(no_ai, bool); + getter_setter!(no_image_ai, bool); + getter_setter!(spc, bool); + getter_setter!(custom_rules, CustomRule, vec); +} diff --git a/rama-http/src/headers/x_robots_tag/robots_tag_builder.rs b/rama-http/src/headers/x_robots_tag/robots_tag_builder.rs new file mode 100644 index 00000000..00cb9b55 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/robots_tag_builder.rs @@ -0,0 +1,61 @@ +use crate::headers::util::value_string::HeaderValueString; +use crate::headers::x_robots_tag::custom_rule::CustomRule; +use crate::headers::x_robots_tag::max_image_preview_setting::MaxImagePreviewSetting; +use crate::headers::x_robots_tag::robots_tag::RobotsTag; + +macro_rules! builder_field { + ($field:ident, $type:ty) => { + paste::paste! { + pub fn [<$field>](mut self, [<$field>]: $type) -> Self { + self.0.[]([<$field>]); + self + } + + pub fn [](&mut self, [<$field>]: $type) -> &mut Self { + self.0.[]([<$field>]); + self + } + } + }; +} + +#[derive(Clone, Debug, Eq, PartialEq, Default)] +pub struct RobotsTagBuilder(T); + +impl RobotsTagBuilder<()> { + pub fn new() -> Self { + RobotsTagBuilder(()) + } + + pub fn bot_name(self, bot_name: Option) -> RobotsTagBuilder { + RobotsTagBuilder(RobotsTag::with_bot_name(bot_name)) + } +} + +impl RobotsTagBuilder { + pub fn add_custom_rule(&mut self, rule: CustomRule) -> &mut Self { + self.0.add_custom_rules(rule); + self + } + + pub fn build(self) -> RobotsTag { + self.0 + } + + builder_field!(bot_name, HeaderValueString); + builder_field!(all, bool); + builder_field!(no_index, bool); + builder_field!(no_follow, bool); + builder_field!(none, bool); + builder_field!(no_snippet, bool); + builder_field!(index_if_embedded, bool); + builder_field!(max_snippet, u32); + builder_field!(max_image_preview, MaxImagePreviewSetting); + builder_field!(max_video_preview, u32); + builder_field!(no_translate, bool); + builder_field!(no_image_index, bool); + builder_field!(unavailable_after, chrono::DateTime); + builder_field!(no_ai, bool); + builder_field!(no_image_ai, bool); + builder_field!(spc, bool); +} diff --git a/rama-http/src/headers/x_robots_tag/rule.rs b/rama-http/src/headers/x_robots_tag/rule.rs deleted file mode 100644 index 35435f45..00000000 --- a/rama-http/src/headers/x_robots_tag/rule.rs +++ /dev/null @@ -1,137 +0,0 @@ -use crate::headers::x_robots_tag::valid_date::ValidDate; -use rama_core::error::OpaqueError; -use std::fmt::Formatter; -use std::str::FromStr; - -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Rule { - All, - NoIndex, - NoFollow, - None, - NoSnippet, - IndexIfEmbedded, - MaxSnippet(u32), - MaxImagePreview(MaxImagePreviewSetting), - MaxVideoPreview(Option), - NoTranslate, - NoImageIndex, - UnavailableAfter(ValidDate), // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." - // custom rules - NoAi, - NoImageAi, - SPC, -} - -impl std::fmt::Display for Rule { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Rule::All => write!(f, "all"), - Rule::NoIndex => write!(f, "noindex"), - Rule::NoFollow => write!(f, "nofollow"), - Rule::None => write!(f, "none"), - Rule::NoSnippet => write!(f, "nosnippet"), - Rule::IndexIfEmbedded => write!(f, "indexifembedded"), - Rule::MaxSnippet(number) => write!(f, "maxsnippet: {}", number), - Rule::MaxImagePreview(setting) => write!(f, "max-image-preview: {}", setting), - Rule::MaxVideoPreview(number) => match number { - Some(number) => write!(f, "max-video-preview: {}", number), - None => write!(f, "max-video-preview: -1"), - }, - Rule::NoTranslate => write!(f, "notranslate"), - Rule::NoImageIndex => write!(f, "noimageindex"), - Rule::UnavailableAfter(date) => write!(f, "unavailable_after: {}", date), - Rule::NoAi => write!(f, "noai"), - Rule::NoImageAi => write!(f, "noimageai"), - Rule::SPC => write!(f, "spc"), - } - } -} - -impl FromStr for Rule { - type Err = OpaqueError; - - fn from_str(s: &str) -> Result { - Self::from_iter(s.split(":").map(str::trim)) - } -} - -impl<'a> Rule { - fn from_iter(mut value: impl Iterator) -> Result { - match value.next() { - Some("all") => Ok(Rule::All), - Some("no_index") => Ok(Rule::NoIndex), - Some("no_follow") => Ok(Rule::NoFollow), - Some("none") => Ok(Rule::None), - Some("no_snippet") => Ok(Rule::NoSnippet), - Some("indexifembedded") => Ok(Rule::IndexIfEmbedded), - Some("max-snippet") => match value.next() { - Some(number) => Ok(Rule::MaxSnippet( - number.parse().map_err(OpaqueError::from_display)?, - )), - None => Err(OpaqueError::from_display( - "No number specified for 'max-snippet'", - )), - }, - Some("max-image-preview") => match value.next() { - Some(setting) => Ok(Rule::MaxImagePreview(setting.parse()?)), - None => Err(OpaqueError::from_display( - "No setting specified for 'max-image-preview'", - )), - }, - Some("max-video-preview") => match value.next() { - Some(number) => Ok(Rule::MaxVideoPreview(match number { - "-1" => None, - n => Some(n.parse().map_err(OpaqueError::from_display)?), - })), - None => Err(OpaqueError::from_display( - "No number specified for 'max-video-preview'", - )), - }, - Some("notranslate") => Ok(Rule::NoTranslate), - Some("noimageindex") => Ok(Rule::NoImageIndex), - Some("unavailable_after") => match value.next() { - Some(date) => Ok(Rule::UnavailableAfter(date.parse()?)), - None => Err(OpaqueError::from_display( - "No date specified for 'unavailable-after'", - )), - }, - Some("noai") => Ok(Rule::NoAi), - Some("noimageai") => Ok(Rule::NoImageAi), - Some("spc") => Ok(Rule::SPC), - _ => Err(OpaqueError::from_display("Invalid X-Robots-Tag rule")), - } - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum MaxImagePreviewSetting { - None, - Standard, - Large, -} - -impl std::fmt::Display for MaxImagePreviewSetting { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - MaxImagePreviewSetting::None => write!(f, "none"), - MaxImagePreviewSetting::Standard => write!(f, "standard"), - MaxImagePreviewSetting::Large => write!(f, "large"), - } - } -} - -impl FromStr for MaxImagePreviewSetting { - type Err = OpaqueError; - - fn from_str(s: &str) -> Result { - match s { - "none" => Ok(MaxImagePreviewSetting::None), - "standard" => Ok(MaxImagePreviewSetting::Standard), - "large" => Ok(MaxImagePreviewSetting::Large), - _ => Err(OpaqueError::from_display( - "failed to parse MaxImagePreviewSetting", - )), - } - } -} diff --git a/rama-http/src/headers/x_robots_tag/valid_date.rs b/rama-http/src/headers/x_robots_tag/valid_date.rs deleted file mode 100644 index 07077f05..00000000 --- a/rama-http/src/headers/x_robots_tag/valid_date.rs +++ /dev/null @@ -1,65 +0,0 @@ -use rama_core::error::OpaqueError; -use regex::Regex; -use std::fmt::{Display, Formatter}; -use std::ops::Deref; -use std::str::FromStr; - -// "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct ValidDate(String); - -impl ValidDate { - pub fn new(date: &str) -> Option { - let new = Self(date.to_owned()); - match new.is_valid() { - true => Some(new), - false => None, - } - } - - pub fn date(&self) -> &str { - &self.0 - } - - pub fn into_date(self) -> String { - self.0 - } - - pub fn is_valid(&self) -> bool { - let rfc_822 = r"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s(0[1-9]|[12]\d|3[01])\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{2}\s([01]\d|2[0-4]):([0-5]\d|60):([0-5]\d|60)\s(UT|GMT|EST|EDT|CST|CDT|MST|MDT|PST|PDT|[+-]\d{4})$"; - let rfc_850 = r"^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s(0?[1-9]|[12]\d|3[01])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\d{2}\s([01]\d|2[0-4]):([0-5]\d|60):([0-5]\d|60)\s(UT|GMT|EST|EDT|CST|CDT|MST|MDT|PST|PDT|[+-]\d{4})$"; - let iso_8601 = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])\s([01]\d|2[0-4]):([0-5]\d|60):([0-5]\d|60).\d{3}$"; - - check_is_valid(rfc_822, self.date()) - || check_is_valid(rfc_850, self.date()) - || check_is_valid(iso_8601, self.date()) - } -} - -fn check_is_valid(re: &str, date: &str) -> bool { - Regex::new(re) - .and_then(|r| Ok(r.is_match(date))) - .unwrap_or(false) -} - -impl FromStr for ValidDate { - type Err = OpaqueError; - - fn from_str(s: &str) -> Result { - Self::new(s).ok_or_else(|| OpaqueError::from_display("Invalid date format")) - } -} - -impl Display for ValidDate { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", &self) - } -} - -impl Deref for ValidDate { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.date() - } -} From 6a5281478587ba55720e6a304bb1f4ce8e69b252 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Mon, 27 Jan 2025 23:33:42 +0100 Subject: [PATCH 16/37] fix chrono dependency placement --- Cargo.toml | 3 ++- rama-http/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c6ba006c..fb45d72c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ bitflags = "2.4" md5 = "0.7.0" brotli = "7" bytes = "1" +chrono = "0.4.39" clap = { version = "4.5.15", features = ["derive"] } crossterm = "0.27" csv = "1.3.1" @@ -255,7 +256,7 @@ rama-tls = { version = "0.2.0-alpha.7", path = "rama-tls", optional = true } rama-ua = { version = "0.2.0-alpha.7", path = "rama-ua", optional = true } rama-utils = { version = "0.2.0-alpha.7", path = "rama-utils" } serde_html_form = { workspace = true, optional = true } -serde_json = { workspace = true, optional = true } + serde_json = { workspace = true, optional = true } tokio = { workspace = true, features = ["macros", "io-std"], optional = true } tracing = { workspace = true, optional = true } diff --git a/rama-http/Cargo.toml b/rama-http/Cargo.toml index f388f1d7..d34fec63 100644 --- a/rama-http/Cargo.toml +++ b/rama-http/Cargo.toml @@ -30,7 +30,7 @@ async-compression = { workspace = true, features = [ base64 = { workspace = true } bitflags = { workspace = true } bytes = { workspace = true } -chrono = "0.4.39" +chrono = { workspace = true } const_format = { workspace = true } csv = { workspace = true } futures-lite = { workspace = true } From 749c086a6e2dfb8d528e294746cff0f70b0172b8 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Mon, 27 Jan 2025 23:34:40 +0100 Subject: [PATCH 17/37] enhance code, add valid_date.rs --- .../src/headers/x_robots_tag/custom_rule.rs | 10 ++-- .../x_robots_tag/max_image_preview_setting.rs | 2 +- rama-http/src/headers/x_robots_tag/mod.rs | 13 ++---- .../src/headers/x_robots_tag/robots_tag.rs | 44 +++++++++--------- .../x_robots_tag/robots_tag_builder.rs | 35 ++++++++------ .../src/headers/x_robots_tag/valid_date.rs | 46 +++++++++++++++++++ 6 files changed, 102 insertions(+), 48 deletions(-) create mode 100644 rama-http/src/headers/x_robots_tag/valid_date.rs diff --git a/rama-http/src/headers/x_robots_tag/custom_rule.rs b/rama-http/src/headers/x_robots_tag/custom_rule.rs index aeb86a27..bb2ab466 100644 --- a/rama-http/src/headers/x_robots_tag/custom_rule.rs +++ b/rama-http/src/headers/x_robots_tag/custom_rule.rs @@ -1,31 +1,31 @@ use crate::headers::util::value_string::{FromStrError, HeaderValueString}; #[derive(Clone, Debug, Eq, PartialEq)] -pub struct CustomRule { +pub(super) struct CustomRule { key: HeaderValueString, value: Option, } impl CustomRule { - pub fn new(key: &str) -> Result { + pub(super) fn new(key: &str) -> Result { Ok(Self { key: key.parse()?, value: None, }) } - pub fn with_value(key: &str, value: &str) -> Result { + pub(super) fn with_value(key: &str, value: &str) -> Result { Ok(Self { key: key.parse()?, value: Some(value.parse()?), }) } - pub fn key(&self) -> &HeaderValueString { + pub(super) fn key(&self) -> &HeaderValueString { &self.key } - pub fn value(&self) -> Option<&HeaderValueString> { + pub(super) fn value(&self) -> Option<&HeaderValueString> { self.value.as_ref() } } diff --git a/rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs b/rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs index c5c5f2df..628c0233 100644 --- a/rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs +++ b/rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs @@ -3,7 +3,7 @@ use std::fmt::Formatter; use std::str::FromStr; #[derive(Clone, Debug, Eq, PartialEq)] -pub enum MaxImagePreviewSetting { +pub(super) enum MaxImagePreviewSetting { None, Standard, Large, diff --git a/rama-http/src/headers/x_robots_tag/mod.rs b/rama-http/src/headers/x_robots_tag/mod.rs index 8d5de836..a2a205e7 100644 --- a/rama-http/src/headers/x_robots_tag/mod.rs +++ b/rama-http/src/headers/x_robots_tag/mod.rs @@ -1,11 +1,8 @@ -pub mod robots_tag; -pub use robots_tag::RobotsTag; +mod robots_tag; -pub mod robots_tag_builder; -pub use robots_tag_builder::RobotsTagBuilder; +mod robots_tag_builder; -pub mod max_image_preview_setting; -pub use max_image_preview_setting::MaxImagePreviewSetting; +mod max_image_preview_setting; -pub mod custom_rule; -pub use custom_rule::CustomRule; +mod custom_rule; +mod valid_date; diff --git a/rama-http/src/headers/x_robots_tag/robots_tag.rs b/rama-http/src/headers/x_robots_tag/robots_tag.rs index 2eccd50b..02077166 100644 --- a/rama-http/src/headers/x_robots_tag/robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag/robots_tag.rs @@ -2,15 +2,21 @@ use crate::headers::util::value_string::HeaderValueString; use crate::headers::x_robots_tag::custom_rule::CustomRule; use crate::headers::x_robots_tag::max_image_preview_setting::MaxImagePreviewSetting; use crate::headers::x_robots_tag::robots_tag_builder::RobotsTagBuilder; +use crate::headers::x_robots_tag::valid_date::ValidDate; macro_rules! getter_setter { ($field:ident, $type:ty) => { paste::paste! { - pub fn [<$field>](&self) -> $type { + pub(super) fn [<$field>](&self) -> $type { self.[<$field>] } - pub fn [](&mut self, [<$field>]: $type) -> &mut Self { + pub(super) fn [](&mut self, [<$field>]: $type) -> &mut Self { + self.[<$field>] = [<$field>]; + self + } + + pub(super) fn [](mut self, [<$field>]: $type) -> Self { self.[<$field>] = [<$field>]; self } @@ -19,25 +25,17 @@ macro_rules! getter_setter { ($field:ident, $type:ty, optional) => { paste::paste! { - pub fn [<$field>](&self) -> Option<&$type> { + pub(super) fn [<$field>](&self) -> Option<&$type> { self.[<$field>].as_ref() } - pub fn [](&mut self, [<$field>]: $type) -> &mut Self { + pub(super) fn [](&mut self, [<$field>]: $type) -> &mut Self { self.[<$field>] = Some([<$field>]); self } - } - }; - - ($field:ident, $type:ty, vec) => { - paste::paste! { - pub fn [<$field>](&self) -> &Vec<$type> { - &self.[<$field>] - } - pub fn [](&mut self, [<$field>]: $type) -> &mut Self { - self.[<$field>].push([<$field>]); + pub(super) fn [](mut self, [<$field>]: $type) -> Self { + self.[<$field>] = Some([<$field>]); self } } @@ -45,7 +43,7 @@ macro_rules! getter_setter { } #[derive(Clone, Debug, Eq, PartialEq, Default)] -pub struct RobotsTag { +pub(super) struct RobotsTag { bot_name: Option, all: bool, no_index: bool, @@ -58,7 +56,7 @@ pub struct RobotsTag { max_video_preview: Option, no_translate: bool, no_image_index: bool, - unavailable_after: Option>, // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." + unavailable_after: Option, // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." // custom rules no_ai: bool, no_image_ai: bool, @@ -67,18 +65,23 @@ pub struct RobotsTag { } impl RobotsTag { - pub fn new() -> Self { + pub(super) fn new() -> Self { Default::default() } - pub fn with_bot_name(bot_name: Option) -> Self { + pub(super) fn new_with_bot_name(bot_name: Option) -> Self { Self { bot_name, ..Default::default() } } - pub fn builder() -> RobotsTagBuilder { + pub(super) fn add_custom_rule(&mut self, rule: CustomRule) -> &mut Self { + self.custom_rules.push(rule); + self + } + + pub(super) fn builder() -> RobotsTagBuilder { RobotsTagBuilder::new() } @@ -94,9 +97,8 @@ impl RobotsTag { getter_setter!(max_video_preview, u32, optional); getter_setter!(no_translate, bool); getter_setter!(no_image_index, bool); - getter_setter!(unavailable_after, chrono::DateTime, optional); + getter_setter!(unavailable_after, ValidDate, optional); getter_setter!(no_ai, bool); getter_setter!(no_image_ai, bool); getter_setter!(spc, bool); - getter_setter!(custom_rules, CustomRule, vec); } diff --git a/rama-http/src/headers/x_robots_tag/robots_tag_builder.rs b/rama-http/src/headers/x_robots_tag/robots_tag_builder.rs index 00cb9b55..ef10a555 100644 --- a/rama-http/src/headers/x_robots_tag/robots_tag_builder.rs +++ b/rama-http/src/headers/x_robots_tag/robots_tag_builder.rs @@ -2,16 +2,22 @@ use crate::headers::util::value_string::HeaderValueString; use crate::headers::x_robots_tag::custom_rule::CustomRule; use crate::headers::x_robots_tag::max_image_preview_setting::MaxImagePreviewSetting; use crate::headers::x_robots_tag::robots_tag::RobotsTag; +use crate::headers::x_robots_tag::valid_date::ValidDate; macro_rules! builder_field { ($field:ident, $type:ty) => { paste::paste! { - pub fn [<$field>](mut self, [<$field>]: $type) -> Self { + pub(super) fn [<$field>](mut self, [<$field>]: $type) -> Self { self.0.[]([<$field>]); self } - pub fn [](&mut self, [<$field>]: $type) -> &mut Self { + pub(super) fn [](&mut self, [<$field>]: $type) -> &mut Self { + self.0.[]([<$field>]); + self + } + + pub(super) fn [](mut self, [<$field>]: $type) -> Self { self.0.[]([<$field>]); self } @@ -19,27 +25,30 @@ macro_rules! builder_field { }; } -#[derive(Clone, Debug, Eq, PartialEq, Default)] -pub struct RobotsTagBuilder(T); +#[derive(Clone, Debug, Eq, PartialEq)] +pub(super) struct RobotsTagBuilder(T); impl RobotsTagBuilder<()> { - pub fn new() -> Self { + pub(super) fn new() -> Self { RobotsTagBuilder(()) } - pub fn bot_name(self, bot_name: Option) -> RobotsTagBuilder { - RobotsTagBuilder(RobotsTag::with_bot_name(bot_name)) + pub(super) fn bot_name( + self, + bot_name: Option, + ) -> RobotsTagBuilder { + RobotsTagBuilder(RobotsTag::new_with_bot_name(bot_name)) } } impl RobotsTagBuilder { - pub fn add_custom_rule(&mut self, rule: CustomRule) -> &mut Self { - self.0.add_custom_rules(rule); - self + pub(super) fn build(self) -> RobotsTag { + self.0 } - pub fn build(self) -> RobotsTag { - self.0 + pub(super) fn add_custom_rule(&mut self, rule: CustomRule) -> &mut Self { + self.0.add_custom_rule(rule); + self } builder_field!(bot_name, HeaderValueString); @@ -54,7 +63,7 @@ impl RobotsTagBuilder { builder_field!(max_video_preview, u32); builder_field!(no_translate, bool); builder_field!(no_image_index, bool); - builder_field!(unavailable_after, chrono::DateTime); + builder_field!(unavailable_after, ValidDate); builder_field!(no_ai, bool); builder_field!(no_image_ai, bool); builder_field!(spc, bool); diff --git a/rama-http/src/headers/x_robots_tag/valid_date.rs b/rama-http/src/headers/x_robots_tag/valid_date.rs new file mode 100644 index 00000000..929c56e8 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag/valid_date.rs @@ -0,0 +1,46 @@ +use chrono::{DateTime, Utc}; +use rama_core::error::OpaqueError; +use std::ops::Deref; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub(super) struct ValidDate(DateTime); + +impl ValidDate { + pub(super) fn new(date: DateTime) -> Result { + Ok(Self(date)) + } +} + +impl Deref for ValidDate { + type Target = DateTime; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl From for DateTime { + fn from(value: ValidDate) -> Self { + value.0 + } +} + +impl TryFrom> for ValidDate { + type Error = OpaqueError; + + fn try_from(value: DateTime) -> Result { + ValidDate::new(value) + } +} + +impl AsRef> for ValidDate { + fn as_ref(&self) -> &DateTime { + &self.0 + } +} + +impl AsMut> for ValidDate { + fn as_mut(&mut self) -> &mut DateTime { + &mut self.0 + } +} From 3aa02ecfbc83c23a630d2a0ff585d977d869d67a Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Wed, 29 Jan 2025 19:04:51 +0100 Subject: [PATCH 18/37] add x_robots_tag.rs --- rama-http/src/headers/mod.rs | 3 +++ rama-http/src/headers/x_robots_tag.rs | 27 +++++++++++++++++++ .../custom_rule.rs | 0 .../max_image_preview_setting.rs | 0 .../mod.rs | 2 ++ .../robots_tag.rs | 10 +++---- .../robots_tag_builder.rs | 8 +++--- 7 files changed, 41 insertions(+), 9 deletions(-) create mode 100644 rama-http/src/headers/x_robots_tag.rs rename rama-http/src/headers/{x_robots_tag => x_robots_tag_components}/custom_rule.rs (100%) rename rama-http/src/headers/{x_robots_tag => x_robots_tag_components}/max_image_preview_setting.rs (100%) rename rama-http/src/headers/{x_robots_tag => x_robots_tag_components}/mod.rs (73%) rename rama-http/src/headers/{x_robots_tag => x_robots_tag_components}/robots_tag.rs (89%) rename rama-http/src/headers/{x_robots_tag => x_robots_tag_components}/robots_tag_builder.rs (86%) diff --git a/rama-http/src/headers/mod.rs b/rama-http/src/headers/mod.rs index 6abc2554..af7ea92f 100644 --- a/rama-http/src/headers/mod.rs +++ b/rama-http/src/headers/mod.rs @@ -103,6 +103,9 @@ pub use ::rama_http_types::headers::HeaderExt; pub(crate) mod util; +mod x_robots_tag_components; + pub mod x_robots_tag; +pub use x_robots_tag::XRobotsTag; pub use util::quality_value::{Quality, QualityValue}; diff --git a/rama-http/src/headers/x_robots_tag.rs b/rama-http/src/headers/x_robots_tag.rs new file mode 100644 index 00000000..681d37e0 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag.rs @@ -0,0 +1,27 @@ +use headers::Header; +use http::{HeaderName, HeaderValue}; +use crate::headers::Error; +use crate::headers::x_robots_tag_components::RobotsTag; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct XRobotsTag { + elements: Vec, +} + +impl Header for XRobotsTag { + fn name() -> &'static HeaderName { + &crate::header::X_ROBOTS_TAG + } + + fn decode<'i, I>(values: &mut I) -> Result + where + Self: Sized, + I: Iterator + { + todo!() + } + + fn encode>(&self, values: &mut E) { + todo!() + } +} diff --git a/rama-http/src/headers/x_robots_tag/custom_rule.rs b/rama-http/src/headers/x_robots_tag_components/custom_rule.rs similarity index 100% rename from rama-http/src/headers/x_robots_tag/custom_rule.rs rename to rama-http/src/headers/x_robots_tag_components/custom_rule.rs diff --git a/rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs b/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs similarity index 100% rename from rama-http/src/headers/x_robots_tag/max_image_preview_setting.rs rename to rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs diff --git a/rama-http/src/headers/x_robots_tag/mod.rs b/rama-http/src/headers/x_robots_tag_components/mod.rs similarity index 73% rename from rama-http/src/headers/x_robots_tag/mod.rs rename to rama-http/src/headers/x_robots_tag_components/mod.rs index a2a205e7..0e2e6f76 100644 --- a/rama-http/src/headers/x_robots_tag/mod.rs +++ b/rama-http/src/headers/x_robots_tag_components/mod.rs @@ -1,8 +1,10 @@ mod robots_tag; +pub(super) use robots_tag::RobotsTag; mod robots_tag_builder; mod max_image_preview_setting; mod custom_rule; + mod valid_date; diff --git a/rama-http/src/headers/x_robots_tag/robots_tag.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs similarity index 89% rename from rama-http/src/headers/x_robots_tag/robots_tag.rs rename to rama-http/src/headers/x_robots_tag_components/robots_tag.rs index 02077166..f03c3710 100644 --- a/rama-http/src/headers/x_robots_tag/robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs @@ -1,8 +1,8 @@ use crate::headers::util::value_string::HeaderValueString; -use crate::headers::x_robots_tag::custom_rule::CustomRule; -use crate::headers::x_robots_tag::max_image_preview_setting::MaxImagePreviewSetting; -use crate::headers::x_robots_tag::robots_tag_builder::RobotsTagBuilder; -use crate::headers::x_robots_tag::valid_date::ValidDate; +use crate::headers::x_robots_tag_components::custom_rule::CustomRule; +use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; +use crate::headers::x_robots_tag_components::robots_tag_builder::RobotsTagBuilder; +use crate::headers::x_robots_tag_components::valid_date::ValidDate; macro_rules! getter_setter { ($field:ident, $type:ty) => { @@ -43,7 +43,7 @@ macro_rules! getter_setter { } #[derive(Clone, Debug, Eq, PartialEq, Default)] -pub(super) struct RobotsTag { +pub(crate) struct RobotsTag { bot_name: Option, all: bool, no_index: bool, diff --git a/rama-http/src/headers/x_robots_tag/robots_tag_builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs similarity index 86% rename from rama-http/src/headers/x_robots_tag/robots_tag_builder.rs rename to rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs index ef10a555..d2146cfa 100644 --- a/rama-http/src/headers/x_robots_tag/robots_tag_builder.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs @@ -1,8 +1,8 @@ use crate::headers::util::value_string::HeaderValueString; -use crate::headers::x_robots_tag::custom_rule::CustomRule; -use crate::headers::x_robots_tag::max_image_preview_setting::MaxImagePreviewSetting; -use crate::headers::x_robots_tag::robots_tag::RobotsTag; -use crate::headers::x_robots_tag::valid_date::ValidDate; +use crate::headers::x_robots_tag_components::custom_rule::CustomRule; +use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; +use crate::headers::x_robots_tag_components::robots_tag::RobotsTag; +use crate::headers::x_robots_tag_components::valid_date::ValidDate; macro_rules! builder_field { ($field:ident, $type:ty) => { From b4a1fe19fa591170cafeba1aff83d859d2529c44 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Wed, 29 Jan 2025 19:05:20 +0100 Subject: [PATCH 19/37] implement FromStr for ValidDate --- .../valid_date.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) rename rama-http/src/headers/{x_robots_tag => x_robots_tag_components}/valid_date.rs (61%) diff --git a/rama-http/src/headers/x_robots_tag/valid_date.rs b/rama-http/src/headers/x_robots_tag_components/valid_date.rs similarity index 61% rename from rama-http/src/headers/x_robots_tag/valid_date.rs rename to rama-http/src/headers/x_robots_tag_components/valid_date.rs index 929c56e8..bcd25e10 100644 --- a/rama-http/src/headers/x_robots_tag/valid_date.rs +++ b/rama-http/src/headers/x_robots_tag_components/valid_date.rs @@ -1,6 +1,7 @@ use chrono::{DateTime, Utc}; -use rama_core::error::OpaqueError; +use rama_core::error::{ErrorContext, OpaqueError}; use std::ops::Deref; +use std::str::FromStr; #[derive(Clone, Debug, Eq, PartialEq)] pub(super) struct ValidDate(DateTime); @@ -44,3 +45,19 @@ impl AsMut> for ValidDate { &mut self.0 } } + +impl FromStr for ValidDate { + type Err = OpaqueError; + + fn from_str(s: &str) -> Result { + ValidDate::new( + DateTime::parse_from_rfc3339(s) + .or_else(|_| { + DateTime::parse_from_rfc2822(s) + .or_else(|_| DateTime::parse_from_str(s, "%A, %d-%b-%y %T %Z")) + }) + .with_context(|| "Failed to parse date")? + .with_timezone(&Utc), + ) + } +} From a480e139c3a74ce9929802f70de0cf83d6bb2eb8 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Fri, 31 Jan 2025 20:50:38 +0100 Subject: [PATCH 20/37] enhance code --- Cargo.toml | 2 +- rama-http/src/headers/x_robots_tag.rs | 30 +++++++++---------- .../max_image_preview_setting.rs | 15 ++++++---- .../x_robots_tag_components/robots_tag.rs | 25 +++++++++++----- .../robots_tag_builder.rs | 26 ++++++++++------ .../x_robots_tag_components/valid_date.rs | 16 +++------- 6 files changed, 64 insertions(+), 50 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fb45d72c..7fc47265 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -256,7 +256,7 @@ rama-tls = { version = "0.2.0-alpha.7", path = "rama-tls", optional = true } rama-ua = { version = "0.2.0-alpha.7", path = "rama-ua", optional = true } rama-utils = { version = "0.2.0-alpha.7", path = "rama-utils" } serde_html_form = { workspace = true, optional = true } - serde_json = { workspace = true, optional = true } +serde_json = { workspace = true, optional = true } tokio = { workspace = true, features = ["macros", "io-std"], optional = true } tracing = { workspace = true, optional = true } diff --git a/rama-http/src/headers/x_robots_tag.rs b/rama-http/src/headers/x_robots_tag.rs index 681d37e0..50262cb1 100644 --- a/rama-http/src/headers/x_robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag.rs @@ -1,7 +1,7 @@ +use crate::headers::x_robots_tag_components::RobotsTag; +use crate::headers::Error; use headers::Header; use http::{HeaderName, HeaderValue}; -use crate::headers::Error; -use crate::headers::x_robots_tag_components::RobotsTag; #[derive(Debug, Clone, PartialEq, Eq)] pub struct XRobotsTag { @@ -9,19 +9,19 @@ pub struct XRobotsTag { } impl Header for XRobotsTag { - fn name() -> &'static HeaderName { - &crate::header::X_ROBOTS_TAG - } + fn name() -> &'static HeaderName { + &crate::header::X_ROBOTS_TAG + } - fn decode<'i, I>(values: &mut I) -> Result - where - Self: Sized, - I: Iterator - { - todo!() - } + fn decode<'i, I>(values: &mut I) -> Result + where + Self: Sized, + I: Iterator, + { + todo!() + } - fn encode>(&self, values: &mut E) { - todo!() - } + fn encode>(&self, values: &mut E) { + todo!() + } } diff --git a/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs b/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs index 628c0233..f978705b 100644 --- a/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs +++ b/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs @@ -23,13 +23,16 @@ impl FromStr for MaxImagePreviewSetting { type Err = OpaqueError; fn from_str(s: &str) -> Result { - match s.to_lowercase().trim() { - "none" => Ok(MaxImagePreviewSetting::None), - "standard" => Ok(MaxImagePreviewSetting::Standard), - "large" => Ok(MaxImagePreviewSetting::Large), - _ => Err(OpaqueError::from_display( + if s.eq_ignore_ascii_case("none") { + Ok(MaxImagePreviewSetting::None) + } else if s.eq_ignore_ascii_case("standard") { + Ok(MaxImagePreviewSetting::Standard) + } else if s.eq_ignore_ascii_case("large") { + Ok(MaxImagePreviewSetting::Large) + } else { + Err(OpaqueError::from_display( "failed to parse MaxImagePreviewSetting", - )), + )) } } } diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs index f03c3710..47b463cb 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs @@ -42,7 +42,7 @@ macro_rules! getter_setter { }; } -#[derive(Clone, Debug, Eq, PartialEq, Default)] +#[derive(Clone, Debug, Eq, PartialEq)] pub(crate) struct RobotsTag { bot_name: Option, all: bool, @@ -65,17 +65,28 @@ pub(crate) struct RobotsTag { } impl RobotsTag { - pub(super) fn new() -> Self { - Default::default() - } - pub(super) fn new_with_bot_name(bot_name: Option) -> Self { Self { bot_name, - ..Default::default() + all: false, + no_index: false, + no_follow: false, + none: false, + no_snippet: false, + index_if_embedded: false, + max_snippet: 0, + max_image_preview: None, + max_video_preview: None, + no_translate: false, + no_image_index: false, + unavailable_after: None, + no_ai: false, + no_image_ai: false, + spc: false, + custom_rules: vec![], } } - + pub(super) fn add_custom_rule(&mut self, rule: CustomRule) -> &mut Self { self.custom_rules.push(rule); self diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs index d2146cfa..314d49a5 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs @@ -1,3 +1,4 @@ +use rama_core::error::OpaqueError; use crate::headers::util::value_string::HeaderValueString; use crate::headers::x_robots_tag_components::custom_rule::CustomRule; use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; @@ -8,17 +9,17 @@ macro_rules! builder_field { ($field:ident, $type:ty) => { paste::paste! { pub(super) fn [<$field>](mut self, [<$field>]: $type) -> Self { - self.0.[]([<$field>]); + self.content.[]([<$field>]); self } pub(super) fn [](&mut self, [<$field>]: $type) -> &mut Self { - self.0.[]([<$field>]); + self.content.[]([<$field>]); self } pub(super) fn [](mut self, [<$field>]: $type) -> Self { - self.0.[]([<$field>]); + self.content.[]([<$field>]); self } } @@ -26,28 +27,35 @@ macro_rules! builder_field { } #[derive(Clone, Debug, Eq, PartialEq)] -pub(super) struct RobotsTagBuilder(T); +pub(super) struct RobotsTagBuilder { + content: T, + valid: bool +} impl RobotsTagBuilder<()> { pub(super) fn new() -> Self { - RobotsTagBuilder(()) + RobotsTagBuilder{content: (), valid: false } } pub(super) fn bot_name( self, bot_name: Option, ) -> RobotsTagBuilder { - RobotsTagBuilder(RobotsTag::new_with_bot_name(bot_name)) + RobotsTagBuilder{ content: RobotsTag::new_with_bot_name(bot_name), valid: false } } } impl RobotsTagBuilder { - pub(super) fn build(self) -> RobotsTag { - self.0 + pub(super) fn build(self) -> Result { + if self.valid { + Ok(self.content) + } else { + Err(OpaqueError::from_display("not a valid robots tag")) + } } pub(super) fn add_custom_rule(&mut self, rule: CustomRule) -> &mut Self { - self.0.add_custom_rule(rule); + self.content.add_custom_rule(rule); self } diff --git a/rama-http/src/headers/x_robots_tag_components/valid_date.rs b/rama-http/src/headers/x_robots_tag_components/valid_date.rs index bcd25e10..57eaa872 100644 --- a/rama-http/src/headers/x_robots_tag_components/valid_date.rs +++ b/rama-http/src/headers/x_robots_tag_components/valid_date.rs @@ -7,8 +7,8 @@ use std::str::FromStr; pub(super) struct ValidDate(DateTime); impl ValidDate { - pub(super) fn new(date: DateTime) -> Result { - Ok(Self(date)) + pub(super) fn new(date: DateTime) -> Self { + Self(date) } } @@ -26,14 +26,6 @@ impl From for DateTime { } } -impl TryFrom> for ValidDate { - type Error = OpaqueError; - - fn try_from(value: DateTime) -> Result { - ValidDate::new(value) - } -} - impl AsRef> for ValidDate { fn as_ref(&self) -> &DateTime { &self.0 @@ -50,7 +42,7 @@ impl FromStr for ValidDate { type Err = OpaqueError; fn from_str(s: &str) -> Result { - ValidDate::new( + Ok(ValidDate::new( DateTime::parse_from_rfc3339(s) .or_else(|_| { DateTime::parse_from_rfc2822(s) @@ -58,6 +50,6 @@ impl FromStr for ValidDate { }) .with_context(|| "Failed to parse date")? .with_timezone(&Utc), - ) + )) } } From 1f32008848a47a75dd021073125cf133cf687e01 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sat, 1 Feb 2025 23:13:33 +0100 Subject: [PATCH 21/37] implement display for ValidDate --- .../src/headers/x_robots_tag_components/valid_date.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rama-http/src/headers/x_robots_tag_components/valid_date.rs b/rama-http/src/headers/x_robots_tag_components/valid_date.rs index 57eaa872..3db7e44f 100644 --- a/rama-http/src/headers/x_robots_tag_components/valid_date.rs +++ b/rama-http/src/headers/x_robots_tag_components/valid_date.rs @@ -1,5 +1,6 @@ use chrono::{DateTime, Utc}; use rama_core::error::{ErrorContext, OpaqueError}; +use std::fmt::{Display, Formatter}; use std::ops::Deref; use std::str::FromStr; @@ -53,3 +54,9 @@ impl FromStr for ValidDate { )) } } + +impl Display for ValidDate { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!(f, "{}", self.0) + } +} From 7f62911cba32693eb53f34af24cf2f4677a41174 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sat, 1 Feb 2025 23:16:14 +0100 Subject: [PATCH 22/37] improve error handling for HeaderValueString --- rama-http/src/headers/util/value_string.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/rama-http/src/headers/util/value_string.rs b/rama-http/src/headers/util/value_string.rs index ecac531c..ff5ca78b 100644 --- a/rama-http/src/headers/util/value_string.rs +++ b/rama-http/src/headers/util/value_string.rs @@ -1,13 +1,12 @@ +use crate::headers::Error; +use bytes::Bytes; +use http::header::HeaderValue; +use std::fmt::{Display, Formatter}; use std::{ fmt, str::{self, FromStr}, }; -use bytes::Bytes; -use http::header::HeaderValue; - -use crate::headers::Error; - /// A value that is both a valid `HeaderValue` and `String`. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct HeaderValueString { @@ -67,7 +66,7 @@ impl<'a> From<&'a HeaderValueString> for HeaderValue { } #[derive(Debug)] -pub struct FromStrError(()); +pub struct FromStrError(&'static str); impl FromStr for HeaderValueString { type Err = FromStrError; @@ -76,6 +75,14 @@ impl FromStr for HeaderValueString { // A valid `str` (the argument)... src.parse() .map(|value| HeaderValueString { value }) - .map_err(|_| FromStrError(())) + .map_err(|_| FromStrError("failed to parse header value from string")) + } +} + +impl Display for FromStrError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + writeln!(f, "{}", self.0) } } + +impl std::error::Error for FromStrError {} From 26ef8c6724d8f7e64e1173a20d4acd4feb15124f Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sat, 1 Feb 2025 23:17:40 +0100 Subject: [PATCH 23/37] initial implementation of encode, decode --- rama-http/src/headers/x_robots_tag.rs | 41 +++++- .../headers/x_robots_tag_components/mod.rs | 4 +- .../x_robots_tag_components/robots_tag.rs | 107 +++++++++++++- .../robots_tag_builder.rs | 78 ---------- .../robots_tag_components/builder.rs | 136 ++++++++++++++++++ .../robots_tag_components/mod.rs | 5 + .../robots_tag_components/parser.rs | 67 +++++++++ 7 files changed, 349 insertions(+), 89 deletions(-) delete mode 100644 rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs create mode 100644 rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs create mode 100644 rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs create mode 100644 rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs diff --git a/rama-http/src/headers/x_robots_tag.rs b/rama-http/src/headers/x_robots_tag.rs index 50262cb1..6ba3f07c 100644 --- a/rama-http/src/headers/x_robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag.rs @@ -1,12 +1,11 @@ +use crate::headers::x_robots_tag_components::robots_tag_components::Parser; use crate::headers::x_robots_tag_components::RobotsTag; use crate::headers::Error; use headers::Header; use http::{HeaderName, HeaderValue}; #[derive(Debug, Clone, PartialEq, Eq)] -pub struct XRobotsTag { - elements: Vec, -} +pub struct XRobotsTag(Vec); impl Header for XRobotsTag { fn name() -> &'static HeaderName { @@ -18,10 +17,42 @@ impl Header for XRobotsTag { Self: Sized, I: Iterator, { - todo!() + let mut elements = Vec::new(); + for value in values { + let mut parser = Parser::new(value.to_str().map_err(|_| Error::invalid())?); + while let Some(result) = parser.next() { + match result { + Ok(robots_tag) => elements.push(robots_tag), + Err(_) => return Err(Error::invalid()), + } + } + } + Ok(XRobotsTag(elements)) } fn encode>(&self, values: &mut E) { - todo!() + use std::fmt; + struct Format(F); + impl fmt::Display for Format + where + F: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, + { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0(f) + } + } + let s = format!( + "{}", + Format(|f: &mut fmt::Formatter<'_>| { + crate::headers::util::csv::fmt_comma_delimited(&mut *f, self.0.iter()) + }) + ); + values.extend(Some(HeaderValue::from_str(&s).unwrap())) + } +} + +impl FromIterator for XRobotsTag { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) } } diff --git a/rama-http/src/headers/x_robots_tag_components/mod.rs b/rama-http/src/headers/x_robots_tag_components/mod.rs index 0e2e6f76..97834688 100644 --- a/rama-http/src/headers/x_robots_tag_components/mod.rs +++ b/rama-http/src/headers/x_robots_tag_components/mod.rs @@ -1,10 +1,10 @@ mod robots_tag; pub(super) use robots_tag::RobotsTag; -mod robots_tag_builder; - mod max_image_preview_setting; mod custom_rule; mod valid_date; + +pub(super) mod robots_tag_components; diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs index 47b463cb..0f239fce 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs @@ -1,8 +1,11 @@ use crate::headers::util::value_string::HeaderValueString; use crate::headers::x_robots_tag_components::custom_rule::CustomRule; use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; -use crate::headers::x_robots_tag_components::robots_tag_builder::RobotsTagBuilder; +use crate::headers::x_robots_tag_components::robots_tag_components::Builder; use crate::headers::x_robots_tag_components::valid_date::ValidDate; +use rama_core::error::OpaqueError; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; macro_rules! getter_setter { ($field:ident, $type:ty) => { @@ -86,14 +89,14 @@ impl RobotsTag { custom_rules: vec![], } } - + pub(super) fn add_custom_rule(&mut self, rule: CustomRule) -> &mut Self { self.custom_rules.push(rule); self } - pub(super) fn builder() -> RobotsTagBuilder { - RobotsTagBuilder::new() + pub(super) fn builder() -> Builder { + Builder::new() } getter_setter!(bot_name, HeaderValueString, optional); @@ -112,4 +115,100 @@ impl RobotsTag { getter_setter!(no_ai, bool); getter_setter!(no_image_ai, bool); getter_setter!(spc, bool); + + pub(super) fn is_valid_field_name(field_name: &str) -> bool { + field_name.eq_ignore_ascii_case("all") + || field_name.eq_ignore_ascii_case("noindex") + || field_name.eq_ignore_ascii_case("nofollow") + || field_name.eq_ignore_ascii_case("none") + || field_name.eq_ignore_ascii_case("nosnippet") + || field_name.eq_ignore_ascii_case("indexifembedded") + || field_name.eq_ignore_ascii_case("notranslate") + || field_name.eq_ignore_ascii_case("noimageindex") + || field_name.eq_ignore_ascii_case("noai") + || field_name.eq_ignore_ascii_case("noimageai") + || field_name.eq_ignore_ascii_case("spc") + } + + pub(super) fn from_str(s: &str) -> Result, OpaqueError> { + let mut bot_name = None; + let mut fields = s; + + if let Some((bot_name_candidate, rest)) = s.split_once(':') { + if !RobotsTag::is_valid_field_name(bot_name_candidate) { + bot_name = Some( + HeaderValueString::from_str(bot_name_candidate) + .map_err(OpaqueError::from_std)?, + ); + fields = rest; + } + } + + let mut builder = RobotsTag::builder().bot_name(bot_name); + + for field in fields.split(',') { + match builder.add_field(field) { + Ok(_) => {} + Err(e) if e.to_string().contains("not a valid robots tag field") => { + // re + return Ok(None); + } + Err(e) => return Err(e), + } + } + + Ok(Some(builder.build()?)) + } +} + +impl Display for RobotsTag { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if let Some(bot_name) = self.bot_name() { + write!(f, "{bot_name}: ")?; + } + + let mut _first = true; + + macro_rules! write_field { + ($cond:expr, $fmt:expr) => { + if $cond { + if !_first { + write!(f, ", ")?; + } + write!(f, "{}", $fmt)?; + _first = false; + } + }; + ($cond:expr, $fmt:expr, optional) => { + if let Some(value) = $cond { + if !_first { + write!(f, ", ")?; + } + write!(f, "{}: {}", $fmt, value)?; + _first = false; + } + }; + } + + write_field!(self.all(), "all"); + write_field!(self.no_index(), "noindex"); + write_field!(self.no_follow(), "nofollow"); + write_field!(self.none(), "none"); + write_field!(self.no_snippet(), "nosnippet"); + write_field!(self.index_if_embedded(), "indexifembedded"); + write_field!( + self.max_snippet() != 0, + format!("max-snippet: {}", self.max_snippet()) + ); + write_field!(self.max_image_preview(), "max-image-preview", optional); + write_field!(self.max_video_preview(), "max-video-preview", optional); + write_field!(self.no_translate(), "notranslate"); + write_field!(self.no_image_index(), "noimageindex"); + write_field!(self.unavailable_after(), "unavailable_after", optional); + write_field!(self.no_ai(), "noai"); + write_field!(self.no_image_ai(), "noimageai"); + write_field!(self.spc(), "spc"); + + writeln!(f) + } } diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs deleted file mode 100644 index 314d49a5..00000000 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_builder.rs +++ /dev/null @@ -1,78 +0,0 @@ -use rama_core::error::OpaqueError; -use crate::headers::util::value_string::HeaderValueString; -use crate::headers::x_robots_tag_components::custom_rule::CustomRule; -use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; -use crate::headers::x_robots_tag_components::robots_tag::RobotsTag; -use crate::headers::x_robots_tag_components::valid_date::ValidDate; - -macro_rules! builder_field { - ($field:ident, $type:ty) => { - paste::paste! { - pub(super) fn [<$field>](mut self, [<$field>]: $type) -> Self { - self.content.[]([<$field>]); - self - } - - pub(super) fn [](&mut self, [<$field>]: $type) -> &mut Self { - self.content.[]([<$field>]); - self - } - - pub(super) fn [](mut self, [<$field>]: $type) -> Self { - self.content.[]([<$field>]); - self - } - } - }; -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub(super) struct RobotsTagBuilder { - content: T, - valid: bool -} - -impl RobotsTagBuilder<()> { - pub(super) fn new() -> Self { - RobotsTagBuilder{content: (), valid: false } - } - - pub(super) fn bot_name( - self, - bot_name: Option, - ) -> RobotsTagBuilder { - RobotsTagBuilder{ content: RobotsTag::new_with_bot_name(bot_name), valid: false } - } -} - -impl RobotsTagBuilder { - pub(super) fn build(self) -> Result { - if self.valid { - Ok(self.content) - } else { - Err(OpaqueError::from_display("not a valid robots tag")) - } - } - - pub(super) fn add_custom_rule(&mut self, rule: CustomRule) -> &mut Self { - self.content.add_custom_rule(rule); - self - } - - builder_field!(bot_name, HeaderValueString); - builder_field!(all, bool); - builder_field!(no_index, bool); - builder_field!(no_follow, bool); - builder_field!(none, bool); - builder_field!(no_snippet, bool); - builder_field!(index_if_embedded, bool); - builder_field!(max_snippet, u32); - builder_field!(max_image_preview, MaxImagePreviewSetting); - builder_field!(max_video_preview, u32); - builder_field!(no_translate, bool); - builder_field!(no_image_index, bool); - builder_field!(unavailable_after, ValidDate); - builder_field!(no_ai, bool); - builder_field!(no_image_ai, bool); - builder_field!(spc, bool); -} diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs new file mode 100644 index 00000000..f3371b03 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs @@ -0,0 +1,136 @@ +use crate::headers::util::value_string::HeaderValueString; +use crate::headers::x_robots_tag_components::custom_rule::CustomRule; +use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; +use crate::headers::x_robots_tag_components::robots_tag::RobotsTag; +use crate::headers::x_robots_tag_components::valid_date::ValidDate; +use rama_core::error::OpaqueError; + +macro_rules! builder_field { + ($field:ident, $type:ty) => { + paste::paste! { + pub(in crate::headers::x_robots_tag_components) fn [<$field>](mut self, [<$field>]: $type) -> Self { + self.content.[]([<$field>]); + self.valid = true; + self + } + + pub(in crate::headers::x_robots_tag_components) fn [](&mut self, [<$field>]: $type) -> &mut Self { + self.content.[]([<$field>]); + self.valid = true; + self + } + } + }; +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub(in crate::headers::x_robots_tag_components) struct Builder { + content: T, + valid: bool, +} + +impl Builder<()> { + pub(in crate::headers::x_robots_tag_components) fn new() -> Self { + Builder { + content: (), + valid: false, + } + } + + pub(in crate::headers::x_robots_tag_components) fn bot_name( + &self, + bot_name: Option, + ) -> Builder { + Builder { + content: RobotsTag::new_with_bot_name(bot_name), + valid: false, + } + } +} + +impl Builder { + pub(in crate::headers::x_robots_tag_components) fn build( + self, + ) -> Result { + if self.valid { + Ok(self.content) + } else { + Err(OpaqueError::from_display("not a valid robots tag")) + } + } + + pub(in crate::headers::x_robots_tag_components) fn add_custom_rule( + &mut self, + rule: CustomRule, + ) -> &mut Self { + self.content.add_custom_rule(rule); + self.valid = true; + self + } + + builder_field!(bot_name, HeaderValueString); + builder_field!(all, bool); + builder_field!(no_index, bool); + builder_field!(no_follow, bool); + builder_field!(none, bool); + builder_field!(no_snippet, bool); + builder_field!(index_if_embedded, bool); + builder_field!(max_snippet, u32); + builder_field!(max_image_preview, MaxImagePreviewSetting); + builder_field!(max_video_preview, u32); + builder_field!(no_translate, bool); + builder_field!(no_image_index, bool); + builder_field!(unavailable_after, ValidDate); + builder_field!(no_ai, bool); + builder_field!(no_image_ai, bool); + builder_field!(spc, bool); + + pub(in crate::headers::x_robots_tag_components) fn add_field( + &mut self, + s: &str, + ) -> Result<&mut Self, OpaqueError> { + if let Some((key, value)) = s.split_once(':') { + Ok(if key.eq_ignore_ascii_case("max-snippet") { + self.set_max_snippet(value.parse().map_err(OpaqueError::from_std)?) + } else if key.eq_ignore_ascii_case("max-image-preview") { + self.set_max_image_preview(value.parse()?) + } else if key.eq_ignore_ascii_case("max-video-preview") { + self.set_max_video_preview(value.parse().map_err(OpaqueError::from_std)?) + } else if key.eq_ignore_ascii_case("unavailable_after: ") { + self.set_unavailable_after(value.parse()?) + } else { + return Err(OpaqueError::from_display("not a valid robots tag field")); + }) + } else { + self.add_simple_field(s) + } + } + + fn add_simple_field(&mut self, s: &str) -> Result<&mut Self, OpaqueError> { + Ok(if s.eq_ignore_ascii_case("all") { + self.set_all(true) + } else if s.eq_ignore_ascii_case("noindex") { + self.set_no_index(true) + } else if s.eq_ignore_ascii_case("nofollow") { + self.set_no_follow(true) + } else if s.eq_ignore_ascii_case("none") { + self.set_none(true) + } else if s.eq_ignore_ascii_case("nosnippet") { + self.set_no_snippet(true) + } else if s.eq_ignore_ascii_case("indexifembedded") { + self.set_index_if_embedded(true) + } else if s.eq_ignore_ascii_case("notranslate") { + self.set_no_translate(true) + } else if s.eq_ignore_ascii_case("noimageindex") { + self.set_no_image_index(true) + } else if s.eq_ignore_ascii_case("noai") { + self.set_no_ai(true) + } else if s.eq_ignore_ascii_case("noimageai") { + self.set_no_image_ai(true) + } else if s.eq_ignore_ascii_case("spc") { + self.set_spc(true) + } else { + return Err(OpaqueError::from_display("not a valid robots tag field")); + }) + } +} diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs new file mode 100644 index 00000000..c43d54b2 --- /dev/null +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs @@ -0,0 +1,5 @@ +mod builder; +pub(super) use builder::Builder; + +mod parser; +pub(in crate::headers) use parser::Parser; diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs new file mode 100644 index 00000000..66306f1c --- /dev/null +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs @@ -0,0 +1,67 @@ +use crate::headers::util::value_string::HeaderValueString; +use crate::headers::x_robots_tag_components::RobotsTag; +use rama_core::error::OpaqueError; +use std::str::FromStr; + +pub(in crate::headers) struct Parser<'a> { + remaining: Option<&'a str>, +} + +impl<'a> Parser<'a> { + pub(in crate::headers) fn new(remaining: &'a str) -> Self { + Self { + remaining: Some(remaining.trim()), + } + } +} + +impl<'a> Iterator for Parser<'_> { + type Item = Result; + + fn next(&mut self) -> Option { + let mut remaining = self.remaining?; + + let bot_name = match Self::parse_bot_name(&mut remaining) { + Ok(bot_name) => bot_name, + Err(e) => return Some(Err(e)), + }; + + let mut builder = RobotsTag::builder().bot_name(bot_name); + + while let Some((field, rest)) = remaining.split_once(',') { + let field = field.trim(); + if field.is_empty() { + continue; + } + + match builder.add_field(field) { + Ok(_) => { + remaining = rest.trim(); + } + Err(e) if e.to_string().contains("not a valid robots tag field") => { + self.remaining = Some(remaining.trim()); + return Some(builder.build()); + } + Err(e) => return Some(Err(e)), + } + } + + Some(builder.build()) + } +} + +impl Parser<'_> { + fn parse_bot_name(remaining: &mut &str) -> Result, OpaqueError> { + if let Some((bot_name_candidate, rest)) = remaining.split_once(':') { + if !RobotsTag::is_valid_field_name(bot_name_candidate) { + *remaining = rest.trim(); + return match HeaderValueString::from_str(bot_name_candidate) { + Ok(bot) => Ok(Some(bot)), + Err(e) => Err(OpaqueError::from_std(e)), + }; + } + } + + Ok(None) + } +} From 29bc0f9c143e0f5a8f22e692cb2ed7cbebb37882 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sun, 2 Feb 2025 17:18:16 +0100 Subject: [PATCH 24/37] enhance decode/parse functionalities --- rama-http/src/headers/x_robots_tag.rs | 19 +++++++++---------- .../robots_tag_components/parser.rs | 18 ++++++++++++++---- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag.rs b/rama-http/src/headers/x_robots_tag.rs index 6ba3f07c..e9acf839 100644 --- a/rama-http/src/headers/x_robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag.rs @@ -17,16 +17,15 @@ impl Header for XRobotsTag { Self: Sized, I: Iterator, { - let mut elements = Vec::new(); - for value in values { - let mut parser = Parser::new(value.to_str().map_err(|_| Error::invalid())?); - while let Some(result) = parser.next() { - match result { - Ok(robots_tag) => elements.push(robots_tag), - Err(_) => return Err(Error::invalid()), - } - } - } + let elements = values.try_fold(Vec::new(), |mut acc, value| { + acc.extend(Parser::parse_value(value).map_err(|err| { + tracing::debug!(?err, "x-robots-tag header element decoding failure"); + Error::invalid() + })?); + + Ok(acc) + })?; + Ok(XRobotsTag(elements)) } diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs index 66306f1c..0a1bf832 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs @@ -1,5 +1,6 @@ use crate::headers::util::value_string::HeaderValueString; use crate::headers::x_robots_tag_components::RobotsTag; +use http::HeaderValue; use rama_core::error::OpaqueError; use std::str::FromStr; @@ -9,9 +10,12 @@ pub(in crate::headers) struct Parser<'a> { impl<'a> Parser<'a> { pub(in crate::headers) fn new(remaining: &'a str) -> Self { - Self { - remaining: Some(remaining.trim()), - } + let remaining = match remaining.trim() { + "" => None, + text => Some(text), + }; + + Self { remaining } } } @@ -19,7 +23,7 @@ impl<'a> Iterator for Parser<'_> { type Item = Result; fn next(&mut self) -> Option { - let mut remaining = self.remaining?; + let mut remaining = self.remaining?.trim(); let bot_name = match Self::parse_bot_name(&mut remaining) { Ok(bot_name) => bot_name, @@ -64,4 +68,10 @@ impl Parser<'_> { Ok(None) } + + pub(in crate::headers) fn parse_value( + value: &HeaderValue, + ) -> Result, OpaqueError> { + Parser::new(value.to_str().map_err(OpaqueError::from_std)?).collect::, _>>() + } } From d731bcf983db3a6e9add498b05deb45f38cbb0ad Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sun, 2 Feb 2025 17:28:52 +0100 Subject: [PATCH 25/37] remove magic strings in max_image_preview_setting.rs --- .../max_image_preview_setting.rs | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs b/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs index f978705b..7b05e69c 100644 --- a/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs +++ b/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs @@ -1,6 +1,7 @@ use rama_core::error::OpaqueError; use std::fmt::Formatter; use std::str::FromStr; +use MaxImagePreviewSetting::*; #[derive(Clone, Debug, Eq, PartialEq)] pub(super) enum MaxImagePreviewSetting { @@ -9,26 +10,32 @@ pub(super) enum MaxImagePreviewSetting { Large, } -impl std::fmt::Display for MaxImagePreviewSetting { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +impl MaxImagePreviewSetting { + fn as_str(&self) -> &'static str { match self { - MaxImagePreviewSetting::None => write!(f, "none"), - MaxImagePreviewSetting::Standard => write!(f, "standard"), - MaxImagePreviewSetting::Large => write!(f, "large"), + None => "none", + Standard => "standard", + Large => "large", } } } +impl std::fmt::Display for MaxImagePreviewSetting { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + impl FromStr for MaxImagePreviewSetting { type Err = OpaqueError; fn from_str(s: &str) -> Result { - if s.eq_ignore_ascii_case("none") { - Ok(MaxImagePreviewSetting::None) - } else if s.eq_ignore_ascii_case("standard") { - Ok(MaxImagePreviewSetting::Standard) - } else if s.eq_ignore_ascii_case("large") { - Ok(MaxImagePreviewSetting::Large) + if s.eq_ignore_ascii_case(None.as_str()) { + Ok(None) + } else if s.eq_ignore_ascii_case(Standard.as_str()) { + Ok(Standard) + } else if s.eq_ignore_ascii_case(Large.as_str()) { + Ok(Large) } else { Err(OpaqueError::from_display( "failed to parse MaxImagePreviewSetting", From 0d055a85701e1ca83ad8a1b55f08c81049a92f91 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sun, 2 Feb 2025 17:44:43 +0100 Subject: [PATCH 26/37] remove from_str functionality (moved to Parser::next()) --- .../x_robots_tag_components/robots_tag.rs | 34 +------------------ 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs index 0f239fce..aa216562 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs @@ -3,9 +3,7 @@ use crate::headers::x_robots_tag_components::custom_rule::CustomRule; use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; use crate::headers::x_robots_tag_components::robots_tag_components::Builder; use crate::headers::x_robots_tag_components::valid_date::ValidDate; -use rama_core::error::OpaqueError; use std::fmt::{Display, Formatter}; -use std::str::FromStr; macro_rules! getter_setter { ($field:ident, $type:ty) => { @@ -129,36 +127,6 @@ impl RobotsTag { || field_name.eq_ignore_ascii_case("noimageai") || field_name.eq_ignore_ascii_case("spc") } - - pub(super) fn from_str(s: &str) -> Result, OpaqueError> { - let mut bot_name = None; - let mut fields = s; - - if let Some((bot_name_candidate, rest)) = s.split_once(':') { - if !RobotsTag::is_valid_field_name(bot_name_candidate) { - bot_name = Some( - HeaderValueString::from_str(bot_name_candidate) - .map_err(OpaqueError::from_std)?, - ); - fields = rest; - } - } - - let mut builder = RobotsTag::builder().bot_name(bot_name); - - for field in fields.split(',') { - match builder.add_field(field) { - Ok(_) => {} - Err(e) if e.to_string().contains("not a valid robots tag field") => { - // re - return Ok(None); - } - Err(e) => return Err(e), - } - } - - Ok(Some(builder.build()?)) - } } impl Display for RobotsTag { @@ -209,6 +177,6 @@ impl Display for RobotsTag { write_field!(self.no_image_ai(), "noimageai"); write_field!(self.spc(), "spc"); - writeln!(f) + Ok(()) } } From 266552d169bb3d5f9cdfcff4fa4d68ffc772b2f1 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sun, 2 Feb 2025 17:45:32 +0100 Subject: [PATCH 27/37] fix error checking --- .../x_robots_tag_components/robots_tag_components/builder.rs | 5 +++-- .../x_robots_tag_components/robots_tag_components/parser.rs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs index f3371b03..a77d5b47 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs @@ -3,6 +3,7 @@ use crate::headers::x_robots_tag_components::custom_rule::CustomRule; use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; use crate::headers::x_robots_tag_components::robots_tag::RobotsTag; use crate::headers::x_robots_tag_components::valid_date::ValidDate; +use headers::Error; use rama_core::error::OpaqueError; macro_rules! builder_field { @@ -99,7 +100,7 @@ impl Builder { } else if key.eq_ignore_ascii_case("unavailable_after: ") { self.set_unavailable_after(value.parse()?) } else { - return Err(OpaqueError::from_display("not a valid robots tag field")); + return Err(OpaqueError::from_std(Error::invalid())); }) } else { self.add_simple_field(s) @@ -130,7 +131,7 @@ impl Builder { } else if s.eq_ignore_ascii_case("spc") { self.set_spc(true) } else { - return Err(OpaqueError::from_display("not a valid robots tag field")); + return Err(OpaqueError::from_std(Error::invalid())); }) } } diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs index 0a1bf832..86fe8fd6 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs @@ -42,7 +42,7 @@ impl<'a> Iterator for Parser<'_> { Ok(_) => { remaining = rest.trim(); } - Err(e) if e.to_string().contains("not a valid robots tag field") => { + Err(e) if e.is::() => { self.remaining = Some(remaining.trim()); return Some(builder.build()); } From 64fb0731ac9d71f862b0bb44aa584bf647f0ef3c Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sun, 2 Feb 2025 18:50:20 +0100 Subject: [PATCH 28/37] builder checks validity at compile-time --- .../robots_tag_components/builder.rs | 129 ++++++++++++------ .../robots_tag_components/parser.rs | 16 ++- 2 files changed, 97 insertions(+), 48 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs index a77d5b47..262ed31b 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs @@ -6,91 +6,130 @@ use crate::headers::x_robots_tag_components::valid_date::ValidDate; use headers::Error; use rama_core::error::OpaqueError; -macro_rules! builder_field { +macro_rules! robots_tag_builder_field { ($field:ident, $type:ty) => { paste::paste! { pub(in crate::headers::x_robots_tag_components) fn [<$field>](mut self, [<$field>]: $type) -> Self { - self.content.[]([<$field>]); - self.valid = true; + self.0.[]([<$field>]); self } pub(in crate::headers::x_robots_tag_components) fn [](&mut self, [<$field>]: $type) -> &mut Self { - self.content.[]([<$field>]); - self.valid = true; + self.0.[]([<$field>]); self } } }; } -#[derive(Clone, Debug, Eq, PartialEq)] -pub(in crate::headers::x_robots_tag_components) struct Builder { - content: T, - valid: bool, +macro_rules! no_tag_builder_field { + ($field:ident, $type:ty) => { + paste::paste! { + pub(in crate::headers::x_robots_tag_components) fn [<$field>](self, [<$field>]: $type) -> Builder { + Builder(RobotsTag::new_with_bot_name(self.0.bot_name)).[<$field>]([<$field>]) + } + } + }; } +#[derive(Clone, Debug, Eq, PartialEq)] +pub(in crate::headers::x_robots_tag_components) struct Builder(T); + impl Builder<()> { pub(in crate::headers::x_robots_tag_components) fn new() -> Self { - Builder { - content: (), - valid: false, - } + Builder(()) } pub(in crate::headers::x_robots_tag_components) fn bot_name( &self, bot_name: Option, - ) -> Builder { - Builder { - content: RobotsTag::new_with_bot_name(bot_name), - valid: false, - } + ) -> Builder { + Builder(NoTag { bot_name }) } } -impl Builder { - pub(in crate::headers::x_robots_tag_components) fn build( +pub(in crate::headers::x_robots_tag_components) struct NoTag { + bot_name: Option, +} + +impl Builder { + pub(in crate::headers::x_robots_tag_components) fn bot_name( + mut self, + bot_name: HeaderValueString, + ) -> Self { + self.0.bot_name = Some(bot_name); + self + } + + pub(in crate::headers::x_robots_tag_components) fn set_bot_name( + &mut self, + bot_name: HeaderValueString, + ) -> &mut Self { + self.0.bot_name = Some(bot_name); + self + } + + no_tag_builder_field!(all, bool); + no_tag_builder_field!(no_index, bool); + no_tag_builder_field!(no_follow, bool); + no_tag_builder_field!(none, bool); + no_tag_builder_field!(no_snippet, bool); + no_tag_builder_field!(index_if_embedded, bool); + no_tag_builder_field!(max_snippet, u32); + no_tag_builder_field!(max_image_preview, MaxImagePreviewSetting); + no_tag_builder_field!(max_video_preview, u32); + no_tag_builder_field!(no_translate, bool); + no_tag_builder_field!(no_image_index, bool); + no_tag_builder_field!(unavailable_after, ValidDate); + no_tag_builder_field!(no_ai, bool); + no_tag_builder_field!(no_image_ai, bool); + no_tag_builder_field!(spc, bool); + + pub(in crate::headers::x_robots_tag_components) fn add_field( self, - ) -> Result { - if self.valid { - Ok(self.content) - } else { - Err(OpaqueError::from_display("not a valid robots tag")) - } + s: &str, + ) -> Result, OpaqueError> { + let mut builder = Builder(RobotsTag::new_with_bot_name(self.0.bot_name)); + builder.add_field(s)?; + Ok(builder) + } +} + +impl Builder { + pub(in crate::headers::x_robots_tag_components) fn build(self) -> RobotsTag { + self.0 } pub(in crate::headers::x_robots_tag_components) fn add_custom_rule( &mut self, rule: CustomRule, ) -> &mut Self { - self.content.add_custom_rule(rule); - self.valid = true; + self.0.add_custom_rule(rule); self } - builder_field!(bot_name, HeaderValueString); - builder_field!(all, bool); - builder_field!(no_index, bool); - builder_field!(no_follow, bool); - builder_field!(none, bool); - builder_field!(no_snippet, bool); - builder_field!(index_if_embedded, bool); - builder_field!(max_snippet, u32); - builder_field!(max_image_preview, MaxImagePreviewSetting); - builder_field!(max_video_preview, u32); - builder_field!(no_translate, bool); - builder_field!(no_image_index, bool); - builder_field!(unavailable_after, ValidDate); - builder_field!(no_ai, bool); - builder_field!(no_image_ai, bool); - builder_field!(spc, bool); + robots_tag_builder_field!(bot_name, HeaderValueString); + robots_tag_builder_field!(all, bool); + robots_tag_builder_field!(no_index, bool); + robots_tag_builder_field!(no_follow, bool); + robots_tag_builder_field!(none, bool); + robots_tag_builder_field!(no_snippet, bool); + robots_tag_builder_field!(index_if_embedded, bool); + robots_tag_builder_field!(max_snippet, u32); + robots_tag_builder_field!(max_image_preview, MaxImagePreviewSetting); + robots_tag_builder_field!(max_video_preview, u32); + robots_tag_builder_field!(no_translate, bool); + robots_tag_builder_field!(no_image_index, bool); + robots_tag_builder_field!(unavailable_after, ValidDate); + robots_tag_builder_field!(no_ai, bool); + robots_tag_builder_field!(no_image_ai, bool); + robots_tag_builder_field!(spc, bool); pub(in crate::headers::x_robots_tag_components) fn add_field( &mut self, s: &str, ) -> Result<&mut Self, OpaqueError> { - if let Some((key, value)) = s.split_once(':') { + if let Some((key, value)) = s.trim().split_once(':') { Ok(if key.eq_ignore_ascii_case("max-snippet") { self.set_max_snippet(value.parse().map_err(OpaqueError::from_std)?) } else if key.eq_ignore_ascii_case("max-image-preview") { diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs index 86fe8fd6..7918706d 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs @@ -30,7 +30,17 @@ impl<'a> Iterator for Parser<'_> { Err(e) => return Some(Err(e)), }; - let mut builder = RobotsTag::builder().bot_name(bot_name); + let mut builder = if let Some((field, rest)) = remaining.split_once(',') { + match RobotsTag::builder().bot_name(bot_name).add_field(field) { + Ok(builder) => { + remaining = rest.trim(); + builder + } + Err(_) => return None, + } + } else { + return None; + }; while let Some((field, rest)) = remaining.split_once(',') { let field = field.trim(); @@ -44,13 +54,13 @@ impl<'a> Iterator for Parser<'_> { } Err(e) if e.is::() => { self.remaining = Some(remaining.trim()); - return Some(builder.build()); + return Some(Ok(builder.build())); } Err(e) => return Some(Err(e)), } } - Some(builder.build()) + Some(Ok(builder.build())) } } From c59690ad3384f140e4b65ed41fd095120c82f5a3 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sun, 2 Feb 2025 19:21:38 +0100 Subject: [PATCH 29/37] fix RobotsTag::is_valid_field_name --- .../src/headers/x_robots_tag_components/robots_tag.rs | 8 ++++++-- .../robots_tag_components/builder.rs | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs index aa216562..6a5d5b58 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs @@ -57,7 +57,7 @@ pub(crate) struct RobotsTag { max_video_preview: Option, no_translate: bool, no_image_index: bool, - unavailable_after: Option, // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601." + unavailable_after: Option, // custom rules no_ai: bool, no_image_ai: bool, @@ -115,14 +115,18 @@ impl RobotsTag { getter_setter!(spc, bool); pub(super) fn is_valid_field_name(field_name: &str) -> bool { - field_name.eq_ignore_ascii_case("all") + field_name.trim().eq_ignore_ascii_case("all") || field_name.eq_ignore_ascii_case("noindex") || field_name.eq_ignore_ascii_case("nofollow") || field_name.eq_ignore_ascii_case("none") || field_name.eq_ignore_ascii_case("nosnippet") || field_name.eq_ignore_ascii_case("indexifembedded") + || field_name.eq_ignore_ascii_case("max-snippet") + || field_name.eq_ignore_ascii_case("max-image-preview") + || field_name.eq_ignore_ascii_case("max-video-preview") || field_name.eq_ignore_ascii_case("notranslate") || field_name.eq_ignore_ascii_case("noimageindex") + || field_name.eq_ignore_ascii_case("unavailable_after") || field_name.eq_ignore_ascii_case("noai") || field_name.eq_ignore_ascii_case("noimageai") || field_name.eq_ignore_ascii_case("spc") diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs index 262ed31b..495778b4 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs @@ -68,7 +68,7 @@ impl Builder { self.0.bot_name = Some(bot_name); self } - + no_tag_builder_field!(all, bool); no_tag_builder_field!(no_index, bool); no_tag_builder_field!(no_follow, bool); From 6c1944b70bf5edf84cc8c91c33e14151394f778a Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sun, 2 Feb 2025 22:25:08 +0100 Subject: [PATCH 30/37] add tests for valid_date.rs --- .../x_robots_tag_components/valid_date.rs | 91 ++++++++++++++++++- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/valid_date.rs b/rama-http/src/headers/x_robots_tag_components/valid_date.rs index 3db7e44f..764379e6 100644 --- a/rama-http/src/headers/x_robots_tag_components/valid_date.rs +++ b/rama-http/src/headers/x_robots_tag_components/valid_date.rs @@ -44,10 +44,11 @@ impl FromStr for ValidDate { fn from_str(s: &str) -> Result { Ok(ValidDate::new( - DateTime::parse_from_rfc3339(s) + DateTime::parse_from_rfc3339(s) // check ISO 8601 .or_else(|_| { - DateTime::parse_from_rfc2822(s) - .or_else(|_| DateTime::parse_from_str(s, "%A, %d-%b-%y %T %Z")) + DateTime::parse_from_rfc2822(s) // check RFC 822 + .or_else(|_| DateTime::parse_from_str(s, "%A, %d-%b-%Y %H:%M:%S %Z")) + // check RFC 850 }) .with_context(|| "Failed to parse date")? .with_timezone(&Utc), @@ -60,3 +61,87 @@ impl Display for ValidDate { writeln!(f, "{}", self.0) } } + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! test_valid_date_strings { + ($($str:literal),+) => { + $(assert!(ValidDate::from_str($str).is_ok(), + "'{}': {:?}", + $str, ValidDate::from_str($str).err());)+ + }; + } + + macro_rules! test_invalid_date_strings { + ($($str:literal),+) => { + $(assert!(ValidDate::from_str($str).is_err());)+ + }; + } + + #[test] + fn test_valid_rfc_822() { + test_valid_date_strings!( + "Wed, 02 Oct 2002 08:00:00 EST", + "Wed, 02 Oct 2002 13:00:00 GMT", + "Wed, 02 Oct 2002 15:00:00 +0200", + "Mon, 11 Mar 2019 01:57:00 EST", + "11 Mar 2019 01:57:23 EDT", + "Mon, 11 Mar 2019 01:57:00 -0500", + "Mon, 11 Mar 2019 01:57 A", + "11 Mar 2019 01:00 N", + "11 Mar 2019 01:59 A", + "Mon, 11 Mar 2019 02:00 Z", + "Mon, 11 Mar 2019 02:00:34 Z", + "11 Mar 2019 02:00 PST" + ); + } + + // fails, because it cannot convert timezone from an abbreviation + // #[test] + // fn test_valid_rfc_850() { + // test_valid_date_strings!( + // "Monday, 01-Jan-2001 08:58:35 UTC", + // "Tuesday, 19-Feb-82 10:14:55 PST", + // "Wednesday, 1-Jan-83 00:00:00 PDT", + // "Thursday, 30-Nov-12 16:59:59 MST", + // "Friday, 9-Mar-31 12:00:00 CST", + // "Friday, 19-Dec-99 23:59:59 EST" + // ); + // } + + #[test] + fn test_valid_iso_8601() { + test_valid_date_strings!( + "2025-02-02T14:30:00+00:00", + "2023-06-15T23:59:59-05:00", + "2019-12-31T12:00:00+08:45", + "2020-02-29T00:00:00Z", + "2024-10-10T10:10:10+02:00", + "2022-07-01T16:45:30-07:00", + "2018-01-01T09:00:00+09:30", + "2030-05-20T05:05:05+05:30", + "1999-12-31T23:59:59-03:00", + "2045-11-11T11:11:11+14:00" + ); + } + + #[test] + fn test_invalid_date_times() { + test_invalid_date_strings!( + "2025-02-30T14:30:00+00:00", + "2023-06-15T25:00:00-05:00", + "2019-12-31T12:60:00+08:45", + "2020-02-29T00:00:00", + "Thu, 32 Dec 2023 10:00:00 +0200", + "Mon, 15 Jan 2023 23:59:60 -0500", + "2024-10-10T10:10:10", + "2022-07-01T16:45:30 UTC", + "2018-01-01T09:00:00+09:75", + "2030-05-20T05:05:05+24:00", + "1999-12-31 23:59:59 -03:00", + "2045-11-11T11:11:11 EST" + ); + } +} From 83b118a47319eba09a94f3648f5a6bcd2774b500 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Wed, 5 Feb 2025 22:21:19 +0100 Subject: [PATCH 31/37] change visibility --- rama-http/src/headers/mod.rs | 4 +- .../x_robots_tag_components/custom_rule.rs | 19 ++++++ .../max_image_preview_setting.rs | 2 +- .../headers/x_robots_tag_components/mod.rs | 7 ++- .../x_robots_tag_components/robots_tag.rs | 56 ++++++++++++----- .../robots_tag_components/builder.rs | 63 ++++++++----------- .../robots_tag_components/mod.rs | 4 +- .../robots_tag_components/parser.rs | 8 +-- .../x_robots_tag_components/valid_date.rs | 6 ++ 9 files changed, 105 insertions(+), 64 deletions(-) diff --git a/rama-http/src/headers/mod.rs b/rama-http/src/headers/mod.rs index af7ea92f..3a82b06d 100644 --- a/rama-http/src/headers/mod.rs +++ b/rama-http/src/headers/mod.rs @@ -103,9 +103,9 @@ pub use ::rama_http_types::headers::HeaderExt; pub(crate) mod util; -mod x_robots_tag_components; +pub mod x_robots_tag_components; -pub mod x_robots_tag; +mod x_robots_tag; pub use x_robots_tag::XRobotsTag; pub use util::quality_value::{Quality, QualityValue}; diff --git a/rama-http/src/headers/x_robots_tag_components/custom_rule.rs b/rama-http/src/headers/x_robots_tag_components/custom_rule.rs index bb2ab466..78d67411 100644 --- a/rama-http/src/headers/x_robots_tag_components/custom_rule.rs +++ b/rama-http/src/headers/x_robots_tag_components/custom_rule.rs @@ -28,4 +28,23 @@ impl CustomRule { pub(super) fn value(&self) -> Option<&HeaderValueString> { self.value.as_ref() } + + pub(super) fn as_tuple(&self) -> (&HeaderValueString, &Option) { + (&self.key, &self.value) + } +} + +impl From for CustomRule { + fn from(key: HeaderValueString) -> Self { + Self { key, value: None } + } +} + +impl From<(HeaderValueString, HeaderValueString)> for CustomRule { + fn from(key_value: (HeaderValueString, HeaderValueString)) -> Self { + Self { + key: key_value.0, + value: Some(key_value.1), + } + } } diff --git a/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs b/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs index 7b05e69c..5d0fe378 100644 --- a/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs +++ b/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs @@ -4,7 +4,7 @@ use std::str::FromStr; use MaxImagePreviewSetting::*; #[derive(Clone, Debug, Eq, PartialEq)] -pub(super) enum MaxImagePreviewSetting { +pub enum MaxImagePreviewSetting { None, Standard, Large, diff --git a/rama-http/src/headers/x_robots_tag_components/mod.rs b/rama-http/src/headers/x_robots_tag_components/mod.rs index 97834688..df64d8ec 100644 --- a/rama-http/src/headers/x_robots_tag_components/mod.rs +++ b/rama-http/src/headers/x_robots_tag_components/mod.rs @@ -1,10 +1,13 @@ mod robots_tag; -pub(super) use robots_tag::RobotsTag; +pub use robots_tag::RobotsTag; mod max_image_preview_setting; +pub use max_image_preview_setting::MaxImagePreviewSetting; mod custom_rule; +use custom_rule::CustomRule; mod valid_date; +use valid_date::ValidDate; -pub(super) mod robots_tag_components; +pub mod robots_tag_components; diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs index 6a5d5b58..71c62185 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs @@ -1,23 +1,22 @@ use crate::headers::util::value_string::HeaderValueString; -use crate::headers::x_robots_tag_components::custom_rule::CustomRule; -use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; use crate::headers::x_robots_tag_components::robots_tag_components::Builder; -use crate::headers::x_robots_tag_components::valid_date::ValidDate; +use crate::headers::x_robots_tag_components::{CustomRule, MaxImagePreviewSetting, ValidDate}; +use chrono::{DateTime, Utc}; use std::fmt::{Display, Formatter}; macro_rules! getter_setter { ($field:ident, $type:ty) => { paste::paste! { - pub(super) fn [<$field>](&self) -> $type { + pub fn [<$field>](&self) -> $type { self.[<$field>] } - pub(super) fn [](&mut self, [<$field>]: $type) -> &mut Self { + pub fn [](&mut self, [<$field>]: $type) -> &mut Self { self.[<$field>] = [<$field>]; self } - pub(super) fn [](mut self, [<$field>]: $type) -> Self { + pub fn [](mut self, [<$field>]: $type) -> Self { self.[<$field>] = [<$field>]; self } @@ -26,16 +25,16 @@ macro_rules! getter_setter { ($field:ident, $type:ty, optional) => { paste::paste! { - pub(super) fn [<$field>](&self) -> Option<&$type> { + pub fn [<$field>](&self) -> Option<&$type> { self.[<$field>].as_ref() } - pub(super) fn [](&mut self, [<$field>]: $type) -> &mut Self { + pub fn [](&mut self, [<$field>]: $type) -> &mut Self { self.[<$field>] = Some([<$field>]); self } - pub(super) fn [](mut self, [<$field>]: $type) -> Self { + pub fn [](mut self, [<$field>]: $type) -> Self { self.[<$field>] = Some([<$field>]); self } @@ -44,7 +43,7 @@ macro_rules! getter_setter { } #[derive(Clone, Debug, Eq, PartialEq)] -pub(crate) struct RobotsTag { +pub struct RobotsTag { bot_name: Option, all: bool, no_index: bool, @@ -58,7 +57,6 @@ pub(crate) struct RobotsTag { no_translate: bool, no_image_index: bool, unavailable_after: Option, - // custom rules no_ai: bool, no_image_ai: bool, spc: bool, @@ -88,12 +86,27 @@ impl RobotsTag { } } - pub(super) fn add_custom_rule(&mut self, rule: CustomRule) -> &mut Self { - self.custom_rules.push(rule); + pub fn add_custom_rule_simple(&mut self, key: HeaderValueString) -> &mut Self { + self.custom_rules.push(key.into()); self } - pub(super) fn builder() -> Builder { + pub fn add_custom_rule_composite( + &mut self, + key: HeaderValueString, + value: HeaderValueString, + ) -> &mut Self { + self.custom_rules.push((key, value).into()); + self + } + + pub fn custom_rules( + &self, + ) -> impl Iterator)> { + self.custom_rules.iter().map(|x| x.as_tuple()) + } + + pub fn builder() -> Builder { Builder::new() } @@ -109,7 +122,6 @@ impl RobotsTag { getter_setter!(max_video_preview, u32, optional); getter_setter!(no_translate, bool); getter_setter!(no_image_index, bool); - getter_setter!(unavailable_after, ValidDate, optional); getter_setter!(no_ai, bool); getter_setter!(no_image_ai, bool); getter_setter!(spc, bool); @@ -131,6 +143,20 @@ impl RobotsTag { || field_name.eq_ignore_ascii_case("noimageai") || field_name.eq_ignore_ascii_case("spc") } + + pub fn unavailable_after(&self) -> Option<&DateTime> { + self.unavailable_after.as_deref() + } + + pub fn set_unavailable_after(&mut self, unavailable_after: DateTime) -> &mut Self { + self.unavailable_after = Some(unavailable_after.into()); + self + } + + pub fn with_unavailable_after(mut self, unavailable_after: DateTime) -> Self { + self.unavailable_after = Some(unavailable_after.into()); + self + } } impl Display for RobotsTag { diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs index 495778b4..0f25a117 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs @@ -1,20 +1,18 @@ use crate::headers::util::value_string::HeaderValueString; -use crate::headers::x_robots_tag_components::custom_rule::CustomRule; -use crate::headers::x_robots_tag_components::max_image_preview_setting::MaxImagePreviewSetting; -use crate::headers::x_robots_tag_components::robots_tag::RobotsTag; -use crate::headers::x_robots_tag_components::valid_date::ValidDate; +use crate::headers::x_robots_tag_components::{MaxImagePreviewSetting, RobotsTag, ValidDate}; +use chrono::{DateTime, Utc}; use headers::Error; use rama_core::error::OpaqueError; macro_rules! robots_tag_builder_field { ($field:ident, $type:ty) => { paste::paste! { - pub(in crate::headers::x_robots_tag_components) fn [<$field>](mut self, [<$field>]: $type) -> Self { + pub fn [<$field>](mut self, [<$field>]: $type) -> Self { self.0.[]([<$field>]); self } - pub(in crate::headers::x_robots_tag_components) fn [](&mut self, [<$field>]: $type) -> &mut Self { + pub fn [](&mut self, [<$field>]: $type) -> &mut Self { self.0.[]([<$field>]); self } @@ -25,7 +23,7 @@ macro_rules! robots_tag_builder_field { macro_rules! no_tag_builder_field { ($field:ident, $type:ty) => { paste::paste! { - pub(in crate::headers::x_robots_tag_components) fn [<$field>](self, [<$field>]: $type) -> Builder { + pub fn [<$field>](self, [<$field>]: $type) -> Builder { Builder(RobotsTag::new_with_bot_name(self.0.bot_name)).[<$field>]([<$field>]) } } @@ -33,38 +31,29 @@ macro_rules! no_tag_builder_field { } #[derive(Clone, Debug, Eq, PartialEq)] -pub(in crate::headers::x_robots_tag_components) struct Builder(T); +pub struct Builder(T); impl Builder<()> { - pub(in crate::headers::x_robots_tag_components) fn new() -> Self { + pub fn new() -> Self { Builder(()) } - pub(in crate::headers::x_robots_tag_components) fn bot_name( - &self, - bot_name: Option, - ) -> Builder { + pub fn bot_name(&self, bot_name: Option) -> Builder { Builder(NoTag { bot_name }) } } -pub(in crate::headers::x_robots_tag_components) struct NoTag { +pub struct NoTag { bot_name: Option, } impl Builder { - pub(in crate::headers::x_robots_tag_components) fn bot_name( - mut self, - bot_name: HeaderValueString, - ) -> Self { + pub fn bot_name(mut self, bot_name: HeaderValueString) -> Self { self.0.bot_name = Some(bot_name); self } - pub(in crate::headers::x_robots_tag_components) fn set_bot_name( - &mut self, - bot_name: HeaderValueString, - ) -> &mut Self { + pub fn set_bot_name(&mut self, bot_name: HeaderValueString) -> &mut Self { self.0.bot_name = Some(bot_name); self } @@ -80,15 +69,12 @@ impl Builder { no_tag_builder_field!(max_video_preview, u32); no_tag_builder_field!(no_translate, bool); no_tag_builder_field!(no_image_index, bool); - no_tag_builder_field!(unavailable_after, ValidDate); + no_tag_builder_field!(unavailable_after, DateTime); no_tag_builder_field!(no_ai, bool); no_tag_builder_field!(no_image_ai, bool); no_tag_builder_field!(spc, bool); - pub(in crate::headers::x_robots_tag_components) fn add_field( - self, - s: &str, - ) -> Result, OpaqueError> { + pub fn add_field(self, s: &str) -> Result, OpaqueError> { let mut builder = Builder(RobotsTag::new_with_bot_name(self.0.bot_name)); builder.add_field(s)?; Ok(builder) @@ -96,15 +82,21 @@ impl Builder { } impl Builder { - pub(in crate::headers::x_robots_tag_components) fn build(self) -> RobotsTag { + pub fn build(self) -> RobotsTag { self.0 } - pub(in crate::headers::x_robots_tag_components) fn add_custom_rule( + pub fn add_custom_rule_simple(&mut self, key: HeaderValueString) -> &mut Self { + self.0.add_custom_rule_simple(key); + self + } + + pub fn add_custom_rule_composite( &mut self, - rule: CustomRule, + key: HeaderValueString, + value: HeaderValueString, ) -> &mut Self { - self.0.add_custom_rule(rule); + self.0.add_custom_rule_composite(key, value); self } @@ -120,15 +112,12 @@ impl Builder { robots_tag_builder_field!(max_video_preview, u32); robots_tag_builder_field!(no_translate, bool); robots_tag_builder_field!(no_image_index, bool); - robots_tag_builder_field!(unavailable_after, ValidDate); + robots_tag_builder_field!(unavailable_after, DateTime); robots_tag_builder_field!(no_ai, bool); robots_tag_builder_field!(no_image_ai, bool); robots_tag_builder_field!(spc, bool); - pub(in crate::headers::x_robots_tag_components) fn add_field( - &mut self, - s: &str, - ) -> Result<&mut Self, OpaqueError> { + pub fn add_field(&mut self, s: &str) -> Result<&mut Self, OpaqueError> { if let Some((key, value)) = s.trim().split_once(':') { Ok(if key.eq_ignore_ascii_case("max-snippet") { self.set_max_snippet(value.parse().map_err(OpaqueError::from_std)?) @@ -137,7 +126,7 @@ impl Builder { } else if key.eq_ignore_ascii_case("max-video-preview") { self.set_max_video_preview(value.parse().map_err(OpaqueError::from_std)?) } else if key.eq_ignore_ascii_case("unavailable_after: ") { - self.set_unavailable_after(value.parse()?) + self.set_unavailable_after(value.parse::()?.into()) } else { return Err(OpaqueError::from_std(Error::invalid())); }) diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs index c43d54b2..797103e0 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs @@ -1,5 +1,5 @@ mod builder; -pub(super) use builder::Builder; +pub use builder::Builder; mod parser; -pub(in crate::headers) use parser::Parser; +pub(crate) use parser::Parser; diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs index 7918706d..36cd8ef4 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs @@ -4,12 +4,12 @@ use http::HeaderValue; use rama_core::error::OpaqueError; use std::str::FromStr; -pub(in crate::headers) struct Parser<'a> { +pub(crate) struct Parser<'a> { remaining: Option<&'a str>, } impl<'a> Parser<'a> { - pub(in crate::headers) fn new(remaining: &'a str) -> Self { + pub(crate) fn new(remaining: &'a str) -> Self { let remaining = match remaining.trim() { "" => None, text => Some(text), @@ -79,9 +79,7 @@ impl Parser<'_> { Ok(None) } - pub(in crate::headers) fn parse_value( - value: &HeaderValue, - ) -> Result, OpaqueError> { + pub(crate) fn parse_value(value: &HeaderValue) -> Result, OpaqueError> { Parser::new(value.to_str().map_err(OpaqueError::from_std)?).collect::, _>>() } } diff --git a/rama-http/src/headers/x_robots_tag_components/valid_date.rs b/rama-http/src/headers/x_robots_tag_components/valid_date.rs index 764379e6..0630045c 100644 --- a/rama-http/src/headers/x_robots_tag_components/valid_date.rs +++ b/rama-http/src/headers/x_robots_tag_components/valid_date.rs @@ -27,6 +27,12 @@ impl From for DateTime { } } +impl From> for ValidDate { + fn from(value: DateTime) -> Self { + Self::new(value) + } +} + impl AsRef> for ValidDate { fn as_ref(&self) -> &DateTime { &self.0 From 8344c932ae1c7543fb9e4ce6dfde0ffb332711d0 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sat, 8 Feb 2025 21:44:45 +0100 Subject: [PATCH 32/37] add parsing from rfc 850 --- .../x_robots_tag_components/valid_date.rs | 245 +++++++++++++++++- 1 file changed, 231 insertions(+), 14 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/valid_date.rs b/rama-http/src/headers/x_robots_tag_components/valid_date.rs index 0630045c..556aa742 100644 --- a/rama-http/src/headers/x_robots_tag_components/valid_date.rs +++ b/rama-http/src/headers/x_robots_tag_components/valid_date.rs @@ -1,4 +1,4 @@ -use chrono::{DateTime, Utc}; +use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; use rama_core::error::{ErrorContext, OpaqueError}; use std::fmt::{Display, Formatter}; use std::ops::Deref; @@ -11,6 +11,220 @@ impl ValidDate { pub(super) fn new(date: DateTime) -> Self { Self(date) } + + fn datetime_from_rfc_850(s: &str) -> Result, OpaqueError> { + let (naive_date_time, remainder) = + NaiveDateTime::parse_and_remainder(s, "%A, %d-%b-%y %T") + .with_context(|| "failed to parse naive datetime")?; + + let fixed_offset = Self::offset_from_abbreviation(remainder)?; + + Ok(DateTime::from_naive_utc_and_offset(naive_date_time, fixed_offset)) + } + + fn offset_from_abbreviation(remainder: &str) -> Result { + Ok(match remainder.trim() { + "ACDT" => "+1030", + "ACST" => "+0930", + "ACT" => "−0500", + "ACWST" => "+0845", + "ADT" => "−0300", + "AEDT" => "+1100", + "AEST" => "+1000", + "AFT" => "+0430", + "AKDT" => "−0800", + "AKST" => "−0900", + "ALMT" => "+0600", + "AMST" => "−0300", + "AMT" => "+0400", + "ANAT" => "+1200", + "AQTT" => "+0500", + "ART" => "−0300", + "AST" => "−0400", + "AWST" => "+0800", + "AZOST" => "+0000", + "AZOT" => "−0100", + "AZT" => "+0400", + "BIOT" => "+0600", + "BIT" => "−1200", + "BNT" => "+0800", + "BOT" => "−0400", + "BRST" => "−0200", + "BRT" => "−0300", + "BST" => "+0600", + "BTT" => "+0600", + "CAT" => "+0200", + "CCT" => "+0630", + "CDT" => "−0500", + "CEST" => "+0200", + "CET" => "+0100", + "CHADT" => "+1345", + "CHAST" => "+1245", + "CHOST" => "+0900", + "CHOT" => "+0800", + "CHST" => "+1000", + "CHUT" => "+1000", + "CIST" => "−0800", + "CKT" => "−1000", + "CLST" => "−0300", + "CLT" => "−0400", + "COST" => "−0400", + "COT" => "−0500", + "CST" => "−0600", + "CVT" => "−0100", + "CWST" => "+0845", + "CXT" => "+0700", + "DAVT" => "+0700", + "DDUT" => "+1000", + "DFT" => "+0100", + "EASST" => "−0500", + "EAST" => "−0600", + "EAT" => "+0300", + "ECT" => "−0500", + "EDT" => "−0400", + "EEST" => "+0300", + "EET" => "+0200", + "EGST" => "+0000", + "EGT" => "−0100", + "EST" => "−0500", + "FET" => "+0300", + "FJT" => "+1200", + "FKST" => "−0300", + "FKT" => "−0400", + "FNT" => "−0200", + "GALT" => "−0600", + "GAMT" => "−0900", + "GET" => "+0400", + "GFT" => "−0300", + "GILT" => "+1200", + "GIT" => "−0900", + "GMT" => "+0000", + "GST" => "+0400", + "GYT" => "−0400", + "HAEC" => "+0200", + "HDT" => "−0900", + "HKT" => "+0800", + "HMT" => "+0500", + "HOVST" => "+0800", + "HOVT" => "+0700", + "HST" => "−1000", + "ICT" => "+0700", + "IDLW" => "−1200", + "IDT" => "+0300", + "IOT" => "+0600", + "IRDT" => "+0430", + "IRKT" => "+0800", + "IRST" => "+0330", + "IST" => "+0530", + "JST" => "+0900", + "KALT" => "+0200", + "KGT" => "+0600", + "KOST" => "+1100", + "KRAT" => "+0700", + "KST" => "+0900", + "LHST" => "+1030", + "LINT" => "+1400", + "MAGT" => "+1200", + "MART" => "−0930", + "MAWT" => "+0500", + "MDT" => "−0600", + "MEST" => "+0200", + "MET" => "+0100", + "MHT" => "+1200", + "MIST" => "+1100", + "MIT" => "−0930", + "MMT" => "+0630", + "MSK" => "+0300", + "MST" => "+0800", + "MUT" => "+0400", + "MVT" => "+0500", + "MYT" => "+0800", + "NCT" => "+1100", + "NDT" => "−0230", + "NFT" => "+1100", + "NOVT" => "+0700", + "NPT" => "+0545", + "NST" => "−0330", + "NT" => "−0330", + "NUT" => "−1100", + "NZDST" => "+1300", + "NZDT" => "+1300", + "NZST" => "+1200", + "OMST" => "+0600", + "ORAT" => "+0500", + "PDT" => "−0700", + "PET" => "−0500", + "PETT" => "+1200", + "PGT" => "+1000", + "PHOT" => "+1300", + "PHST" => "+0800", + "PHT" => "+0800", + "PKT" => "+0500", + "PMDT" => "−0200", + "PMST" => "−0300", + "PONT" => "+1100", + "PST" => "−0800", + "PWT" => "+0900", + "PYST" => "−0300", + "PYT" => "−0400", + "RET" => "+0400", + "ROTT" => "−0300", + "SAKT" => "+1100", + "SAMT" => "+0400", + "SAST" => "+0200", + "SBT" => "+1100", + "SCT" => "+0400", + "SDT" => "−1000", + "SGT" => "+0800", + "SLST" => "+0530", + "SRET" => "+1100", + "SRT" => "−0300", + "SST" => "−1100", + "SYOT" => "+0300", + "TAHT" => "−1000", + "TFT" => "+0500", + "THA" => "+0700", + "TJT" => "+0500", + "TKT" => "+1300", + "TLT" => "+0900", + "TMT" => "+0500", + "TOT" => "+1300", + "TRT" => "+0300", + "TST" => "+0800", + "TVT" => "+1200", + "ULAST" => "+0900", + "ULAT" => "+0800", + "UTC" => "+0000", + "UYST" => "−0200", + "UYT" => "−0300", + "UZT" => "+0500", + "VET" => "−0400", + "VLAT" => "+1000", + "VOLT" => "+0300", + "VOST" => "+0600", + "VUT" => "+1100", + "WAKT" => "+1200", + "WAST" => "+0200", + "WAT" => "+0100", + "WEST" => "+0100", + "WET" => "+0000", + "WGST" => "−0200", + "WGT" => "−0300", + "WIB" => "+0700", + "WIT" => "+0900", + "WITA" => "+0800", + "WST" => "+0800", + "YAKT" => "+0900", + "YEKT" => "+0500", + _ => { + return Err(OpaqueError::from_display( + "timezone abbreviation not recognized", + )) + } + } + .parse() + .with_context(|| "failed to parse timezone abbreviation")?) + } } impl Deref for ValidDate { @@ -53,7 +267,7 @@ impl FromStr for ValidDate { DateTime::parse_from_rfc3339(s) // check ISO 8601 .or_else(|_| { DateTime::parse_from_rfc2822(s) // check RFC 822 - .or_else(|_| DateTime::parse_from_str(s, "%A, %d-%b-%Y %H:%M:%S %Z")) + .or_else(|_| Self::datetime_from_rfc_850(s)) // check RFC 850 }) .with_context(|| "Failed to parse date")? @@ -104,18 +318,21 @@ mod tests { ); } - // fails, because it cannot convert timezone from an abbreviation - // #[test] - // fn test_valid_rfc_850() { - // test_valid_date_strings!( - // "Monday, 01-Jan-2001 08:58:35 UTC", - // "Tuesday, 19-Feb-82 10:14:55 PST", - // "Wednesday, 1-Jan-83 00:00:00 PDT", - // "Thursday, 30-Nov-12 16:59:59 MST", - // "Friday, 9-Mar-31 12:00:00 CST", - // "Friday, 19-Dec-99 23:59:59 EST" - // ); - // } + #[test] + fn test_valid_rfc_850() { + test_valid_date_strings!( + "Sunday, 04-Feb-24 23:59:59 GMT", + "Monday, 29-Feb-88 12:34:56 UTC", + "Tuesday, 01-Jan-80 00:00:00 EST", + "Friday, 31-Dec-99 23:59:59 CST", + "Thursday, 24-Feb-00 23:59:59 MST", + "Friday, 01-Mar-19 00:00:01 PST", + "Saturday, 31-Oct-20 13:45:30 EDT", + "Wednesday, 27-Jun-12 23:59:60 CDT", + "Monday, 03-Sep-01 01:02:03 CET", + "Tuesday, 15-Aug-95 18:00:00 PDT" + ); + } #[test] fn test_valid_iso_8601() { From 36c031cb9df68f5de9ca827b42d9309aefe1a064 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sat, 8 Feb 2025 22:48:33 +0100 Subject: [PATCH 33/37] fix timezone lookup logic --- .../x_robots_tag_components/valid_date.rs | 439 +++++++++--------- 1 file changed, 224 insertions(+), 215 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/valid_date.rs b/rama-http/src/headers/x_robots_tag_components/valid_date.rs index 556aa742..cd6e5862 100644 --- a/rama-http/src/headers/x_robots_tag_components/valid_date.rs +++ b/rama-http/src/headers/x_robots_tag_components/valid_date.rs @@ -1,8 +1,10 @@ use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; use rama_core::error::{ErrorContext, OpaqueError}; +use std::collections::HashMap; use std::fmt::{Display, Formatter}; use std::ops::Deref; use std::str::FromStr; +use std::sync::OnceLock; #[derive(Clone, Debug, Eq, PartialEq)] pub(super) struct ValidDate(DateTime); @@ -11,220 +13,6 @@ impl ValidDate { pub(super) fn new(date: DateTime) -> Self { Self(date) } - - fn datetime_from_rfc_850(s: &str) -> Result, OpaqueError> { - let (naive_date_time, remainder) = - NaiveDateTime::parse_and_remainder(s, "%A, %d-%b-%y %T") - .with_context(|| "failed to parse naive datetime")?; - - let fixed_offset = Self::offset_from_abbreviation(remainder)?; - - Ok(DateTime::from_naive_utc_and_offset(naive_date_time, fixed_offset)) - } - - fn offset_from_abbreviation(remainder: &str) -> Result { - Ok(match remainder.trim() { - "ACDT" => "+1030", - "ACST" => "+0930", - "ACT" => "−0500", - "ACWST" => "+0845", - "ADT" => "−0300", - "AEDT" => "+1100", - "AEST" => "+1000", - "AFT" => "+0430", - "AKDT" => "−0800", - "AKST" => "−0900", - "ALMT" => "+0600", - "AMST" => "−0300", - "AMT" => "+0400", - "ANAT" => "+1200", - "AQTT" => "+0500", - "ART" => "−0300", - "AST" => "−0400", - "AWST" => "+0800", - "AZOST" => "+0000", - "AZOT" => "−0100", - "AZT" => "+0400", - "BIOT" => "+0600", - "BIT" => "−1200", - "BNT" => "+0800", - "BOT" => "−0400", - "BRST" => "−0200", - "BRT" => "−0300", - "BST" => "+0600", - "BTT" => "+0600", - "CAT" => "+0200", - "CCT" => "+0630", - "CDT" => "−0500", - "CEST" => "+0200", - "CET" => "+0100", - "CHADT" => "+1345", - "CHAST" => "+1245", - "CHOST" => "+0900", - "CHOT" => "+0800", - "CHST" => "+1000", - "CHUT" => "+1000", - "CIST" => "−0800", - "CKT" => "−1000", - "CLST" => "−0300", - "CLT" => "−0400", - "COST" => "−0400", - "COT" => "−0500", - "CST" => "−0600", - "CVT" => "−0100", - "CWST" => "+0845", - "CXT" => "+0700", - "DAVT" => "+0700", - "DDUT" => "+1000", - "DFT" => "+0100", - "EASST" => "−0500", - "EAST" => "−0600", - "EAT" => "+0300", - "ECT" => "−0500", - "EDT" => "−0400", - "EEST" => "+0300", - "EET" => "+0200", - "EGST" => "+0000", - "EGT" => "−0100", - "EST" => "−0500", - "FET" => "+0300", - "FJT" => "+1200", - "FKST" => "−0300", - "FKT" => "−0400", - "FNT" => "−0200", - "GALT" => "−0600", - "GAMT" => "−0900", - "GET" => "+0400", - "GFT" => "−0300", - "GILT" => "+1200", - "GIT" => "−0900", - "GMT" => "+0000", - "GST" => "+0400", - "GYT" => "−0400", - "HAEC" => "+0200", - "HDT" => "−0900", - "HKT" => "+0800", - "HMT" => "+0500", - "HOVST" => "+0800", - "HOVT" => "+0700", - "HST" => "−1000", - "ICT" => "+0700", - "IDLW" => "−1200", - "IDT" => "+0300", - "IOT" => "+0600", - "IRDT" => "+0430", - "IRKT" => "+0800", - "IRST" => "+0330", - "IST" => "+0530", - "JST" => "+0900", - "KALT" => "+0200", - "KGT" => "+0600", - "KOST" => "+1100", - "KRAT" => "+0700", - "KST" => "+0900", - "LHST" => "+1030", - "LINT" => "+1400", - "MAGT" => "+1200", - "MART" => "−0930", - "MAWT" => "+0500", - "MDT" => "−0600", - "MEST" => "+0200", - "MET" => "+0100", - "MHT" => "+1200", - "MIST" => "+1100", - "MIT" => "−0930", - "MMT" => "+0630", - "MSK" => "+0300", - "MST" => "+0800", - "MUT" => "+0400", - "MVT" => "+0500", - "MYT" => "+0800", - "NCT" => "+1100", - "NDT" => "−0230", - "NFT" => "+1100", - "NOVT" => "+0700", - "NPT" => "+0545", - "NST" => "−0330", - "NT" => "−0330", - "NUT" => "−1100", - "NZDST" => "+1300", - "NZDT" => "+1300", - "NZST" => "+1200", - "OMST" => "+0600", - "ORAT" => "+0500", - "PDT" => "−0700", - "PET" => "−0500", - "PETT" => "+1200", - "PGT" => "+1000", - "PHOT" => "+1300", - "PHST" => "+0800", - "PHT" => "+0800", - "PKT" => "+0500", - "PMDT" => "−0200", - "PMST" => "−0300", - "PONT" => "+1100", - "PST" => "−0800", - "PWT" => "+0900", - "PYST" => "−0300", - "PYT" => "−0400", - "RET" => "+0400", - "ROTT" => "−0300", - "SAKT" => "+1100", - "SAMT" => "+0400", - "SAST" => "+0200", - "SBT" => "+1100", - "SCT" => "+0400", - "SDT" => "−1000", - "SGT" => "+0800", - "SLST" => "+0530", - "SRET" => "+1100", - "SRT" => "−0300", - "SST" => "−1100", - "SYOT" => "+0300", - "TAHT" => "−1000", - "TFT" => "+0500", - "THA" => "+0700", - "TJT" => "+0500", - "TKT" => "+1300", - "TLT" => "+0900", - "TMT" => "+0500", - "TOT" => "+1300", - "TRT" => "+0300", - "TST" => "+0800", - "TVT" => "+1200", - "ULAST" => "+0900", - "ULAT" => "+0800", - "UTC" => "+0000", - "UYST" => "−0200", - "UYT" => "−0300", - "UZT" => "+0500", - "VET" => "−0400", - "VLAT" => "+1000", - "VOLT" => "+0300", - "VOST" => "+0600", - "VUT" => "+1100", - "WAKT" => "+1200", - "WAST" => "+0200", - "WAT" => "+0100", - "WEST" => "+0100", - "WET" => "+0000", - "WGST" => "−0200", - "WGT" => "−0300", - "WIB" => "+0700", - "WIT" => "+0900", - "WITA" => "+0800", - "WST" => "+0800", - "YAKT" => "+0900", - "YEKT" => "+0500", - _ => { - return Err(OpaqueError::from_display( - "timezone abbreviation not recognized", - )) - } - } - .parse() - .with_context(|| "failed to parse timezone abbreviation")?) - } } impl Deref for ValidDate { @@ -267,7 +55,7 @@ impl FromStr for ValidDate { DateTime::parse_from_rfc3339(s) // check ISO 8601 .or_else(|_| { DateTime::parse_from_rfc2822(s) // check RFC 822 - .or_else(|_| Self::datetime_from_rfc_850(s)) + .or_else(|_| datetime_from_rfc_850(s)) // check RFC 850 }) .with_context(|| "Failed to parse date")? @@ -282,6 +70,227 @@ impl Display for ValidDate { } } +fn datetime_from_rfc_850(s: &str) -> Result, OpaqueError> { + let (naive_date_time, remainder) = NaiveDateTime::parse_and_remainder(s, "%A, %d-%b-%y %T") + .with_context(|| "failed to parse naive datetime")?; + + let fixed_offset = offset_from_abbreviation(remainder)?; + + Ok(DateTime::from_naive_utc_and_offset( + naive_date_time, + fixed_offset, + )) +} + +fn offset_from_abbreviation(remainder: &str) -> Result { + get_timezone_map() + .get(remainder.trim()) + .ok_or_else(|| OpaqueError::from_display(format!("invalid abbreviation: {}", remainder)))? + .parse() + .with_context(|| "failed to parse timezone abbreviation") +} + +static TIMEZONE_MAP: OnceLock> = OnceLock::new(); + +fn get_timezone_map() -> &'static HashMap<&'static str, &'static str> { + TIMEZONE_MAP.get_or_init(|| { + let mut map = HashMap::new(); + map.insert("ACDT", "+1030"); + map.insert("ACST", "+0930"); + map.insert("ACT", "−0500"); + map.insert("ACWST", "+0845"); + map.insert("ADT", "−0300"); + map.insert("AEDT", "+1100"); + map.insert("AEST", "+1000"); + map.insert("AFT", "+0430"); + map.insert("AKDT", "−0800"); + map.insert("AKST", "−0900"); + map.insert("ALMT", "+0600"); + map.insert("AMST", "−0300"); + map.insert("AMT", "+0400"); + map.insert("ANAT", "+1200"); + map.insert("AQTT", "+0500"); + map.insert("ART", "−0300"); + map.insert("AST", "−0400"); + map.insert("AWST", "+0800"); + map.insert("AZOST", "+0000"); + map.insert("AZOT", "−0100"); + map.insert("AZT", "+0400"); + map.insert("BIOT", "+0600"); + map.insert("BIT", "−1200"); + map.insert("BNT", "+0800"); + map.insert("BOT", "−0400"); + map.insert("BRST", "−0200"); + map.insert("BRT", "−0300"); + map.insert("BST", "+0600"); + map.insert("BTT", "+0600"); + map.insert("CAT", "+0200"); + map.insert("CCT", "+0630"); + map.insert("CDT", "−0500"); + map.insert("CEST", "+0200"); + map.insert("CET", "+0100"); + map.insert("CHADT", "+1345"); + map.insert("CHAST", "+1245"); + map.insert("CHOST", "+0900"); + map.insert("CHOT", "+0800"); + map.insert("CHST", "+1000"); + map.insert("CHUT", "+1000"); + map.insert("CIST", "−0800"); + map.insert("CKT", "−1000"); + map.insert("CLST", "−0300"); + map.insert("CLT", "−0400"); + map.insert("COST", "−0400"); + map.insert("COT", "−0500"); + map.insert("CST", "−0600"); + map.insert("CVT", "−0100"); + map.insert("CWST", "+0845"); + map.insert("CXT", "+0700"); + map.insert("DAVT", "+0700"); + map.insert("DDUT", "+1000"); + map.insert("DFT", "+0100"); + map.insert("EASST", "−0500"); + map.insert("EAST", "−0600"); + map.insert("EAT", "+0300"); + map.insert("ECT", "−0500"); + map.insert("EDT", "−0400"); + map.insert("EEST", "+0300"); + map.insert("EET", "+0200"); + map.insert("EGST", "+0000"); + map.insert("EGT", "−0100"); + map.insert("EST", "−0500"); + map.insert("FET", "+0300"); + map.insert("FJT", "+1200"); + map.insert("FKST", "−0300"); + map.insert("FKT", "−0400"); + map.insert("FNT", "−0200"); + map.insert("GALT", "−0600"); + map.insert("GAMT", "−0900"); + map.insert("GET", "+0400"); + map.insert("GFT", "−0300"); + map.insert("GILT", "+1200"); + map.insert("GIT", "−0900"); + map.insert("GMT", "+0000"); + map.insert("GST", "+0400"); + map.insert("GYT", "−0400"); + map.insert("HAEC", "+0200"); + map.insert("HDT", "−0900"); + map.insert("HKT", "+0800"); + map.insert("HMT", "+0500"); + map.insert("HOVST", "+0800"); + map.insert("HOVT", "+0700"); + map.insert("HST", "−1000"); + map.insert("ICT", "+0700"); + map.insert("IDLW", "−1200"); + map.insert("IDT", "+0300"); + map.insert("IOT", "+0600"); + map.insert("IRDT", "+0430"); + map.insert("IRKT", "+0800"); + map.insert("IRST", "+0330"); + map.insert("IST", "+0530"); + map.insert("JST", "+0900"); + map.insert("KALT", "+0200"); + map.insert("KGT", "+0600"); + map.insert("KOST", "+1100"); + map.insert("KRAT", "+0700"); + map.insert("KST", "+0900"); + map.insert("LHST", "+1030"); + map.insert("LINT", "+1400"); + map.insert("MAGT", "+1200"); + map.insert("MART", "−0930"); + map.insert("MAWT", "+0500"); + map.insert("MDT", "−0600"); + map.insert("MEST", "+0200"); + map.insert("MET", "+0100"); + map.insert("MHT", "+1200"); + map.insert("MIST", "+1100"); + map.insert("MIT", "−0930"); + map.insert("MMT", "+0630"); + map.insert("MSK", "+0300"); + map.insert("MST", "+0800"); + map.insert("MUT", "+0400"); + map.insert("MVT", "+0500"); + map.insert("MYT", "+0800"); + map.insert("NCT", "+1100"); + map.insert("NDT", "−0230"); + map.insert("NFT", "+1100"); + map.insert("NOVT", "+0700"); + map.insert("NPT", "+0545"); + map.insert("NST", "−0330"); + map.insert("NT", "−0330"); + map.insert("NUT", "−1100"); + map.insert("NZDST", "+1300"); + map.insert("NZDT", "+1300"); + map.insert("NZST", "+1200"); + map.insert("OMST", "+0600"); + map.insert("ORAT", "+0500"); + map.insert("PDT", "−0700"); + map.insert("PET", "−0500"); + map.insert("PETT", "+1200"); + map.insert("PGT", "+1000"); + map.insert("PHOT", "+1300"); + map.insert("PHST", "+0800"); + map.insert("PHT", "+0800"); + map.insert("PKT", "+0500"); + map.insert("PMDT", "−0200"); + map.insert("PMST", "−0300"); + map.insert("PONT", "+1100"); + map.insert("PST", "−0800"); + map.insert("PWT", "+0900"); + map.insert("PYST", "−0300"); + map.insert("PYT", "−0400"); + map.insert("RET", "+0400"); + map.insert("ROTT", "−0300"); + map.insert("SAKT", "+1100"); + map.insert("SAMT", "+0400"); + map.insert("SAST", "+0200"); + map.insert("SBT", "+1100"); + map.insert("SCT", "+0400"); + map.insert("SDT", "−1000"); + map.insert("SGT", "+0800"); + map.insert("SLST", "+0530"); + map.insert("SRET", "+1100"); + map.insert("SRT", "−0300"); + map.insert("SST", "−1100"); + map.insert("SYOT", "+0300"); + map.insert("TAHT", "−1000"); + map.insert("TFT", "+0500"); + map.insert("THA", "+0700"); + map.insert("TJT", "+0500"); + map.insert("TKT", "+1300"); + map.insert("TLT", "+0900"); + map.insert("TMT", "+0500"); + map.insert("TOT", "+1300"); + map.insert("TRT", "+0300"); + map.insert("TST", "+0800"); + map.insert("TVT", "+1200"); + map.insert("ULAST", "+0900"); + map.insert("ULAT", "+0800"); + map.insert("UTC", "+0000"); + map.insert("UYST", "−0200"); + map.insert("UYT", "−0300"); + map.insert("UZT", "+0500"); + map.insert("VET", "−0400"); + map.insert("VLAT", "+1000"); + map.insert("VOLT", "+0300"); + map.insert("VOST", "+0600"); + map.insert("VUT", "+1100"); + map.insert("WAKT", "+1200"); + map.insert("WAST", "+0200"); + map.insert("WAT", "+0100"); + map.insert("WEST", "+0100"); + map.insert("WET", "+0000"); + map.insert("WGST", "−0200"); + map.insert("WGT", "−0300"); + map.insert("WIB", "+0700"); + map.insert("WIT", "+0900"); + map.insert("WITA", "+0800"); + map.insert("WST", "+0800"); + map.insert("YAKT", "+0900"); + map.insert("YEKT", "+0500"); + map + }) +} + #[cfg(test)] mod tests { use super::*; From 8b6fc68612be670ebb0fb24cca38a58ee50d781f Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Sun, 9 Feb 2025 14:01:02 +0100 Subject: [PATCH 34/37] add docs --- .../robots_tag_components/builder.rs | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs index 0f25a117..7c2a735d 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs @@ -30,6 +30,30 @@ macro_rules! no_tag_builder_field { }; } +/// Generic structure used for building a [`RobotsTag`] with compile-time validation +/// +/// # States +/// +/// - `Builder<()>` +/// - a new builder without any values +/// - can transform to `Builder` using the [`Builder::bot_name()`] function +/// - `Builder` +/// - holds a `bot_name` field, but still isn't a valid [`RobotsTag`] +/// - can transform to `Builder` by specifying a valid [`RobotsTag`] field +/// - `Builder` +/// - holds a valid [`RobotsTag`] struct, which can be further modified +/// - can be built into a [`RobotsTag`] using the [`Builder::::build()`] function +/// +/// # Examples +/// +/// ``` +/// # use rama_http::headers::x_robots_tag_components::RobotsTag; +/// let robots_tag = RobotsTag::builder() +/// .bot_name(None) +/// .no_follow(true) +/// .build(); +/// assert_eq!(robots_tag.no_follow(), true); +/// ``` #[derive(Clone, Debug, Eq, PartialEq)] pub struct Builder(T); @@ -117,8 +141,38 @@ impl Builder { robots_tag_builder_field!(no_image_ai, bool); robots_tag_builder_field!(spc, bool); + /// Adds a field based on its `&str` representation + /// + /// # Returns and Errors + /// + /// - `Result<&mut Self, OpaqueError>` + /// - `Ok(&mut Self)` + /// - when the field was valid and successfully added + /// - returns `&mut Self` wrapped inside for easier chaining of functions + /// - `Err(OpaqueError)` + /// - is of type [`headers::Error`] when the field name is not valid + /// - for composite rules (key + value), wraps the conversion error for the value + /// + /// # Examples + /// + /// ``` + /// # use std::num::ParseIntError; + /// # use rama_http::headers::x_robots_tag_components::RobotsTag; + /// let mut builder = RobotsTag::builder().bot_name(None).no_follow(true); + /// assert!(builder.add_field("nosnippet").is_ok()); + /// assert!(builder.add_field("max-snippet: 8").is_ok()); + /// assert!(builder.add_field("nonexistent").is_err_and(|e| e.is::())); + /// assert!(builder.add_field("max-video-preview: not_a_number").is_err_and(|e| e.is::())); + /// + /// let robots_tag = builder.build(); + /// + /// assert_eq!(robots_tag.no_snippet(), true); + /// assert_eq!(robots_tag.max_snippet(), 8); + /// ``` pub fn add_field(&mut self, s: &str) -> Result<&mut Self, OpaqueError> { - if let Some((key, value)) = s.trim().split_once(':') { + if let Some((key, value)) = s.split_once(':') { + let key = key.trim(); + let value = value.trim(); Ok(if key.eq_ignore_ascii_case("max-snippet") { self.set_max_snippet(value.parse().map_err(OpaqueError::from_std)?) } else if key.eq_ignore_ascii_case("max-image-preview") { From de26de8c8064cf673018d3d8864fdfda9349e981 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Mon, 10 Feb 2025 19:08:55 +0100 Subject: [PATCH 35/37] make RobotsTag read-only, enhance Builder API --- .../x_robots_tag_components/robots_tag.rs | 122 ++++++------------ .../robots_tag_components/builder.rs | 103 ++++++++++----- .../robots_tag_components/mod.rs | 1 + .../robots_tag_components/parser.rs | 7 +- 4 files changed, 118 insertions(+), 115 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs index 71c62185..fe2df87b 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs @@ -1,25 +1,15 @@ use crate::headers::util::value_string::HeaderValueString; -use crate::headers::x_robots_tag_components::robots_tag_components::Builder; +use crate::headers::x_robots_tag_components::robots_tag_components::{Builder, NoTag}; use crate::headers::x_robots_tag_components::{CustomRule, MaxImagePreviewSetting, ValidDate}; use chrono::{DateTime, Utc}; use std::fmt::{Display, Formatter}; -macro_rules! getter_setter { +macro_rules! getter { ($field:ident, $type:ty) => { paste::paste! { pub fn [<$field>](&self) -> $type { self.[<$field>] } - - pub fn [](&mut self, [<$field>]: $type) -> &mut Self { - self.[<$field>] = [<$field>]; - self - } - - pub fn [](mut self, [<$field>]: $type) -> Self { - self.[<$field>] = [<$field>]; - self - } } }; @@ -28,39 +18,29 @@ macro_rules! getter_setter { pub fn [<$field>](&self) -> Option<&$type> { self.[<$field>].as_ref() } - - pub fn [](&mut self, [<$field>]: $type) -> &mut Self { - self.[<$field>] = Some([<$field>]); - self - } - - pub fn [](mut self, [<$field>]: $type) -> Self { - self.[<$field>] = Some([<$field>]); - self - } } }; } #[derive(Clone, Debug, Eq, PartialEq)] pub struct RobotsTag { - bot_name: Option, - all: bool, - no_index: bool, - no_follow: bool, - none: bool, - no_snippet: bool, - index_if_embedded: bool, - max_snippet: u32, - max_image_preview: Option, - max_video_preview: Option, - no_translate: bool, - no_image_index: bool, - unavailable_after: Option, - no_ai: bool, - no_image_ai: bool, - spc: bool, - custom_rules: Vec, + pub(super) bot_name: Option, + pub(super) all: bool, + pub(super) no_index: bool, + pub(super) no_follow: bool, + pub(super) none: bool, + pub(super) no_snippet: bool, + pub(super) index_if_embedded: bool, + pub(super) max_snippet: u32, + pub(super) max_image_preview: Option, + pub(super) max_video_preview: Option, + pub(super) no_translate: bool, + pub(super) no_image_index: bool, + pub(super) unavailable_after: Option, + pub(super) no_ai: bool, + pub(super) no_image_ai: bool, + pub(super) spc: bool, + pub(super) custom_rules: Vec, } impl RobotsTag { @@ -86,18 +66,8 @@ impl RobotsTag { } } - pub fn add_custom_rule_simple(&mut self, key: HeaderValueString) -> &mut Self { - self.custom_rules.push(key.into()); - self - } - - pub fn add_custom_rule_composite( - &mut self, - key: HeaderValueString, - value: HeaderValueString, - ) -> &mut Self { - self.custom_rules.push((key, value).into()); - self + pub fn builder() -> Builder { + Builder::new() } pub fn custom_rules( @@ -106,25 +76,25 @@ impl RobotsTag { self.custom_rules.iter().map(|x| x.as_tuple()) } - pub fn builder() -> Builder { - Builder::new() - } + getter!(bot_name, HeaderValueString, optional); + getter!(all, bool); + getter!(no_index, bool); + getter!(no_follow, bool); + getter!(none, bool); + getter!(no_snippet, bool); + getter!(index_if_embedded, bool); + getter!(max_snippet, u32); + getter!(max_image_preview, MaxImagePreviewSetting, optional); + getter!(max_video_preview, u32, optional); + getter!(no_translate, bool); + getter!(no_image_index, bool); + getter!(no_ai, bool); + getter!(no_image_ai, bool); + getter!(spc, bool); - getter_setter!(bot_name, HeaderValueString, optional); - getter_setter!(all, bool); - getter_setter!(no_index, bool); - getter_setter!(no_follow, bool); - getter_setter!(none, bool); - getter_setter!(no_snippet, bool); - getter_setter!(index_if_embedded, bool); - getter_setter!(max_snippet, u32); - getter_setter!(max_image_preview, MaxImagePreviewSetting, optional); - getter_setter!(max_video_preview, u32, optional); - getter_setter!(no_translate, bool); - getter_setter!(no_image_index, bool); - getter_setter!(no_ai, bool); - getter_setter!(no_image_ai, bool); - getter_setter!(spc, bool); + pub fn unavailable_after(&self) -> Option<&DateTime> { + self.unavailable_after.as_deref() + } pub(super) fn is_valid_field_name(field_name: &str) -> bool { field_name.trim().eq_ignore_ascii_case("all") @@ -143,20 +113,6 @@ impl RobotsTag { || field_name.eq_ignore_ascii_case("noimageai") || field_name.eq_ignore_ascii_case("spc") } - - pub fn unavailable_after(&self) -> Option<&DateTime> { - self.unavailable_after.as_deref() - } - - pub fn set_unavailable_after(&mut self, unavailable_after: DateTime) -> &mut Self { - self.unavailable_after = Some(unavailable_after.into()); - self - } - - pub fn with_unavailable_after(mut self, unavailable_after: DateTime) -> Self { - self.unavailable_after = Some(unavailable_after.into()); - self - } } impl Display for RobotsTag { diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs index 7c2a735d..8f6d5297 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/builder.rs @@ -5,15 +5,43 @@ use headers::Error; use rama_core::error::OpaqueError; macro_rules! robots_tag_builder_field { + ($field:ident, bool) => { + paste::paste! { + pub fn [<$field>](mut self) -> Self { + self.0.[<$field>] = true; + self + } + + pub fn [](&mut self) -> &mut Self { + self.0.[<$field>] = true; + self + } + } + }; + ($field:ident, $type:ty) => { paste::paste! { pub fn [<$field>](mut self, [<$field>]: $type) -> Self { - self.0.[]([<$field>]); + self.0.[<$field>] = [<$field>]; + self + } + + pub fn [](&mut self, [<$field>]: $type) -> &mut Self { + self.0.[<$field>] = [<$field>]; + self + } + } + }; + + ($field:ident, $type:ty, optional) => { + paste::paste! { + pub fn [<$field>](mut self, [<$field>]: $type) -> Self { + self.0.[<$field>] = Some([<$field>]); self } pub fn [](&mut self, [<$field>]: $type) -> &mut Self { - self.0.[]([<$field>]); + self.0.[<$field>] = Some([<$field>]); self } } @@ -21,6 +49,14 @@ macro_rules! robots_tag_builder_field { } macro_rules! no_tag_builder_field { + ($field:ident, bool) => { + paste::paste! { + pub fn [<$field>](self) -> Builder { + Builder(RobotsTag::new_with_bot_name(self.0.bot_name)).[<$field>]() + } + } + }; + ($field:ident, $type:ty) => { paste::paste! { pub fn [<$field>](self, [<$field>]: $type) -> Builder { @@ -49,29 +85,24 @@ macro_rules! no_tag_builder_field { /// ``` /// # use rama_http::headers::x_robots_tag_components::RobotsTag; /// let robots_tag = RobotsTag::builder() -/// .bot_name(None) -/// .no_follow(true) +/// .no_follow() /// .build(); /// assert_eq!(robots_tag.no_follow(), true); /// ``` #[derive(Clone, Debug, Eq, PartialEq)] pub struct Builder(T); -impl Builder<()> { - pub fn new() -> Self { - Builder(()) - } - - pub fn bot_name(&self, bot_name: Option) -> Builder { - Builder(NoTag { bot_name }) - } -} +impl Builder<()> {} pub struct NoTag { bot_name: Option, } impl Builder { + pub fn new() -> Self { + Self(NoTag { bot_name: None }) + } + pub fn bot_name(mut self, bot_name: HeaderValueString) -> Self { self.0.bot_name = Some(bot_name); self @@ -111,7 +142,7 @@ impl Builder { } pub fn add_custom_rule_simple(&mut self, key: HeaderValueString) -> &mut Self { - self.0.add_custom_rule_simple(key); + self.0.custom_rules.push(key.into()); self } @@ -120,11 +151,21 @@ impl Builder { key: HeaderValueString, value: HeaderValueString, ) -> &mut Self { - self.0.add_custom_rule_composite(key, value); + self.0.custom_rules.push((key, value).into()); + self + } + + pub fn set_unavailable_after(&mut self, unavailable_after: DateTime) -> &mut Self { + self.0.unavailable_after = Some(unavailable_after.into()); self } - robots_tag_builder_field!(bot_name, HeaderValueString); + pub fn unavailable_after(mut self, unavailable_after: DateTime) -> Self { + self.0.unavailable_after = Some(unavailable_after.into()); + self + } + + robots_tag_builder_field!(bot_name, HeaderValueString, optional); robots_tag_builder_field!(all, bool); robots_tag_builder_field!(no_index, bool); robots_tag_builder_field!(no_follow, bool); @@ -132,11 +173,10 @@ impl Builder { robots_tag_builder_field!(no_snippet, bool); robots_tag_builder_field!(index_if_embedded, bool); robots_tag_builder_field!(max_snippet, u32); - robots_tag_builder_field!(max_image_preview, MaxImagePreviewSetting); - robots_tag_builder_field!(max_video_preview, u32); + robots_tag_builder_field!(max_image_preview, MaxImagePreviewSetting, optional); + robots_tag_builder_field!(max_video_preview, u32, optional); robots_tag_builder_field!(no_translate, bool); robots_tag_builder_field!(no_image_index, bool); - robots_tag_builder_field!(unavailable_after, DateTime); robots_tag_builder_field!(no_ai, bool); robots_tag_builder_field!(no_image_ai, bool); robots_tag_builder_field!(spc, bool); @@ -158,7 +198,8 @@ impl Builder { /// ``` /// # use std::num::ParseIntError; /// # use rama_http::headers::x_robots_tag_components::RobotsTag; - /// let mut builder = RobotsTag::builder().bot_name(None).no_follow(true); + /// let mut builder = RobotsTag::builder().no_follow(); + /// /// assert!(builder.add_field("nosnippet").is_ok()); /// assert!(builder.add_field("max-snippet: 8").is_ok()); /// assert!(builder.add_field("nonexistent").is_err_and(|e| e.is::())); @@ -191,27 +232,27 @@ impl Builder { fn add_simple_field(&mut self, s: &str) -> Result<&mut Self, OpaqueError> { Ok(if s.eq_ignore_ascii_case("all") { - self.set_all(true) + self.set_all() } else if s.eq_ignore_ascii_case("noindex") { - self.set_no_index(true) + self.set_no_index() } else if s.eq_ignore_ascii_case("nofollow") { - self.set_no_follow(true) + self.set_no_follow() } else if s.eq_ignore_ascii_case("none") { - self.set_none(true) + self.set_none() } else if s.eq_ignore_ascii_case("nosnippet") { - self.set_no_snippet(true) + self.set_no_snippet() } else if s.eq_ignore_ascii_case("indexifembedded") { - self.set_index_if_embedded(true) + self.set_index_if_embedded() } else if s.eq_ignore_ascii_case("notranslate") { - self.set_no_translate(true) + self.set_no_translate() } else if s.eq_ignore_ascii_case("noimageindex") { - self.set_no_image_index(true) + self.set_no_image_index() } else if s.eq_ignore_ascii_case("noai") { - self.set_no_ai(true) + self.set_no_ai() } else if s.eq_ignore_ascii_case("noimageai") { - self.set_no_image_ai(true) + self.set_no_image_ai() } else if s.eq_ignore_ascii_case("spc") { - self.set_spc(true) + self.set_spc() } else { return Err(OpaqueError::from_std(Error::invalid())); }) diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs index 797103e0..edf87ff1 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/mod.rs @@ -1,5 +1,6 @@ mod builder; pub use builder::Builder; +pub(super) use builder::NoTag; mod parser; pub(crate) use parser::Parser; diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs index 36cd8ef4..1308c182 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag_components/parser.rs @@ -30,8 +30,13 @@ impl<'a> Iterator for Parser<'_> { Err(e) => return Some(Err(e)), }; + let mut builder = RobotsTag::builder(); + let mut builder = if let Some((field, rest)) = remaining.split_once(',') { - match RobotsTag::builder().bot_name(bot_name).add_field(field) { + if let Some(bot_name) = bot_name { + builder.set_bot_name(bot_name); + } + match builder.add_field(field) { Ok(builder) => { remaining = rest.trim(); builder From 51afbf821dad7bceabb5c60b0122c60f4119dd26 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Mon, 10 Feb 2025 19:17:28 +0100 Subject: [PATCH 36/37] remove unused functions --- rama-http/src/headers/util/value_string.rs | 31 ++----------------- .../x_robots_tag_components/custom_rule.rs | 24 +------------- 2 files changed, 4 insertions(+), 51 deletions(-) diff --git a/rama-http/src/headers/util/value_string.rs b/rama-http/src/headers/util/value_string.rs index ff5ca78b..c572decf 100644 --- a/rama-http/src/headers/util/value_string.rs +++ b/rama-http/src/headers/util/value_string.rs @@ -1,5 +1,3 @@ -use crate::headers::Error; -use bytes::Bytes; use http::header::HeaderValue; use std::fmt::{Display, Formatter}; use std::{ @@ -17,29 +15,6 @@ pub struct HeaderValueString { } impl HeaderValueString { - pub(crate) fn from_val(val: &HeaderValue) -> Result { - if val.to_str().is_ok() { - Ok(HeaderValueString { value: val.clone() }) - } else { - Err(Error::invalid()) - } - } - - pub(crate) fn from_string(src: String) -> Option { - // A valid `str` (the argument)... - let bytes = Bytes::from(src); - HeaderValue::from_maybe_shared(bytes) - .ok() - .map(|value| HeaderValueString { value }) - } - - pub(crate) fn from_static(src: &'static str) -> HeaderValueString { - // A valid `str` (the argument)... - HeaderValueString { - value: HeaderValue::from_static(src), - } - } - pub(crate) fn as_str(&self) -> &str { // HeaderValueString is only created from HeaderValues // that have validated they are also UTF-8 strings. @@ -48,13 +23,13 @@ impl HeaderValueString { } impl fmt::Debug for HeaderValueString { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) } } -impl fmt::Display for HeaderValueString { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +impl Display for HeaderValueString { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } diff --git a/rama-http/src/headers/x_robots_tag_components/custom_rule.rs b/rama-http/src/headers/x_robots_tag_components/custom_rule.rs index 78d67411..0e67ec84 100644 --- a/rama-http/src/headers/x_robots_tag_components/custom_rule.rs +++ b/rama-http/src/headers/x_robots_tag_components/custom_rule.rs @@ -1,4 +1,4 @@ -use crate::headers::util::value_string::{FromStrError, HeaderValueString}; +use crate::headers::util::value_string::HeaderValueString; #[derive(Clone, Debug, Eq, PartialEq)] pub(super) struct CustomRule { @@ -7,28 +7,6 @@ pub(super) struct CustomRule { } impl CustomRule { - pub(super) fn new(key: &str) -> Result { - Ok(Self { - key: key.parse()?, - value: None, - }) - } - - pub(super) fn with_value(key: &str, value: &str) -> Result { - Ok(Self { - key: key.parse()?, - value: Some(value.parse()?), - }) - } - - pub(super) fn key(&self) -> &HeaderValueString { - &self.key - } - - pub(super) fn value(&self) -> Option<&HeaderValueString> { - self.value.as_ref() - } - pub(super) fn as_tuple(&self) -> (&HeaderValueString, &Option) { (&self.key, &self.value) } From 00468dcdeb9584b7cf890c9f51df17fbe33a05b0 Mon Sep 17 00:00:00 2001 From: hafihaf123 Date: Tue, 11 Feb 2025 11:13:54 +0100 Subject: [PATCH 37/37] add docs --- .../max_image_preview_setting.rs | 15 +++++++++++++++ .../headers/x_robots_tag_components/robots_tag.rs | 3 +++ .../headers/x_robots_tag_components/valid_date.rs | 10 ++-------- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs b/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs index 5d0fe378..ed601289 100644 --- a/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs +++ b/rama-http/src/headers/x_robots_tag_components/max_image_preview_setting.rs @@ -3,6 +3,21 @@ use std::fmt::Formatter; use std::str::FromStr; use MaxImagePreviewSetting::*; +/// The maximum size of an image preview for this page in a search results. +/// If omitted, search engines may show an image preview of the default size. +/// If you don't want search engines to use larger thumbnail images, +/// specify a max-image-preview value of standard or none. [^source] +/// +/// # Values +/// +/// - `none` +/// - No image preview is to be shown. +/// - `standard` +/// - A default image preview may be shown. +/// - `large` +/// - A larger image preview, up to the width of the viewport, may be shown. +/// +/// [^source]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Robots-Tag#max-image-preview_setting #[derive(Clone, Debug, Eq, PartialEq)] pub enum MaxImagePreviewSetting { None, diff --git a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs index fe2df87b..0ab180c7 100644 --- a/rama-http/src/headers/x_robots_tag_components/robots_tag.rs +++ b/rama-http/src/headers/x_robots_tag_components/robots_tag.rs @@ -22,6 +22,9 @@ macro_rules! getter { }; } +/// A single element of [`XRobotsTag`] corresponding to the valid values for one `bot_name` +/// +/// [List of directives](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Robots-Tag#directives) #[derive(Clone, Debug, Eq, PartialEq)] pub struct RobotsTag { pub(super) bot_name: Option, diff --git a/rama-http/src/headers/x_robots_tag_components/valid_date.rs b/rama-http/src/headers/x_robots_tag_components/valid_date.rs index cd6e5862..ce57ab26 100644 --- a/rama-http/src/headers/x_robots_tag_components/valid_date.rs +++ b/rama-http/src/headers/x_robots_tag_components/valid_date.rs @@ -9,12 +9,6 @@ use std::sync::OnceLock; #[derive(Clone, Debug, Eq, PartialEq)] pub(super) struct ValidDate(DateTime); -impl ValidDate { - pub(super) fn new(date: DateTime) -> Self { - Self(date) - } -} - impl Deref for ValidDate { type Target = DateTime; @@ -31,7 +25,7 @@ impl From for DateTime { impl From> for ValidDate { fn from(value: DateTime) -> Self { - Self::new(value) + Self(value) } } @@ -51,7 +45,7 @@ impl FromStr for ValidDate { type Err = OpaqueError; fn from_str(s: &str) -> Result { - Ok(ValidDate::new( + Ok(ValidDate( DateTime::parse_from_rfc3339(s) // check ISO 8601 .or_else(|_| { DateTime::parse_from_rfc2822(s) // check RFC 822