From ff770ceb2b500831518955431be3852cb9714c1b Mon Sep 17 00:00:00 2001 From: j-mendez Date: Sat, 11 Jan 2025 07:57:59 -0500 Subject: [PATCH] chore(tests): fix transformations build parse --- spider/src/page.rs | 15 +++++++++++++++ spider_transformations/src/transformation/mod.rs | 16 ++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/spider/src/page.rs b/spider/src/page.rs index 046cd7960..7a36ed481 100644 --- a/spider/src/page.rs +++ b/spider/src/page.rs @@ -489,6 +489,21 @@ fn get_error_status( get_error_status_base(should_retry, error_for_status).map(std::sync::Arc::new) } + +/// Instantiate a new page without scraping it and with the base URL parsed (used for testing purposes). +#[cfg(not(feature = "decentralized"))] +pub fn build_with_parse(url: &str, res: PageResponse) -> Page { + let mut page = build(url, res); + page.set_url_parsed_direct_empty(); + page +} + +/// Instantiate a new page without scraping it and with the base URL parsed (used for testing purposes). +#[cfg(feature = "decentralized")] +pub fn build_with_parse(url: &str, res: PageResponse) -> Page { + build(url, res) +} + /// Instantiate a new page without scraping it (used for testing purposes). #[cfg(not(feature = "decentralized"))] pub fn build(url: &str, res: PageResponse) -> Page { diff --git a/spider_transformations/src/transformation/mod.rs b/spider_transformations/src/transformation/mod.rs index 397d7ef31..3a7275bb4 100644 --- a/spider_transformations/src/transformation/mod.rs +++ b/spider_transformations/src/transformation/mod.rs @@ -13,7 +13,7 @@ mod tests { use maud::PreEscaped; use spider::{ bytes::Bytes, - page::build, + page::build_with_parse, tokio::{self, fs::File}, utils::PageResponse, }; @@ -51,7 +51,7 @@ mod tests { let mut page_response = PageResponse::default(); page_response.content = Some(Bytes::from(markup).into()); - let page = build(url, page_response); + let page = build_with_parse(url, page_response); conf.return_format = ReturnFormat::Markdown; @@ -111,7 +111,7 @@ mod tests { let mut page_response = PageResponse::default(); conf.return_format = ReturnFormat::XML; page_response.content = Some(Bytes::from(markup).into()); - let page = build(url, page_response); + let page = build_with_parse(url, page_response); let content = content::transform_content(&page, &conf, &None, &None, &None); assert!( content @@ -129,7 +129,7 @@ mod tests { let mut page_response = PageResponse::default(); page_response.content = Some(Bytes::from(markup).into()); - let page = build(url, page_response); + let page = build_with_parse(url, page_response); conf.return_format = ReturnFormat::Markdown; @@ -154,7 +154,7 @@ mod tests { let mut page_response = PageResponse::default(); page_response.content = Some(Bytes::from(markup).into()); - let page = build(url, page_response); + let page = build_with_parse(url, page_response); conf.return_format = ReturnFormat::Markdown; @@ -179,7 +179,7 @@ mod tests { let mut page_response = PageResponse::default(); page_response.content = Some(Bytes::from(markup).into()); - let page = build(url, page_response); + let page = build_with_parse(url, page_response); conf.return_format = ReturnFormat::Text; @@ -204,7 +204,7 @@ mod tests { let mut page_response = PageResponse::default(); page_response.content = Some(Bytes::from(markup).into()); - let page = build(url, page_response); + let page = build_with_parse(url, page_response); conf.return_format = ReturnFormat::Text; @@ -236,7 +236,7 @@ mod tests { page_response.content = Some(b.into()); - let page = build("https://example.com/example.pdf", page_response); + let page = build_with_parse("https://example.com/example.pdf", page_response); let content = content::transform_content(&page, &conf, &None, &None, &None);