diff --git a/src/attachments.rs b/src/attachments.rs index 4ff9e37..7dfd0de 100644 --- a/src/attachments.rs +++ b/src/attachments.rs @@ -10,14 +10,16 @@ use sha2::{digest::generic_array::functional::FunctionalSequence, Digest, Sha256 use tracing::{debug, trace, warn}; use uuid::Uuid; -use crate::{cohost::attachment_id_to_url, path::AttachmentsPath}; +use crate::{ + cohost::{attachment_id_to_url, Cacheable}, + path::AttachmentsPath, +}; pub trait AttachmentsContext { fn store(&self, input_path: &Path) -> eyre::Result; fn cache_imported(&self, url: &str, post_basename: &str) -> eyre::Result; - fn cache_cohost_file(&self, id: &str) -> eyre::Result; + fn cache_cohost_resource(&self, cacheable: &Cacheable) -> eyre::Result; fn cache_cohost_thumb(&self, id: &str) -> eyre::Result; - fn cache_cohost_emoji(&self, id: &str, url: &str) -> eyre::Result; } pub struct RealAttachmentsContext; @@ -47,14 +49,27 @@ impl AttachmentsContext for RealAttachmentsContext { } #[tracing::instrument(skip(self))] - fn cache_cohost_file(&self, id: &str) -> eyre::Result { - let url = attachment_id_to_url(id); - let dir = &*AttachmentsPath::ROOT; - let path = dir.join(id)?; - create_dir_all(&path)?; - cache_cohost_attachment(&url, &path, None)?; + fn cache_cohost_resource(&self, cacheable: &Cacheable) -> eyre::Result { + match cacheable { + Cacheable::Attachment { id } => { + let url = attachment_id_to_url(id); + let dir = &*AttachmentsPath::ROOT; + let path = dir.join(id)?; + create_dir_all(&path)?; + cache_cohost_attachment(&url, &path, None)?; + + cached_attachment_url(id, dir) + } - cached_attachment_url(id, dir) + Cacheable::Static { filename, url } => { + let dir = &*AttachmentsPath::COHOST_STATIC; + create_dir_all(dir)?; + let path = dir.join(filename)?; + trace!(?path); + + cache_other_cohost_resource(url, &path) + } + } } #[tracing::instrument(skip(self))] @@ -71,16 +86,6 @@ impl AttachmentsContext for RealAttachmentsContext { cached_attachment_url(id, dir) } - - #[tracing::instrument(skip(self))] - fn cache_cohost_emoji(&self, id: &str, url: &str) -> eyre::Result { - let dir = &*AttachmentsPath::EMOJI; - let path = dir.join(id)?; - trace!(?path); - create_dir_all(&path)?; - - cache_imported_attachment(&url, &path) - } } fn cached_attachment_url(id: &str, dir: &AttachmentsPath) -> eyre::Result { @@ -208,3 +213,23 @@ fn cache_cohost_attachment( Ok(path) } + +fn cache_other_cohost_resource(url: &str, path: &AttachmentsPath) -> eyre::Result { + // if we can open the cached file... + if let Ok(mut file) = File::open(path) { + trace!("cache hit: {url}"); + // check if we can read the file. + let mut result = Vec::default(); + file.read_to_end(&mut result)?; + return Ok(path.clone()); + } + + trace!("cache miss"); + debug!("downloading resource"); + + let response = reqwest::blocking::get(url)?; + let result = response.bytes()?.to_vec(); + File::create(path)?.write_all(&result)?; + + Ok(path.clone()) +} diff --git a/src/cohost.rs b/src/cohost.rs index ebe0f00..1bb5ece 100644 --- a/src/cohost.rs +++ b/src/cohost.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use serde::Deserialize; use serde_json::Value; +use tracing::warn; use crate::Author; @@ -200,39 +201,98 @@ pub enum Ast { Text { value: String }, } -pub fn attachment_id_to_url(id: &str) -> String { - format!("https://cohost.org/rc/attachment-redirect/{id}") +#[derive(Debug, PartialEq)] +pub enum Cacheable<'url> { + /// cohost attachment (staging.cohostcdn.org/attachment or an equivalent redirect) + Attachment { id: &'url str }, + /// cohost emote, eggbug logo, or other static asset (cohost.org/static) + Static { filename: &'url str, url: &'url str }, } -pub fn attachment_url_to_id(url: &str) -> Option<&str> { - url.strip_prefix("https://cohost.org/rc/attachment-redirect/") - .or_else(|| url.strip_prefix("https://cohost.org/api/v1/attachments/")) - .or_else(|| url.strip_prefix("https://staging.cohostcdn.org/attachment/")) - .filter(|id_plus| id_plus.len() >= 36) - .map(|id_plus| &id_plus[..36]) +impl<'url> Cacheable<'url> { + pub fn attachment(id: &'url str) -> Self { + Self::Attachment { id } + } + + pub fn r#static(filename: &'url str, url: &'url str) -> Self { + Self::Static { filename, url } + } + + pub fn from_url(url: &'url str) -> Option { + if let Some(attachment_id) = url + .strip_prefix("https://cohost.org/rc/attachment-redirect/") + .or_else(|| url.strip_prefix("https://cohost.org/api/v1/attachments/")) + .or_else(|| url.strip_prefix("https://staging.cohostcdn.org/attachment/")) + .filter(|id_plus| id_plus.len() >= 36) + .map(|id_plus| &id_plus[..36]) + { + return Some(Self::attachment(attachment_id)); + } + if let Some(static_filename) = url.strip_prefix("https://cohost.org/static/") { + if static_filename.is_empty() { + warn!(url, "skipping cohost static path without filename"); + return None; + } + if static_filename.contains(['/', '?']) { + warn!( + url, + "skipping cohost static path with unexpected slash or query string", + ); + return None; + } + return Some(Self::r#static(static_filename, url)); + } + + None + } } -pub fn custom_emoji_url_to_id(url: &str) -> Option<&str> { - url.strip_prefix("https://cohost.org/static/") - .and_then(|basename| basename.rsplit_once(".")) - .map(|(id, _extension)| id) +pub fn attachment_id_to_url(id: &str) -> String { + format!("https://cohost.org/rc/attachment-redirect/{id}") } #[test] -fn test_attachment_url_to_id() { +fn test_cacheable() { assert_eq!( - attachment_url_to_id( - "https://cohost.org/rc/attachment-redirect/44444444-4444-4444-4444-444444444444?query" + Cacheable::from_url( + "https://cohost.org/rc/attachment-redirect/44444444-4444-4444-4444-444444444444?query", ), - Some("44444444-4444-4444-4444-444444444444") + Some(Cacheable::Attachment { + id: "44444444-4444-4444-4444-444444444444", + }), ); assert_eq!( - attachment_url_to_id( - "https://cohost.org/api/v1/attachments/44444444-4444-4444-4444-444444444444?query" + Cacheable::from_url( + "https://cohost.org/api/v1/attachments/44444444-4444-4444-4444-444444444444?query", ), - Some("44444444-4444-4444-4444-444444444444") + Some(Cacheable::Attachment { + id: "44444444-4444-4444-4444-444444444444", + }), + ); + assert_eq!( + Cacheable::from_url( + "https://staging.cohostcdn.org/attachment/44444444-4444-4444-4444-444444444444/file.jpg?query", + ), + Some(Cacheable::Attachment { + id: "44444444-4444-4444-4444-444444444444", + }), + ); + assert_eq!( + Cacheable::from_url("https://cohost.org/static/f0c56e99113f1a0731b4.svg"), + Some(Cacheable::Static { + filename: "f0c56e99113f1a0731b4.svg", + url: "https://cohost.org/static/f0c56e99113f1a0731b4.svg", + }), + ); + assert_eq!(Cacheable::from_url("https://cohost.org/static/"), None); + assert_eq!( + Cacheable::from_url("https://cohost.org/static/f0c56e99113f1a0731b4.svg?query"), + None + ); + assert_eq!( + Cacheable::from_url("https://cohost.org/static/subdir/f0c56e99113f1a0731b4.svg"), + None ); - assert_eq!(attachment_url_to_id("https://staging.cohostcdn.org/attachment/44444444-4444-4444-4444-444444444444/file.jpg?query"), Some("44444444-4444-4444-4444-444444444444")); } impl From<&PostingProject> for Author { diff --git a/src/command/cohost2autost.rs b/src/command/cohost2autost.rs index 5b056b3..8d159e6 100644 --- a/src/command/cohost2autost.rs +++ b/src/command/cohost2autost.rs @@ -17,10 +17,7 @@ use tracing::{info, trace, warn}; use crate::{ attachments::{AttachmentsContext, RealAttachmentsContext}, - cohost::{ - attachment_id_to_url, attachment_url_to_id, custom_emoji_url_to_id, Ask, AskingProject, - Ast, Attachment, Block, Post, - }, + cohost::{attachment_id_to_url, Ask, AskingProject, Ast, Attachment, Block, Cacheable, Post}, css::{parse_inline_style, serialise_inline_style, InlineStyleToken}, dom::{ convert_idl_to_content_attribute, create_element, create_fragment, debug_attributes_seen, @@ -191,7 +188,9 @@ fn convert_single_chost( let template = CohostImgTemplate { data_cohost_src: attachment_id_to_url(&attachmentId), thumb_src: context.cache_cohost_thumb(&attachmentId)?.site_path()?, - src: context.cache_cohost_file(&attachmentId)?.site_path()?, + src: context + .cache_cohost_resource(&Cacheable::attachment(&attachmentId))? + .site_path()?, alt: altText, width, height, @@ -205,7 +204,9 @@ fn convert_single_chost( } => { let template = CohostAudioTemplate { data_cohost_src: attachment_id_to_url(&attachmentId), - src: context.cache_cohost_file(&attachmentId)?.site_path()?, + src: context + .cache_cohost_resource(&Cacheable::attachment(&attachmentId))? + .site_path()?, artist, title, }; @@ -369,14 +370,15 @@ fn process_chost_fragment( for attr in attrs.iter_mut() { if attr_names.contains(&attr.name) { let old_url = attr.value.to_str().to_owned(); - if let Some(id) = attachment_url_to_id(&old_url) { + if let Some(cacheable) = Cacheable::from_url(&old_url) { trace!( - "found cohost attachment url in <{} {}>: {old_url}", + url = old_url, + "found cohost resource url in <{} {}>", name.local, attr.name.local ); attr.value = context - .cache_cohost_file(id)? + .cache_cohost_resource(&cacheable)? .site_path()? .base_relative_url() .into(); @@ -399,11 +401,12 @@ fn process_chost_fragment( for token in parse_inline_style(old_style) { tokens.push(match token { InlineStyleToken::Url(url) => { - if let Some(id) = attachment_url_to_id(&url) { + if let Some(cacheable) = Cacheable::from_url(&url) { + trace!(url, "found cohost resource url in inline style"); has_any_cohost_attachment_urls = true; InlineStyleToken::Url( context - .cache_cohost_file(id)? + .cache_cohost_resource(&cacheable)? .site_path()? .base_relative_url(), ) @@ -464,12 +467,12 @@ fn process_chost_fragment( }); } if let Some(url) = url { - if let Some(id) = custom_emoji_url_to_id(url) { - trace!("found cohost custom emoji url in : {url}"); + if let Some(cacheable) = Cacheable::from_url(url) { + trace!(url, "found cohost resource url in "); attrs.borrow_mut().push(Attribute { name: QualName::attribute("src"), value: context - .cache_cohost_emoji(id, url)? + .cache_cohost_resource(&cacheable)? .site_path()? .base_relative_url() .into(), @@ -508,19 +511,17 @@ fn test_render_markdown_block() -> eyre::Result<()> { ) -> eyre::Result { unreachable!(); } - fn cache_cohost_file(&self, id: &str) -> eyre::Result { - Ok(AttachmentsPath::ROOT.join(&format!("{id}"))?) + fn cache_cohost_resource(&self, cacheable: &Cacheable) -> eyre::Result { + Ok(match cacheable { + Cacheable::Attachment { id } => AttachmentsPath::ROOT.join(&format!("{id}"))?, + Cacheable::Static { filename, .. } => { + AttachmentsPath::COHOST_STATIC.join(&format!("{filename}"))? + } + }) } fn cache_cohost_thumb(&self, id: &str) -> eyre::Result { Ok(AttachmentsPath::THUMBS.join(&format!("{id}"))?) } - fn cache_cohost_emoji( - &self, - _id: &str, - _url: &str, - ) -> eyre::Result { - unreachable!() - } } let n = "\n"; diff --git a/src/path.rs b/src/path.rs index 9bb0aab..6ee467c 100644 --- a/src/path.rs +++ b/src/path.rs @@ -307,8 +307,14 @@ impl AttachmentsPath { LazyLock::new(|| Self::new(AttachmentsKind::ROOT.into()).expect("guaranteed by argument")); pub const THUMBS: LazyLock = LazyLock::new(|| Self::ROOT.join("thumbs").expect("guaranteed by argument")); + #[deprecated(since = "1.2.0", note = "cohost emoji are now stored in COHOST_STATIC")] pub const EMOJI: LazyLock = LazyLock::new(|| Self::ROOT.join("emoji").expect("guaranteed by argument")); + pub const COHOST_STATIC: LazyLock = LazyLock::new(|| { + Self::ROOT + .join("cohost-static") + .expect("guaranteed by argument") + }); pub fn site_path(&self) -> eyre::Result { let mut result = SitePath::ATTACHMENTS.to_owned();