Skip to content

Commit

Permalink
cohost2autost: cache hotlinked cohost static assets
Browse files Browse the repository at this point in the history
  • Loading branch information
delan committed Dec 28, 2024
1 parent df82503 commit 819a72c
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 63 deletions.
65 changes: 45 additions & 20 deletions src/attachments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,16 @@ use sha2::{digest::generic_array::functional::FunctionalSequence, Digest, Sha256
use tracing::{debug, trace, warn};
use uuid::Uuid;

use crate::{cohost::attachment_id_to_url, path::AttachmentsPath};
use crate::{
cohost::{attachment_id_to_url, Cacheable},
path::AttachmentsPath,
};

pub trait AttachmentsContext {
fn store(&self, input_path: &Path) -> eyre::Result<AttachmentsPath>;
fn cache_imported(&self, url: &str, post_basename: &str) -> eyre::Result<AttachmentsPath>;
fn cache_cohost_file(&self, id: &str) -> eyre::Result<AttachmentsPath>;
fn cache_cohost_resource(&self, cacheable: &Cacheable) -> eyre::Result<AttachmentsPath>;
fn cache_cohost_thumb(&self, id: &str) -> eyre::Result<AttachmentsPath>;
fn cache_cohost_emoji(&self, id: &str, url: &str) -> eyre::Result<AttachmentsPath>;
}

pub struct RealAttachmentsContext;
Expand Down Expand Up @@ -47,14 +49,27 @@ impl AttachmentsContext for RealAttachmentsContext {
}

#[tracing::instrument(skip(self))]
fn cache_cohost_file(&self, id: &str) -> eyre::Result<AttachmentsPath> {
let url = attachment_id_to_url(id);
let dir = &*AttachmentsPath::ROOT;
let path = dir.join(id)?;
create_dir_all(&path)?;
cache_cohost_attachment(&url, &path, None)?;
fn cache_cohost_resource(&self, cacheable: &Cacheable) -> eyre::Result<AttachmentsPath> {
match cacheable {
Cacheable::Attachment { id } => {
let url = attachment_id_to_url(id);
let dir = &*AttachmentsPath::ROOT;
let path = dir.join(id)?;
create_dir_all(&path)?;
cache_cohost_attachment(&url, &path, None)?;

cached_attachment_url(id, dir)
}

cached_attachment_url(id, dir)
Cacheable::Static { filename, url } => {
let dir = &*AttachmentsPath::COHOST_STATIC;
create_dir_all(dir)?;
let path = dir.join(filename)?;
trace!(?path);

cache_other_cohost_resource(url, &path)
}
}
}

#[tracing::instrument(skip(self))]
Expand All @@ -71,16 +86,6 @@ impl AttachmentsContext for RealAttachmentsContext {

cached_attachment_url(id, dir)
}

#[tracing::instrument(skip(self))]
fn cache_cohost_emoji(&self, id: &str, url: &str) -> eyre::Result<AttachmentsPath> {
let dir = &*AttachmentsPath::EMOJI;
let path = dir.join(id)?;
trace!(?path);
create_dir_all(&path)?;

cache_imported_attachment(&url, &path)
}
}

fn cached_attachment_url(id: &str, dir: &AttachmentsPath) -> eyre::Result<AttachmentsPath> {
Expand Down Expand Up @@ -208,3 +213,23 @@ fn cache_cohost_attachment(

Ok(path)
}

fn cache_other_cohost_resource(url: &str, path: &AttachmentsPath) -> eyre::Result<AttachmentsPath> {
// if we can open the cached file...
if let Ok(mut file) = File::open(path) {
trace!("cache hit: {url}");
// check if we can read the file.
let mut result = Vec::default();
file.read_to_end(&mut result)?;
return Ok(path.clone());
}

trace!("cache miss");
debug!("downloading resource");

let response = reqwest::blocking::get(url)?;
let result = response.bytes()?.to_vec();
File::create(path)?.write_all(&result)?;

Ok(path.clone())
}
100 changes: 80 additions & 20 deletions src/cohost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::collections::HashMap;

use serde::Deserialize;
use serde_json::Value;
use tracing::warn;

use crate::Author;

Expand Down Expand Up @@ -200,39 +201,98 @@ pub enum Ast {
Text { value: String },
}

pub fn attachment_id_to_url(id: &str) -> String {
format!("https://cohost.org/rc/attachment-redirect/{id}")
#[derive(Debug, PartialEq)]
pub enum Cacheable<'url> {
/// cohost attachment (staging.cohostcdn.org/attachment or an equivalent redirect)
Attachment { id: &'url str },
/// cohost emote, eggbug logo, or other static asset (cohost.org/static)
Static { filename: &'url str, url: &'url str },
}

pub fn attachment_url_to_id(url: &str) -> Option<&str> {
url.strip_prefix("https://cohost.org/rc/attachment-redirect/")
.or_else(|| url.strip_prefix("https://cohost.org/api/v1/attachments/"))
.or_else(|| url.strip_prefix("https://staging.cohostcdn.org/attachment/"))
.filter(|id_plus| id_plus.len() >= 36)
.map(|id_plus| &id_plus[..36])
impl<'url> Cacheable<'url> {
pub fn attachment(id: &'url str) -> Self {
Self::Attachment { id }
}

pub fn r#static(filename: &'url str, url: &'url str) -> Self {
Self::Static { filename, url }
}

pub fn from_url(url: &'url str) -> Option<Self> {
if let Some(attachment_id) = url
.strip_prefix("https://cohost.org/rc/attachment-redirect/")
.or_else(|| url.strip_prefix("https://cohost.org/api/v1/attachments/"))
.or_else(|| url.strip_prefix("https://staging.cohostcdn.org/attachment/"))
.filter(|id_plus| id_plus.len() >= 36)
.map(|id_plus| &id_plus[..36])
{
return Some(Self::attachment(attachment_id));
}
if let Some(static_filename) = url.strip_prefix("https://cohost.org/static/") {
if static_filename.is_empty() {
warn!(url, "skipping cohost static path without filename");
return None;
}
if static_filename.contains(['/', '?']) {
warn!(
url,
"skipping cohost static path with unexpected slash or query string",
);
return None;
}
return Some(Self::r#static(static_filename, url));
}

None
}
}

pub fn custom_emoji_url_to_id(url: &str) -> Option<&str> {
url.strip_prefix("https://cohost.org/static/")
.and_then(|basename| basename.rsplit_once("."))
.map(|(id, _extension)| id)
pub fn attachment_id_to_url(id: &str) -> String {
format!("https://cohost.org/rc/attachment-redirect/{id}")
}

#[test]
fn test_attachment_url_to_id() {
fn test_cacheable() {
assert_eq!(
attachment_url_to_id(
"https://cohost.org/rc/attachment-redirect/44444444-4444-4444-4444-444444444444?query"
Cacheable::from_url(
"https://cohost.org/rc/attachment-redirect/44444444-4444-4444-4444-444444444444?query",
),
Some("44444444-4444-4444-4444-444444444444")
Some(Cacheable::Attachment {
id: "44444444-4444-4444-4444-444444444444",
}),
);
assert_eq!(
attachment_url_to_id(
"https://cohost.org/api/v1/attachments/44444444-4444-4444-4444-444444444444?query"
Cacheable::from_url(
"https://cohost.org/api/v1/attachments/44444444-4444-4444-4444-444444444444?query",
),
Some("44444444-4444-4444-4444-444444444444")
Some(Cacheable::Attachment {
id: "44444444-4444-4444-4444-444444444444",
}),
);
assert_eq!(
Cacheable::from_url(
"https://staging.cohostcdn.org/attachment/44444444-4444-4444-4444-444444444444/file.jpg?query",
),
Some(Cacheable::Attachment {
id: "44444444-4444-4444-4444-444444444444",
}),
);
assert_eq!(
Cacheable::from_url("https://cohost.org/static/f0c56e99113f1a0731b4.svg"),
Some(Cacheable::Static {
filename: "f0c56e99113f1a0731b4.svg",
url: "https://cohost.org/static/f0c56e99113f1a0731b4.svg",
}),
);
assert_eq!(Cacheable::from_url("https://cohost.org/static/"), None);
assert_eq!(
Cacheable::from_url("https://cohost.org/static/f0c56e99113f1a0731b4.svg?query"),
None
);
assert_eq!(
Cacheable::from_url("https://cohost.org/static/subdir/f0c56e99113f1a0731b4.svg"),
None
);
assert_eq!(attachment_url_to_id("https://staging.cohostcdn.org/attachment/44444444-4444-4444-4444-444444444444/file.jpg?query"), Some("44444444-4444-4444-4444-444444444444"));
}

impl From<&PostingProject> for Author {
Expand Down
47 changes: 24 additions & 23 deletions src/command/cohost2autost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@ use tracing::{info, trace, warn};

use crate::{
attachments::{AttachmentsContext, RealAttachmentsContext},
cohost::{
attachment_id_to_url, attachment_url_to_id, custom_emoji_url_to_id, Ask, AskingProject,
Ast, Attachment, Block, Post,
},
cohost::{attachment_id_to_url, Ask, AskingProject, Ast, Attachment, Block, Cacheable, Post},
css::{parse_inline_style, serialise_inline_style, InlineStyleToken},
dom::{
convert_idl_to_content_attribute, create_element, create_fragment, debug_attributes_seen,
Expand Down Expand Up @@ -191,7 +188,9 @@ fn convert_single_chost(
let template = CohostImgTemplate {
data_cohost_src: attachment_id_to_url(&attachmentId),
thumb_src: context.cache_cohost_thumb(&attachmentId)?.site_path()?,
src: context.cache_cohost_file(&attachmentId)?.site_path()?,
src: context
.cache_cohost_resource(&Cacheable::attachment(&attachmentId))?
.site_path()?,
alt: altText,
width,
height,
Expand All @@ -205,7 +204,9 @@ fn convert_single_chost(
} => {
let template = CohostAudioTemplate {
data_cohost_src: attachment_id_to_url(&attachmentId),
src: context.cache_cohost_file(&attachmentId)?.site_path()?,
src: context
.cache_cohost_resource(&Cacheable::attachment(&attachmentId))?
.site_path()?,
artist,
title,
};
Expand Down Expand Up @@ -369,14 +370,15 @@ fn process_chost_fragment(
for attr in attrs.iter_mut() {
if attr_names.contains(&attr.name) {
let old_url = attr.value.to_str().to_owned();
if let Some(id) = attachment_url_to_id(&old_url) {
if let Some(cacheable) = Cacheable::from_url(&old_url) {
trace!(
"found cohost attachment url in <{} {}>: {old_url}",
url = old_url,
"found cohost resource url in <{} {}>",
name.local,
attr.name.local
);
attr.value = context
.cache_cohost_file(id)?
.cache_cohost_resource(&cacheable)?
.site_path()?
.base_relative_url()
.into();
Expand All @@ -399,11 +401,12 @@ fn process_chost_fragment(
for token in parse_inline_style(old_style) {
tokens.push(match token {
InlineStyleToken::Url(url) => {
if let Some(id) = attachment_url_to_id(&url) {
if let Some(cacheable) = Cacheable::from_url(&url) {
trace!(url, "found cohost resource url in inline style");
has_any_cohost_attachment_urls = true;
InlineStyleToken::Url(
context
.cache_cohost_file(id)?
.cache_cohost_resource(&cacheable)?
.site_path()?
.base_relative_url(),
)
Expand Down Expand Up @@ -464,12 +467,12 @@ fn process_chost_fragment(
});
}
if let Some(url) = url {
if let Some(id) = custom_emoji_url_to_id(url) {
trace!("found cohost custom emoji url in <CustomEmoji url>: {url}");
if let Some(cacheable) = Cacheable::from_url(url) {
trace!(url, "found cohost resource url in <CustomEmoji url>");
attrs.borrow_mut().push(Attribute {
name: QualName::attribute("src"),
value: context
.cache_cohost_emoji(id, url)?
.cache_cohost_resource(&cacheable)?
.site_path()?
.base_relative_url()
.into(),
Expand Down Expand Up @@ -508,19 +511,17 @@ fn test_render_markdown_block() -> eyre::Result<()> {
) -> eyre::Result<AttachmentsPath> {
unreachable!();
}
fn cache_cohost_file(&self, id: &str) -> eyre::Result<AttachmentsPath> {
Ok(AttachmentsPath::ROOT.join(&format!("{id}"))?)
fn cache_cohost_resource(&self, cacheable: &Cacheable) -> eyre::Result<AttachmentsPath> {
Ok(match cacheable {
Cacheable::Attachment { id } => AttachmentsPath::ROOT.join(&format!("{id}"))?,
Cacheable::Static { filename, .. } => {
AttachmentsPath::COHOST_STATIC.join(&format!("{filename}"))?
}
})
}
fn cache_cohost_thumb(&self, id: &str) -> eyre::Result<AttachmentsPath> {
Ok(AttachmentsPath::THUMBS.join(&format!("{id}"))?)
}
fn cache_cohost_emoji(
&self,
_id: &str,
_url: &str,
) -> eyre::Result<crate::path::AttachmentsPath> {
unreachable!()
}
}

let n = "\n";
Expand Down
6 changes: 6 additions & 0 deletions src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,14 @@ impl AttachmentsPath {
LazyLock::new(|| Self::new(AttachmentsKind::ROOT.into()).expect("guaranteed by argument"));
pub const THUMBS: LazyLock<Self> =
LazyLock::new(|| Self::ROOT.join("thumbs").expect("guaranteed by argument"));
#[deprecated(since = "1.2.0", note = "cohost emoji are now stored in COHOST_STATIC")]
pub const EMOJI: LazyLock<Self> =
LazyLock::new(|| Self::ROOT.join("emoji").expect("guaranteed by argument"));
pub const COHOST_STATIC: LazyLock<Self> = LazyLock::new(|| {
Self::ROOT
.join("cohost-static")
.expect("guaranteed by argument")
});

pub fn site_path(&self) -> eyre::Result<SitePath> {
let mut result = SitePath::ATTACHMENTS.to_owned();
Expand Down

0 comments on commit 819a72c

Please sign in to comment.