From 2f45cc764fab1fefdd1ca6f979d5d21d25089e5c Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Fri, 15 Dec 2023 14:40:11 +0100 Subject: [PATCH 01/20] Wipperoni --- .DS_Store | Bin 0 -> 8196 bytes lib/src/config.rs | 2 +- lib/src/config/backends.rs | 62 ++++++++++++++++++++++++++++++ lib/src/execute.rs | 75 ++++++++++++++++++++++++++++++++++--- lib/src/lib.rs | 6 ++- lib/src/linking.rs | 4 +- lib/src/logging.rs | 40 +++++++++++++++----- lib/src/service.rs | 6 +-- lib/src/session.rs | 16 +++++++- 9 files changed, 188 insertions(+), 23 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4fad5728a9ef73ac297d95e13a0997acc347b64b GIT binary patch literal 8196 zcmeHMziSjh6#gc=Bu7L@p~>1U_DbSku!hr45KF9yv+H+ zdRq?V4j;-PD|bRs2A%V#CLJoXUV2eL6sRhYawx`4bg_qFaDHD|yyeIGd9&H-gi zu6+G=`uo?%+hM#jGd|e$z6~VzS&D@*E_N|OD?rEck7|a`;o+B;2j71h4pZ~=^O8w@ zR=~%;Y$CML!(PB+?r51jT*SwJckR^}e-bTmtI+^2PB|%O?I1@xz>7Y=DI-4K`xhRb zsq@!ajRttpvBl%+rgV;vI|k20e7py5Jv3vG;eLEVjv~i=FUUPwuWhCz~Mdb2-@DJUqGnNk8vXIi(_X zIX;i%!z!o1W%AgeHbdONBdYV1S0nP`SfG&_>hu}eKF6bH6TN0ul#I_;;qymic2{M8 z5^#@}{a@ctF;UJ8xzSu_GB@-yO`o+>wk8mj;R literal 0 HcmV?d00001 diff --git a/lib/src/config.rs b/lib/src/config.rs index 370c7a24..d84d4f7f 100644 --- a/lib/src/config.rs +++ b/lib/src/config.rs @@ -46,7 +46,7 @@ pub use self::geolocation::Geolocation; /// Types and deserializers for object store configuration settings. mod object_store; -pub use crate::object_store::ObjectStores; +pub use crate::object_store::{ObjectKey, ObjectStoreKey, ObjectStores}; /// Types and deserializers for secret store configuration settings. mod secret_store; diff --git a/lib/src/config/backends.rs b/lib/src/config/backends.rs index 4c1bfd7a..25036bc4 100644 --- a/lib/src/config/backends.rs +++ b/lib/src/config/backends.rs @@ -5,7 +5,22 @@ use { std::{collections::HashMap, sync::Arc}, }; +use std::pin::Pin; + +use futures::Future; +use http::{Request, Response}; +use hyper::Body; + pub use self::client_cert_info::{ClientCertError, ClientCertInfo}; +use crate::ExecuteCtx; + +pub enum BackendExperiment { + /// Simulate sending an HTTP request to this endpoint but never actually reach out. + InMemory(InMemoryBackend), + + /// Use the backend to send and actual HTTP request. + Live(Backend), +} /// A single backend definition. #[derive(Clone, Debug)] @@ -16,6 +31,53 @@ pub struct Backend { pub use_sni: bool, pub grpc: bool, pub client_cert: Option, + // Instead of sending an HTTP request for this backend, + // use the following handler to produce a response. + // pub handler: Option, +} + +// #[derive(Clone, Debug)] +pub struct InMemoryBackend { + backend: Backend, + handler: BackendHandler, +} + +// #[derive(Clone)] +pub struct Handler { + handler: BackendHandler, +} + +impl Clone for Handler { + fn clone(&self) -> Self { + Self { + handler: self.handler.clone(), + } + } +} + +pub type BackendHandler = + Arc) -> Pin>>>>>; //Pin>>>; + +// fn make_handler( +// ctx: ExecuteCtx, +// ) -> impl Fn(Request) -> Pin>>> { +// // move |y: i32| Box::pin(async_add(x, y)) + +// // move |req| ctx.handle_request(req, "127.0.0.1".parse().unwrap()) +// todo!() +// } + +// async fn async_add(x: i32, y: i32) -> i32 { +// x + y +// } + +// #[derive(Clone, Debug)] +pub enum Wat { + /// Just an opaque handler + Handler(), + + /// A loaded fastly service backend. + Chained(ExecuteCtx), } /// A map of [`Backend`] definitions, keyed by their name. diff --git a/lib/src/execute.rs b/lib/src/execute.rs index bfbec0fe..2195a9c8 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -1,11 +1,10 @@ //! Guest code execution. -use std::time::SystemTime; - +use crate::{config::UnknownImportBehavior, logging::LogEndpoint}; +use std::{collections::BTreeMap, sync::RwLock, time::SystemTime}; +use tokio::sync::mpsc::{Receiver, Sender as MspcSender}; +use tracing::field::Iter; use wasmtime::GuestProfiler; - -use crate::config::UnknownImportBehavior; - use { crate::{ body::Body, @@ -78,6 +77,48 @@ pub struct ExecuteCtx { /// this must refer to a directory, while in run mode it names /// a file. guest_profile_path: Arc>, + /// Endpoints that should be monitored. Allows reading logged message lines to that endpoint. + endpoints: Arc, +} + +#[derive(Clone)] +pub struct Endpoints { + // #[allow(clippy::type_complexity)] + // todo iterator + pub endpoints: Arc, MspcSender>>>>, +} + +impl Endpoints { + pub fn new() -> Self { + Self { + endpoints: Arc::new(RwLock::new(BTreeMap::new())), + } + } + + /// Registers a listener for the given endpoint name. + /// Any messages the invocation sends to this endpoint will be captured by this listener. + pub fn register_listener>>(&self, name: T) -> EndpointListener { + let (sender, receiver) = tokio::sync::mpsc::channel(1000); // todo arbitrary limit right now. + self.endpoints.write().unwrap().insert(name.into(), sender); + + EndpointListener { receiver } + } +} + +pub struct EndpointListener { + receiver: Receiver>, +} + +impl EndpointListener { + // Todo blocking for now. + pub fn messages(&mut self) -> Vec> { + let mut messages = vec![]; + while let Some(msg) = self.receiver.blocking_recv() { + messages.push(msg); + } + + messages + } } impl ExecuteCtx { @@ -133,6 +174,7 @@ impl ExecuteCtx { epoch_increment_thread, epoch_increment_stop, guest_profile_path: Arc::new(guest_profile_path), + endpoints: Arc::new(Endpoints::new()), }) } @@ -230,6 +272,12 @@ impl ExecuteCtx { &self.tls_config } + /// Set the endpoints for this execution context. + pub fn with_endpoints(mut self, endpoints: Endpoints) -> Self { + self.endpoints = Arc::new(endpoints); + self + } + /// Asynchronously handle a request. /// /// This method fully instantiates the wasm module housed within the `ExecuteCtx`, @@ -314,6 +362,7 @@ impl ExecuteCtx { .status(hyper::StatusCode::INTERNAL_SERVER_ERROR) .body(Body::from(body.as_bytes())) .unwrap() + .into() } }; @@ -342,6 +391,7 @@ impl ExecuteCtx { self.config_path.clone(), self.object_store.clone(), self.secret_stores.clone(), + self.endpoints.clone(), ); let guest_profile_path = self.guest_profile_path.as_deref().map(|path| { @@ -437,6 +487,7 @@ impl ExecuteCtx { self.config_path.clone(), self.object_store.clone(), self.secret_stores.clone(), + self.endpoints.clone(), ); let profiler = self.guest_profile_path.is_some().then(|| { @@ -483,6 +534,20 @@ impl ExecuteCtx { } } +// pub struct ExecutionResult { +// pub response: Response, +// pub logs: Vec, +// } + +// impl From> for ExecutionResult { +// fn from(response: Response) -> Self { +// Self { +// response, +// logs: vec![], +// } +// } +// } + fn write_profile(store: &mut wasmtime::Store, guest_profile_path: Option<&PathBuf>) { if let (Some(profile), Some(path)) = (store.data_mut().take_guest_profiler(), guest_profile_path) diff --git a/lib/src/lib.rs b/lib/src/lib.rs index a2383aff..aabd258b 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -32,6 +32,10 @@ mod upstream; mod wiggle_abi; pub use { - error::Error, execute::ExecuteCtx, service::ViceroyService, upstream::BackendConnector, + error::Error, + execute::{EndpointListener, Endpoints, ExecuteCtx}, + http, hyper, + service::ViceroyService, + upstream::BackendConnector, wasmtime::ProfilingStrategy, }; diff --git a/lib/src/linking.rs b/lib/src/linking.rs index 24fd90a6..a4de2f3e 100644 --- a/lib/src/linking.rs +++ b/lib/src/linking.rs @@ -132,13 +132,13 @@ fn make_wasi_ctx(ctx: &ExecuteCtx, session: &Session) -> Result); +/// A logging endpoint. +pub struct LogEndpoint { + name: Vec, + sender: Option>>, +} lazy_static! { /// The underlying writer to use for all log messages. It defaults to `stdout`, @@ -18,9 +22,12 @@ lazy_static! { } impl LogEndpoint { - /// Allocate a new `LogEndpoint` with the given name. - pub fn new(name: &[u8]) -> LogEndpoint { - LogEndpoint(name.to_owned()) + /// Allocate a new `LogEndpoint` with the given name and optional sender. + pub fn new(name: &[u8], sender: Option>>) -> LogEndpoint { + LogEndpoint { + name: name.to_owned(), + sender, + } } /// Write a log entry to this endpoint. @@ -43,11 +50,18 @@ impl LogEndpoint { } // Accumulate log entry into a buffer before writing, while escaping newlines - let mut to_write = - Vec::with_capacity(msg.len() + self.0.len() + LOG_ENDPOINT_DELIM.len() + 1); + // A listener just takes the messages line by line, while the stdout format is with delimiters. + let mut to_write = if self.sender.is_some() { + Vec::with_capacity(msg.len()) + } else { + let buf_len = msg.len() + self.name.len() + LOG_ENDPOINT_DELIM.len() + 1; + let mut buf = Vec::with_capacity(buf_len); + + buf.extend_from_slice(&self.name); + buf.extend_from_slice(LOG_ENDPOINT_DELIM); + buf + }; - to_write.extend_from_slice(&self.0); - to_write.extend_from_slice(LOG_ENDPOINT_DELIM); for &byte in msg { if byte == b'\n' { to_write.extend_from_slice(br"\n"); @@ -57,7 +71,13 @@ impl LogEndpoint { } to_write.push(b'\n'); - LOG_WRITER.lock().unwrap().write_all(&to_write) + if let Some(ref sender) = self.sender { + sender.try_send(to_write).expect("todo"); + // sender.blocking_send(to_write).expect("todo"); + Ok(()) + } else { + LOG_WRITER.lock().unwrap().write_all(&to_write) + } } } diff --git a/lib/src/service.rs b/lib/src/service.rs index eb9201be..8bbf9a3b 100644 --- a/lib/src/service.rs +++ b/lib/src/service.rs @@ -63,9 +63,9 @@ impl ViceroyService { /// each time a new request is sent. This function will only return if an error occurs. // FIXME KTM 2020-06-22: Once `!` is stabilized, this should be `Result`. pub async fn serve(self, addr: SocketAddr) -> Result<(), hyper::Error> { - let server = hyper::Server::bind(&addr).serve(self); - event!(Level::INFO, "Listening on http://{}", server.local_addr()); - server.await?; + // let server = hyper::Server::bind(&addr).serve(self); + // event!(Level::INFO, "Listening on http://{}", server.local_addr()); + // server.await?; Ok(()) } } diff --git a/lib/src/session.rs b/lib/src/session.rs index 41b9d866..3fa37b46 100644 --- a/lib/src/session.rs +++ b/lib/src/session.rs @@ -7,6 +7,7 @@ pub use async_item::{ AsyncItem, PeekableTask, PendingKvDeleteTask, PendingKvInsertTask, PendingKvLookupTask, }; +use crate::execute::Endpoints; use { self::downstream::DownstreamResponse, crate::{ @@ -144,6 +145,7 @@ impl Session { config_path: Arc>, object_store: Arc, secret_stores: Arc, + endpoints: Arc, ) -> Session { let (parts, body) = req.into_parts(); let downstream_req_original_headers = parts.headers.clone(); @@ -180,6 +182,18 @@ impl Session { config_path, req_id, } + .write_endpoints(endpoints) + } + + // Todo: Haxx, it's better to run this code in the constructor and initialize `log_endpoints_by_name` and `log_endpoints`. + fn write_endpoints(mut self, endpoints: Arc) -> Self { + for (name, sender) in endpoints.endpoints.read().unwrap().iter() { + let endpoint = LogEndpoint::new(name, Some(sender.clone())); + let handle = self.log_endpoints.push(endpoint); + self.log_endpoints_by_name.insert(name.to_owned(), handle); + } + + self } // ----- Downstream Request API ----- @@ -528,7 +542,7 @@ impl Session { if let Some(handle) = self.log_endpoints_by_name.get(name).copied() { return handle; } - let endpoint = LogEndpoint::new(name); + let endpoint = LogEndpoint::new(name, None); let handle = self.log_endpoints.push(endpoint); self.log_endpoints_by_name.insert(name.to_owned(), handle); handle From e33e7c30efe8907799db58f40a789e1a13ce4f3c Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Tue, 9 Jan 2024 18:09:37 +0100 Subject: [PATCH 02/20] In memory backends. --- Cargo.lock | 1 + Cargo.toml | 12 ++-- cli/Cargo.toml | 19 +++--- cli/tests/integration/common/backends.rs | 1 + lib/Cargo.toml | 8 +-- lib/src/config.rs | 4 +- lib/src/config/backends.rs | 75 +++++++++--------------- lib/src/execute.rs | 3 +- lib/src/lib.rs | 1 + lib/src/service.rs | 6 +- lib/src/session.rs | 2 +- lib/src/upstream.rs | 34 ++++++----- lib/src/wiggle_abi/req_impl.rs | 4 +- 13 files changed, 78 insertions(+), 92 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 03aa88fc..b2d7f176 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2263,6 +2263,7 @@ name = "viceroy-lib" version = "0.9.4" dependencies = [ "anyhow", + "async-trait", "bytes", "bytesize", "cfg-if", diff --git a/Cargo.toml b/Cargo.toml index e5b15096..b6d09a92 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,20 +1,15 @@ [workspace] -members = [ - "cli", - "lib", -] +members = ["cli", "lib"] resolver = "2" # Exclude our integration test fixtures, which need to be compiled to wasm # (managed by the Makefile) -exclude = [ - "test-fixtures", -] +exclude = ["test-fixtures"] # Specify `cli` as the default workspace member to operate on. This means that # commands like `cargo run` will run the CLI binary by default. # See: https://doc.rust-lang.org/cargo/reference/workspaces.html#package-selection -default-members = [ "cli" ] +default-members = ["cli"] [profile.dev] # Since some of the integration tests involve compiling Wasm, a little optimization goes a long way @@ -36,6 +31,7 @@ tracing = "0.1.37" tracing-futures = "0.2.5" futures = "0.3.24" url = "2.3.1" +async-trait = "0.1.74" # Wasmtime dependencies wasi-common = "13.0.0" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 98fdaebe..3bcdbf2e 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -15,14 +15,14 @@ categories = [ "development-tools", "network-programming", "simulation", - "wasm" + "wasm", ] include = [ - "../README.md", - "../CHANGELOG.md", - "../SECURITY.md", - "../doc/logo.png", - "src/**/*" + "../README.md", + "../CHANGELOG.md", + "../SECURITY.md", + "../doc/logo.png", + "src/**/*", ] [[bin]] @@ -38,7 +38,12 @@ serde_json = { workspace = true } clap = { workspace = true } rustls = { workspace = true } rustls-pemfile = { workspace = true } -tls-listener = { version = "^0.7.0", features = ["rustls", "hyper-h1", "tokio-net", "rt"] } +tls-listener = { version = "^0.7.0", features = [ + "rustls", + "hyper-h1", + "tokio-net", + "rt", +] } tokio = { workspace = true } tokio-rustls = { workspace = true } tracing = { workspace = true } diff --git a/cli/tests/integration/common/backends.rs b/cli/tests/integration/common/backends.rs index e2e284d8..538433dd 100644 --- a/cli/tests/integration/common/backends.rs +++ b/cli/tests/integration/common/backends.rs @@ -75,6 +75,7 @@ impl TestBackends { use_sni: backend.use_sni, grpc: false, client_cert: None, + handler: None, }; backends.insert(name.to_string(), Arc::new(backend_config)); } diff --git a/lib/Cargo.toml b/lib/Cargo.toml index b98b4a76..d75be487 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -9,12 +9,7 @@ documentation = "https://docs.rs/viceroy-lib" homepage = "https://github.com/fastly/Viceroy" repository = "https://github.com/fastly/Viceroy" keywords = ["wasm", "fastly"] -categories = [ - "development-tools", - "network-programming", - "simulation", - "wasm" -] +categories = ["development-tools", "network-programming", "simulation", "wasm"] include = [ "../CHANGELOG.md", "../SECURITY.md", @@ -57,6 +52,7 @@ wasmtime = { workspace = true } wasmtime-wasi = { workspace = true } wasmtime-wasi-nn = { workspace = true } wiggle = { workspace = true } +async-trait.workspace = true [dev-dependencies] tempfile = "3.6.0" diff --git a/lib/src/config.rs b/lib/src/config.rs index d84d4f7f..48150e5e 100644 --- a/lib/src/config.rs +++ b/lib/src/config.rs @@ -29,7 +29,9 @@ pub type Dictionaries = HashMap; /// Types and deserializers for backend configuration settings. mod backends; -pub use self::backends::{Backend, ClientCertError, ClientCertInfo}; +pub use self::backends::{ + Backend, ClientCertError, ClientCertInfo, Handler, InMemoryBackendHandler, +}; pub type Backends = HashMap>; diff --git a/lib/src/config/backends.rs b/lib/src/config/backends.rs index 25036bc4..24d1b27d 100644 --- a/lib/src/config/backends.rs +++ b/lib/src/config/backends.rs @@ -1,26 +1,14 @@ mod client_cert_info; +use async_trait::async_trait; +use http::{Request, Response}; +use hyper::Body; use { hyper::{header::HeaderValue, Uri}, std::{collections::HashMap, sync::Arc}, }; -use std::pin::Pin; - -use futures::Future; -use http::{Request, Response}; -use hyper::Body; - pub use self::client_cert_info::{ClientCertError, ClientCertInfo}; -use crate::ExecuteCtx; - -pub enum BackendExperiment { - /// Simulate sending an HTTP request to this endpoint but never actually reach out. - InMemory(InMemoryBackend), - - /// Use the backend to send and actual HTTP request. - Live(Backend), -} /// A single backend definition. #[derive(Clone, Debug)] @@ -31,53 +19,43 @@ pub struct Backend { pub use_sni: bool, pub grpc: bool, pub client_cert: Option, - // Instead of sending an HTTP request for this backend, - // use the following handler to produce a response. - // pub handler: Option, -} -// #[derive(Clone, Debug)] -pub struct InMemoryBackend { - backend: Backend, - handler: BackendHandler, + /// Handler that will be called instead of making an HTTP call. + pub handler: Option, } -// #[derive(Clone)] +#[derive(Clone)] pub struct Handler { - handler: BackendHandler, + handler: Arc>, } -impl Clone for Handler { - fn clone(&self) -> Self { +impl Handler { + pub fn new(handler: Box) -> Self { Self { - handler: self.handler.clone(), + handler: Arc::new(handler), } } } -pub type BackendHandler = - Arc) -> Pin>>>>>; //Pin>>>; - -// fn make_handler( -// ctx: ExecuteCtx, -// ) -> impl Fn(Request) -> Pin>>> { -// // move |y: i32| Box::pin(async_add(x, y)) - -// // move |req| ctx.handle_request(req, "127.0.0.1".parse().unwrap()) -// todo!() -// } +impl std::ops::Deref for Handler { + type Target = dyn InMemoryBackendHandler; -// async fn async_add(x: i32, y: i32) -> i32 { -// x + y -// } + fn deref(&self) -> &Self::Target { + &**self.handler + } +} -// #[derive(Clone, Debug)] -pub enum Wat { - /// Just an opaque handler - Handler(), +impl std::fmt::Debug for Handler { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Handler") + .field("handler", &"opaque handler function".to_string()) + .finish() + } +} - /// A loaded fastly service backend. - Chained(ExecuteCtx), +#[async_trait] +pub trait InMemoryBackendHandler: Send + Sync + 'static { + async fn handle(&self, req: Request) -> Response; } /// A map of [`Backend`] definitions, keyed by their name. @@ -211,6 +189,7 @@ mod deserialization { grpc, // NOTE: Update when we support client certs in static backends client_cert: None, + handler: None, }) } } diff --git a/lib/src/execute.rs b/lib/src/execute.rs index 2195a9c8..79ad8bd2 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -1,9 +1,8 @@ //! Guest code execution. -use crate::{config::UnknownImportBehavior, logging::LogEndpoint}; +use crate::config::UnknownImportBehavior; use std::{collections::BTreeMap, sync::RwLock, time::SystemTime}; use tokio::sync::mpsc::{Receiver, Sender as MspcSender}; -use tracing::field::Iter; use wasmtime::GuestProfiler; use { crate::{ diff --git a/lib/src/lib.rs b/lib/src/lib.rs index aabd258b..23bfe9e8 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -32,6 +32,7 @@ mod upstream; mod wiggle_abi; pub use { + async_trait, error::Error, execute::{EndpointListener, Endpoints, ExecuteCtx}, http, hyper, diff --git a/lib/src/service.rs b/lib/src/service.rs index 8bbf9a3b..eb9201be 100644 --- a/lib/src/service.rs +++ b/lib/src/service.rs @@ -63,9 +63,9 @@ impl ViceroyService { /// each time a new request is sent. This function will only return if an error occurs. // FIXME KTM 2020-06-22: Once `!` is stabilized, this should be `Result`. pub async fn serve(self, addr: SocketAddr) -> Result<(), hyper::Error> { - // let server = hyper::Server::bind(&addr).serve(self); - // event!(Level::INFO, "Listening on http://{}", server.local_addr()); - // server.await?; + let server = hyper::Server::bind(&addr).serve(self); + event!(Level::INFO, "Listening on http://{}", server.local_addr()); + server.await?; Ok(()) } } diff --git a/lib/src/session.rs b/lib/src/session.rs index 3fa37b46..bdbef33e 100644 --- a/lib/src/session.rs +++ b/lib/src/session.rs @@ -86,7 +86,7 @@ pub struct Session { /// Populated prior to guest execution, and never modified. geolocation: Arc, /// The backends dynamically added by the program. This is separated from - /// `backends` because we do not want one session to effect the backends + /// `backends` because we do not want one session to affect the backends /// available to any other session. dynamic_backends: Backends, /// The TLS configuration for this execution. diff --git a/lib/src/upstream.rs b/lib/src/upstream.rs index 4bcbcb4f..fd36420f 100644 --- a/lib/src/upstream.rs +++ b/lib/src/upstream.rs @@ -265,24 +265,30 @@ pub fn send_request( req.headers_mut().insert(hyper::header::HOST, host); *req.uri_mut() = uri; + let handler = backend.handler.clone(); // Haxx let h2only = backend.grpc; + async move { - let mut builder = Client::builder(); + let basic_response = if let Some(handler) = handler { + handler.handle(req).await + } else { + let mut builder = Client::builder(); - if req.version() == Version::HTTP_2 { - builder.http2_only(true); - } + if req.version() == Version::HTTP_2 { + builder.http2_only(true); + } - let basic_response = builder - .set_host(false) - .http2_only(h2only) - .build(connector) - .request(req) - .await - .map_err(|e| { - eprintln!("Error: {:?}", e); - e - })?; + builder + .set_host(false) + .http2_only(h2only) + .build(connector) + .request(req) + .await + .map_err(|e| { + eprintln!("Error: {:?}", e); + e + })? + }; if try_decompression && basic_response diff --git a/lib/src/wiggle_abi/req_impl.rs b/lib/src/wiggle_abi/req_impl.rs index 3d6737a7..21ea0635 100644 --- a/lib/src/wiggle_abi/req_impl.rs +++ b/lib/src/wiggle_abi/req_impl.rs @@ -2,12 +2,11 @@ use super::types::SendErrorDetail; use super::SecretStoreError; -use crate::config::ClientCertInfo; +use crate::config::{Backend, ClientCertInfo}; use crate::secret_store::SecretLookup; use { crate::{ - config::Backend, error::Error, session::{AsyncItem, PeekableTask, Session, ViceroyRequestMetadata}, upstream, @@ -389,6 +388,7 @@ impl FastlyHttpReq for Session { use_sni, grpc, client_cert, + handler: None, }; if !self.add_backend(name, new_backend) { From 741713ace9b9f668e5bd50b0f80d97e2a11b24ff Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Sun, 4 Feb 2024 19:36:41 +0000 Subject: [PATCH 03/20] Allow intercepting dynamic backends creation. --- .DS_Store | Bin 8196 -> 8196 bytes lib/src/config.rs | 3 ++- lib/src/config/backends.rs | 5 +++++ lib/src/execute.rs | 16 +++++++++++++++- lib/src/session.rs | 6 +++++- lib/src/wiggle_abi/req_impl.rs | 5 +++++ 6 files changed, 32 insertions(+), 3 deletions(-) diff --git a/.DS_Store b/.DS_Store index 4fad5728a9ef73ac297d95e13a0997acc347b64b..6fdc147e9cb3865fc137bac1c43c337fbb17bb25 100644 GIT binary patch delta 402 zcmZp1XmQw}DiEjX&d0#Oz`~%%kj{|FP?DSP;*yk;p9B=+&^r`neQU*0M^yO~yz&JZ zhQZ1CxdlKy3=D<>lbZz;nZ!jWFB52GX0F~fSx-T3SWnjWwVvYDQ*C5qH?4F delta 402 zcmZp1XmQw}Di9~mWyHY1z`~%%kj{|FP?DSP;*yk;p9B=+5V`#9)1U8098u*{@X8lt z7zQWj=N16; mod backends; pub use self::backends::{ - Backend, ClientCertError, ClientCertInfo, Handler, InMemoryBackendHandler, + Backend, ClientCertError, ClientCertInfo, DynamicBackendRegistrar, Handler, + InMemoryBackendHandler, }; pub type Backends = HashMap>; diff --git a/lib/src/config/backends.rs b/lib/src/config/backends.rs index 24d1b27d..a6d136a1 100644 --- a/lib/src/config/backends.rs +++ b/lib/src/config/backends.rs @@ -58,6 +58,11 @@ pub trait InMemoryBackendHandler: Send + Sync + 'static { async fn handle(&self, req: Request) -> Response; } +// TODO: Should probably be more like an interceptor, registrar conveys the wrong idea. +pub trait DynamicBackendRegistrar: Send + Sync + 'static { + fn register(&self, backend: Backend) -> Backend; +} + /// A map of [`Backend`] definitions, keyed by their name. #[derive(Clone, Debug, Default)] pub struct BackendsConfig(pub HashMap>); diff --git a/lib/src/execute.rs b/lib/src/execute.rs index 79ad8bd2..113b92c0 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -1,6 +1,6 @@ //! Guest code execution. -use crate::config::UnknownImportBehavior; +use crate::config::{DynamicBackendRegistrar, UnknownImportBehavior}; use std::{collections::BTreeMap, sync::RwLock, time::SystemTime}; use tokio::sync::mpsc::{Receiver, Sender as MspcSender}; use wasmtime::GuestProfiler; @@ -34,6 +34,7 @@ use { }; pub const EPOCH_INTERRUPTION_PERIOD: Duration = Duration::from_micros(50); + /// Execution context used by a [`ViceroyService`](struct.ViceroyService.html). /// /// This is all of the state needed to instantiate a module, in order to respond to an HTTP @@ -49,6 +50,8 @@ pub struct ExecuteCtx { module: Module, /// The backends for this execution. backends: Arc, + /// If set, intercepts dynamic backend registrations. + dynamic_backend_registrar: Option>>, /// The device detection mappings for this execution. device_detection: Arc, /// The geolocation mappings for this execution. @@ -174,6 +177,7 @@ impl ExecuteCtx { epoch_increment_stop, guest_profile_path: Arc::new(guest_profile_path), endpoints: Arc::new(Endpoints::new()), + dynamic_backend_registrar: None, }) } @@ -277,6 +281,14 @@ impl ExecuteCtx { self } + pub fn with_dynamic_backend_registrar( + mut self, + registrar: Box, + ) -> Self { + self.dynamic_backend_registrar = Some(Arc::new(registrar)); + self + } + /// Asynchronously handle a request. /// /// This method fully instantiates the wasm module housed within the `ExecuteCtx`, @@ -391,6 +403,7 @@ impl ExecuteCtx { self.object_store.clone(), self.secret_stores.clone(), self.endpoints.clone(), + self.dynamic_backend_registrar.clone(), ); let guest_profile_path = self.guest_profile_path.as_deref().map(|path| { @@ -487,6 +500,7 @@ impl ExecuteCtx { self.object_store.clone(), self.secret_stores.clone(), self.endpoints.clone(), + self.dynamic_backend_registrar.clone(), ); let profiler = self.guest_profile_path.is_some().then(|| { diff --git a/lib/src/session.rs b/lib/src/session.rs index bdbef33e..86e5aca8 100644 --- a/lib/src/session.rs +++ b/lib/src/session.rs @@ -7,7 +7,7 @@ pub use async_item::{ AsyncItem, PeekableTask, PendingKvDeleteTask, PendingKvInsertTask, PendingKvLookupTask, }; -use crate::execute::Endpoints; +use crate::{config::DynamicBackendRegistrar, execute::Endpoints}; use { self::downstream::DownstreamResponse, crate::{ @@ -89,6 +89,8 @@ pub struct Session { /// `backends` because we do not want one session to affect the backends /// available to any other session. dynamic_backends: Backends, + /// If set, the registrar is used to register dynamic backends. + pub(crate) dynamic_backend_registrar: Option>>, /// The TLS configuration for this execution. /// /// Populated prior to guest execution, and never modified. @@ -146,6 +148,7 @@ impl Session { object_store: Arc, secret_stores: Arc, endpoints: Arc, + dynamic_backend_registrar: Option>>, ) -> Session { let (parts, body) = req.into_parts(); let downstream_req_original_headers = parts.headers.clone(); @@ -181,6 +184,7 @@ impl Session { secrets_by_name: PrimaryMap::new(), config_path, req_id, + dynamic_backend_registrar, } .write_endpoints(endpoints) } diff --git a/lib/src/wiggle_abi/req_impl.rs b/lib/src/wiggle_abi/req_impl.rs index 21ea0635..446085e7 100644 --- a/lib/src/wiggle_abi/req_impl.rs +++ b/lib/src/wiggle_abi/req_impl.rs @@ -391,6 +391,11 @@ impl FastlyHttpReq for Session { handler: None, }; + let new_backend = match self.dynamic_backend_registrar { + Some(ref registrar) => registrar.register(new_backend), + None => new_backend, + }; + if !self.add_backend(name, new_backend) { return Err(Error::BackendNameRegistryError(name.to_string())); } From ee51447e94fc8faf894bdab48d1819b2eb26b570 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Sun, 4 Feb 2024 19:41:39 +0000 Subject: [PATCH 04/20] Some cleanup --- lib/src/execute.rs | 14 -------------- lib/src/logging.rs | 1 - 2 files changed, 15 deletions(-) diff --git a/lib/src/execute.rs b/lib/src/execute.rs index 113b92c0..20c57d5b 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -547,20 +547,6 @@ impl ExecuteCtx { } } -// pub struct ExecutionResult { -// pub response: Response, -// pub logs: Vec, -// } - -// impl From> for ExecutionResult { -// fn from(response: Response) -> Self { -// Self { -// response, -// logs: vec![], -// } -// } -// } - fn write_profile(store: &mut wasmtime::Store, guest_profile_path: Option<&PathBuf>) { if let (Some(profile), Some(path)) = (store.data_mut().take_guest_profiler(), guest_profile_path) diff --git a/lib/src/logging.rs b/lib/src/logging.rs index 240f3a50..a58d4a20 100644 --- a/lib/src/logging.rs +++ b/lib/src/logging.rs @@ -73,7 +73,6 @@ impl LogEndpoint { if let Some(ref sender) = self.sender { sender.try_send(to_write).expect("todo"); - // sender.blocking_send(to_write).expect("todo"); Ok(()) } else { LOG_WRITER.lock().unwrap().write_all(&to_write) From da776b59cd4cf764e14f28e82324c0a1773b2e07 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Sat, 24 Feb 2024 19:06:14 +0100 Subject: [PATCH 05/20] Non-failing cache stubs. --- lib/src/cache_state.rs | 42 ++++++++++++++++++++++++++++++++++++ lib/src/execute.rs | 10 ++++++++- lib/src/lib.rs | 1 + lib/src/session.rs | 6 +++++- lib/src/wiggle_abi/cache.rs | 30 ++++++++++---------------- lib/src/wiggle_abi/entity.rs | 3 ++- 6 files changed, 70 insertions(+), 22 deletions(-) create mode 100644 lib/src/cache_state.rs diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs new file mode 100644 index 00000000..8881562b --- /dev/null +++ b/lib/src/cache_state.rs @@ -0,0 +1,42 @@ +use crate::wiggle_abi::types; +use std::{ + collections::BTreeMap, + sync::{Arc, RwLock}, + time::Instant, +}; + +struct CacheEntry { + /// The cached bytes. + body: Vec, + + /// Vary used to create this entry. + vary: BTreeMap, + + /// Max-age this entry has been created with. + max_age: Option, + + /// Stale-while-revalidate this entry has been created with. + swr: Option, + + /// Instant this entry has been created. + created_at: Instant, + + /// The user metadata stored alongside the cache. + user_metadata: Vec, +} + +#[derive(Clone, Default)] +pub struct CacheState { + // #[allow(clippy::type_complexity)] + // Cache key to entry map. + cache_entries: Arc, CacheEntry>>>, + + /// Handle to cache key for ABI interop. + handle_map: Arc>>>, +} + +impl CacheState { + pub(crate) fn new() -> Self { + Self::default() + } +} diff --git a/lib/src/execute.rs b/lib/src/execute.rs index 20c57d5b..f885f6be 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -1,6 +1,9 @@ //! Guest code execution. -use crate::config::{DynamicBackendRegistrar, UnknownImportBehavior}; +use crate::{ + cache_state::CacheState, + config::{DynamicBackendRegistrar, UnknownImportBehavior}, +}; use std::{collections::BTreeMap, sync::RwLock, time::SystemTime}; use tokio::sync::mpsc::{Receiver, Sender as MspcSender}; use wasmtime::GuestProfiler; @@ -81,6 +84,8 @@ pub struct ExecuteCtx { guest_profile_path: Arc>, /// Endpoints that should be monitored. Allows reading logged message lines to that endpoint. endpoints: Arc, + /// Cache state to use. + cache_state: Arc, } #[derive(Clone)] @@ -178,6 +183,7 @@ impl ExecuteCtx { guest_profile_path: Arc::new(guest_profile_path), endpoints: Arc::new(Endpoints::new()), dynamic_backend_registrar: None, + cache_state: Arc::new(CacheState::new()), }) } @@ -404,6 +410,7 @@ impl ExecuteCtx { self.secret_stores.clone(), self.endpoints.clone(), self.dynamic_backend_registrar.clone(), + self.cache_state.clone(), ); let guest_profile_path = self.guest_profile_path.as_deref().map(|path| { @@ -501,6 +508,7 @@ impl ExecuteCtx { self.secret_stores.clone(), self.endpoints.clone(), self.dynamic_backend_registrar.clone(), + self.cache_state.clone(), ); let profiler = self.guest_profile_path.is_some().then(|| { diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 23bfe9e8..c55504bd 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -20,6 +20,7 @@ pub mod logging; pub mod session; mod async_io; +mod cache_state; mod downstream; mod execute; mod headers; diff --git a/lib/src/session.rs b/lib/src/session.rs index 86e5aca8..307a62ac 100644 --- a/lib/src/session.rs +++ b/lib/src/session.rs @@ -7,7 +7,7 @@ pub use async_item::{ AsyncItem, PeekableTask, PendingKvDeleteTask, PendingKvInsertTask, PendingKvLookupTask, }; -use crate::{config::DynamicBackendRegistrar, execute::Endpoints}; +use crate::{cache_state::CacheState, config::DynamicBackendRegistrar, execute::Endpoints}; use { self::downstream::DownstreamResponse, crate::{ @@ -129,6 +129,8 @@ pub struct Session { config_path: Arc>, /// The ID for the client request being processed. req_id: u64, + /// The cache state to use. + pub(crate) cache_state: Arc, } impl Session { @@ -149,6 +151,7 @@ impl Session { secret_stores: Arc, endpoints: Arc, dynamic_backend_registrar: Option>>, + cache_state: Arc, ) -> Session { let (parts, body) = req.into_parts(); let downstream_req_original_headers = parts.headers.clone(); @@ -185,6 +188,7 @@ impl Session { config_path, req_id, dynamic_backend_registrar, + cache_state, } .write_endpoints(endpoints) } diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index f2972c13..bc74dc1a 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -1,7 +1,10 @@ use crate::session::Session; use super::fastly_cache::FastlyCache; -use super::{types, Error}; +use super::{ + types::{self, CacheHandle}, + Error, +}; #[allow(unused_variables)] impl FastlyCache for Session { @@ -11,9 +14,8 @@ impl FastlyCache for Session { options_mask: types::CacheLookupOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + // self.cache_state.get(cache_key.) + Ok(CacheHandle::from(123)) } fn insert<'a>( @@ -22,9 +24,7 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + Ok(types::BodyHandle::from(123)) } fn transaction_lookup<'a>( @@ -33,9 +33,7 @@ impl FastlyCache for Session { options_mask: types::CacheLookupOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + Ok(types::CacheHandle::from(123)) } fn transaction_insert<'a>( @@ -44,9 +42,7 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + Ok(types::BodyHandle::from(123)) } fn transaction_insert_and_stream_back<'a>( @@ -78,15 +74,11 @@ impl FastlyCache for Session { } fn close(&mut self, handle: types::CacheHandle) -> Result<(), Error> { - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + Ok(()) } fn get_state(&mut self, handle: types::CacheHandle) -> Result { - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + Ok(types::CacheLookupState::MUST_INSERT_OR_UPDATE) } fn get_user_metadata<'a>( diff --git a/lib/src/wiggle_abi/entity.rs b/lib/src/wiggle_abi/entity.rs index 1641ade8..d23f7ef6 100644 --- a/lib/src/wiggle_abi/entity.rs +++ b/lib/src/wiggle_abi/entity.rs @@ -3,7 +3,7 @@ //! [ref]: https://docs.rs/cranelift-entity/latest/cranelift_entity/trait.EntityRef.html use super::types::{ - AsyncItemHandle, BodyHandle, DictionaryHandle, EndpointHandle, ObjectStoreHandle, + AsyncItemHandle, BodyHandle, CacheHandle, DictionaryHandle, EndpointHandle, ObjectStoreHandle, PendingRequestHandle, RequestHandle, ResponseHandle, SecretHandle, SecretStoreHandle, }; @@ -49,3 +49,4 @@ wiggle_entity!(ObjectStoreHandle); wiggle_entity!(SecretStoreHandle); wiggle_entity!(SecretHandle); wiggle_entity!(AsyncItemHandle); +wiggle_entity!(CacheHandle); From f1d023cb3fc34fe8d8e40124e966b9df524f1468 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Mon, 4 Mar 2024 17:23:46 +0100 Subject: [PATCH 06/20] WIP caches --- lib/src/cache_state.rs | 152 +++++++++++++++++++++++++++++---- lib/src/execute.rs | 5 ++ lib/src/lib.rs | 1 + lib/src/logging.rs | 2 + lib/src/wiggle_abi/cache.rs | 163 ++++++++++++++++++++++++++++++++++-- 5 files changed, 296 insertions(+), 27 deletions(-) diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs index 8881562b..c0784ff5 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/cache_state.rs @@ -1,42 +1,158 @@ -use crate::wiggle_abi::types; +use cranelift_entity::PrimaryMap; +use http::HeaderMap; + +use crate::{body::Body, session::Session, wiggle_abi::types}; use std::{ - collections::BTreeMap, - sync::{Arc, RwLock}, + collections::{BTreeMap, HashMap}, + sync::{ + atomic::{AtomicI32, AtomicU32, Ordering}, + Arc, RwLock, + }, time::Instant, }; -struct CacheEntry { +/// Handle used for a non-existing cache entry. +pub fn not_found_handle() -> types::CacheHandle { + types::CacheHandle::from(u32::MAX) +} + +type PrimaryCacheKey = Vec; + +#[derive(Debug)] +pub struct CacheKey { + /// The primary bytes of the cache key. + primary: Vec, + + /// A list of secondary keys (based on vary). + /// Note that, right now, this list must be ordered in order to map the same entry. + secondary: Vec, +} + +#[derive(Debug)] +pub struct CacheEntry { /// The cached bytes. - body: Vec, + pub body: types::BodyHandle, /// Vary used to create this entry. - vary: BTreeMap, + pub vary: BTreeMap, /// Max-age this entry has been created with. - max_age: Option, + pub max_age: Option, /// Stale-while-revalidate this entry has been created with. - swr: Option, + pub swr: Option, /// Instant this entry has been created. - created_at: Instant, + pub created_at: Instant, /// The user metadata stored alongside the cache. - user_metadata: Vec, + pub user_metadata: Vec, } -#[derive(Clone, Default)] +impl CacheEntry { + pub fn vary_matches(&self, headers: &HeaderMap) -> bool { + self.vary.iter().all(|(vary_key, vary_value)| { + headers + .get(vary_key) + .map(|v| v == vary_value) + .unwrap_or(false) + }) + } +} + +#[derive(Clone, Default, Debug)] pub struct CacheState { - // #[allow(clippy::type_complexity)] - // Cache key to entry map. - cache_entries: Arc, CacheEntry>>>, + /// Cache entries, indexable by handle. + pub cache_entries: Arc>>, - /// Handle to cache key for ABI interop. - handle_map: Arc>>>, + /// Primary cache key to a list of variants. + pub key_candidates: Arc>>>, + // cache_entries: Arc>>, + // /// Sequence + // handle_sequence: Arc, } +// Requires: +// - lookup: key to handle +// - insert: key to bodyhandle +// - state: handle to state +// - body: handle to body handle + impl CacheState { - pub(crate) fn new() -> Self { - Self::default() + pub fn new() -> Self { + // 0 is reserved for missing cache entries. + // let handle_sequence = Arc::new(AtomicU32::new(1)); + + Self { + // handle_sequence, + ..Default::default() + } } } + +// /// Get handle for the given key. +// /// Will return the empty handle (0) if no entry is found. +// pub(crate) fn get_handle(&self, key: &Vec) -> types::CacheHandle { +// self.handles +// .read() +// .unwrap() +// .get(key) +// .map(ToOwned::to_owned) +// .unwrap_or(none_handle()) +// } + +// pub(crate) fn get_state(&self, key: &types::CacheHandle) -> CacheLookupState { +// self.cache_entries.read().unwrap().get(key).map(|entry| { +// // check: +// // - expired (STALE) +// // - Not found (MUST_INSERT_OR_UPDATE) +// // - Found and usable (USABLE) +// // - ??? (FOUND) +// }) + +// // handle: types::CacheHandle + +// // self.handles +// // .read() +// // .expect("[Get] Handle lookup to succeed") +// // .get(&handle.inner()) +// // .and_then(|key| { +// // self.cache_entries +// // .read() +// // .expect("[Get] Entry lookup to succeed") +// // .get(key) +// // }) +// // todo!() +// } + +// pub(crate) fn insert( +// &self, +// key: Vec, +// max_age: u64, +// body_handle: types::BodyHandle, +// ) -> types::CacheHandle { +// let entry = CacheEntry { +// body: body_handle, +// vary: BTreeMap::new(), +// max_age: Some(max_age), +// swr: None, +// created_at: Instant::now(), +// user_metadata: vec![], +// }; + +// todo!() + +// // let handle_index = self.handle_sequence.fetch_add(1, Ordering::Relaxed); +// // self.handles +// // .write() +// // .expect("[Insert] Handle lookup to succeed") +// // .insert(handle_index, key.clone()); + +// // self.cache_entries +// // .write() +// // .expect("[Insert] Cache entry lock to succeed") +// // .insert(key, entry); + +// // types::CacheHandle::from(handle_index) +// } +// } diff --git a/lib/src/execute.rs b/lib/src/execute.rs index f885f6be..0ee23a2a 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -295,6 +295,11 @@ impl ExecuteCtx { self } + pub fn with_caches(mut self, cache_state: Arc) -> Self { + self.cache_state = cache_state; + self + } + /// Asynchronously handle a request. /// /// This method fully instantiates the wasm module housed within the `ExecuteCtx`, diff --git a/lib/src/lib.rs b/lib/src/lib.rs index c55504bd..191e4f37 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -34,6 +34,7 @@ mod wiggle_abi; pub use { async_trait, + cache_state::CacheState, error::Error, execute::{EndpointListener, Endpoints, ExecuteCtx}, http, hyper, diff --git a/lib/src/logging.rs b/lib/src/logging.rs index a58d4a20..54934ddc 100644 --- a/lib/src/logging.rs +++ b/lib/src/logging.rs @@ -71,6 +71,8 @@ impl LogEndpoint { } to_write.push(b'\n'); + dbg!(std::str::from_utf8(&to_write)); + if let Some(ref sender) = self.sender { sender.try_send(to_write).expect("todo"); Ok(()) diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index bc74dc1a..bfddd473 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -1,11 +1,88 @@ +use std::collections::BTreeMap; +use std::time::Instant; + +use tracing::{event, Level}; + +use crate::body::Body; +use crate::cache_state::{not_found_handle, CacheEntry}; use crate::session::Session; use super::fastly_cache::FastlyCache; use super::{ - types::{self, CacheHandle}, + types::{self}, Error, }; +// pub struct CacheWriteOptions<'a> { +// pub max_age_ns: CacheDurationNs, +// pub request_headers: RequestHandle, +// pub vary_rule_ptr: wiggle::GuestPtr<'a, u8>, +// pub vary_rule_len: u32, +// pub initial_age_ns: CacheDurationNs, +// pub stale_while_revalidate_ns: CacheDurationNs, +// pub surrogate_keys_ptr: wiggle::GuestPtr<'a, u8>, +// pub surrogate_keys_len: u32, +// pub length: CacheObjectLength, +// pub user_metadata_ptr: wiggle::GuestPtr<'a, u8>, +// pub user_metadata_len: u32, +// } + +// Q: What does this do? Indicate which fields are safe to read? +// pub struct CacheWriteOptionsMask( +// ::Internal, +// ); +// impl CacheWriteOptionsMask { +// #[allow(deprecated, non_upper_case_globals)] +// pub const RESERVED: Self = Self::from_bits_retain(1); +// #[allow(deprecated, non_upper_case_globals)] +// pub const REQUEST_HEADERS: Self = Self::from_bits_retain(2); +// #[allow(deprecated, non_upper_case_globals)] +// pub const VARY_RULE: Self = Self::from_bits_retain(4); +// #[allow(deprecated, non_upper_case_globals)] +// pub const INITIAL_AGE_NS: Self = Self::from_bits_retain(8); +// #[allow(deprecated, non_upper_case_globals)] +// pub const STALE_WHILE_REVALIDATE_NS: Self = Self::from_bits_retain(16); +// #[allow(deprecated, non_upper_case_globals)] +// pub const SURROGATE_KEYS: Self = Self::from_bits_retain(32); +// #[allow(deprecated, non_upper_case_globals)] +// pub const LENGTH: Self = Self::from_bits_retain(64); +// #[allow(deprecated, non_upper_case_globals)] +// pub const USER_METADATA: Self = Self::from_bits_retain(128); +// #[allow(deprecated, non_upper_case_globals)] +// pub const SENSITIVE_DATA: Self = Self::from_bits_retain(256); +// } + +// pub struct CacheLookupOptionsMask( +// ::Internal, +// ); +// impl CacheLookupOptionsMask { +// #[allow(deprecated, non_upper_case_globals)] +// pub const RESERVED: Self = Self::from_bits_retain(1); +// #[allow(deprecated, non_upper_case_globals)] +// pub const REQUEST_HEADERS: Self = Self::from_bits_retain(2); +// } + +// pub struct CacheLookupOptions { +// pub request_headers: RequestHandle, +// } + +// pub struct CacheGetBodyOptions { +// pub from: u64, +// pub to: u64, +// } + +// pub struct CacheGetBodyOptionsMask( +// ::Internal, +// ); +// impl CacheGetBodyOptionsMask { +// #[allow(deprecated, non_upper_case_globals)] +// pub const RESERVED: Self = Self::from_bits_retain(1); +// #[allow(deprecated, non_upper_case_globals)] +// pub const FROM: Self = Self::from_bits_retain(2); +// #[allow(deprecated, non_upper_case_globals)] +// pub const TO: Self = Self::from_bits_retain(4); +// } + #[allow(unused_variables)] impl FastlyCache for Session { fn lookup<'a>( @@ -14,35 +91,99 @@ impl FastlyCache for Session { options_mask: types::CacheLookupOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { - // self.cache_state.get(cache_key.) - Ok(CacheHandle::from(123)) + println!("WAS IS DAS {}", options_mask); + // panic!("WTF: {}", options_mask); + // let primary_key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); + + // // Q: I guess the bitmask indicates if this is a safe operation? + // // -> Doesn't matter for us anyways, we're just using headers all the time afaict. + // let options: types::CacheLookupOptions = options.read().unwrap(); + // let req_parts = self.take_request_parts(options.request_headers)?; + + // let candidates_lock = self.cache_state.key_candidates.read().unwrap(); + // let candidates = candidates_lock.get(&primary_key); + + // if let Some(candidates) = candidates { + // // Eh, maybe a lock on the entire thing? + // // We don't need perf anyways, contenting locks doesn't matter. + // let entry_lock = self.cache_state.cache_entries.read().unwrap(); + + // for candidate_handle in candidates { + // if let Some(candidate_entry) = entry_lock.get(*candidate_handle) { + // if candidate_entry.vary_matches(&req_parts.headers) { + // return Ok(*candidate_handle); + // } + // } + // } + // } + + Ok(not_found_handle()) } + // pub struct CacheWriteOptions<'a> { + // pub max_age_ns: CacheDurationNs, + // pub request_headers: RequestHandle, + // pub vary_rule_ptr: wiggle::GuestPtr<'a, u8>, + // pub vary_rule_len: u32, + // pub initial_age_ns: CacheDurationNs, + // pub stale_while_revalidate_ns: CacheDurationNs, + // pub surrogate_keys_ptr: wiggle::GuestPtr<'a, u8>, + // pub surrogate_keys_len: u32, + // pub length: CacheObjectLength, + // pub user_metadata_ptr: wiggle::GuestPtr<'a, u8>, + // pub user_metadata_len: u32, + // } fn insert<'a>( &mut self, cache_key: &wiggle::GuestPtr<'a, [u8]>, options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { - Ok(types::BodyHandle::from(123)) + println!("WRITE MASK: {}", options_mask); + + let options: types::CacheWriteOptions = options.read().unwrap(); + let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); + let max_age_s = options.max_age_ns / 1000; + let swr_s = options.stale_while_revalidate_ns / 1000; + let body_handle = self.insert_body(Body::empty()); + + let entry = CacheEntry { + body: body_handle, + vary: BTreeMap::new(), + max_age: Some(max_age_s), + swr: Some(swr_s), + created_at: Instant::now(), + user_metadata: vec![], + }; + + // self.cache_state + // .insert(key, options.max_age_ns / 1000, body_handle); + + Ok(body_handle) + + // todo!() } + /// Stub delegating to regular lookup. fn transaction_lookup<'a>( &mut self, cache_key: &wiggle::GuestPtr<'a, [u8]>, options_mask: types::CacheLookupOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { - Ok(types::CacheHandle::from(123)) + self.lookup(cache_key, options_mask, options) } + /// Stub delegating to regular insert. fn transaction_insert<'a>( &mut self, handle: types::CacheHandle, options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { - Ok(types::BodyHandle::from(123)) + // self.insert(handle, options_mask, options) + + Ok(self.insert_body(Body::empty())) } fn transaction_insert_and_stream_back<'a>( @@ -78,6 +219,11 @@ impl FastlyCache for Session { } fn get_state(&mut self, handle: types::CacheHandle) -> Result { + // match self.cache_state.get(handle) { + // Some(_entry) => Ok(types::CacheLookupState::FOUND), + // None => Ok(types::CacheLookupState::MUST_INSERT_OR_UPDATE), + // } + Ok(types::CacheLookupState::MUST_INSERT_OR_UPDATE) } @@ -132,9 +278,8 @@ impl FastlyCache for Session { } fn get_age_ns(&mut self, handle: types::CacheHandle) -> Result { - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + // self.cache_state.get(key) + todo!() } fn get_hits(&mut self, handle: types::CacheHandle) -> Result { From 46fcf2a38b9c72a9f5a3cdd5aaa7c9342f3f540a Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Tue, 5 Mar 2024 17:04:34 +0100 Subject: [PATCH 07/20] Playing around with cache body management. --- lib/src/cache_state.rs | 138 +++++++++++++++----- lib/src/logging.rs | 2 - lib/src/session.rs | 59 ++++++++- lib/src/wiggle_abi/body_impl.rs | 3 + lib/src/wiggle_abi/cache.rs | 220 ++++++++++++++++++++++++++------ lib/src/wiggle_abi/req_impl.rs | 2 +- 6 files changed, 353 insertions(+), 71 deletions(-) diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs index c0784ff5..13867bd5 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/cache_state.rs @@ -1,13 +1,10 @@ +use crate::{body::Body, wiggle_abi::types}; use cranelift_entity::PrimaryMap; use http::HeaderMap; - -use crate::{body::Body, session::Session, wiggle_abi::types}; use std::{ collections::{BTreeMap, HashMap}, - sync::{ - atomic::{AtomicI32, AtomicU32, Ordering}, - Arc, RwLock, - }, + fmt::Display, + sync::{Arc, RwLock}, time::Instant, }; @@ -18,23 +15,13 @@ pub fn not_found_handle() -> types::CacheHandle { type PrimaryCacheKey = Vec; -#[derive(Debug)] -pub struct CacheKey { - /// The primary bytes of the cache key. - primary: Vec, - - /// A list of secondary keys (based on vary). - /// Note that, right now, this list must be ordered in order to map the same entry. - secondary: Vec, -} - #[derive(Debug)] pub struct CacheEntry { /// The cached bytes. - pub body: types::BodyHandle, + pub body_handle: types::BodyHandle, /// Vary used to create this entry. - pub vary: BTreeMap, + pub vary: BTreeMap>, /// Max-age this entry has been created with. pub max_age: Option, @@ -52,10 +39,7 @@ pub struct CacheEntry { impl CacheEntry { pub fn vary_matches(&self, headers: &HeaderMap) -> bool { self.vary.iter().all(|(vary_key, vary_value)| { - headers - .get(vary_key) - .map(|v| v == vary_value) - .unwrap_or(false) + headers.get(vary_key).and_then(|h| h.to_str().ok()) == vary_value.as_deref() }) } } @@ -67,9 +51,15 @@ pub struct CacheState { /// Primary cache key to a list of variants. pub key_candidates: Arc>>>, - // cache_entries: Arc>>, - // /// Sequence - // handle_sequence: Arc, + + pub surrogates_to_handles: Arc>>>, + + /// The way cache bodies are handled makes it super hard to move cache state between + /// executions. We can't fork the SDK, so we need to abide by the interface. + /// + /// We change the session to be aware of `CacheState`, which will write a copy of the body when + /// execution ends. On load, these bodies will be written back into the next session. + pub bodies: Arc>>>, } // Requires: @@ -80,13 +70,101 @@ pub struct CacheState { impl CacheState { pub fn new() -> Self { - // 0 is reserved for missing cache entries. - // let handle_sequence = Arc::new(AtomicU32::new(1)); + Default::default() + } + + // This handling is necessary due to incredibly painful body handling. + pub async fn format_pretty(&self) -> String { + let formatted_bodies = self.format_bodies().await; + let formatter = CacheStateFormatter { + state: self, + formatted_bodies, + }; + + formatter.to_string() + } - Self { - // handle_sequence, - ..Default::default() + async fn format_bodies(&self) -> HashMap { + let mut formatted_bodies = HashMap::new(); + let mut new_bodies = HashMap::new(); + + // We need to remove all bodies once, read them into byte vectors in order to format them, + // then recreate the bodies and write them back into the map. Excellent. + let bodies = self.bodies.write().unwrap().drain().collect::>(); + for (key, body) in bodies { + if let Some(body) = body { + let formatted = match body.read_into_vec().await { + Ok(bytes) => { + dbg!(&bytes); + new_bodies.insert(key, Some(Body::from(bytes.clone()))); + String::from_utf8(bytes).unwrap_or("Invalid UTF-8".to_owned()) + } + Err(err) => format!("Invalid body: {err}"), + }; + + formatted_bodies.insert(key, formatted); + } else { + new_bodies.insert(key, None); + } } + + // Write back the bodies. + self.bodies.write().unwrap().extend(new_bodies); + formatted_bodies + } +} + +struct CacheStateFormatter<'state> { + state: &'state CacheState, + formatted_bodies: HashMap, +} + +impl<'state> Display for CacheStateFormatter<'state> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn indent(level: usize) -> String { + " ".repeat(level) + } + + dbg!(&self.formatted_bodies); + + writeln!(f, "Cache State:")?; + writeln!(f, "{}Entries:", indent(1))?; + + for (handle, entry) in self.state.cache_entries.read().unwrap().iter() { + writeln!( + f, + "{}[{}]: {:?} | {:?}", + indent(2), + handle, + entry.max_age, + entry.swr + )?; + writeln!(f, "{}Vary:", indent(3))?; + + for (key, value) in entry.vary.iter() { + writeln!(f, "{}{key}: {:?}", indent(4), value)?; + } + + writeln!(f, "{}User Metadata:", indent(3))?; + writeln!( + f, + "{}{}", + indent(4), + std::str::from_utf8(&entry.user_metadata).unwrap_or("Invalid UITF-8") + )?; + + writeln!(f, "{}Body:", indent(3))?; + + let body = self + .formatted_bodies + .get(&entry.body_handle) + .map(|s| s.as_str()) + .unwrap_or(""); + + writeln!(f, "{}{}", indent(4), body)?; + } + + Ok(()) } } diff --git a/lib/src/logging.rs b/lib/src/logging.rs index 54934ddc..a58d4a20 100644 --- a/lib/src/logging.rs +++ b/lib/src/logging.rs @@ -71,8 +71,6 @@ impl LogEndpoint { } to_write.push(b'\n'); - dbg!(std::str::from_utf8(&to_write)); - if let Some(ref sender) = self.sender { sender.try_send(to_write).expect("todo"); Ok(()) diff --git a/lib/src/session.rs b/lib/src/session.rs index 307a62ac..0fcfadbc 100644 --- a/lib/src/session.rs +++ b/lib/src/session.rs @@ -133,6 +133,33 @@ pub struct Session { pub(crate) cache_state: Arc, } +impl Drop for Session { + fn drop(&mut self) { + // let mut bodies = self.cache_state.bodies.write().unwrap(); + + let get_bodies = self + .cache_state + .cache_entries + .write() + .unwrap() + .iter() + .map(|e| e.1.body_handle) + .collect::>(); + + let mut handles_to_bodies = HashMap::new(); + + for handle in get_bodies { + handles_to_bodies.insert(handle, dbg!(self.take_body(handle).ok())); + } + + self.cache_state + .bodies + .write() + .unwrap() + .extend(handles_to_bodies); + } +} + impl Session { /// Create an empty session. #[allow(clippy::too_many_arguments)] @@ -190,6 +217,7 @@ impl Session { dynamic_backend_registrar, cache_state, } + .load_caches() .write_endpoints(endpoints) } @@ -204,6 +232,33 @@ impl Session { self } + // Restores cache state from the given cache state. The only thing required right now is to re-process the bodies. + fn load_caches(mut self) -> Self { + // First, extract the handle to body mapping. + let bodies = self + .cache_state + .bodies + .write() + .unwrap() + .drain() + .collect::>(); + + // Re-insert the bodies into the session. This requires replacing the handles used in the cache entries, unfortunately. + for (handle, body) in bodies { + let body = body.unwrap_or_default(); + let new_handle = self.insert_body(body); + + for (_, entry) in self.cache_state.cache_entries.write().unwrap().iter_mut() { + if entry.body_handle == handle { + entry.body_handle = new_handle; + break; + } + } + } + + self + } + // ----- Downstream Request API ----- /// Retrieve the downstream client IP address associated with this session. @@ -263,7 +318,7 @@ impl Session { /// [handle]: ../wiggle_abi/types/struct.BodyHandle.html /// [body]: ../body/struct.Body.html pub fn insert_body(&mut self, body: Body) -> BodyHandle { - self.async_items.push(Some(AsyncItem::Body(body))).into() + dbg!(self.async_items.push(Some(AsyncItem::Body(body))).into()) } /// Get a reference to a [`Body`][body], given its [`BodyHandle`][handle]. @@ -304,6 +359,7 @@ impl Session { /// [err]: ../error/enum.HandleError.html /// [handle]: ../wiggle_abi/types/struct.BodyHandle.html pub fn take_body(&mut self, handle: BodyHandle) -> Result { + dbg!("Taking body", handle); self.async_items .get_mut(handle.into()) .and_then(Option::take) @@ -315,6 +371,7 @@ impl Session { /// /// Returns a [`HandleError`][crate::error::HandleError] if the handle is not associated with a body in the session. pub fn drop_body(&mut self, handle: BodyHandle) -> Result<(), HandleError> { + dbg!("Dropping body", handle); self.async_items .get_mut(handle.into()) .and_then(Option::take) diff --git a/lib/src/wiggle_abi/body_impl.rs b/lib/src/wiggle_abi/body_impl.rs index d2d85e16..dca3cf79 100644 --- a/lib/src/wiggle_abi/body_impl.rs +++ b/lib/src/wiggle_abi/body_impl.rs @@ -110,6 +110,8 @@ impl FastlyHttpBody for Session { } fn close(&mut self, body_handle: BodyHandle) -> Result<(), Error> { + dbg!("CLOSING", body_handle); + // Drop the body and pass up an error if the handle does not exist if self.is_streaming_body(body_handle) { // Make sure a streaming body gets a `finish` message @@ -120,6 +122,7 @@ impl FastlyHttpBody for Session { } fn abandon(&mut self, body_handle: BodyHandle) -> Result<(), Error> { + dbg!("ABANDONING", body_handle); // Drop the body without a `finish` message Ok(self.drop_body(body_handle)?) } diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index bfddd473..a3529b84 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -1,3 +1,4 @@ +use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::time::Instant; @@ -91,31 +92,33 @@ impl FastlyCache for Session { options_mask: types::CacheLookupOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { - println!("WAS IS DAS {}", options_mask); - // panic!("WTF: {}", options_mask); - // let primary_key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); - - // // Q: I guess the bitmask indicates if this is a safe operation? - // // -> Doesn't matter for us anyways, we're just using headers all the time afaict. - // let options: types::CacheLookupOptions = options.read().unwrap(); - // let req_parts = self.take_request_parts(options.request_headers)?; - - // let candidates_lock = self.cache_state.key_candidates.read().unwrap(); - // let candidates = candidates_lock.get(&primary_key); - - // if let Some(candidates) = candidates { - // // Eh, maybe a lock on the entire thing? - // // We don't need perf anyways, contenting locks doesn't matter. - // let entry_lock = self.cache_state.cache_entries.read().unwrap(); - - // for candidate_handle in candidates { - // if let Some(candidate_entry) = entry_lock.get(*candidate_handle) { - // if candidate_entry.vary_matches(&req_parts.headers) { - // return Ok(*candidate_handle); - // } - // } - // } - // } + let primary_key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); + + // Q: I guess the bitmask indicates if this is a safe operation? + // -> Doesn't matter for us anyways, we're just using headers all the time afaict. + let options: types::CacheLookupOptions = options.read().unwrap(); + let req_parts = self.request_parts(options.request_headers)?; + + let candidates_lock = self.cache_state.key_candidates.read().unwrap(); + let candidates = candidates_lock.get(&primary_key); + + if let Some(candidates) = candidates { + dbg!("CHECKING CANDIDATE"); + // Eh, maybe a lock on the entire thing? + // We don't need perf anyways, contenting locks doesn't matter. + let entry_lock = self.cache_state.cache_entries.read().unwrap(); + + for candidate_handle in candidates { + if let Some(candidate_entry) = entry_lock.get(*candidate_handle) { + if candidate_entry.vary_matches(&req_parts.headers) { + dbg!("WOULD_MATCH"); + dbg!(candidate_entry); + // return Ok(*candidate_handle); + return Ok(not_found_handle()); + } + } + } + } Ok(not_found_handle()) } @@ -133,6 +136,26 @@ impl FastlyCache for Session { // pub user_metadata_ptr: wiggle::GuestPtr<'a, u8>, // pub user_metadata_len: u32, // } + // impl CacheWriteOptionsMask { + // #[allow(deprecated, non_upper_case_globals)] + // pub const RESERVED: Self = Self::from_bits_retain(1); + // #[allow(deprecated, non_upper_case_globals)] + // pub const REQUEST_HEADERS: Self = Self::from_bits_retain(2); + // #[allow(deprecated, non_upper_case_globals)] + // pub const VARY_RULE: Self = Self::from_bits_retain(4); + // #[allow(deprecated, non_upper_case_globals)] + // pub const INITIAL_AGE_NS: Self = Self::from_bits_retain(8); + // #[allow(deprecated, non_upper_case_globals)] + // pub const STALE_WHILE_REVALIDATE_NS: Self = Self::from_bits_retain(16); + // #[allow(deprecated, non_upper_case_globals)] + // pub const SURROGATE_KEYS: Self = Self::from_bits_retain(32); + // #[allow(deprecated, non_upper_case_globals)] + // pub const LENGTH: Self = Self::from_bits_retain(64); + // #[allow(deprecated, non_upper_case_globals)] + // pub const USER_METADATA: Self = Self::from_bits_retain(128); + // #[allow(deprecated, non_upper_case_globals)] + // pub const SENSITIVE_DATA: Self = Self::from_bits_retain(256); + // } fn insert<'a>( &mut self, cache_key: &wiggle::GuestPtr<'a, [u8]>, @@ -140,28 +163,144 @@ impl FastlyCache for Session { options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { println!("WRITE MASK: {}", options_mask); + // TODO: Skipped over all the sanity checks usually done by similar code (see `req_impl`). let options: types::CacheWriteOptions = options.read().unwrap(); let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); - let max_age_s = options.max_age_ns / 1000; - let swr_s = options.stale_while_revalidate_ns / 1000; - let body_handle = self.insert_body(Body::empty()); + // Cache write must contain max-age. + let max_age_s = options.max_age_ns / 1000 / 1000 / 1000; + + // Swr might not be set, check bitmask for. Else we'd always get Some(0). + let swr_s = + if options_mask.contains(types::CacheWriteOptionsMask::STALE_WHILE_REVALIDATE_NS) { + Some(options.stale_while_revalidate_ns / 1000 / 1000 / 1000) + } else { + None + }; + + let surrogate_keys = if options_mask.contains(types::CacheWriteOptionsMask::SURROGATE_KEYS) + { + if options.surrogate_keys_len == 0 { + return Err(Error::InvalidArgument); + } + + let byte_slice = options + .surrogate_keys_ptr + .as_array(options.surrogate_keys_len) + .to_vec()?; + + match String::from_utf8(byte_slice) { + Ok(s) => s + .split_whitespace() + .map(ToOwned::to_owned) + .collect::>(), + + Err(_) => return Err(Error::InvalidArgument), + } + } else { + vec![] + }; + + let user_metadata = if options_mask.contains(types::CacheWriteOptionsMask::USER_METADATA) { + if options.user_metadata_len == 0 { + return Err(Error::InvalidArgument); + } + + let byte_slice = options + .user_metadata_ptr + .as_array(options.user_metadata_len) + .to_vec()?; + + byte_slice + } else { + vec![] + }; + + let vary = if options_mask.contains(types::CacheWriteOptionsMask::VARY_RULE) { + if options.vary_rule_len == 0 { + return Err(Error::InvalidArgument); + } + + let byte_slice = options + .vary_rule_ptr + .as_array(options.vary_rule_len) + .to_vec()?; + + let vary_rules = match String::from_utf8(byte_slice) { + Ok(s) => s + .split_whitespace() + .map(ToOwned::to_owned) + .collect::>(), + + Err(_) => return Err(Error::InvalidArgument), + }; + + if options_mask.contains(types::CacheWriteOptionsMask::REQUEST_HEADERS) { + let req_parts = self.request_parts(options.request_headers)?; + let mut map = BTreeMap::new(); + + // Extract necessary vary headers. + for vary in vary_rules { + // If you think this sucks... then you'd be right. Just supposed to work right now. + let value = req_parts + .headers + .get(&vary) + .map(|h| h.to_str().unwrap().to_string()); + + map.insert(vary, value); + } + + map + } else { + // Or invalid argument? + BTreeMap::new() + } + } else { + BTreeMap::new() + }; + + // TODO: Check cache entry overwrite - variants must be unique. + + let body_handle = dbg!(self.insert_body(Body::empty())); let entry = CacheEntry { - body: body_handle, - vary: BTreeMap::new(), + body_handle, + vary, max_age: Some(max_age_s), - swr: Some(swr_s), + swr: swr_s, created_at: Instant::now(), - user_metadata: vec![], + user_metadata, }; - // self.cache_state - // .insert(key, options.max_age_ns / 1000, body_handle); + dbg!("BASE ENTRY"); + + // Write entry. + let entry_handle = self.cache_state.cache_entries.write().unwrap().push(entry); + + // Write handle key candidate mapping. + match self.cache_state.key_candidates.write().unwrap().entry(key) { + Entry::Vacant(vacant) => { + vacant.insert(vec![entry_handle]); + } + Entry::Occupied(mut occupied) => { + occupied.get_mut().push(entry_handle); + } + } + + // Write surrogates. + let mut surrogates_write_lock = self.cache_state.surrogates_to_handles.write().unwrap(); + for surrogate_key in surrogate_keys { + match surrogates_write_lock.entry(surrogate_key) { + Entry::Vacant(vacant) => { + vacant.insert(vec![entry_handle]); + } + Entry::Occupied(mut occupied) => { + occupied.get_mut().push(entry_handle); + } + }; + } Ok(body_handle) - - // todo!() } /// Stub delegating to regular lookup. @@ -215,6 +354,7 @@ impl FastlyCache for Session { } fn close(&mut self, handle: types::CacheHandle) -> Result<(), Error> { + dbg!("CLOSING"); Ok(()) } @@ -234,6 +374,7 @@ impl FastlyCache for Session { user_metadata_out_len: u32, nwritten_out: &wiggle::GuestPtr<'a, u32>, ) -> Result<(), Error> { + dbg!("USER META DEAD"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) @@ -245,6 +386,7 @@ impl FastlyCache for Session { options_mask: types::CacheGetBodyOptionsMask, options: &types::CacheGetBodyOptions, ) -> Result { + dbg!("BODY DEAD"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) @@ -254,6 +396,7 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { + dbg!("LEN DEAD"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) @@ -263,6 +406,7 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { + dbg!("MAX AGE DEAD"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) @@ -272,17 +416,19 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { + dbg!("SWR DEAD"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) } fn get_age_ns(&mut self, handle: types::CacheHandle) -> Result { - // self.cache_state.get(key) + dbg!("AGE NS DEAD"); todo!() } fn get_hits(&mut self, handle: types::CacheHandle) -> Result { + dbg!("HITS DEAD"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) diff --git a/lib/src/wiggle_abi/req_impl.rs b/lib/src/wiggle_abi/req_impl.rs index 446085e7..f7ca2ff5 100644 --- a/lib/src/wiggle_abi/req_impl.rs +++ b/lib/src/wiggle_abi/req_impl.rs @@ -822,7 +822,7 @@ impl FastlyHttpReq for Session { fn close(&mut self, req_handle: RequestHandle) -> Result<(), Error> { // We don't do anything with the parts, but we do pass the error up if // the handle given doesn't exist - self.take_request_parts(req_handle)?; + dbg!(self.take_request_parts(req_handle))?; Ok(()) } From 530c8cf46023a352c7bf7e852e04d37e8cb2dd60 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Wed, 6 Mar 2024 20:21:32 +0100 Subject: [PATCH 08/20] Limping over the finish line to preserve cache state in between invocations. --- lib/src/session.rs | 42 ++++++++-- lib/src/wiggle_abi/body_impl.rs | 49 +++++++++++- lib/src/wiggle_abi/cache.rs | 136 ++++++++++++++++++++++---------- lib/src/wiggle_abi/req_impl.rs | 4 +- 4 files changed, 178 insertions(+), 53 deletions(-) diff --git a/lib/src/session.rs b/lib/src/session.rs index 0fcfadbc..64363214 100644 --- a/lib/src/session.rs +++ b/lib/src/session.rs @@ -135,9 +135,8 @@ pub struct Session { impl Drop for Session { fn drop(&mut self) { - // let mut bodies = self.cache_state.bodies.write().unwrap(); - - let get_bodies = self + // All body handles needed by cache entries. + let body_handles = self .cache_state .cache_entries .write() @@ -146,17 +145,32 @@ impl Drop for Session { .map(|e| e.1.body_handle) .collect::>(); - let mut handles_to_bodies = HashMap::new(); + dbg!("Body handles required by entries", &body_handles); + + // Handles already preserved (intercepted via .close()). + let existing_body_handles = self + .cache_state + .bodies + .read() + .unwrap() + .iter() + .map(|(k, _)| *k) + .collect::>(); + + let mut body_handles_to_bodies = HashMap::new(); - for handle in get_bodies { - handles_to_bodies.insert(handle, dbg!(self.take_body(handle).ok())); + for handle in body_handles { + if !existing_body_handles.contains(&handle) { + dbg!("[Session Drop] Preserving handle", handle); + body_handles_to_bodies.insert(handle, dbg!(self.take_body(handle).ok())); + } } self.cache_state .bodies .write() .unwrap() - .extend(handles_to_bodies); + .extend(body_handles_to_bodies); } } @@ -247,10 +261,17 @@ impl Session { for (handle, body) in bodies { let body = body.unwrap_or_default(); let new_handle = self.insert_body(body); + dbg!(format!("Reloading handle {} as {}", handle, new_handle)); - for (_, entry) in self.cache_state.cache_entries.write().unwrap().iter_mut() { + for (entry_handle, entry) in self.cache_state.cache_entries.write().unwrap().iter_mut() + { if entry.body_handle == handle { + dbg!(format!( + "Entry {} now points to {}", + entry_handle, new_handle + )); entry.body_handle = new_handle; + dbg!(entry.body_handle); break; } } @@ -329,6 +350,7 @@ impl Session { /// [err]: ../error/enum.HandleError.html /// [handle]: ../wiggle_abi/types/struct.BodyHandle.html pub fn body(&self, handle: BodyHandle) -> Result<&Body, HandleError> { + dbg!("Getting body", handle); self.async_items .get(handle.into()) .and_then(Option::as_ref) @@ -344,6 +366,7 @@ impl Session { /// [err]: ../error/enum.HandleError.html /// [handle]: ../wiggle_abi/types/struct.BodyHandle.html pub fn body_mut(&mut self, handle: BodyHandle) -> Result<&mut Body, HandleError> { + dbg!("Getting body mut", handle); self.async_items .get_mut(handle.into()) .and_then(Option::as_mut) @@ -387,6 +410,7 @@ impl Session { /// [body]: ../body/struct.Body.html /// [err]: ../error/enum.HandleError.html pub fn begin_streaming(&mut self, handle: BodyHandle) -> Result { + dbg!("Begin streaming", handle); self.async_items .get_mut(handle.into()) .and_then(Option::as_mut) @@ -419,6 +443,7 @@ impl Session { &mut self, handle: BodyHandle, ) -> Result<&mut StreamingBody, HandleError> { + dbg!("Streaming mut", handle); self.async_items .get_mut(handle.into()) .and_then(Option::as_mut) @@ -439,6 +464,7 @@ impl Session { &mut self, handle: BodyHandle, ) -> Result { + dbg!("take streaming", handle); self.async_items .get_mut(handle.into()) .and_then(Option::take) diff --git a/lib/src/wiggle_abi/body_impl.rs b/lib/src/wiggle_abi/body_impl.rs index dca3cf79..b2d167bc 100644 --- a/lib/src/wiggle_abi/body_impl.rs +++ b/lib/src/wiggle_abi/body_impl.rs @@ -22,6 +22,8 @@ use { #[wiggle::async_trait] impl FastlyHttpBody for Session { async fn append(&mut self, dest: BodyHandle, src: BodyHandle) -> Result<(), Error> { + dbg!("Appending to body from other body", dest, src); + // Take the `src` body out of the session, and get a mutable reference // to the `dest` body we will append to. let mut src = self.take_body(src)?; @@ -83,6 +85,8 @@ impl FastlyHttpBody for Session { buf: &GuestPtr<'a, [u8]>, end: BodyWriteEnd, ) -> Result { + dbg!("Writing to body", body_handle); + // Validate the body handle and the buffer. let buf = &buf.as_slice()?.ok_or(Error::SharedMemory)?[..]; @@ -110,19 +114,58 @@ impl FastlyHttpBody for Session { } fn close(&mut self, body_handle: BodyHandle) -> Result<(), Error> { - dbg!("CLOSING", body_handle); + dbg!("Closing", body_handle); + + // TODO Giga haxx ahead + let cache_body_handle = self + .cache_state + .cache_entries + .read() + .unwrap() + .iter() + .find_map(|(_, e)| (dbg!(e.body_handle) == body_handle).then(|| body_handle.clone())); // Drop the body and pass up an error if the handle does not exist if self.is_streaming_body(body_handle) { + if cache_body_handle.is_some() { + return Err(Error::Unsupported { + msg: "Streaming cache body not supported", + }); + } + // Make sure a streaming body gets a `finish` message self.take_streaming_body(body_handle)?.finish() } else { - Ok(self.drop_body(body_handle)?) + if let Some(cache_body_handle) = dbg!(cache_body_handle) { + dbg!("Preserving cache body for", cache_body_handle); + + let body = self.take_body(body_handle).ok(); + self.cache_state + .bodies + .write() + .unwrap() + .insert(cache_body_handle, body); + + Ok(()) + } else { + Ok(self.drop_body(body_handle)?) + } } } fn abandon(&mut self, body_handle: BodyHandle) -> Result<(), Error> { - dbg!("ABANDONING", body_handle); + let entry_handle = self + .cache_state + .cache_entries + .read() + .unwrap() + .iter() + .find_map(|(_, e)| (e.body_handle == body_handle).then(|| body_handle.clone())); + + if entry_handle.is_some() { + panic!("Abandoning cache body handle not... handled (handle {body_handle}).") + } + // Drop the body without a `finish` message Ok(self.drop_body(body_handle)?) } diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index a3529b84..ce09d809 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -1,18 +1,18 @@ -use std::collections::btree_map::Entry; -use std::collections::BTreeMap; -use std::time::Instant; - -use tracing::{event, Level}; - -use crate::body::Body; -use crate::cache_state::{not_found_handle, CacheEntry}; -use crate::session::Session; - -use super::fastly_cache::FastlyCache; use super::{ + fastly_cache::FastlyCache, types::{self}, Error, }; +use crate::{ + body::Body, + cache_state::{not_found_handle, CacheEntry}, + session::Session, +}; +use std::{ + collections::{btree_map::Entry, BTreeMap}, + time::Instant, +}; +use tracing::{event, Level}; // pub struct CacheWriteOptions<'a> { // pub max_age_ns: CacheDurationNs, @@ -83,7 +83,6 @@ use super::{ // #[allow(deprecated, non_upper_case_globals)] // pub const TO: Self = Self::from_bits_retain(4); // } - #[allow(unused_variables)] impl FastlyCache for Session { fn lookup<'a>( @@ -103,7 +102,6 @@ impl FastlyCache for Session { let candidates = candidates_lock.get(&primary_key); if let Some(candidates) = candidates { - dbg!("CHECKING CANDIDATE"); // Eh, maybe a lock on the entire thing? // We don't need perf anyways, contenting locks doesn't matter. let entry_lock = self.cache_state.cache_entries.read().unwrap(); @@ -111,10 +109,9 @@ impl FastlyCache for Session { for candidate_handle in candidates { if let Some(candidate_entry) = entry_lock.get(*candidate_handle) { if candidate_entry.vary_matches(&req_parts.headers) { - dbg!("WOULD_MATCH"); - dbg!(candidate_entry); - // return Ok(*candidate_handle); - return Ok(not_found_handle()); + dbg!("MATCHED"); + return Ok(dbg!(*candidate_handle)); + // return Ok(not_found_handle()); } } } @@ -263,7 +260,8 @@ impl FastlyCache for Session { // TODO: Check cache entry overwrite - variants must be unique. let body_handle = dbg!(self.insert_body(Body::empty())); - let entry = CacheEntry { + // let body_handle = types::BodyHandle::from(123); + let mut entry = CacheEntry { body_handle, vary, max_age: Some(max_age_s), @@ -272,22 +270,54 @@ impl FastlyCache for Session { user_metadata, }; - dbg!("BASE ENTRY"); - - // Write entry. - let entry_handle = self.cache_state.cache_entries.write().unwrap().push(entry); + // begin almost-copy-paste from lookup + let req_parts = self.request_parts(options.request_headers)?; + dbg!("candidates read"); + let mut candidates_lock = self.cache_state.key_candidates.write().unwrap(); + let candidates = candidates_lock.get_mut(&key); + + let (entry_handle, overwrite) = candidates + .and_then(|candidates| { + dbg!("cache entries write"); + let entry_lock = self.cache_state.cache_entries.write().unwrap(); + + candidates.iter_mut().find_map(|candidate_handle| { + entry_lock + .get(*candidate_handle) + .and_then(|mut candidate_entry| { + candidate_entry.vary_matches(&req_parts.headers).then(|| { + dbg!("OVERWRITE ENTRY"); + + let _ = std::mem::replace(&mut candidate_entry, &mut entry); + (*candidate_handle, true) + }) + }) + }) + }) + .unwrap_or_else(|| { + dbg!("cache entries write"); + // Write new entry. + let entry_handle = self.cache_state.cache_entries.write().unwrap().push(entry); + (entry_handle, false) + }); + + drop(candidates_lock); // Write handle key candidate mapping. - match self.cache_state.key_candidates.write().unwrap().entry(key) { - Entry::Vacant(vacant) => { - vacant.insert(vec![entry_handle]); - } - Entry::Occupied(mut occupied) => { - occupied.get_mut().push(entry_handle); + if !overwrite { + dbg!("candidates write"); + match self.cache_state.key_candidates.write().unwrap().entry(key) { + Entry::Vacant(vacant) => { + vacant.insert(vec![entry_handle]); + } + Entry::Occupied(mut occupied) => { + occupied.get_mut().push(entry_handle); + } } } - // Write surrogates. + // Write surrogates (we don't really need to care about the overwrite case here for now). + dbg!("surrogates write"); let mut surrogates_write_lock = self.cache_state.surrogates_to_handles.write().unwrap(); for surrogate_key in surrogate_keys { match surrogates_write_lock.entry(surrogate_key) { @@ -300,6 +330,11 @@ impl FastlyCache for Session { }; } + dbg!(format!( + "Wrote cache entry {} with body handle {}", + entry_handle, body_handle + )); + Ok(body_handle) } @@ -310,6 +345,8 @@ impl FastlyCache for Session { options_mask: types::CacheLookupOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { + event!(Level::ERROR, "TX lookup"); + dbg!("TX lookup"); self.lookup(cache_key, options_mask, options) } @@ -320,9 +357,10 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { - // self.insert(handle, options_mask, options) + event!(Level::ERROR, "INSERT tx {handle}"); + dbg!("INSERT tx {handle}"); - Ok(self.insert_body(Body::empty())) + Ok(dbg!(self.insert_body(Body::empty()))) } fn transaction_insert_and_stream_back<'a>( @@ -331,6 +369,8 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result<(types::BodyHandle, types::CacheHandle), Error> { + event!(Level::ERROR, "GET tx insert stream {handle}"); + dbg!("GET tx insert stream {handle}"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) @@ -342,19 +382,22 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result<(), Error> { + event!(Level::ERROR, "GET tx update {handle}"); + dbg!("GET tx update {handle}"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) } fn transaction_cancel(&mut self, handle: types::CacheHandle) -> Result<(), Error> { + event!(Level::ERROR, "GET tx cancel {handle}"); + dbg!("GET tx cancel {handle}"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) } fn close(&mut self, handle: types::CacheHandle) -> Result<(), Error> { - dbg!("CLOSING"); Ok(()) } @@ -364,7 +407,9 @@ impl FastlyCache for Session { // None => Ok(types::CacheLookupState::MUST_INSERT_OR_UPDATE), // } - Ok(types::CacheLookupState::MUST_INSERT_OR_UPDATE) + event!(Level::ERROR, "GET STATE {handle}"); + dbg!("GET STATE {handle}"); + Ok(dbg!(types::CacheLookupState::MUST_INSERT_OR_UPDATE)) } fn get_user_metadata<'a>( @@ -374,7 +419,8 @@ impl FastlyCache for Session { user_metadata_out_len: u32, nwritten_out: &wiggle::GuestPtr<'a, u32>, ) -> Result<(), Error> { - dbg!("USER META DEAD"); + event!(Level::ERROR, "GET meta {handle}"); + dbg!("GET meta {handle}"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) @@ -386,7 +432,8 @@ impl FastlyCache for Session { options_mask: types::CacheGetBodyOptionsMask, options: &types::CacheGetBodyOptions, ) -> Result { - dbg!("BODY DEAD"); + event!(Level::ERROR, "GET BODY handling {handle}"); + dbg!("GET BODY handling {handle}"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) @@ -396,7 +443,8 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - dbg!("LEN DEAD"); + event!(Level::ERROR, "GET KEN {handle}"); + dbg!("GET KEN {handle}"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) @@ -406,7 +454,8 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - dbg!("MAX AGE DEAD"); + event!(Level::ERROR, "GET maxage {handle}"); + dbg!("GET maxage {handle}"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) @@ -416,19 +465,24 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - dbg!("SWR DEAD"); + event!(Level::ERROR, "GET swr {handle}"); + dbg!("GET swr {handle}"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) } fn get_age_ns(&mut self, handle: types::CacheHandle) -> Result { - dbg!("AGE NS DEAD"); - todo!() + event!(Level::ERROR, "GET age ns {handle}"); + dbg!("GET age ns {handle}"); + Err(Error::Unsupported { + msg: "Cache API primitives not yet supported", + }) } fn get_hits(&mut self, handle: types::CacheHandle) -> Result { - dbg!("HITS DEAD"); + event!(Level::ERROR, "GET hits {handle}"); + dbg!("GET hits {handle}"); Err(Error::Unsupported { msg: "Cache API primitives not yet supported", }) diff --git a/lib/src/wiggle_abi/req_impl.rs b/lib/src/wiggle_abi/req_impl.rs index f7ca2ff5..70b20bc6 100644 --- a/lib/src/wiggle_abi/req_impl.rs +++ b/lib/src/wiggle_abi/req_impl.rs @@ -820,9 +820,11 @@ impl FastlyHttpReq for Session { } fn close(&mut self, req_handle: RequestHandle) -> Result<(), Error> { + dbg!("Closing request", req_handle); + // We don't do anything with the parts, but we do pass the error up if // the handle given doesn't exist - dbg!(self.take_request_parts(req_handle))?; + self.take_request_parts(req_handle)?; Ok(()) } From 2928cbece2c5ff850b70bfdc28fad6159d52453c Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Thu, 7 Mar 2024 16:15:49 +0100 Subject: [PATCH 09/20] Mostly working POC. --- lib/src/cache_state.rs | 1 - lib/src/error.rs | 4 ++ lib/src/execute.rs | 8 +-- lib/src/wiggle_abi/cache.rs | 126 +++++++++++++++++++++++++++++------- 4 files changed, 109 insertions(+), 30 deletions(-) diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs index 13867bd5..c1364bf4 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/cache_state.rs @@ -95,7 +95,6 @@ impl CacheState { if let Some(body) = body { let formatted = match body.read_into_vec().await { Ok(bytes) => { - dbg!(&bytes); new_bodies.insert(key, Some(Body::from(bytes.clone()))); String::from_utf8(bytes).unwrap_or("Invalid UTF-8".to_owned()) } diff --git a/lib/src/error.rs b/lib/src/error.rs index a7ed0f62..7b8fcb77 100644 --- a/lib/src/error.rs +++ b/lib/src/error.rs @@ -253,6 +253,10 @@ pub enum HandleError { #[error("Invalid body handle: {0}")] InvalidBodyHandle(crate::wiggle_abi::types::BodyHandle), + /// A cache handle was not valid. + #[error("Invalid cache handle: {0}")] + InvalidCacheHandle(crate::wiggle_abi::types::CacheHandle), + /// A logging endpoint handle was not valid. #[error("Invalid endpoint handle: {0}")] InvalidEndpointHandle(crate::wiggle_abi::types::EndpointHandle), diff --git a/lib/src/execute.rs b/lib/src/execute.rs index 0ee23a2a..089b3a1e 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -117,10 +117,10 @@ pub struct EndpointListener { } impl EndpointListener { - // Todo blocking for now. pub fn messages(&mut self) -> Vec> { let mut messages = vec![]; - while let Some(msg) = self.receiver.blocking_recv() { + + while let Ok(msg) = self.receiver.try_recv() { messages.push(msg); } @@ -282,8 +282,8 @@ impl ExecuteCtx { } /// Set the endpoints for this execution context. - pub fn with_endpoints(mut self, endpoints: Endpoints) -> Self { - self.endpoints = Arc::new(endpoints); + pub fn with_endpoints(mut self, endpoints: Arc) -> Self { + self.endpoints = endpoints; self } diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index ce09d809..0e1434fe 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -6,6 +6,7 @@ use super::{ use crate::{ body::Body, cache_state::{not_found_handle, CacheEntry}, + error::HandleError, session::Session, }; use std::{ @@ -402,28 +403,76 @@ impl FastlyCache for Session { } fn get_state(&mut self, handle: types::CacheHandle) -> Result { - // match self.cache_state.get(handle) { - // Some(_entry) => Ok(types::CacheLookupState::FOUND), - // None => Ok(types::CacheLookupState::MUST_INSERT_OR_UPDATE), - // } - - event!(Level::ERROR, "GET STATE {handle}"); dbg!("GET STATE {handle}"); - Ok(dbg!(types::CacheLookupState::MUST_INSERT_OR_UPDATE)) + event!(Level::ERROR, "GET STATE {handle}"); + if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + // Entry found. + let mut state = types::CacheLookupState::FOUND; + let mut ttl = 0; + let age = entry.created_at.elapsed().as_secs(); + + // Compute total ttl + if let Some(max_age) = entry.max_age { + ttl += max_age; + }; + + if let Some(swr) = entry.swr { + ttl += swr; + }; + + // Compute staleness + match (entry.max_age, entry.swr) { + (Some(max_age), Some(swr)) if age > max_age && age < ttl => { + state |= types::CacheLookupState::STALE + } + _ => (), + }; + + // TODO initial age etc not respected. + // Compute if usable. + if age < ttl { + // Entry is usable as max-age + swr define the period an entry is usable. + state |= types::CacheLookupState::USABLE; + } + + Ok(dbg!(state)) + } else { + Ok(dbg!(types::CacheLookupState::MUST_INSERT_OR_UPDATE)) + } } fn get_user_metadata<'a>( &mut self, handle: types::CacheHandle, user_metadata_out_ptr: &wiggle::GuestPtr<'a, u8>, - user_metadata_out_len: u32, + user_metadata_out_len: u32, // TODO: Is this the maximum allowed length? nwritten_out: &wiggle::GuestPtr<'a, u32>, ) -> Result<(), Error> { event!(Level::ERROR, "GET meta {handle}"); dbg!("GET meta {handle}"); - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if entry.user_metadata.len() > user_metadata_out_len as usize { + nwritten_out.write(entry.user_metadata.len().try_into().unwrap_or(0))?; + return Err(Error::BufferLengthError { + buf: "user_metadata_out", + len: "user_metadata_out_len", + }); + } + + let user_metadata_len = u32::try_from(entry.user_metadata.len()) + .expect("smaller than user_metadata_out_len means it must fit"); + + let mut metadata_out = user_metadata_out_ptr + .as_array(user_metadata_len) + .as_slice_mut()? + .ok_or(Error::SharedMemory)?; + + metadata_out.copy_from_slice(&entry.user_metadata); + nwritten_out.write(user_metadata_len)?; + Ok(()) + } else { + Err(HandleError::InvalidCacheHandle(handle).into()) + } } fn get_body( @@ -432,11 +481,16 @@ impl FastlyCache for Session { options_mask: types::CacheGetBodyOptionsMask, options: &types::CacheGetBodyOptions, ) -> Result { - event!(Level::ERROR, "GET BODY handling {handle}"); - dbg!("GET BODY handling {handle}"); - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + dbg!("Cache GET BODY handle {handle}"); + if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + // We know that the body will be read and consumed, so we make a safety backup + // let body_ref = self.body(entry.body_handle)?; + // body_ref.into_iter() + + Ok(entry.body_handle) + } else { + Err(HandleError::InvalidCacheHandle(handle).into()) + } } fn get_length( @@ -456,9 +510,16 @@ impl FastlyCache for Session { ) -> Result { event!(Level::ERROR, "GET maxage {handle}"); dbg!("GET maxage {handle}"); - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + Ok(types::CacheDurationNs::from( + entry + .max_age + .map(|max_age| max_age * 1_000_000_000) + .unwrap_or(0), + )) + } else { + Err(HandleError::InvalidCacheHandle(handle).into()) + } } fn get_stale_while_revalidate_ns( @@ -467,17 +528,32 @@ impl FastlyCache for Session { ) -> Result { event!(Level::ERROR, "GET swr {handle}"); dbg!("GET swr {handle}"); - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + + if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + Ok(types::CacheDurationNs::from( + entry.swr.map(|swr| swr * 1_000_000_000).unwrap_or(0), + )) + } else { + Err(HandleError::InvalidCacheHandle(handle).into()) + } } fn get_age_ns(&mut self, handle: types::CacheHandle) -> Result { event!(Level::ERROR, "GET age ns {handle}"); dbg!("GET age ns {handle}"); - Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", - }) + + if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + Ok(types::CacheDurationNs::from( + entry + .created_at + .elapsed() + .as_nanos() + .try_into() + .unwrap_or(u64::MAX), + )) + } else { + Err(HandleError::InvalidCacheHandle(handle).into()) + } } fn get_hits(&mut self, handle: types::CacheHandle) -> Result { From 37ced9ba9aa4c567c302e4e9141ec9f3c9f4b70a Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Fri, 8 Mar 2024 11:39:09 +0100 Subject: [PATCH 10/20] Some cleanup --- lib/src/cache_state.rs | 200 +++++++++++++++++------------------- lib/src/wiggle_abi/cache.rs | 122 +++++++++++----------- 2 files changed, 155 insertions(+), 167 deletions(-) diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs index c1364bf4..83c40cfe 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/cache_state.rs @@ -9,41 +9,13 @@ use std::{ }; /// Handle used for a non-existing cache entry. +/// TODO: Is this needed? Seems to work without checking it. pub fn not_found_handle() -> types::CacheHandle { types::CacheHandle::from(u32::MAX) } type PrimaryCacheKey = Vec; -#[derive(Debug)] -pub struct CacheEntry { - /// The cached bytes. - pub body_handle: types::BodyHandle, - - /// Vary used to create this entry. - pub vary: BTreeMap>, - - /// Max-age this entry has been created with. - pub max_age: Option, - - /// Stale-while-revalidate this entry has been created with. - pub swr: Option, - - /// Instant this entry has been created. - pub created_at: Instant, - - /// The user metadata stored alongside the cache. - pub user_metadata: Vec, -} - -impl CacheEntry { - pub fn vary_matches(&self, headers: &HeaderMap) -> bool { - self.vary.iter().all(|(vary_key, vary_value)| { - headers.get(vary_key).and_then(|h| h.to_str().ok()) == vary_value.as_deref() - }) - } -} - #[derive(Clone, Default, Debug)] pub struct CacheState { /// Cache entries, indexable by handle. @@ -62,12 +34,6 @@ pub struct CacheState { pub bodies: Arc>>>, } -// Requires: -// - lookup: key to handle -// - insert: key to bodyhandle -// - state: handle to state -// - body: handle to body handle - impl CacheState { pub fn new() -> Self { Default::default() @@ -113,6 +79,78 @@ impl CacheState { } } +#[derive(Debug)] +pub struct CacheEntry { + /// Key the entry was created with. + /// Here for convenience. + pub key: Vec, + + /// The cached bytes. + pub body_handle: types::BodyHandle, + + /// Vary used to create this entry. + pub vary: BTreeMap>, + + /// Initial age of the cache entry. + pub initial_age_ns: Option, + + /// Max-age this entry has been created with. + pub max_age: Option, + + /// Stale-while-revalidate this entry has been created with. + pub swr: Option, + + /// Instant this entry has been created. + pub created_at: Instant, + + /// The user metadata stored alongside the cache. + pub user_metadata: Vec, +} + +impl CacheEntry { + pub fn vary_matches(&self, headers: &HeaderMap) -> bool { + self.vary.iter().all(|(vary_key, vary_value)| { + headers.get(vary_key).and_then(|h| h.to_str().ok()) == vary_value.as_deref() + }) + } + + pub fn age_ns(&self) -> u64 { + self.created_at + .elapsed() + .as_nanos() + .try_into() + .ok() + .and_then(|age_ns: u64| age_ns.checked_add(self.initial_age_ns.unwrap_or(0)).ok()) + .unwrap_or(u64::MAX) + } + + /// Stale: Is within + pub fn is_stale(&self) -> bool { + match (self.max_age, self.swr) { + (Some(max_age), Some(swr)) if age > max_age && age < ttl => { + state |= types::CacheLookupState::STALE + } + _ => (), + }; + } + + /// Usable: Age is smaller than max-age + swr. + pub fn is_usable(&self) -> bool { + let mut total_ttl = 0; + let age = self.age_ns() / NS_TO_S_FACTOR; + + if let Some(max_age) = self.max_age { + total_ttl += max_age; + }; + + if let Some(swr) = self.swr { + total_ttl += swr; + }; + + age < total_ttl + } +} + struct CacheStateFormatter<'state> { state: &'state CacheState, formatted_bodies: HashMap, @@ -124,22 +162,35 @@ impl<'state> Display for CacheStateFormatter<'state> { " ".repeat(level) } - dbg!(&self.formatted_bodies); - writeln!(f, "Cache State:")?; writeln!(f, "{}Entries:", indent(1))?; for (handle, entry) in self.state.cache_entries.read().unwrap().iter() { writeln!( f, - "{}[{}]: {:?} | {:?}", + "{}[{}]: {}", indent(2), handle, - entry.max_age, - entry.swr + entry + .key + .iter() + .map(|x| format!("{x:X}")) + .collect::>() + .join("") )?; - writeln!(f, "{}Vary:", indent(3))?; + writeln!(f, "{}TTLs (sec):", indent(3),)?; + writeln!(f, "{}Age: {}", indent(4), entry.age_ns() / 1_000_000_000)?; + writeln!( + f, + "{}Inital age: {:?}", + indent(4), + entry.initial_age_ns.map(|ia| ia / 1_000_000_000) + )?; + writeln!(f, "{}Max-age: {:?}", indent(4), entry.max_age)?; + writeln!(f, "{}Swr: {:?}", indent(4), entry.swr)?; + + writeln!(f, "{}Vary:", indent(3))?; for (key, value) in entry.vary.iter() { writeln!(f, "{}{key}: {:?}", indent(4), value)?; } @@ -166,70 +217,3 @@ impl<'state> Display for CacheStateFormatter<'state> { Ok(()) } } - -// /// Get handle for the given key. -// /// Will return the empty handle (0) if no entry is found. -// pub(crate) fn get_handle(&self, key: &Vec) -> types::CacheHandle { -// self.handles -// .read() -// .unwrap() -// .get(key) -// .map(ToOwned::to_owned) -// .unwrap_or(none_handle()) -// } - -// pub(crate) fn get_state(&self, key: &types::CacheHandle) -> CacheLookupState { -// self.cache_entries.read().unwrap().get(key).map(|entry| { -// // check: -// // - expired (STALE) -// // - Not found (MUST_INSERT_OR_UPDATE) -// // - Found and usable (USABLE) -// // - ??? (FOUND) -// }) - -// // handle: types::CacheHandle - -// // self.handles -// // .read() -// // .expect("[Get] Handle lookup to succeed") -// // .get(&handle.inner()) -// // .and_then(|key| { -// // self.cache_entries -// // .read() -// // .expect("[Get] Entry lookup to succeed") -// // .get(key) -// // }) -// // todo!() -// } - -// pub(crate) fn insert( -// &self, -// key: Vec, -// max_age: u64, -// body_handle: types::BodyHandle, -// ) -> types::CacheHandle { -// let entry = CacheEntry { -// body: body_handle, -// vary: BTreeMap::new(), -// max_age: Some(max_age), -// swr: None, -// created_at: Instant::now(), -// user_metadata: vec![], -// }; - -// todo!() - -// // let handle_index = self.handle_sequence.fetch_add(1, Ordering::Relaxed); -// // self.handles -// // .write() -// // .expect("[Insert] Handle lookup to succeed") -// // .insert(handle_index, key.clone()); - -// // self.cache_entries -// // .write() -// // .expect("[Insert] Cache entry lock to succeed") -// // .insert(key, entry); - -// // types::CacheHandle::from(handle_index) -// } -// } diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index 0e1434fe..85bd7ec0 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -15,6 +15,8 @@ use std::{ }; use tracing::{event, Level}; +const NS_TO_S_FACTOR: u64 = 1_000_000_000; + // pub struct CacheWriteOptions<'a> { // pub max_age_ns: CacheDurationNs, // pub request_headers: RequestHandle, @@ -110,9 +112,7 @@ impl FastlyCache for Session { for candidate_handle in candidates { if let Some(candidate_entry) = entry_lock.get(*candidate_handle) { if candidate_entry.vary_matches(&req_parts.headers) { - dbg!("MATCHED"); return Ok(dbg!(*candidate_handle)); - // return Ok(not_found_handle()); } } } @@ -160,19 +160,18 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { - println!("WRITE MASK: {}", options_mask); // TODO: Skipped over all the sanity checks usually done by similar code (see `req_impl`). let options: types::CacheWriteOptions = options.read().unwrap(); let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); // Cache write must contain max-age. - let max_age_s = options.max_age_ns / 1000 / 1000 / 1000; + let max_age_s = options.max_age_ns / NS_TO_S_FACTOR; // Swr might not be set, check bitmask for. Else we'd always get Some(0). let swr_s = if options_mask.contains(types::CacheWriteOptionsMask::STALE_WHILE_REVALIDATE_NS) { - Some(options.stale_while_revalidate_ns / 1000 / 1000 / 1000) + Some(options.stale_while_revalidate_ns / NS_TO_S_FACTOR) } else { None }; @@ -258,28 +257,29 @@ impl FastlyCache for Session { BTreeMap::new() }; - // TODO: Check cache entry overwrite - variants must be unique. + let initial_age_ns = options_mask + .contains(types::CacheWriteOptionsMask::INITIAL_AGE_NS) + .then(|| options.initial_age_ns); - let body_handle = dbg!(self.insert_body(Body::empty())); - // let body_handle = types::BodyHandle::from(123); + let body_handle = self.insert_body(Body::empty()); let mut entry = CacheEntry { + key: key.clone(), body_handle, vary, + initial_age_ns, max_age: Some(max_age_s), swr: swr_s, created_at: Instant::now(), user_metadata, }; - // begin almost-copy-paste from lookup + // Check for overwrites let req_parts = self.request_parts(options.request_headers)?; - dbg!("candidates read"); let mut candidates_lock = self.cache_state.key_candidates.write().unwrap(); let candidates = candidates_lock.get_mut(&key); let (entry_handle, overwrite) = candidates .and_then(|candidates| { - dbg!("cache entries write"); let entry_lock = self.cache_state.cache_entries.write().unwrap(); candidates.iter_mut().find_map(|candidate_handle| { @@ -287,7 +287,11 @@ impl FastlyCache for Session { .get(*candidate_handle) .and_then(|mut candidate_entry| { candidate_entry.vary_matches(&req_parts.headers).then(|| { - dbg!("OVERWRITE ENTRY"); + event!( + Level::TRACE, + "Overwriting cache entry {}", + candidate_handle + ); let _ = std::mem::replace(&mut candidate_entry, &mut entry); (*candidate_handle, true) @@ -296,17 +300,21 @@ impl FastlyCache for Session { }) }) .unwrap_or_else(|| { - dbg!("cache entries write"); // Write new entry. let entry_handle = self.cache_state.cache_entries.write().unwrap().push(entry); + event!( + Level::TRACE, + "Wrote new cache entry {} with body handle {}", + entry_handle, + body_handle + ); (entry_handle, false) }); drop(candidates_lock); - // Write handle key candidate mapping. if !overwrite { - dbg!("candidates write"); + // Write handle key candidate mapping. match self.cache_state.key_candidates.write().unwrap().entry(key) { Entry::Vacant(vacant) => { vacant.insert(vec![entry_handle]); @@ -318,7 +326,6 @@ impl FastlyCache for Session { } // Write surrogates (we don't really need to care about the overwrite case here for now). - dbg!("surrogates write"); let mut surrogates_write_lock = self.cache_state.surrogates_to_handles.write().unwrap(); for surrogate_key in surrogate_keys { match surrogates_write_lock.entry(surrogate_key) { @@ -331,14 +338,23 @@ impl FastlyCache for Session { }; } - dbg!(format!( - "Wrote cache entry {} with body handle {}", - entry_handle, body_handle - )); - Ok(body_handle) } + // pub struct CacheLookupOptionsMask( + // ::Internal, + // ); + // impl CacheLookupOptionsMask { + // #[allow(deprecated, non_upper_case_globals)] + // pub const RESERVED: Self = Self::from_bits_retain(1); + // #[allow(deprecated, non_upper_case_globals)] + // pub const REQUEST_HEADERS: Self = Self::from_bits_retain(2); + // } + + // pub struct CacheLookupOptions { + // pub request_headers: RequestHandle, + // } + /// Stub delegating to regular lookup. fn transaction_lookup<'a>( &mut self, @@ -346,9 +362,13 @@ impl FastlyCache for Session { options_mask: types::CacheLookupOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { - event!(Level::ERROR, "TX lookup"); - dbg!("TX lookup"); - self.lookup(cache_key, options_mask, options) + let cache_handle = if self.lookup(cache_key, options_mask, options)? != not_found_handle() { + todo!() + } else { + todo!() + }; + + todo!() } /// Stub delegating to regular insert. @@ -358,10 +378,15 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { - event!(Level::ERROR, "INSERT tx {handle}"); - dbg!("INSERT tx {handle}"); + // Ok(dbg!(self.insert_body(Body::empty()))) + + // &mut self, + // cache_key: &wiggle::GuestPtr<'a, [u8]>, + // options_mask: types::CacheWriteOptionsMask, + // options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, - Ok(dbg!(self.insert_body(Body::empty()))) + self.insert(); + todo!() } fn transaction_insert_and_stream_back<'a>( @@ -370,10 +395,9 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result<(types::BodyHandle, types::CacheHandle), Error> { - event!(Level::ERROR, "GET tx insert stream {handle}"); - dbg!("GET tx insert stream {handle}"); + event!(Level::ERROR, "Tx insert and stream back not implemented"); Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", + msg: "Tx insert and stream back not implemented", }) } @@ -383,18 +407,16 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result<(), Error> { - event!(Level::ERROR, "GET tx update {handle}"); - dbg!("GET tx update {handle}"); + event!(Level::ERROR, "Tx update not implemented"); Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", + msg: "Tx update not implemented", }) } fn transaction_cancel(&mut self, handle: types::CacheHandle) -> Result<(), Error> { - event!(Level::ERROR, "GET tx cancel {handle}"); - dbg!("GET tx cancel {handle}"); + event!(Level::ERROR, "Tx cancel not implemented"); Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", + msg: "Tx cancel not implemented", }) } @@ -403,13 +425,11 @@ impl FastlyCache for Session { } fn get_state(&mut self, handle: types::CacheHandle) -> Result { - dbg!("GET STATE {handle}"); - event!(Level::ERROR, "GET STATE {handle}"); if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { // Entry found. let mut state = types::CacheLookupState::FOUND; let mut ttl = 0; - let age = entry.created_at.elapsed().as_secs(); + let age = entry.age_ns() / NS_TO_S_FACTOR; // Compute total ttl if let Some(max_age) = entry.max_age { @@ -420,7 +440,7 @@ impl FastlyCache for Session { ttl += swr; }; - // Compute staleness + // Compute staleness. match (entry.max_age, entry.swr) { (Some(max_age), Some(swr)) if age > max_age && age < ttl => { state |= types::CacheLookupState::STALE @@ -428,7 +448,6 @@ impl FastlyCache for Session { _ => (), }; - // TODO initial age etc not respected. // Compute if usable. if age < ttl { // Entry is usable as max-age + swr define the period an entry is usable. @@ -485,7 +504,7 @@ impl FastlyCache for Session { if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { // We know that the body will be read and consumed, so we make a safety backup // let body_ref = self.body(entry.body_handle)?; - // body_ref.into_iter() + // body_ref.read(); Ok(entry.body_handle) } else { @@ -526,9 +545,6 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - event!(Level::ERROR, "GET swr {handle}"); - dbg!("GET swr {handle}"); - if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { Ok(types::CacheDurationNs::from( entry.swr.map(|swr| swr * 1_000_000_000).unwrap_or(0), @@ -539,28 +555,16 @@ impl FastlyCache for Session { } fn get_age_ns(&mut self, handle: types::CacheHandle) -> Result { - event!(Level::ERROR, "GET age ns {handle}"); - dbg!("GET age ns {handle}"); - if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { - Ok(types::CacheDurationNs::from( - entry - .created_at - .elapsed() - .as_nanos() - .try_into() - .unwrap_or(u64::MAX), - )) + Ok(types::CacheDurationNs::from(entry.age_ns())) } else { Err(HandleError::InvalidCacheHandle(handle).into()) } } fn get_hits(&mut self, handle: types::CacheHandle) -> Result { - event!(Level::ERROR, "GET hits {handle}"); - dbg!("GET hits {handle}"); Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", + msg: "get_hits is not implemented", }) } } From 69bfa77faa6eed3421912ee5e0a418817eba3aa9 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Fri, 8 Mar 2024 11:52:13 +0100 Subject: [PATCH 11/20] More simplifications and cleanup refactoring --- lib/src/cache_state.rs | 37 +++++++++------- lib/src/wiggle_abi/cache.rs | 87 +++++++++++++------------------------ 2 files changed, 52 insertions(+), 72 deletions(-) diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs index 83c40cfe..fdd8fda5 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/cache_state.rs @@ -95,10 +95,10 @@ pub struct CacheEntry { pub initial_age_ns: Option, /// Max-age this entry has been created with. - pub max_age: Option, + pub max_age_ns: Option, /// Stale-while-revalidate this entry has been created with. - pub swr: Option, + pub swr_ns: Option, /// Instant this entry has been created. pub created_at: Instant, @@ -120,34 +120,41 @@ impl CacheEntry { .as_nanos() .try_into() .ok() - .and_then(|age_ns: u64| age_ns.checked_add(self.initial_age_ns.unwrap_or(0)).ok()) + .and_then(|age_ns: u64| age_ns.checked_add(self.initial_age_ns.unwrap_or(0))) .unwrap_or(u64::MAX) } - /// Stale: Is within + /// Stale: Is within max-age + ttl, but only if there's an swr given. pub fn is_stale(&self) -> bool { - match (self.max_age, self.swr) { - (Some(max_age), Some(swr)) if age > max_age && age < ttl => { - state |= types::CacheLookupState::STALE - } - _ => (), - }; + let age = self.age_ns(); + let total_ttl = self.total_ttl_ns(); + + match (self.max_age_ns, self.swr_ns) { + (Some(max_age), Some(swr)) => age > max_age && age < total_ttl, + _ => false, + } } /// Usable: Age is smaller than max-age + swr. pub fn is_usable(&self) -> bool { - let mut total_ttl = 0; - let age = self.age_ns() / NS_TO_S_FACTOR; + let mut total_ttl = self.total_ttl_ns(); + let age = self.age_ns(); - if let Some(max_age) = self.max_age { + age < total_ttl + } + + /// Max-age + swr of the cache entry. + fn total_ttl_ns(&self) -> u64 { + let mut total_ttl = 0; + if let Some(max_age) = self.max_age_ns { total_ttl += max_age; }; - if let Some(swr) = self.swr { + if let Some(swr) = self.swr_ns { total_ttl += swr; }; - age < total_ttl + total_ttl } } diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index 85bd7ec0..180e3081 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -15,7 +15,7 @@ use std::{ }; use tracing::{event, Level}; -const NS_TO_S_FACTOR: u64 = 1_000_000_000; +// pub const NS_TO_S_FACTOR: u64 = 1_000_000_000; // pub struct CacheWriteOptions<'a> { // pub max_age_ns: CacheDurationNs, @@ -112,7 +112,8 @@ impl FastlyCache for Session { for candidate_handle in candidates { if let Some(candidate_entry) = entry_lock.get(*candidate_handle) { if candidate_entry.vary_matches(&req_parts.headers) { - return Ok(dbg!(*candidate_handle)); + event!(Level::TRACE, "Found matching cache entry"); + return Ok(*candidate_handle); } } } @@ -166,15 +167,12 @@ impl FastlyCache for Session { let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); // Cache write must contain max-age. - let max_age_s = options.max_age_ns / NS_TO_S_FACTOR; + let max_age_ns = options.max_age_ns; // Swr might not be set, check bitmask for. Else we'd always get Some(0). - let swr_s = - if options_mask.contains(types::CacheWriteOptionsMask::STALE_WHILE_REVALIDATE_NS) { - Some(options.stale_while_revalidate_ns / NS_TO_S_FACTOR) - } else { - None - }; + let swr_ns = options_mask + .contains(types::CacheWriteOptionsMask::STALE_WHILE_REVALIDATE_NS) + .then(|| options.stale_while_revalidate_ns); let surrogate_keys = if options_mask.contains(types::CacheWriteOptionsMask::SURROGATE_KEYS) { @@ -267,8 +265,8 @@ impl FastlyCache for Session { body_handle, vary, initial_age_ns, - max_age: Some(max_age_s), - swr: swr_s, + max_age_ns: Some(max_age_ns), + swr_ns: swr_ns, created_at: Instant::now(), user_metadata, }; @@ -385,7 +383,7 @@ impl FastlyCache for Session { // options_mask: types::CacheWriteOptionsMask, // options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, - self.insert(); + // self.insert(); todo!() } @@ -428,35 +426,25 @@ impl FastlyCache for Session { if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { // Entry found. let mut state = types::CacheLookupState::FOUND; - let mut ttl = 0; - let age = entry.age_ns() / NS_TO_S_FACTOR; - - // Compute total ttl - if let Some(max_age) = entry.max_age { - ttl += max_age; - }; - if let Some(swr) = entry.swr { - ttl += swr; - }; - - // Compute staleness. - match (entry.max_age, entry.swr) { - (Some(max_age), Some(swr)) if age > max_age && age < ttl => { - state |= types::CacheLookupState::STALE - } - _ => (), - }; + if entry.is_stale() { + state |= types::CacheLookupState::STALE + } else { + // If stale, entry must be updated. + state |= types::CacheLookupState::MUST_INSERT_OR_UPDATE + } - // Compute if usable. - if age < ttl { - // Entry is usable as max-age + swr define the period an entry is usable. + if entry.is_usable() { state |= types::CacheLookupState::USABLE; + } else { + // If not usable, caller must insert / refresh the cache entry. + state |= types::CacheLookupState::MUST_INSERT_OR_UPDATE } - Ok(dbg!(state)) + Ok(state) } else { - Ok(dbg!(types::CacheLookupState::MUST_INSERT_OR_UPDATE)) + // Entry not found, entry must be inserted. + Ok(types::CacheLookupState::MUST_INSERT_OR_UPDATE) } } @@ -467,8 +455,6 @@ impl FastlyCache for Session { user_metadata_out_len: u32, // TODO: Is this the maximum allowed length? nwritten_out: &wiggle::GuestPtr<'a, u32>, ) -> Result<(), Error> { - event!(Level::ERROR, "GET meta {handle}"); - dbg!("GET meta {handle}"); if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { if entry.user_metadata.len() > user_metadata_out_len as usize { nwritten_out.write(entry.user_metadata.len().try_into().unwrap_or(0))?; @@ -488,6 +474,7 @@ impl FastlyCache for Session { metadata_out.copy_from_slice(&entry.user_metadata); nwritten_out.write(user_metadata_len)?; + Ok(()) } else { Err(HandleError::InvalidCacheHandle(handle).into()) @@ -500,12 +487,7 @@ impl FastlyCache for Session { options_mask: types::CacheGetBodyOptionsMask, options: &types::CacheGetBodyOptions, ) -> Result { - dbg!("Cache GET BODY handle {handle}"); if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { - // We know that the body will be read and consumed, so we make a safety backup - // let body_ref = self.body(entry.body_handle)?; - // body_ref.read(); - Ok(entry.body_handle) } else { Err(HandleError::InvalidCacheHandle(handle).into()) @@ -516,10 +498,9 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - event!(Level::ERROR, "GET KEN {handle}"); - dbg!("GET KEN {handle}"); + event!(Level::ERROR, "Cache entry length get not implemented."); Err(Error::Unsupported { - msg: "Cache API primitives not yet supported", + msg: "Cache entry length get not implemented.", }) } @@ -527,15 +508,8 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - event!(Level::ERROR, "GET maxage {handle}"); - dbg!("GET maxage {handle}"); if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { - Ok(types::CacheDurationNs::from( - entry - .max_age - .map(|max_age| max_age * 1_000_000_000) - .unwrap_or(0), - )) + Ok(types::CacheDurationNs::from(entry.max_age_ns.unwrap_or(0))) } else { Err(HandleError::InvalidCacheHandle(handle).into()) } @@ -546,9 +520,7 @@ impl FastlyCache for Session { handle: types::CacheHandle, ) -> Result { if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { - Ok(types::CacheDurationNs::from( - entry.swr.map(|swr| swr * 1_000_000_000).unwrap_or(0), - )) + Ok(types::CacheDurationNs::from(entry.swr_ns.unwrap_or(0))) } else { Err(HandleError::InvalidCacheHandle(handle).into()) } @@ -563,8 +535,9 @@ impl FastlyCache for Session { } fn get_hits(&mut self, handle: types::CacheHandle) -> Result { + event!(Level::ERROR, "Cache entry get_hits not implemented."); Err(Error::Unsupported { - msg: "get_hits is not implemented", + msg: "Cache entry get_hits not implemented.", }) } } From ca742af4c6935ab0cda2a601c6305871c1f41361 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Fri, 8 Mar 2024 13:57:21 +0100 Subject: [PATCH 12/20] Transaction stuff --- lib/src/cache_state.rs | 360 +++++++++++++++++++++++++++----- lib/src/wiggle_abi/body_impl.rs | 18 +- lib/src/wiggle_abi/cache.rs | 251 ++++------------------ 3 files changed, 359 insertions(+), 270 deletions(-) diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs index fdd8fda5..e8c534d0 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/cache_state.rs @@ -1,12 +1,13 @@ -use crate::{body::Body, wiggle_abi::types}; +use crate::{body::Body, wiggle_abi::types, Error}; use cranelift_entity::PrimaryMap; -use http::HeaderMap; +use http::{request::Parts, HeaderMap}; use std::{ - collections::{BTreeMap, HashMap}, + collections::{btree_map::Entry, BTreeMap, HashMap}, fmt::Display, sync::{Arc, RwLock}, time::Instant, }; +use tracing::{event, Level}; /// Handle used for a non-existing cache entry. /// TODO: Is this needed? Seems to work without checking it. @@ -16,6 +17,27 @@ pub fn not_found_handle() -> types::CacheHandle { type PrimaryCacheKey = Vec; +// #[derive(Debug)] +// pub enum CacheEntry { +// PendingTxInsert(PrimaryCacheKey), +// Entry(CacheEntry), +// } + +// impl CacheEntry { +// pub fn as_entry_panicking(&self) -> &CacheEntry { +// match self { +// CacheEntry::Entry(e) => e, +// CacheEntry::PendingTxInsert(_) => panic!("Entry is not a cache entry, but pending tx"), +// } +// } +// } + +// #[derive(Clone, Debug)] +// pub enum PendingTxState { +// Insert(PrimaryCacheKey), +// Overwrite(types::CacheHandle), +// } + #[derive(Clone, Default, Debug)] pub struct CacheState { /// Cache entries, indexable by handle. @@ -32,6 +54,13 @@ pub struct CacheState { /// We change the session to be aware of `CacheState`, which will write a copy of the body when /// execution ends. On load, these bodies will be written back into the next session. pub bodies: Arc>>>, + + /// CacheHandle markers for pending transactions. Handles received for TX operataions + /// always point into this map instead of the entries. + // + // TODO this is probably a crime, since two cache handle primary maps may lead to + // fun bugs, but as we're not in need to really testing tx, we can just hack it. + pub pending_tx: Arc>>, } impl CacheState { @@ -39,6 +68,207 @@ impl CacheState { Default::default() } + // TODO: Purge + + pub fn match_entry(&self, key: &Vec, headers: &HeaderMap) -> Option { + let candidates_lock = self.key_candidates.read().unwrap(); + + candidates_lock.get(key).and_then(|candidates| { + let entry_lock = self.cache_entries.write().unwrap(); + + candidates.iter().find_map(|candidate_handle| { + entry_lock + .get(*candidate_handle) + .and_then(|candidate_entry| { + candidate_entry + .vary_matches(headers) + .then(|| *candidate_handle) + }) + }) + }) + } + + pub fn insert<'a>( + &self, + key: Vec, + options_mask: types::CacheWriteOptionsMask, + options: types::CacheWriteOptions, + request_parts: Option<&Parts>, + body_to_use: types::BodyHandle, + ) -> Result<(), Error> { + // Cache write must contain max-age. + let max_age_ns = options.max_age_ns; + + // Swr might not be set, check bitmask for. Else we'd always get Some(0). + let swr_ns = options_mask + .contains(types::CacheWriteOptionsMask::STALE_WHILE_REVALIDATE_NS) + .then(|| options.stale_while_revalidate_ns); + + let surrogate_keys = if options_mask.contains(types::CacheWriteOptionsMask::SURROGATE_KEYS) + { + if options.surrogate_keys_len == 0 { + return Err(Error::InvalidArgument); + } + + let byte_slice = options + .surrogate_keys_ptr + .as_array(options.surrogate_keys_len) + .to_vec()?; + + match String::from_utf8(byte_slice) { + Ok(s) => s + .split_whitespace() + .map(ToOwned::to_owned) + .collect::>(), + + Err(_) => return Err(Error::InvalidArgument), + } + } else { + vec![] + }; + + let user_metadata = if options_mask.contains(types::CacheWriteOptionsMask::USER_METADATA) { + if options.user_metadata_len == 0 { + return Err(Error::InvalidArgument); + } + + let byte_slice = options + .user_metadata_ptr + .as_array(options.user_metadata_len) + .to_vec()?; + + byte_slice + } else { + vec![] + }; + + let vary = if options_mask.contains(types::CacheWriteOptionsMask::VARY_RULE) { + if options.vary_rule_len == 0 { + return Err(Error::InvalidArgument); + } + + let byte_slice = options + .vary_rule_ptr + .as_array(options.vary_rule_len) + .to_vec()?; + + let vary_rules = match String::from_utf8(byte_slice) { + Ok(s) => s + .split_whitespace() + .map(ToOwned::to_owned) + .collect::>(), + + Err(_) => return Err(Error::InvalidArgument), + }; + + if let Some(req_parts) = request_parts { + let mut map = BTreeMap::new(); + + // Extract necessary vary headers. + for vary in vary_rules { + // If you think this sucks... then you'd be right. Just supposed to work right now. + let value = req_parts + .headers + .get(&vary) + .map(|h| h.to_str().unwrap().to_string()); + + map.insert(vary, value); + } + + map + } else { + // Or invalid argument? + BTreeMap::new() + } + } else { + BTreeMap::new() + }; + + let initial_age_ns = options_mask + .contains(types::CacheWriteOptionsMask::INITIAL_AGE_NS) + .then(|| options.initial_age_ns); + + let mut entry = CacheEntry { + key: key.clone(), + body_handle: body_to_use, + vary, + initial_age_ns, + max_age_ns: Some(max_age_ns), + swr_ns: swr_ns, + created_at: Instant::now(), + user_metadata, + }; + + // Check for and perform overwrites. + let mut candidates_lock = self.key_candidates.write().unwrap(); + let candidates = candidates_lock.get_mut(&key); + let headers = request_parts.map(|p| &p.headers); + + let (entry_handle, overwrite) = candidates + .and_then(|candidates| { + let entry_lock = self.cache_entries.write().unwrap(); + + candidates.iter_mut().find_map(|candidate_handle| { + entry_lock + .get(*candidate_handle) + .and_then(|mut candidate_entry| { + candidate_entry + .vary_matches(&headers.unwrap_or(&HeaderMap::new())) + .then(|| { + event!( + Level::TRACE, + "Overwriting cache entry {}", + candidate_handle + ); + + let _ = std::mem::replace(&mut candidate_entry, &mut entry); + (*candidate_handle, true) + }) + }) + }) + }) + .unwrap_or_else(|| { + // Write new entry. + let entry_handle = self.cache_entries.write().unwrap().push(entry); + event!( + Level::TRACE, + "Wrote new cache entry {} with body handle {}", + entry_handle, + body_to_use + ); + (entry_handle, false) + }); + + drop(candidates_lock); + + if !overwrite { + // Write handle key candidate mapping. + match self.key_candidates.write().unwrap().entry(key) { + Entry::Vacant(vacant) => { + vacant.insert(vec![entry_handle]); + } + Entry::Occupied(mut occupied) => { + occupied.get_mut().push(entry_handle); + } + } + } + + // Write surrogates (we don't really need to care about the overwrite case here for now). + let mut surrogates_write_lock = self.surrogates_to_handles.write().unwrap(); + for surrogate_key in surrogate_keys { + match surrogates_write_lock.entry(surrogate_key) { + Entry::Vacant(vacant) => { + vacant.insert(vec![entry_handle]); + } + Entry::Occupied(mut occupied) => { + occupied.get_mut().push(entry_handle); + } + }; + } + + Ok(()) + } + // This handling is necessary due to incredibly painful body handling. pub async fn format_pretty(&self) -> String { let formatted_bodies = self.format_bodies().await; @@ -165,62 +395,90 @@ struct CacheStateFormatter<'state> { impl<'state> Display for CacheStateFormatter<'state> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - fn indent(level: usize) -> String { - " ".repeat(level) - } - writeln!(f, "Cache State:")?; - writeln!(f, "{}Entries:", indent(1))?; + writeln!(f, "{}Entries:", Self::indent(1))?; for (handle, entry) in self.state.cache_entries.read().unwrap().iter() { - writeln!( - f, - "{}[{}]: {}", - indent(2), - handle, - entry - .key - .iter() - .map(|x| format!("{x:X}")) - .collect::>() - .join("") - )?; - - writeln!(f, "{}TTLs (sec):", indent(3),)?; - writeln!(f, "{}Age: {}", indent(4), entry.age_ns() / 1_000_000_000)?; - writeln!( - f, - "{}Inital age: {:?}", - indent(4), - entry.initial_age_ns.map(|ia| ia / 1_000_000_000) - )?; - writeln!(f, "{}Max-age: {:?}", indent(4), entry.max_age)?; - writeln!(f, "{}Swr: {:?}", indent(4), entry.swr)?; - - writeln!(f, "{}Vary:", indent(3))?; - for (key, value) in entry.vary.iter() { - writeln!(f, "{}{key}: {:?}", indent(4), value)?; - } + self.fmt_entry(f, handle, entry)?; + } - writeln!(f, "{}User Metadata:", indent(3))?; - writeln!( - f, - "{}{}", - indent(4), - std::str::from_utf8(&entry.user_metadata).unwrap_or("Invalid UITF-8") - )?; + Ok(()) + } +} - writeln!(f, "{}Body:", indent(3))?; +pub const NS_TO_S_FACTOR: u64 = 1_000_000_000; - let body = self - .formatted_bodies - .get(&entry.body_handle) - .map(|s| s.as_str()) - .unwrap_or(""); +impl<'state> CacheStateFormatter<'state> { + fn indent(level: usize) -> String { + " ".repeat(level) + } - writeln!(f, "{}{}", indent(4), body)?; + fn fmt_entry( + &self, + f: &mut std::fmt::Formatter<'_>, + handle: types::CacheHandle, + entry: &CacheEntry, + ) -> std::fmt::Result { + writeln!( + f, + "{}[{}]: {}", + Self::indent(2), + handle, + entry + .key + .iter() + .map(|x| format!("{x:X}")) + .collect::>() + .join("") + )?; + + writeln!(f, "{}TTLs (in seconds):", Self::indent(3),)?; + writeln!( + f, + "{}Age: {}", + Self::indent(4), + entry.age_ns() / NS_TO_S_FACTOR + )?; + writeln!( + f, + "{}Inital age: {:?}", + Self::indent(4), + entry.initial_age_ns.map(|ia| ia / NS_TO_S_FACTOR) + )?; + writeln!( + f, + "{}Max-age: {:?}", + Self::indent(4), + entry.max_age_ns.map(|x| x / NS_TO_S_FACTOR) + )?; + writeln!( + f, + "{}Swr: {:?}", + Self::indent(4), + entry.swr_ns.map(|x| x / NS_TO_S_FACTOR) + )?; + + writeln!(f, "{}Vary:", Self::indent(3))?; + for (key, value) in entry.vary.iter() { + writeln!(f, "{}{key}: {:?}", Self::indent(4), value)?; } - Ok(()) + writeln!(f, "{}User Metadata:", Self::indent(3))?; + writeln!( + f, + "{}{}", + Self::indent(4), + std::str::from_utf8(&entry.user_metadata).unwrap_or("Invalid UITF-8") + )?; + + writeln!(f, "{}Body:", Self::indent(3))?; + + let body = self + .formatted_bodies + .get(&entry.body_handle) + .map(|s| s.as_str()) + .unwrap_or(""); + + writeln!(f, "{}{}", Self::indent(4), body) } } diff --git a/lib/src/wiggle_abi/body_impl.rs b/lib/src/wiggle_abi/body_impl.rs index b2d167bc..ee771b76 100644 --- a/lib/src/wiggle_abi/body_impl.rs +++ b/lib/src/wiggle_abi/body_impl.rs @@ -1,6 +1,7 @@ //! fastly_body` hostcall implementations. use http::{HeaderName, HeaderValue}; +use tracing::{event, Level}; use crate::wiggle_abi::headers::HttpHeaders; @@ -85,8 +86,6 @@ impl FastlyHttpBody for Session { buf: &GuestPtr<'a, [u8]>, end: BodyWriteEnd, ) -> Result { - dbg!("Writing to body", body_handle); - // Validate the body handle and the buffer. let buf = &buf.as_slice()?.ok_or(Error::SharedMemory)?[..]; @@ -114,8 +113,6 @@ impl FastlyHttpBody for Session { } fn close(&mut self, body_handle: BodyHandle) -> Result<(), Error> { - dbg!("Closing", body_handle); - // TODO Giga haxx ahead let cache_body_handle = self .cache_state @@ -123,7 +120,7 @@ impl FastlyHttpBody for Session { .read() .unwrap() .iter() - .find_map(|(_, e)| (dbg!(e.body_handle) == body_handle).then(|| body_handle.clone())); + .find_map(|(_, e)| (e.body_handle == body_handle).then(|| body_handle.clone())); // Drop the body and pass up an error if the handle does not exist if self.is_streaming_body(body_handle) { @@ -137,7 +134,11 @@ impl FastlyHttpBody for Session { self.take_streaming_body(body_handle)?.finish() } else { if let Some(cache_body_handle) = dbg!(cache_body_handle) { - dbg!("Preserving cache body for", cache_body_handle); + event!( + Level::TRACE, + "Preserving cache body for {}", + cache_body_handle + ); let body = self.take_body(body_handle).ok(); self.cache_state @@ -163,7 +164,10 @@ impl FastlyHttpBody for Session { .find_map(|(_, e)| (e.body_handle == body_handle).then(|| body_handle.clone())); if entry_handle.is_some() { - panic!("Abandoning cache body handle not... handled (handle {body_handle}).") + event!( + Level::ERROR, + "Abandoning cache body handle is not yet handled. Handle: {body_handle}." + ) } // Drop the body without a `finish` message diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index 180e3081..b30dc4d2 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -3,20 +3,9 @@ use super::{ types::{self}, Error, }; -use crate::{ - body::Body, - cache_state::{not_found_handle, CacheEntry}, - error::HandleError, - session::Session, -}; -use std::{ - collections::{btree_map::Entry, BTreeMap}, - time::Instant, -}; +use crate::{body::Body, cache_state::not_found_handle, error::HandleError, session::Session}; use tracing::{event, Level}; -// pub const NS_TO_S_FACTOR: u64 = 1_000_000_000; - // pub struct CacheWriteOptions<'a> { // pub max_age_ns: CacheDurationNs, // pub request_headers: RequestHandle, @@ -95,9 +84,6 @@ impl FastlyCache for Session { options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { let primary_key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); - - // Q: I guess the bitmask indicates if this is a safe operation? - // -> Doesn't matter for us anyways, we're just using headers all the time afaict. let options: types::CacheLookupOptions = options.read().unwrap(); let req_parts = self.request_parts(options.request_headers)?; @@ -162,197 +148,21 @@ impl FastlyCache for Session { options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { // TODO: Skipped over all the sanity checks usually done by similar code (see `req_impl`). - let options: types::CacheWriteOptions = options.read().unwrap(); let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); - - // Cache write must contain max-age. - let max_age_ns = options.max_age_ns; - - // Swr might not be set, check bitmask for. Else we'd always get Some(0). - let swr_ns = options_mask - .contains(types::CacheWriteOptionsMask::STALE_WHILE_REVALIDATE_NS) - .then(|| options.stale_while_revalidate_ns); - - let surrogate_keys = if options_mask.contains(types::CacheWriteOptionsMask::SURROGATE_KEYS) - { - if options.surrogate_keys_len == 0 { - return Err(Error::InvalidArgument); - } - - let byte_slice = options - .surrogate_keys_ptr - .as_array(options.surrogate_keys_len) - .to_vec()?; - - match String::from_utf8(byte_slice) { - Ok(s) => s - .split_whitespace() - .map(ToOwned::to_owned) - .collect::>(), - - Err(_) => return Err(Error::InvalidArgument), - } - } else { - vec![] - }; - - let user_metadata = if options_mask.contains(types::CacheWriteOptionsMask::USER_METADATA) { - if options.user_metadata_len == 0 { - return Err(Error::InvalidArgument); - } - - let byte_slice = options - .user_metadata_ptr - .as_array(options.user_metadata_len) - .to_vec()?; - - byte_slice - } else { - vec![] - }; - - let vary = if options_mask.contains(types::CacheWriteOptionsMask::VARY_RULE) { - if options.vary_rule_len == 0 { - return Err(Error::InvalidArgument); - } - - let byte_slice = options - .vary_rule_ptr - .as_array(options.vary_rule_len) - .to_vec()?; - - let vary_rules = match String::from_utf8(byte_slice) { - Ok(s) => s - .split_whitespace() - .map(ToOwned::to_owned) - .collect::>(), - - Err(_) => return Err(Error::InvalidArgument), - }; - - if options_mask.contains(types::CacheWriteOptionsMask::REQUEST_HEADERS) { - let req_parts = self.request_parts(options.request_headers)?; - let mut map = BTreeMap::new(); - - // Extract necessary vary headers. - for vary in vary_rules { - // If you think this sucks... then you'd be right. Just supposed to work right now. - let value = req_parts - .headers - .get(&vary) - .map(|h| h.to_str().unwrap().to_string()); - - map.insert(vary, value); - } - - map - } else { - // Or invalid argument? - BTreeMap::new() - } + let body_to_use = self.insert_body(Body::empty()); + let parts = if options_mask.contains(types::CacheWriteOptionsMask::REQUEST_HEADERS) { + Some(self.request_parts(options.request_headers)?) } else { - BTreeMap::new() - }; - - let initial_age_ns = options_mask - .contains(types::CacheWriteOptionsMask::INITIAL_AGE_NS) - .then(|| options.initial_age_ns); - - let body_handle = self.insert_body(Body::empty()); - let mut entry = CacheEntry { - key: key.clone(), - body_handle, - vary, - initial_age_ns, - max_age_ns: Some(max_age_ns), - swr_ns: swr_ns, - created_at: Instant::now(), - user_metadata, + None }; - // Check for overwrites - let req_parts = self.request_parts(options.request_headers)?; - let mut candidates_lock = self.cache_state.key_candidates.write().unwrap(); - let candidates = candidates_lock.get_mut(&key); - - let (entry_handle, overwrite) = candidates - .and_then(|candidates| { - let entry_lock = self.cache_state.cache_entries.write().unwrap(); - - candidates.iter_mut().find_map(|candidate_handle| { - entry_lock - .get(*candidate_handle) - .and_then(|mut candidate_entry| { - candidate_entry.vary_matches(&req_parts.headers).then(|| { - event!( - Level::TRACE, - "Overwriting cache entry {}", - candidate_handle - ); - - let _ = std::mem::replace(&mut candidate_entry, &mut entry); - (*candidate_handle, true) - }) - }) - }) - }) - .unwrap_or_else(|| { - // Write new entry. - let entry_handle = self.cache_state.cache_entries.write().unwrap().push(entry); - event!( - Level::TRACE, - "Wrote new cache entry {} with body handle {}", - entry_handle, - body_handle - ); - (entry_handle, false) - }); - - drop(candidates_lock); - - if !overwrite { - // Write handle key candidate mapping. - match self.cache_state.key_candidates.write().unwrap().entry(key) { - Entry::Vacant(vacant) => { - vacant.insert(vec![entry_handle]); - } - Entry::Occupied(mut occupied) => { - occupied.get_mut().push(entry_handle); - } - } - } - - // Write surrogates (we don't really need to care about the overwrite case here for now). - let mut surrogates_write_lock = self.cache_state.surrogates_to_handles.write().unwrap(); - for surrogate_key in surrogate_keys { - match surrogates_write_lock.entry(surrogate_key) { - Entry::Vacant(vacant) => { - vacant.insert(vec![entry_handle]); - } - Entry::Occupied(mut occupied) => { - occupied.get_mut().push(entry_handle); - } - }; - } + self.cache_state + .insert(key, options_mask, options, parts, body_to_use)?; - Ok(body_handle) + Ok(body_to_use) } - // pub struct CacheLookupOptionsMask( - // ::Internal, - // ); - // impl CacheLookupOptionsMask { - // #[allow(deprecated, non_upper_case_globals)] - // pub const RESERVED: Self = Self::from_bits_retain(1); - // #[allow(deprecated, non_upper_case_globals)] - // pub const REQUEST_HEADERS: Self = Self::from_bits_retain(2); - // } - - // pub struct CacheLookupOptions { - // pub request_headers: RequestHandle, - // } - /// Stub delegating to regular lookup. fn transaction_lookup<'a>( &mut self, @@ -360,13 +170,10 @@ impl FastlyCache for Session { options_mask: types::CacheLookupOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { - let cache_handle = if self.lookup(cache_key, options_mask, options)? != not_found_handle() { - todo!() - } else { - todo!() - }; - - todo!() + // TODO: This is a plain hack, not a working implementation: Parallel tx etc, are simply not required. + // -> This will fall apart immediately if tx are used as actual transactions. + let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); + Ok(self.cache_state.pending_tx.write().unwrap().push(key)) } /// Stub delegating to regular insert. @@ -376,15 +183,35 @@ impl FastlyCache for Session { options_mask: types::CacheWriteOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { - // Ok(dbg!(self.insert_body(Body::empty()))) + let key = self + .cache_state + .pending_tx + .read() + .unwrap() + .get(handle) + .map(ToOwned::to_owned); + + if let Some(pending_tx_key) = key { + let options: types::CacheWriteOptions = options.read().unwrap(); + let body_to_use = self.insert_body(Body::empty()); + let parts = if options_mask.contains(types::CacheWriteOptionsMask::REQUEST_HEADERS) { + Some(self.request_parts(options.request_headers)?) + } else { + None + }; - // &mut self, - // cache_key: &wiggle::GuestPtr<'a, [u8]>, - // options_mask: types::CacheWriteOptionsMask, - // options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, + self.cache_state.insert( + pending_tx_key.to_owned(), + options_mask, + options, + parts, + body_to_use, + )?; - // self.insert(); - todo!() + Ok(body_to_use) + } else { + Err(HandleError::InvalidCacheHandle(handle).into()) + } } fn transaction_insert_and_stream_back<'a>( From 8f2feaee071d23e075e437a1940fb125944a7e74 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Fri, 8 Mar 2024 23:34:17 +0100 Subject: [PATCH 13/20] Dramatically reduce haxx-o-meter by reimplementing cache body handling. --- lib/src/cache_state.rs | 99 +++++++----------------------- lib/src/session.rs | 103 +++++++++----------------------- lib/src/session/async_item.rs | 25 ++++++-- lib/src/wiggle_abi/body_impl.rs | 81 +++++++++---------------- lib/src/wiggle_abi/cache.rs | 67 ++++++--------------- 5 files changed, 116 insertions(+), 259 deletions(-) diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs index e8c534d0..8a950d65 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/cache_state.rs @@ -17,35 +17,17 @@ pub fn not_found_handle() -> types::CacheHandle { type PrimaryCacheKey = Vec; -// #[derive(Debug)] -// pub enum CacheEntry { -// PendingTxInsert(PrimaryCacheKey), -// Entry(CacheEntry), -// } - -// impl CacheEntry { -// pub fn as_entry_panicking(&self) -> &CacheEntry { -// match self { -// CacheEntry::Entry(e) => e, -// CacheEntry::PendingTxInsert(_) => panic!("Entry is not a cache entry, but pending tx"), -// } -// } -// } - -// #[derive(Clone, Debug)] -// pub enum PendingTxState { -// Insert(PrimaryCacheKey), -// Overwrite(types::CacheHandle), -// } - #[derive(Clone, Default, Debug)] pub struct CacheState { /// Cache entries, indexable by handle. + // TODO: Maybe an option for the entry to signify a deleted one, since the primarymap isn't + // really doing that? pub cache_entries: Arc>>, /// Primary cache key to a list of variants. pub key_candidates: Arc>>>, + /// Surrogates to cache entry mapping to support simplistic purging. pub surrogates_to_handles: Arc>>>, /// The way cache bodies are handled makes it super hard to move cache state between @@ -70,6 +52,7 @@ impl CacheState { // TODO: Purge + // Todo: Use in insert / lookup. pub fn match_entry(&self, key: &Vec, headers: &HeaderMap) -> Option { let candidates_lock = self.key_candidates.read().unwrap(); @@ -94,8 +77,8 @@ impl CacheState { options_mask: types::CacheWriteOptionsMask, options: types::CacheWriteOptions, request_parts: Option<&Parts>, - body_to_use: types::BodyHandle, - ) -> Result<(), Error> { + // body_to_use: types::BodyHandle, + ) -> Result { // Cache write must contain max-age. let max_age_ns = options.max_age_ns; @@ -190,7 +173,7 @@ impl CacheState { let mut entry = CacheEntry { key: key.clone(), - body_handle: body_to_use, + body_bytes: vec![], vary, initial_age_ns, max_age_ns: Some(max_age_ns), @@ -230,12 +213,7 @@ impl CacheState { .unwrap_or_else(|| { // Write new entry. let entry_handle = self.cache_entries.write().unwrap().push(entry); - event!( - Level::TRACE, - "Wrote new cache entry {} with body handle {}", - entry_handle, - body_to_use - ); + event!(Level::TRACE, "Wrote new cache entry {}", entry_handle,); (entry_handle, false) }); @@ -266,47 +244,14 @@ impl CacheState { }; } - Ok(()) + Ok(entry_handle) } // This handling is necessary due to incredibly painful body handling. pub async fn format_pretty(&self) -> String { - let formatted_bodies = self.format_bodies().await; - let formatter = CacheStateFormatter { - state: self, - formatted_bodies, - }; - + let formatter = CacheStateFormatter { state: self }; formatter.to_string() } - - async fn format_bodies(&self) -> HashMap { - let mut formatted_bodies = HashMap::new(); - let mut new_bodies = HashMap::new(); - - // We need to remove all bodies once, read them into byte vectors in order to format them, - // then recreate the bodies and write them back into the map. Excellent. - let bodies = self.bodies.write().unwrap().drain().collect::>(); - for (key, body) in bodies { - if let Some(body) = body { - let formatted = match body.read_into_vec().await { - Ok(bytes) => { - new_bodies.insert(key, Some(Body::from(bytes.clone()))); - String::from_utf8(bytes).unwrap_or("Invalid UTF-8".to_owned()) - } - Err(err) => format!("Invalid body: {err}"), - }; - - formatted_bodies.insert(key, formatted); - } else { - new_bodies.insert(key, None); - } - } - - // Write back the bodies. - self.bodies.write().unwrap().extend(new_bodies); - formatted_bodies - } } #[derive(Debug)] @@ -315,9 +260,11 @@ pub struct CacheEntry { /// Here for convenience. pub key: Vec, - /// The cached bytes. - pub body_handle: types::BodyHandle, + /// The raw bytes of the cached entry. + pub body_bytes: Vec, + // The cached bytes. + // pub body_handle: types::BodyHandle, /// Vary used to create this entry. pub vary: BTreeMap>, @@ -360,14 +307,14 @@ impl CacheEntry { let total_ttl = self.total_ttl_ns(); match (self.max_age_ns, self.swr_ns) { - (Some(max_age), Some(swr)) => age > max_age && age < total_ttl, + (Some(max_age), Some(_)) => age > max_age && age < total_ttl, _ => false, } } /// Usable: Age is smaller than max-age + swr. pub fn is_usable(&self) -> bool { - let mut total_ttl = self.total_ttl_ns(); + let total_ttl = self.total_ttl_ns(); let age = self.age_ns(); age < total_ttl @@ -390,7 +337,6 @@ impl CacheEntry { struct CacheStateFormatter<'state> { state: &'state CacheState, - formatted_bodies: HashMap, } impl<'state> Display for CacheStateFormatter<'state> { @@ -472,13 +418,12 @@ impl<'state> CacheStateFormatter<'state> { )?; writeln!(f, "{}Body:", Self::indent(3))?; + writeln!( + f, + "{}", + std::str::from_utf8(&entry.body_bytes).unwrap_or("") + )?; - let body = self - .formatted_bodies - .get(&entry.body_handle) - .map(|s| s.as_str()) - .unwrap_or(""); - - writeln!(f, "{}{}", Self::indent(4), body) + Ok(()) } } diff --git a/lib/src/session.rs b/lib/src/session.rs index 64363214..78d76698 100644 --- a/lib/src/session.rs +++ b/lib/src/session.rs @@ -133,47 +133,6 @@ pub struct Session { pub(crate) cache_state: Arc, } -impl Drop for Session { - fn drop(&mut self) { - // All body handles needed by cache entries. - let body_handles = self - .cache_state - .cache_entries - .write() - .unwrap() - .iter() - .map(|e| e.1.body_handle) - .collect::>(); - - dbg!("Body handles required by entries", &body_handles); - - // Handles already preserved (intercepted via .close()). - let existing_body_handles = self - .cache_state - .bodies - .read() - .unwrap() - .iter() - .map(|(k, _)| *k) - .collect::>(); - - let mut body_handles_to_bodies = HashMap::new(); - - for handle in body_handles { - if !existing_body_handles.contains(&handle) { - dbg!("[Session Drop] Preserving handle", handle); - body_handles_to_bodies.insert(handle, dbg!(self.take_body(handle).ok())); - } - } - - self.cache_state - .bodies - .write() - .unwrap() - .extend(body_handles_to_bodies); - } -} - impl Session { /// Create an empty session. #[allow(clippy::too_many_arguments)] @@ -231,7 +190,6 @@ impl Session { dynamic_backend_registrar, cache_state, } - .load_caches() .write_endpoints(endpoints) } @@ -246,40 +204,6 @@ impl Session { self } - // Restores cache state from the given cache state. The only thing required right now is to re-process the bodies. - fn load_caches(mut self) -> Self { - // First, extract the handle to body mapping. - let bodies = self - .cache_state - .bodies - .write() - .unwrap() - .drain() - .collect::>(); - - // Re-insert the bodies into the session. This requires replacing the handles used in the cache entries, unfortunately. - for (handle, body) in bodies { - let body = body.unwrap_or_default(); - let new_handle = self.insert_body(body); - dbg!(format!("Reloading handle {} as {}", handle, new_handle)); - - for (entry_handle, entry) in self.cache_state.cache_entries.write().unwrap().iter_mut() - { - if entry.body_handle == handle { - dbg!(format!( - "Entry {} now points to {}", - entry_handle, new_handle - )); - entry.body_handle = new_handle; - dbg!(entry.body_handle); - break; - } - } - } - - self - } - // ----- Downstream Request API ----- /// Retrieve the downstream client IP address associated with this session. @@ -342,6 +266,13 @@ impl Session { dbg!(self.async_items.push(Some(AsyncItem::Body(body))).into()) } + pub fn insert_cache_body(&mut self, cache_handle: types::CacheHandle) -> BodyHandle { + dbg!(self + .async_items + .push(Some(AsyncItem::CachingBody(cache_handle))) + .into()) + } + /// Get a reference to a [`Body`][body], given its [`BodyHandle`][handle]. /// /// Returns a [`HandleError`][err] if the handle is not associated with a body in the session. @@ -430,6 +361,26 @@ impl Session { } } + pub fn is_caching_body(&self, handle: BodyHandle) -> bool { + if let Some(Some(body)) = self.async_items.get(handle.into()) { + body.is_caching() + } else { + false + } + } + + pub fn caching_body_handle( + &mut self, + handle: BodyHandle, + ) -> Result { + dbg!("Getting body mut", handle); + self.async_items + .get_mut(handle.into()) + .and_then(|item| item.as_ref().map(|item| item.resolve_cache_handle())) + .flatten() + .ok_or(HandleError::InvalidBodyHandle(handle)) + } + /// Get a mutable reference to the streaming body `Sender`, if and only if the provided /// `BodyHandle` is the downstream body being sent. /// diff --git a/lib/src/session/async_item.rs b/lib/src/session/async_item.rs index c6600049..92e19e51 100644 --- a/lib/src/session/async_item.rs +++ b/lib/src/session/async_item.rs @@ -1,8 +1,9 @@ -use crate::object_store::ObjectStoreError; -use crate::{body::Body, error::Error, streaming_body::StreamingBody}; +use crate::{ + body::Body, error::Error, object_store::ObjectStoreError, streaming_body::StreamingBody, + wiggle_abi::types, +}; use anyhow::anyhow; -use futures::Future; -use futures::FutureExt; +use futures::{Future, FutureExt}; use http::Response; use tokio::sync::oneshot; @@ -45,6 +46,7 @@ impl PendingKvDeleteTask { /// body (writeable only). It is used within the body handle map in `Session`. #[derive(Debug)] pub enum AsyncItem { + CachingBody(types::CacheHandle), Body(Body), StreamingBody(StreamingBody), PendingReq(PeekableTask>), @@ -58,9 +60,21 @@ impl AsyncItem { matches!(self, Self::StreamingBody(_)) } + pub fn is_caching(&self) -> bool { + matches!(self, Self::CachingBody(_)) + } + + pub fn resolve_cache_handle(&self) -> Option { + match self { + Self::CachingBody(handle) => Some(*handle), + _ => None, + } + } + pub fn as_body(&self) -> Option<&Body> { match self { Self::Body(body) => Some(body), + Self::CachingBody(_) => panic!("as_body called"), _ => None, } } @@ -68,6 +82,7 @@ impl AsyncItem { pub fn as_body_mut(&mut self) -> Option<&mut Body> { match self { Self::Body(body) => Some(body), + Self::CachingBody(_) => panic!("as_body_mut called"), _ => None, } } @@ -75,6 +90,7 @@ impl AsyncItem { pub fn into_body(self) -> Option { match self { Self::Body(body) => Some(body), + Self::CachingBody(_) => panic!("into_body called"), _ => None, } } @@ -174,6 +190,7 @@ impl AsyncItem { match self { Self::StreamingBody(body) => body.await_ready().await, Self::Body(body) => body.await_ready().await, + Self::CachingBody(_) => (), Self::PendingReq(req) => req.await_ready().await, Self::PendingKvLookup(req) => req.0.await_ready().await, Self::PendingKvInsert(req) => req.0.await_ready().await, diff --git a/lib/src/wiggle_abi/body_impl.rs b/lib/src/wiggle_abi/body_impl.rs index ee771b76..3e10092b 100644 --- a/lib/src/wiggle_abi/body_impl.rs +++ b/lib/src/wiggle_abi/body_impl.rs @@ -1,10 +1,8 @@ //! fastly_body` hostcall implementations. +use crate::wiggle_abi::headers::HttpHeaders; use http::{HeaderName, HeaderValue}; use tracing::{event, Level}; - -use crate::wiggle_abi::headers::HttpHeaders; - use { crate::{ body::Body, @@ -23,8 +21,6 @@ use { #[wiggle::async_trait] impl FastlyHttpBody for Session { async fn append(&mut self, dest: BodyHandle, src: BodyHandle) -> Result<(), Error> { - dbg!("Appending to body from other body", dest, src); - // Take the `src` body out of the session, and get a mutable reference // to the `dest` body we will append to. let mut src = self.take_body(src)?; @@ -36,11 +32,23 @@ impl FastlyHttpBody for Session { dest.send_chunk(chunk).await?; } dest.trailers.extend(trailers); + } else if self.is_caching_body(dest) { + // If we're writing to a caching body, we directly append to the buffer. + let cache_handle = self.caching_body_handle(dest).unwrap(); + let source_bytes = src.read_into_vec().await?; + + self.cache_state + .cache_entries + .write() + .unwrap() + .get_mut(cache_handle) + .map(|entry| entry.body_bytes.extend(source_bytes)); } else { let dest = self.body_mut(dest)?; dest.trailers.extend(trailers); dest.append(src); } + Ok(()) } @@ -54,6 +62,7 @@ impl FastlyHttpBody for Session { buf: &GuestPtr<'a, u8>, buf_len: u32, ) -> Result { + dbg!("READING FROM BODY"); let mut buf_slice = buf .as_array(buf_len) .as_slice_mut()? @@ -93,6 +102,10 @@ impl FastlyHttpBody for Session { match end { BodyWriteEnd::Front => { // Only normal bodies can be front-written + if self.is_caching_body(body_handle) { + event!(Level::ERROR, "Attempted pushing front to a caching body"); + } + self.body_mut(body_handle)?.push_front(buf); } BodyWriteEnd::Back => { @@ -100,11 +113,20 @@ impl FastlyHttpBody for Session { self.streaming_body_mut(body_handle)? .send_chunk(buf) .await?; + } else if self.is_caching_body(body_handle) { + let cache_handle = self.caching_body_handle(body_handle).unwrap(); + self.cache_state + .cache_entries + .write() + .unwrap() + .get_mut(cache_handle) + .map(|entry| entry.body_bytes.extend(buf)); } else { self.body_mut(body_handle)?.push_back(buf); } } } + // Finally, return the number of bytes written, which is _always_ the full buffer Ok(buf .len() @@ -113,63 +135,16 @@ impl FastlyHttpBody for Session { } fn close(&mut self, body_handle: BodyHandle) -> Result<(), Error> { - // TODO Giga haxx ahead - let cache_body_handle = self - .cache_state - .cache_entries - .read() - .unwrap() - .iter() - .find_map(|(_, e)| (e.body_handle == body_handle).then(|| body_handle.clone())); - // Drop the body and pass up an error if the handle does not exist if self.is_streaming_body(body_handle) { - if cache_body_handle.is_some() { - return Err(Error::Unsupported { - msg: "Streaming cache body not supported", - }); - } - // Make sure a streaming body gets a `finish` message self.take_streaming_body(body_handle)?.finish() } else { - if let Some(cache_body_handle) = dbg!(cache_body_handle) { - event!( - Level::TRACE, - "Preserving cache body for {}", - cache_body_handle - ); - - let body = self.take_body(body_handle).ok(); - self.cache_state - .bodies - .write() - .unwrap() - .insert(cache_body_handle, body); - - Ok(()) - } else { - Ok(self.drop_body(body_handle)?) - } + Ok(self.drop_body(body_handle)?) } } fn abandon(&mut self, body_handle: BodyHandle) -> Result<(), Error> { - let entry_handle = self - .cache_state - .cache_entries - .read() - .unwrap() - .iter() - .find_map(|(_, e)| (e.body_handle == body_handle).then(|| body_handle.clone())); - - if entry_handle.is_some() { - event!( - Level::ERROR, - "Abandoning cache body handle is not yet handled. Handle: {body_handle}." - ) - } - // Drop the body without a `finish` message Ok(self.drop_body(body_handle)?) } diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index b30dc4d2..4acb3c30 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -108,39 +108,6 @@ impl FastlyCache for Session { Ok(not_found_handle()) } - // pub struct CacheWriteOptions<'a> { - // pub max_age_ns: CacheDurationNs, - // pub request_headers: RequestHandle, - // pub vary_rule_ptr: wiggle::GuestPtr<'a, u8>, - // pub vary_rule_len: u32, - // pub initial_age_ns: CacheDurationNs, - // pub stale_while_revalidate_ns: CacheDurationNs, - // pub surrogate_keys_ptr: wiggle::GuestPtr<'a, u8>, - // pub surrogate_keys_len: u32, - // pub length: CacheObjectLength, - // pub user_metadata_ptr: wiggle::GuestPtr<'a, u8>, - // pub user_metadata_len: u32, - // } - // impl CacheWriteOptionsMask { - // #[allow(deprecated, non_upper_case_globals)] - // pub const RESERVED: Self = Self::from_bits_retain(1); - // #[allow(deprecated, non_upper_case_globals)] - // pub const REQUEST_HEADERS: Self = Self::from_bits_retain(2); - // #[allow(deprecated, non_upper_case_globals)] - // pub const VARY_RULE: Self = Self::from_bits_retain(4); - // #[allow(deprecated, non_upper_case_globals)] - // pub const INITIAL_AGE_NS: Self = Self::from_bits_retain(8); - // #[allow(deprecated, non_upper_case_globals)] - // pub const STALE_WHILE_REVALIDATE_NS: Self = Self::from_bits_retain(16); - // #[allow(deprecated, non_upper_case_globals)] - // pub const SURROGATE_KEYS: Self = Self::from_bits_retain(32); - // #[allow(deprecated, non_upper_case_globals)] - // pub const LENGTH: Self = Self::from_bits_retain(64); - // #[allow(deprecated, non_upper_case_globals)] - // pub const USER_METADATA: Self = Self::from_bits_retain(128); - // #[allow(deprecated, non_upper_case_globals)] - // pub const SENSITIVE_DATA: Self = Self::from_bits_retain(256); - // } fn insert<'a>( &mut self, cache_key: &wiggle::GuestPtr<'a, [u8]>, @@ -150,17 +117,14 @@ impl FastlyCache for Session { // TODO: Skipped over all the sanity checks usually done by similar code (see `req_impl`). let options: types::CacheWriteOptions = options.read().unwrap(); let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); - let body_to_use = self.insert_body(Body::empty()); let parts = if options_mask.contains(types::CacheWriteOptionsMask::REQUEST_HEADERS) { Some(self.request_parts(options.request_headers)?) } else { None }; - self.cache_state - .insert(key, options_mask, options, parts, body_to_use)?; - - Ok(body_to_use) + let cache_handle = self.cache_state.insert(key, options_mask, options, parts)?; + Ok(self.insert_cache_body(cache_handle)) } /// Stub delegating to regular lookup. @@ -193,22 +157,17 @@ impl FastlyCache for Session { if let Some(pending_tx_key) = key { let options: types::CacheWriteOptions = options.read().unwrap(); - let body_to_use = self.insert_body(Body::empty()); let parts = if options_mask.contains(types::CacheWriteOptionsMask::REQUEST_HEADERS) { Some(self.request_parts(options.request_headers)?) } else { None }; - self.cache_state.insert( - pending_tx_key.to_owned(), - options_mask, - options, - parts, - body_to_use, - )?; + let cache_handle = + self.cache_state + .insert(pending_tx_key.to_owned(), options_mask, options, parts)?; - Ok(body_to_use) + Ok(self.insert_cache_body(cache_handle)) } else { Err(HandleError::InvalidCacheHandle(handle).into()) } @@ -314,8 +273,18 @@ impl FastlyCache for Session { options_mask: types::CacheGetBodyOptionsMask, options: &types::CacheGetBodyOptions, ) -> Result { - if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { - Ok(entry.body_handle) + let body = self + .cache_state + .cache_entries + .read() + .unwrap() + .get(handle) + .map(|entry| entry.body_bytes.clone()); + + if let Some(body) = body { + // Re-insert a body into the session to allow subsequent reads. + let body_handle = self.insert_body(Body::from(body)); + Ok(body_handle) } else { Err(HandleError::InvalidCacheHandle(handle).into()) } From bfe57521f3b65da473890d488cb03a96b8599807 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Sat, 9 Mar 2024 19:58:47 +0100 Subject: [PATCH 14/20] Further simplify caching code and reduce hacks. Implement purging. --- lib/src/cache_state.rs | 164 ++++++++++++++++---------------- lib/src/wiggle_abi/body_impl.rs | 8 +- lib/src/wiggle_abi/cache.rs | 35 ++----- 3 files changed, 100 insertions(+), 107 deletions(-) diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs index 8a950d65..80712ccc 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/cache_state.rs @@ -1,8 +1,8 @@ -use crate::{body::Body, wiggle_abi::types, Error}; +use crate::{wiggle_abi::types, Error}; use cranelift_entity::PrimaryMap; use http::{request::Parts, HeaderMap}; use std::{ - collections::{btree_map::Entry, BTreeMap, HashMap}, + collections::{btree_map::Entry, BTreeMap, HashSet}, fmt::Display, sync::{Arc, RwLock}, time::Instant, @@ -20,23 +20,14 @@ type PrimaryCacheKey = Vec; #[derive(Clone, Default, Debug)] pub struct CacheState { /// Cache entries, indexable by handle. - // TODO: Maybe an option for the entry to signify a deleted one, since the primarymap isn't - // really doing that? - pub cache_entries: Arc>>, + /// `None` indicates a deleted entry. + pub cache_entries: Arc>>>, /// Primary cache key to a list of variants. pub key_candidates: Arc>>>, - /// Surrogates to cache entry mapping to support simplistic purging. - pub surrogates_to_handles: Arc>>>, - - /// The way cache bodies are handled makes it super hard to move cache state between - /// executions. We can't fork the SDK, so we need to abide by the interface. - /// - /// We change the session to be aware of `CacheState`, which will write a copy of the body when - /// execution ends. On load, these bodies will be written back into the next session. - pub bodies: Arc>>>, - + // Surrogates to cache entry mapping to support simplistic purging. + // pub surrogates_to_handles: Arc>>>, /// CacheHandle markers for pending transactions. Handles received for TX operataions /// always point into this map instead of the entries. // @@ -50,7 +41,25 @@ impl CacheState { Default::default() } - // TODO: Purge + pub fn purge(&self, surrogates: Vec) { + let mut cache_entries = self.cache_entries.write().unwrap(); + let surrogates_to_purge: HashSet = surrogates.into_iter().collect(); + + // Simplistic implementation: Just go over all cache entries and kick out matching ones. + for (_handle, handle_entry) in cache_entries.iter_mut() { + if let Some(entry) = handle_entry { + if entry + .surrogate_keys + .intersection(&surrogates_to_purge) + .count() + != 0 + { + // Drop the content of the cache entry. + handle_entry.take(); + } + } + } + } // Todo: Use in insert / lookup. pub fn match_entry(&self, key: &Vec, headers: &HeaderMap) -> Option { @@ -63,9 +72,9 @@ impl CacheState { entry_lock .get(*candidate_handle) .and_then(|candidate_entry| { - candidate_entry - .vary_matches(headers) - .then(|| *candidate_handle) + candidate_entry.as_ref().and_then(|entry| { + entry.vary_matches(headers).then(|| *candidate_handle) + }) }) }) }) @@ -171,7 +180,7 @@ impl CacheState { .contains(types::CacheWriteOptionsMask::INITIAL_AGE_NS) .then(|| options.initial_age_ns); - let mut entry = CacheEntry { + let entry = CacheEntry { key: key.clone(), body_bytes: vec![], vary, @@ -180,69 +189,49 @@ impl CacheState { swr_ns: swr_ns, created_at: Instant::now(), user_metadata, + surrogate_keys: surrogate_keys.into_iter().collect(), }; - // Check for and perform overwrites. - let mut candidates_lock = self.key_candidates.write().unwrap(); - let candidates = candidates_lock.get_mut(&key); - let headers = request_parts.map(|p| &p.headers); - - let (entry_handle, overwrite) = candidates - .and_then(|candidates| { - let entry_lock = self.cache_entries.write().unwrap(); - - candidates.iter_mut().find_map(|candidate_handle| { - entry_lock - .get(*candidate_handle) - .and_then(|mut candidate_entry| { - candidate_entry - .vary_matches(&headers.unwrap_or(&HeaderMap::new())) - .then(|| { - event!( - Level::TRACE, - "Overwriting cache entry {}", - candidate_handle - ); - - let _ = std::mem::replace(&mut candidate_entry, &mut entry); - (*candidate_handle, true) - }) - }) - }) - }) - .unwrap_or_else(|| { - // Write new entry. - let entry_handle = self.cache_entries.write().unwrap().push(entry); - event!(Level::TRACE, "Wrote new cache entry {}", entry_handle,); - (entry_handle, false) - }); - - drop(candidates_lock); - - if !overwrite { - // Write handle key candidate mapping. - match self.key_candidates.write().unwrap().entry(key) { - Entry::Vacant(vacant) => { - vacant.insert(vec![entry_handle]); - } - Entry::Occupied(mut occupied) => { - occupied.get_mut().push(entry_handle); - } + // Check for and perform overwrites or write a new entry. + let existing_entry_handle = self.match_entry( + &key, + request_parts + .map(|p| &p.headers) + .unwrap_or(&HeaderMap::new()), + ); + + let entry_handle = match existing_entry_handle { + Some(handle) => { + // Overwrite entry. + event!(Level::TRACE, "Overwriting cache entry {}", handle); + + self.cache_entries + .write() + .unwrap() + .get_mut(handle) + .map(|old_entry| old_entry.replace(entry)); + + handle } - } - // Write surrogates (we don't really need to care about the overwrite case here for now). - let mut surrogates_write_lock = self.surrogates_to_handles.write().unwrap(); - for surrogate_key in surrogate_keys { - match surrogates_write_lock.entry(surrogate_key) { - Entry::Vacant(vacant) => { - vacant.insert(vec![entry_handle]); - } - Entry::Occupied(mut occupied) => { - occupied.get_mut().push(entry_handle); + None => { + // Write new entry. + let new_entry_handle = self.cache_entries.write().unwrap().push(Some(entry)); + event!(Level::TRACE, "Wrote new cache entry {}", new_entry_handle); + + // Write handle key candidate mapping. + match self.key_candidates.write().unwrap().entry(key) { + Entry::Vacant(vacant) => { + vacant.insert(vec![new_entry_handle]); + } + Entry::Occupied(mut occupied) => { + occupied.get_mut().push(new_entry_handle); + } } - }; - } + + new_entry_handle + } + }; Ok(entry_handle) } @@ -268,6 +257,9 @@ pub struct CacheEntry { /// Vary used to create this entry. pub vary: BTreeMap>, + /// Surrogates attached to this cache entry. + pub surrogate_keys: HashSet, + /// Initial age of the cache entry. pub initial_age_ns: Option, @@ -345,7 +337,10 @@ impl<'state> Display for CacheStateFormatter<'state> { writeln!(f, "{}Entries:", Self::indent(1))?; for (handle, entry) in self.state.cache_entries.read().unwrap().iter() { - self.fmt_entry(f, handle, entry)?; + match entry { + Some(entry) => self.fmt_entry(f, handle, entry)?, + None => writeln!(f, "{}[{}]: Purged", Self::indent(2), handle,)?, + } } Ok(()) @@ -409,6 +404,14 @@ impl<'state> CacheStateFormatter<'state> { writeln!(f, "{}{key}: {:?}", Self::indent(4), value)?; } + writeln!(f, "{}Surrogate keys:", Self::indent(3))?; + let mut surrogates: Vec<&String> = entry.surrogate_keys.iter().collect(); + surrogates.sort(); + + for surrogate in surrogates { + writeln!(f, "{}{surrogate}", Self::indent(4))?; + } + writeln!(f, "{}User Metadata:", Self::indent(3))?; writeln!( f, @@ -420,7 +423,8 @@ impl<'state> CacheStateFormatter<'state> { writeln!(f, "{}Body:", Self::indent(3))?; writeln!( f, - "{}", + "{}{}", + Self::indent(4), std::str::from_utf8(&entry.body_bytes).unwrap_or("") )?; diff --git a/lib/src/wiggle_abi/body_impl.rs b/lib/src/wiggle_abi/body_impl.rs index 3e10092b..c46d6917 100644 --- a/lib/src/wiggle_abi/body_impl.rs +++ b/lib/src/wiggle_abi/body_impl.rs @@ -42,7 +42,11 @@ impl FastlyHttpBody for Session { .write() .unwrap() .get_mut(cache_handle) - .map(|entry| entry.body_bytes.extend(source_bytes)); + .map(|entry| { + entry + .as_mut() + .map(|entry| entry.body_bytes.extend(source_bytes)) + }); } else { let dest = self.body_mut(dest)?; dest.trailers.extend(trailers); @@ -120,7 +124,7 @@ impl FastlyHttpBody for Session { .write() .unwrap() .get_mut(cache_handle) - .map(|entry| entry.body_bytes.extend(buf)); + .map(|entry| entry.as_mut().map(|entry| entry.body_bytes.extend(buf))); } else { self.body_mut(body_handle)?.push_back(buf); } diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index 4acb3c30..9b18b533 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -87,25 +87,10 @@ impl FastlyCache for Session { let options: types::CacheLookupOptions = options.read().unwrap(); let req_parts = self.request_parts(options.request_headers)?; - let candidates_lock = self.cache_state.key_candidates.read().unwrap(); - let candidates = candidates_lock.get(&primary_key); - - if let Some(candidates) = candidates { - // Eh, maybe a lock on the entire thing? - // We don't need perf anyways, contenting locks doesn't matter. - let entry_lock = self.cache_state.cache_entries.read().unwrap(); - - for candidate_handle in candidates { - if let Some(candidate_entry) = entry_lock.get(*candidate_handle) { - if candidate_entry.vary_matches(&req_parts.headers) { - event!(Level::TRACE, "Found matching cache entry"); - return Ok(*candidate_handle); - } - } - } - } - - Ok(not_found_handle()) + Ok(self + .cache_state + .match_entry(&primary_key, &req_parts.headers) + .unwrap_or(not_found_handle())) } fn insert<'a>( @@ -209,7 +194,7 @@ impl FastlyCache for Session { } fn get_state(&mut self, handle: types::CacheHandle) -> Result { - if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { // Entry found. let mut state = types::CacheLookupState::FOUND; @@ -241,7 +226,7 @@ impl FastlyCache for Session { user_metadata_out_len: u32, // TODO: Is this the maximum allowed length? nwritten_out: &wiggle::GuestPtr<'a, u32>, ) -> Result<(), Error> { - if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { if entry.user_metadata.len() > user_metadata_out_len as usize { nwritten_out.write(entry.user_metadata.len().try_into().unwrap_or(0))?; return Err(Error::BufferLengthError { @@ -279,7 +264,7 @@ impl FastlyCache for Session { .read() .unwrap() .get(handle) - .map(|entry| entry.body_bytes.clone()); + .and_then(|entry| entry.as_ref().map(|entry| entry.body_bytes.clone())); if let Some(body) = body { // Re-insert a body into the session to allow subsequent reads. @@ -304,7 +289,7 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { Ok(types::CacheDurationNs::from(entry.max_age_ns.unwrap_or(0))) } else { Err(HandleError::InvalidCacheHandle(handle).into()) @@ -315,7 +300,7 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { Ok(types::CacheDurationNs::from(entry.swr_ns.unwrap_or(0))) } else { Err(HandleError::InvalidCacheHandle(handle).into()) @@ -323,7 +308,7 @@ impl FastlyCache for Session { } fn get_age_ns(&mut self, handle: types::CacheHandle) -> Result { - if let Some(entry) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { Ok(types::CacheDurationNs::from(entry.age_ns())) } else { Err(HandleError::InvalidCacheHandle(handle).into()) From 2081068a876a426b9aea1f67dc95ef6340d20c4a Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Mon, 11 Mar 2024 09:56:01 +0100 Subject: [PATCH 15/20] More cleanup --- lib/src/cache_state.rs | 6 +++--- lib/src/execute.rs | 9 +++++---- lib/src/session.rs | 19 +++++-------------- lib/src/wiggle_abi/body_impl.rs | 1 - lib/src/wiggle_abi/req_impl.rs | 2 -- 5 files changed, 13 insertions(+), 24 deletions(-) diff --git a/lib/src/cache_state.rs b/lib/src/cache_state.rs index 80712ccc..2e88a6f6 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/cache_state.rs @@ -237,7 +237,7 @@ impl CacheState { } // This handling is necessary due to incredibly painful body handling. - pub async fn format_pretty(&self) -> String { + pub fn format_pretty(&self) -> String { let formatter = CacheStateFormatter { state: self }; formatter.to_string() } @@ -246,8 +246,8 @@ impl CacheState { #[derive(Debug)] pub struct CacheEntry { /// Key the entry was created with. - /// Here for convenience. - pub key: Vec, + /// Here for convenience, entries are retrieved via handles. + pub key: PrimaryCacheKey, /// The raw bytes of the cached entry. pub body_bytes: Vec, diff --git a/lib/src/execute.rs b/lib/src/execute.rs index 089b3a1e..57efda63 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -83,12 +83,12 @@ pub struct ExecuteCtx { /// a file. guest_profile_path: Arc>, /// Endpoints that should be monitored. Allows reading logged message lines to that endpoint. - endpoints: Arc, + endpoints: Endpoints, /// Cache state to use. cache_state: Arc, } -#[derive(Clone)] +#[derive(Clone, Default)] pub struct Endpoints { // #[allow(clippy::type_complexity)] // todo iterator @@ -117,6 +117,7 @@ pub struct EndpointListener { } impl EndpointListener { + /// Drains and returns all (raw) messages from this listener. pub fn messages(&mut self) -> Vec> { let mut messages = vec![]; @@ -181,7 +182,7 @@ impl ExecuteCtx { epoch_increment_thread, epoch_increment_stop, guest_profile_path: Arc::new(guest_profile_path), - endpoints: Arc::new(Endpoints::new()), + endpoints: Endpoints::new(), dynamic_backend_registrar: None, cache_state: Arc::new(CacheState::new()), }) @@ -282,7 +283,7 @@ impl ExecuteCtx { } /// Set the endpoints for this execution context. - pub fn with_endpoints(mut self, endpoints: Arc) -> Self { + pub fn with_endpoints(mut self, endpoints: Endpoints) -> Self { self.endpoints = endpoints; self } diff --git a/lib/src/session.rs b/lib/src/session.rs index 78d76698..f63878de 100644 --- a/lib/src/session.rs +++ b/lib/src/session.rs @@ -149,7 +149,7 @@ impl Session { config_path: Arc>, object_store: Arc, secret_stores: Arc, - endpoints: Arc, + endpoints: Endpoints, dynamic_backend_registrar: Option>>, cache_state: Arc, ) -> Session { @@ -194,7 +194,7 @@ impl Session { } // Todo: Haxx, it's better to run this code in the constructor and initialize `log_endpoints_by_name` and `log_endpoints`. - fn write_endpoints(mut self, endpoints: Arc) -> Self { + fn write_endpoints(mut self, endpoints: Endpoints) -> Self { for (name, sender) in endpoints.endpoints.read().unwrap().iter() { let endpoint = LogEndpoint::new(name, Some(sender.clone())); let handle = self.log_endpoints.push(endpoint); @@ -263,14 +263,13 @@ impl Session { /// [handle]: ../wiggle_abi/types/struct.BodyHandle.html /// [body]: ../body/struct.Body.html pub fn insert_body(&mut self, body: Body) -> BodyHandle { - dbg!(self.async_items.push(Some(AsyncItem::Body(body))).into()) + self.async_items.push(Some(AsyncItem::Body(body))).into() } pub fn insert_cache_body(&mut self, cache_handle: types::CacheHandle) -> BodyHandle { - dbg!(self - .async_items + self.async_items .push(Some(AsyncItem::CachingBody(cache_handle))) - .into()) + .into() } /// Get a reference to a [`Body`][body], given its [`BodyHandle`][handle]. @@ -281,7 +280,6 @@ impl Session { /// [err]: ../error/enum.HandleError.html /// [handle]: ../wiggle_abi/types/struct.BodyHandle.html pub fn body(&self, handle: BodyHandle) -> Result<&Body, HandleError> { - dbg!("Getting body", handle); self.async_items .get(handle.into()) .and_then(Option::as_ref) @@ -297,7 +295,6 @@ impl Session { /// [err]: ../error/enum.HandleError.html /// [handle]: ../wiggle_abi/types/struct.BodyHandle.html pub fn body_mut(&mut self, handle: BodyHandle) -> Result<&mut Body, HandleError> { - dbg!("Getting body mut", handle); self.async_items .get_mut(handle.into()) .and_then(Option::as_mut) @@ -313,7 +310,6 @@ impl Session { /// [err]: ../error/enum.HandleError.html /// [handle]: ../wiggle_abi/types/struct.BodyHandle.html pub fn take_body(&mut self, handle: BodyHandle) -> Result { - dbg!("Taking body", handle); self.async_items .get_mut(handle.into()) .and_then(Option::take) @@ -325,7 +321,6 @@ impl Session { /// /// Returns a [`HandleError`][crate::error::HandleError] if the handle is not associated with a body in the session. pub fn drop_body(&mut self, handle: BodyHandle) -> Result<(), HandleError> { - dbg!("Dropping body", handle); self.async_items .get_mut(handle.into()) .and_then(Option::take) @@ -341,7 +336,6 @@ impl Session { /// [body]: ../body/struct.Body.html /// [err]: ../error/enum.HandleError.html pub fn begin_streaming(&mut self, handle: BodyHandle) -> Result { - dbg!("Begin streaming", handle); self.async_items .get_mut(handle.into()) .and_then(Option::as_mut) @@ -373,7 +367,6 @@ impl Session { &mut self, handle: BodyHandle, ) -> Result { - dbg!("Getting body mut", handle); self.async_items .get_mut(handle.into()) .and_then(|item| item.as_ref().map(|item| item.resolve_cache_handle())) @@ -394,7 +387,6 @@ impl Session { &mut self, handle: BodyHandle, ) -> Result<&mut StreamingBody, HandleError> { - dbg!("Streaming mut", handle); self.async_items .get_mut(handle.into()) .and_then(Option::as_mut) @@ -415,7 +407,6 @@ impl Session { &mut self, handle: BodyHandle, ) -> Result { - dbg!("take streaming", handle); self.async_items .get_mut(handle.into()) .and_then(Option::take) diff --git a/lib/src/wiggle_abi/body_impl.rs b/lib/src/wiggle_abi/body_impl.rs index c46d6917..c0363fd2 100644 --- a/lib/src/wiggle_abi/body_impl.rs +++ b/lib/src/wiggle_abi/body_impl.rs @@ -66,7 +66,6 @@ impl FastlyHttpBody for Session { buf: &GuestPtr<'a, u8>, buf_len: u32, ) -> Result { - dbg!("READING FROM BODY"); let mut buf_slice = buf .as_array(buf_len) .as_slice_mut()? diff --git a/lib/src/wiggle_abi/req_impl.rs b/lib/src/wiggle_abi/req_impl.rs index 70b20bc6..446085e7 100644 --- a/lib/src/wiggle_abi/req_impl.rs +++ b/lib/src/wiggle_abi/req_impl.rs @@ -820,8 +820,6 @@ impl FastlyHttpReq for Session { } fn close(&mut self, req_handle: RequestHandle) -> Result<(), Error> { - dbg!("Closing request", req_handle); - // We don't do anything with the parts, but we do pass the error up if // the handle given doesn't exist self.take_request_parts(req_handle)?; From 0157ae91e88c3277bbae61a481f39a99237cd8a8 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Mon, 11 Mar 2024 18:10:13 +0100 Subject: [PATCH 16/20] Rename spree --- lib/src/config.rs | 2 +- lib/src/config/backends.rs | 3 +- lib/src/execute.rs | 55 +++++------ .../{cache_state.rs => in_memory_cache.rs} | 46 ++++----- lib/src/lib.rs | 6 +- lib/src/session.rs | 26 +++-- lib/src/wiggle_abi/body_impl.rs | 4 +- lib/src/wiggle_abi/cache.rs | 95 +++---------------- lib/src/wiggle_abi/req_impl.rs | 4 +- 9 files changed, 87 insertions(+), 154 deletions(-) rename lib/src/{cache_state.rs => in_memory_cache.rs} (92%) diff --git a/lib/src/config.rs b/lib/src/config.rs index 7c912419..4e584692 100644 --- a/lib/src/config.rs +++ b/lib/src/config.rs @@ -30,7 +30,7 @@ pub type Dictionaries = HashMap; mod backends; pub use self::backends::{ - Backend, ClientCertError, ClientCertInfo, DynamicBackendRegistrar, Handler, + Backend, ClientCertError, ClientCertInfo, DynamicBackendRegistrationInterceptor, Handler, InMemoryBackendHandler, }; diff --git a/lib/src/config/backends.rs b/lib/src/config/backends.rs index a6d136a1..86c83fe4 100644 --- a/lib/src/config/backends.rs +++ b/lib/src/config/backends.rs @@ -58,8 +58,7 @@ pub trait InMemoryBackendHandler: Send + Sync + 'static { async fn handle(&self, req: Request) -> Response; } -// TODO: Should probably be more like an interceptor, registrar conveys the wrong idea. -pub trait DynamicBackendRegistrar: Send + Sync + 'static { +pub trait DynamicBackendRegistrationInterceptor: Send + Sync + 'static { fn register(&self, backend: Backend) -> Backend; } diff --git a/lib/src/execute.rs b/lib/src/execute.rs index 57efda63..b426f977 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -1,8 +1,8 @@ //! Guest code execution. use crate::{ - cache_state::CacheState, - config::{DynamicBackendRegistrar, UnknownImportBehavior}, + config::{DynamicBackendRegistrationInterceptor, UnknownImportBehavior}, + in_memory_cache::InMemoryCache, }; use std::{collections::BTreeMap, sync::RwLock, time::SystemTime}; use tokio::sync::mpsc::{Receiver, Sender as MspcSender}; @@ -54,7 +54,7 @@ pub struct ExecuteCtx { /// The backends for this execution. backends: Arc, /// If set, intercepts dynamic backend registrations. - dynamic_backend_registrar: Option>>, + dynamic_backend_interceptor: Option>>, /// The device detection mappings for this execution. device_detection: Arc, /// The geolocation mappings for this execution. @@ -82,20 +82,20 @@ pub struct ExecuteCtx { /// this must refer to a directory, while in run mode it names /// a file. guest_profile_path: Arc>, - /// Endpoints that should be monitored. Allows reading logged message lines to that endpoint. - endpoints: Endpoints, - /// Cache state to use. - cache_state: Arc, + /// Endpoints to monitor for messages. + endpoints_monitor: EndpointsMonitor, + /// Cache to use for this execution. + cache: InMemoryCache, } +/// A monitor for logging endpoints. Messages written to the contained endpoints will instead be +/// directed at the wrapped channel, allowing to programmatically access the logs on the receiver side. #[derive(Clone, Default)] -pub struct Endpoints { - // #[allow(clippy::type_complexity)] - // todo iterator +pub struct EndpointsMonitor { pub endpoints: Arc, MspcSender>>>>, } -impl Endpoints { +impl EndpointsMonitor { pub fn new() -> Self { Self { endpoints: Arc::new(RwLock::new(BTreeMap::new())), @@ -118,6 +118,7 @@ pub struct EndpointListener { impl EndpointListener { /// Drains and returns all (raw) messages from this listener. + /// Will _not_ block and wait for messages. pub fn messages(&mut self) -> Vec> { let mut messages = vec![]; @@ -182,9 +183,9 @@ impl ExecuteCtx { epoch_increment_thread, epoch_increment_stop, guest_profile_path: Arc::new(guest_profile_path), - endpoints: Endpoints::new(), - dynamic_backend_registrar: None, - cache_state: Arc::new(CacheState::new()), + endpoints_monitor: EndpointsMonitor::new(), + dynamic_backend_interceptor: None, + cache: InMemoryCache::new(), }) } @@ -283,21 +284,21 @@ impl ExecuteCtx { } /// Set the endpoints for this execution context. - pub fn with_endpoints(mut self, endpoints: Endpoints) -> Self { - self.endpoints = endpoints; + pub fn with_endpoints(mut self, endpoints: EndpointsMonitor) -> Self { + self.endpoints_monitor = endpoints; self } - pub fn with_dynamic_backend_registrar( + pub fn with_dynamic_backend_interceptor( mut self, - registrar: Box, + interceptor: Box, ) -> Self { - self.dynamic_backend_registrar = Some(Arc::new(registrar)); + self.dynamic_backend_interceptor = Some(Arc::new(interceptor)); self } - pub fn with_caches(mut self, cache_state: Arc) -> Self { - self.cache_state = cache_state; + pub fn with_cache(mut self, cache: InMemoryCache) -> Self { + self.cache = cache; self } @@ -414,9 +415,9 @@ impl ExecuteCtx { self.config_path.clone(), self.object_store.clone(), self.secret_stores.clone(), - self.endpoints.clone(), - self.dynamic_backend_registrar.clone(), - self.cache_state.clone(), + self.endpoints_monitor.clone(), + self.dynamic_backend_interceptor.clone(), + self.cache.clone(), ); let guest_profile_path = self.guest_profile_path.as_deref().map(|path| { @@ -512,9 +513,9 @@ impl ExecuteCtx { self.config_path.clone(), self.object_store.clone(), self.secret_stores.clone(), - self.endpoints.clone(), - self.dynamic_backend_registrar.clone(), - self.cache_state.clone(), + self.endpoints_monitor.clone(), + self.dynamic_backend_interceptor.clone(), + self.cache.clone(), ); let profiler = self.guest_profile_path.is_some().then(|| { diff --git a/lib/src/cache_state.rs b/lib/src/in_memory_cache.rs similarity index 92% rename from lib/src/cache_state.rs rename to lib/src/in_memory_cache.rs index 2e88a6f6..556ae263 100644 --- a/lib/src/cache_state.rs +++ b/lib/src/in_memory_cache.rs @@ -10,7 +10,6 @@ use std::{ use tracing::{event, Level}; /// Handle used for a non-existing cache entry. -/// TODO: Is this needed? Seems to work without checking it. pub fn not_found_handle() -> types::CacheHandle { types::CacheHandle::from(u32::MAX) } @@ -18,7 +17,7 @@ pub fn not_found_handle() -> types::CacheHandle { type PrimaryCacheKey = Vec; #[derive(Clone, Default, Debug)] -pub struct CacheState { +pub struct InMemoryCache { /// Cache entries, indexable by handle. /// `None` indicates a deleted entry. pub cache_entries: Arc>>>, @@ -36,7 +35,7 @@ pub struct CacheState { pub pending_tx: Arc>>, } -impl CacheState { +impl InMemoryCache { pub fn new() -> Self { Default::default() } @@ -61,8 +60,13 @@ impl CacheState { } } - // Todo: Use in insert / lookup. - pub fn match_entry(&self, key: &Vec, headers: &HeaderMap) -> Option { + /// Attempts to retrieve a cache entry by primary key. + /// Matches vary rules against the given headers to retrieve the correct variant. + pub fn get_entry( + &self, + key: &PrimaryCacheKey, + headers: &HeaderMap, + ) -> Option { let candidates_lock = self.key_candidates.read().unwrap(); candidates_lock.get(key).and_then(|candidates| { @@ -82,16 +86,17 @@ impl CacheState { pub fn insert<'a>( &self, - key: Vec, + key: PrimaryCacheKey, options_mask: types::CacheWriteOptionsMask, options: types::CacheWriteOptions, request_parts: Option<&Parts>, - // body_to_use: types::BodyHandle, ) -> Result { - // Cache write must contain max-age. + // Cache writes must contain max-age. let max_age_ns = options.max_age_ns; - // Swr might not be set, check bitmask for. Else we'd always get Some(0). + // Example on how the bitmask works: The data the guest gives us via the ABI (here, `CacheWriteOptions`) + // doesn't have option semantics, so we can't distinguish between unset and zero values, + // We check the bitmask for if it's set, else we'd always get `Some(0)` here. let swr_ns = options_mask .contains(types::CacheWriteOptionsMask::STALE_WHILE_REVALIDATE_NS) .then(|| options.stale_while_revalidate_ns); @@ -158,7 +163,8 @@ impl CacheState { // Extract necessary vary headers. for vary in vary_rules { - // If you think this sucks... then you'd be right. Just supposed to work right now. + // Dear reader, if you think this sucks... then you'd be right. + // (Just supposed to work right now) let value = req_parts .headers .get(&vary) @@ -193,7 +199,7 @@ impl CacheState { }; // Check for and perform overwrites or write a new entry. - let existing_entry_handle = self.match_entry( + let existing_entry_handle = self.get_entry( &key, request_parts .map(|p| &p.headers) @@ -235,12 +241,6 @@ impl CacheState { Ok(entry_handle) } - - // This handling is necessary due to incredibly painful body handling. - pub fn format_pretty(&self) -> String { - let formatter = CacheStateFormatter { state: self }; - formatter.to_string() - } } #[derive(Debug)] @@ -327,16 +327,14 @@ impl CacheEntry { } } -struct CacheStateFormatter<'state> { - state: &'state CacheState, -} +pub const NS_TO_S_FACTOR: u64 = 1_000_000_000; -impl<'state> Display for CacheStateFormatter<'state> { +impl Display for InMemoryCache { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!(f, "Cache State:")?; writeln!(f, "{}Entries:", Self::indent(1))?; - for (handle, entry) in self.state.cache_entries.read().unwrap().iter() { + for (handle, entry) in self.cache_entries.read().unwrap().iter() { match entry { Some(entry) => self.fmt_entry(f, handle, entry)?, None => writeln!(f, "{}[{}]: Purged", Self::indent(2), handle,)?, @@ -347,9 +345,7 @@ impl<'state> Display for CacheStateFormatter<'state> { } } -pub const NS_TO_S_FACTOR: u64 = 1_000_000_000; - -impl<'state> CacheStateFormatter<'state> { +impl InMemoryCache { fn indent(level: usize) -> String { " ".repeat(level) } diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 191e4f37..f9f3a0be 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -20,10 +20,10 @@ pub mod logging; pub mod session; mod async_io; -mod cache_state; mod downstream; mod execute; mod headers; +mod in_memory_cache; mod linking; mod object_store; mod secret_store; @@ -34,10 +34,10 @@ mod wiggle_abi; pub use { async_trait, - cache_state::CacheState, error::Error, - execute::{EndpointListener, Endpoints, ExecuteCtx}, + execute::{EndpointListener, EndpointsMonitor, ExecuteCtx}, http, hyper, + in_memory_cache::InMemoryCache, service::ViceroyService, upstream::BackendConnector, wasmtime::ProfilingStrategy, diff --git a/lib/src/session.rs b/lib/src/session.rs index f63878de..e626a468 100644 --- a/lib/src/session.rs +++ b/lib/src/session.rs @@ -7,7 +7,10 @@ pub use async_item::{ AsyncItem, PeekableTask, PendingKvDeleteTask, PendingKvInsertTask, PendingKvLookupTask, }; -use crate::{cache_state::CacheState, config::DynamicBackendRegistrar, execute::Endpoints}; +use crate::{ + config::DynamicBackendRegistrationInterceptor, execute::EndpointsMonitor, + in_memory_cache::InMemoryCache, +}; use { self::downstream::DownstreamResponse, crate::{ @@ -89,8 +92,11 @@ pub struct Session { /// `backends` because we do not want one session to affect the backends /// available to any other session. dynamic_backends: Backends, - /// If set, the registrar is used to register dynamic backends. - pub(crate) dynamic_backend_registrar: Option>>, + /// If set, intercepts dynamic backend registrations and allows to modify them on the fly. + /// Useful in combination with in-memory backend handlers to allow handling dynamic backends + /// in memory as well. + pub(crate) dynamic_backend_interceptor: + Option>>, /// The TLS configuration for this execution. /// /// Populated prior to guest execution, and never modified. @@ -130,7 +136,7 @@ pub struct Session { /// The ID for the client request being processed. req_id: u64, /// The cache state to use. - pub(crate) cache_state: Arc, + pub(crate) cache: InMemoryCache, } impl Session { @@ -149,9 +155,9 @@ impl Session { config_path: Arc>, object_store: Arc, secret_stores: Arc, - endpoints: Endpoints, - dynamic_backend_registrar: Option>>, - cache_state: Arc, + endpoints: EndpointsMonitor, + dynamic_backend_interceptor: Option>>, + cache: InMemoryCache, ) -> Session { let (parts, body) = req.into_parts(); let downstream_req_original_headers = parts.headers.clone(); @@ -187,14 +193,14 @@ impl Session { secrets_by_name: PrimaryMap::new(), config_path, req_id, - dynamic_backend_registrar, - cache_state, + dynamic_backend_interceptor, + cache, } .write_endpoints(endpoints) } // Todo: Haxx, it's better to run this code in the constructor and initialize `log_endpoints_by_name` and `log_endpoints`. - fn write_endpoints(mut self, endpoints: Endpoints) -> Self { + fn write_endpoints(mut self, endpoints: EndpointsMonitor) -> Self { for (name, sender) in endpoints.endpoints.read().unwrap().iter() { let endpoint = LogEndpoint::new(name, Some(sender.clone())); let handle = self.log_endpoints.push(endpoint); diff --git a/lib/src/wiggle_abi/body_impl.rs b/lib/src/wiggle_abi/body_impl.rs index c0363fd2..2ecc69a7 100644 --- a/lib/src/wiggle_abi/body_impl.rs +++ b/lib/src/wiggle_abi/body_impl.rs @@ -37,7 +37,7 @@ impl FastlyHttpBody for Session { let cache_handle = self.caching_body_handle(dest).unwrap(); let source_bytes = src.read_into_vec().await?; - self.cache_state + self.cache .cache_entries .write() .unwrap() @@ -118,7 +118,7 @@ impl FastlyHttpBody for Session { .await?; } else if self.is_caching_body(body_handle) { let cache_handle = self.caching_body_handle(body_handle).unwrap(); - self.cache_state + self.cache .cache_entries .write() .unwrap() diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index 9b18b533..fc3e27e9 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -3,78 +3,9 @@ use super::{ types::{self}, Error, }; -use crate::{body::Body, cache_state::not_found_handle, error::HandleError, session::Session}; +use crate::{body::Body, error::HandleError, in_memory_cache::not_found_handle, session::Session}; use tracing::{event, Level}; -// pub struct CacheWriteOptions<'a> { -// pub max_age_ns: CacheDurationNs, -// pub request_headers: RequestHandle, -// pub vary_rule_ptr: wiggle::GuestPtr<'a, u8>, -// pub vary_rule_len: u32, -// pub initial_age_ns: CacheDurationNs, -// pub stale_while_revalidate_ns: CacheDurationNs, -// pub surrogate_keys_ptr: wiggle::GuestPtr<'a, u8>, -// pub surrogate_keys_len: u32, -// pub length: CacheObjectLength, -// pub user_metadata_ptr: wiggle::GuestPtr<'a, u8>, -// pub user_metadata_len: u32, -// } - -// Q: What does this do? Indicate which fields are safe to read? -// pub struct CacheWriteOptionsMask( -// ::Internal, -// ); -// impl CacheWriteOptionsMask { -// #[allow(deprecated, non_upper_case_globals)] -// pub const RESERVED: Self = Self::from_bits_retain(1); -// #[allow(deprecated, non_upper_case_globals)] -// pub const REQUEST_HEADERS: Self = Self::from_bits_retain(2); -// #[allow(deprecated, non_upper_case_globals)] -// pub const VARY_RULE: Self = Self::from_bits_retain(4); -// #[allow(deprecated, non_upper_case_globals)] -// pub const INITIAL_AGE_NS: Self = Self::from_bits_retain(8); -// #[allow(deprecated, non_upper_case_globals)] -// pub const STALE_WHILE_REVALIDATE_NS: Self = Self::from_bits_retain(16); -// #[allow(deprecated, non_upper_case_globals)] -// pub const SURROGATE_KEYS: Self = Self::from_bits_retain(32); -// #[allow(deprecated, non_upper_case_globals)] -// pub const LENGTH: Self = Self::from_bits_retain(64); -// #[allow(deprecated, non_upper_case_globals)] -// pub const USER_METADATA: Self = Self::from_bits_retain(128); -// #[allow(deprecated, non_upper_case_globals)] -// pub const SENSITIVE_DATA: Self = Self::from_bits_retain(256); -// } - -// pub struct CacheLookupOptionsMask( -// ::Internal, -// ); -// impl CacheLookupOptionsMask { -// #[allow(deprecated, non_upper_case_globals)] -// pub const RESERVED: Self = Self::from_bits_retain(1); -// #[allow(deprecated, non_upper_case_globals)] -// pub const REQUEST_HEADERS: Self = Self::from_bits_retain(2); -// } - -// pub struct CacheLookupOptions { -// pub request_headers: RequestHandle, -// } - -// pub struct CacheGetBodyOptions { -// pub from: u64, -// pub to: u64, -// } - -// pub struct CacheGetBodyOptionsMask( -// ::Internal, -// ); -// impl CacheGetBodyOptionsMask { -// #[allow(deprecated, non_upper_case_globals)] -// pub const RESERVED: Self = Self::from_bits_retain(1); -// #[allow(deprecated, non_upper_case_globals)] -// pub const FROM: Self = Self::from_bits_retain(2); -// #[allow(deprecated, non_upper_case_globals)] -// pub const TO: Self = Self::from_bits_retain(4); -// } #[allow(unused_variables)] impl FastlyCache for Session { fn lookup<'a>( @@ -88,8 +19,8 @@ impl FastlyCache for Session { let req_parts = self.request_parts(options.request_headers)?; Ok(self - .cache_state - .match_entry(&primary_key, &req_parts.headers) + .cache + .get_entry(&primary_key, &req_parts.headers) .unwrap_or(not_found_handle())) } @@ -108,7 +39,7 @@ impl FastlyCache for Session { None }; - let cache_handle = self.cache_state.insert(key, options_mask, options, parts)?; + let cache_handle = self.cache.insert(key, options_mask, options, parts)?; Ok(self.insert_cache_body(cache_handle)) } @@ -122,7 +53,7 @@ impl FastlyCache for Session { // TODO: This is a plain hack, not a working implementation: Parallel tx etc, are simply not required. // -> This will fall apart immediately if tx are used as actual transactions. let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); - Ok(self.cache_state.pending_tx.write().unwrap().push(key)) + Ok(self.cache.pending_tx.write().unwrap().push(key)) } /// Stub delegating to regular insert. @@ -133,7 +64,7 @@ impl FastlyCache for Session { options: &wiggle::GuestPtr<'a, types::CacheWriteOptions<'a>>, ) -> Result { let key = self - .cache_state + .cache .pending_tx .read() .unwrap() @@ -149,7 +80,7 @@ impl FastlyCache for Session { }; let cache_handle = - self.cache_state + self.cache .insert(pending_tx_key.to_owned(), options_mask, options, parts)?; Ok(self.insert_cache_body(cache_handle)) @@ -194,7 +125,7 @@ impl FastlyCache for Session { } fn get_state(&mut self, handle: types::CacheHandle) -> Result { - if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache.cache_entries.read().unwrap().get(handle) { // Entry found. let mut state = types::CacheLookupState::FOUND; @@ -226,7 +157,7 @@ impl FastlyCache for Session { user_metadata_out_len: u32, // TODO: Is this the maximum allowed length? nwritten_out: &wiggle::GuestPtr<'a, u32>, ) -> Result<(), Error> { - if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache.cache_entries.read().unwrap().get(handle) { if entry.user_metadata.len() > user_metadata_out_len as usize { nwritten_out.write(entry.user_metadata.len().try_into().unwrap_or(0))?; return Err(Error::BufferLengthError { @@ -259,7 +190,7 @@ impl FastlyCache for Session { options: &types::CacheGetBodyOptions, ) -> Result { let body = self - .cache_state + .cache .cache_entries .read() .unwrap() @@ -289,7 +220,7 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache.cache_entries.read().unwrap().get(handle) { Ok(types::CacheDurationNs::from(entry.max_age_ns.unwrap_or(0))) } else { Err(HandleError::InvalidCacheHandle(handle).into()) @@ -300,7 +231,7 @@ impl FastlyCache for Session { &mut self, handle: types::CacheHandle, ) -> Result { - if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache.cache_entries.read().unwrap().get(handle) { Ok(types::CacheDurationNs::from(entry.swr_ns.unwrap_or(0))) } else { Err(HandleError::InvalidCacheHandle(handle).into()) @@ -308,7 +239,7 @@ impl FastlyCache for Session { } fn get_age_ns(&mut self, handle: types::CacheHandle) -> Result { - if let Some(Some(entry)) = self.cache_state.cache_entries.read().unwrap().get(handle) { + if let Some(Some(entry)) = self.cache.cache_entries.read().unwrap().get(handle) { Ok(types::CacheDurationNs::from(entry.age_ns())) } else { Err(HandleError::InvalidCacheHandle(handle).into()) diff --git a/lib/src/wiggle_abi/req_impl.rs b/lib/src/wiggle_abi/req_impl.rs index 446085e7..f71c4609 100644 --- a/lib/src/wiggle_abi/req_impl.rs +++ b/lib/src/wiggle_abi/req_impl.rs @@ -391,8 +391,8 @@ impl FastlyHttpReq for Session { handler: None, }; - let new_backend = match self.dynamic_backend_registrar { - Some(ref registrar) => registrar.register(new_backend), + let new_backend = match self.dynamic_backend_interceptor { + Some(ref interceptor) => interceptor.register(new_backend), None => new_backend, }; From 98a829450d11cb1a307d2cbc71e4fa1ce9745d9b Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Wed, 27 Mar 2024 12:14:43 +0100 Subject: [PATCH 17/20] Turns out it was a crime after all. --- lib/src/in_memory_cache.rs | 16 +++++----------- lib/src/wiggle_abi/cache.rs | 16 +++++++++++----- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/src/in_memory_cache.rs b/lib/src/in_memory_cache.rs index 556ae263..26eb63a5 100644 --- a/lib/src/in_memory_cache.rs +++ b/lib/src/in_memory_cache.rs @@ -2,7 +2,7 @@ use crate::{wiggle_abi::types, Error}; use cranelift_entity::PrimaryMap; use http::{request::Parts, HeaderMap}; use std::{ - collections::{btree_map::Entry, BTreeMap, HashSet}, + collections::{btree_map::Entry, BTreeMap, HashMap, HashSet}, fmt::Display, sync::{Arc, RwLock}, time::Instant, @@ -19,20 +19,15 @@ type PrimaryCacheKey = Vec; #[derive(Clone, Default, Debug)] pub struct InMemoryCache { /// Cache entries, indexable by handle. - /// `None` indicates a deleted entry. + /// `None` indicates a deleted entry OR a tx marker. pub cache_entries: Arc>>>, /// Primary cache key to a list of variants. pub key_candidates: Arc>>>, - // Surrogates to cache entry mapping to support simplistic purging. - // pub surrogates_to_handles: Arc>>>, - /// CacheHandle markers for pending transactions. Handles received for TX operataions - /// always point into this map instead of the entries. - // - // TODO this is probably a crime, since two cache handle primary maps may lead to - // fun bugs, but as we're not in need to really testing tx, we can just hack it. - pub pending_tx: Arc>>, + // CacheHandle markers for pending transactions. Since we need to retrieve the cache key for + // a tx insert, this map maps the pending handle the the key used to look it up. + pub pending_tx: Arc>>, } impl InMemoryCache { @@ -68,7 +63,6 @@ impl InMemoryCache { headers: &HeaderMap, ) -> Option { let candidates_lock = self.key_candidates.read().unwrap(); - candidates_lock.get(key).and_then(|candidates| { let entry_lock = self.cache_entries.write().unwrap(); diff --git a/lib/src/wiggle_abi/cache.rs b/lib/src/wiggle_abi/cache.rs index fc3e27e9..d9f5b482 100644 --- a/lib/src/wiggle_abi/cache.rs +++ b/lib/src/wiggle_abi/cache.rs @@ -50,10 +50,16 @@ impl FastlyCache for Session { options_mask: types::CacheLookupOptionsMask, options: &wiggle::GuestPtr<'a, types::CacheLookupOptions>, ) -> Result { - // TODO: This is a plain hack, not a working implementation: Parallel tx etc, are simply not required. - // -> This will fall apart immediately if tx are used as actual transactions. - let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); - Ok(self.cache.pending_tx.write().unwrap().push(key)) + // Check if the entry already exists. + let handle = self.lookup(cache_key, options_mask, options)?; + if handle == not_found_handle() { + let key: Vec = cache_key.as_slice().unwrap().unwrap().to_vec(); + let handle = self.cache.cache_entries.write().unwrap().push(None); + self.cache.pending_tx.write().unwrap().insert(handle, key); + Ok(handle) + } else { + Ok(handle) + } } /// Stub delegating to regular insert. @@ -68,7 +74,7 @@ impl FastlyCache for Session { .pending_tx .read() .unwrap() - .get(handle) + .get(&handle) .map(ToOwned::to_owned); if let Some(pending_tx_key) = key { From f59abfda672ceb94994cf1e7741e6ce474488b95 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Tue, 2 Apr 2024 13:20:16 +0200 Subject: [PATCH 18/20] Add run_to_completion to executor. --- cli/tests/integration/common.rs | 4 +-- lib/src/execute.rs | 53 +++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/cli/tests/integration/common.rs b/cli/tests/integration/common.rs index b1a4b498..e5a4275e 100644 --- a/cli/tests/integration/common.rs +++ b/cli/tests/integration/common.rs @@ -358,8 +358,8 @@ impl Test { .await .map(|result| { match result { - (resp, None) => resp, - (_, Some(err)) => { + (resp, None, _) => resp, + (_, Some(err), _) => { // Splat the string representation of the runtime error into a synthetic // 500. This is a bit of a hack, but good enough to check for expected error // strings. diff --git a/lib/src/execute.rs b/lib/src/execute.rs index b426f977..eed265f1 100644 --- a/lib/src/execute.rs +++ b/lib/src/execute.rs @@ -5,7 +5,10 @@ use crate::{ in_memory_cache::InMemoryCache, }; use std::{collections::BTreeMap, sync::RwLock, time::SystemTime}; -use tokio::sync::mpsc::{Receiver, Sender as MspcSender}; +use tokio::{ + sync::mpsc::{Receiver, Sender as MspcSender}, + task::JoinHandle as TokioJoinHandle, +}; use wasmtime::GuestProfiler; use { crate::{ @@ -38,6 +41,10 @@ use { pub const EPOCH_INTERRUPTION_PERIOD: Duration = Duration::from_micros(50); +pub struct GuestHandle { + pub(crate) handle: TokioJoinHandle>, +} + /// Execution context used by a [`ViceroyService`](struct.ViceroyService.html). /// /// This is all of the state needed to instantiate a module, in order to respond to an HTTP @@ -330,7 +337,14 @@ impl ExecuteCtx { self, incoming_req: Request, remote: IpAddr, - ) -> Result<(Response, Option), Error> { + ) -> Result< + ( + Response, + Option, + Option, // TODO: Just hiding the error for now, need this to work. + ), + Error, + > { let req = prepare_request(incoming_req)?; let (sender, receiver) = oneshot::channel(); @@ -346,12 +360,18 @@ impl ExecuteCtx { ); let resp = match receiver.await { - Ok(resp) => (resp, None), + Ok(resp) => ( + resp, + None, + Some(GuestHandle { + handle: guest_handle, + }), + ), Err(_) => match guest_handle .await .expect("guest worker finished without panicking") { - Ok(_) => (Response::new(Body::empty()), None), + Ok(_) => (Response::new(Body::empty()), None, None), Err(ExecutionError::WasmTrap(_e)) => { event!( Level::ERROR, @@ -363,7 +383,7 @@ impl ExecuteCtx { .status(hyper::StatusCode::INTERNAL_SERVER_ERROR) .body(Body::empty()) .unwrap(); - (response, Some(_e)) + (response, Some(_e), None) } Err(e) => panic!("failed to run guest: {}", e), }, @@ -560,6 +580,29 @@ impl ExecuteCtx { result } + + /// Runs a guest handle to completion and returns any error that might have occurred. + /// This is useful when a guest returns a response early, but later processing errors, + /// which should be surfaced to the calling code. + pub async fn run_to_completion(handle: GuestHandle) -> Option { + match handle + .handle + .await + .expect("guest worker finished without panicking") + { + Ok(_) => None, + Err(ExecutionError::WasmTrap(e)) => { + event!( + Level::ERROR, + "There was an error running the guest to completion {}", + e.to_string() + ); + + Some(e) + } + Err(e) => panic!("failed to run guest to completion: {}", e), + } + } } fn write_profile(store: &mut wasmtime::Store, guest_profile_path: Option<&PathBuf>) { From a0259a4b08a50dfb9286dbf6290991ff07e3ab83 Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Wed, 17 Apr 2024 15:49:53 +0200 Subject: [PATCH 19/20] Expose GuestHandle type --- lib/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/lib.rs b/lib/src/lib.rs index f9f3a0be..4e21d49d 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -35,7 +35,7 @@ mod wiggle_abi; pub use { async_trait, error::Error, - execute::{EndpointListener, EndpointsMonitor, ExecuteCtx}, + execute::{EndpointListener, EndpointsMonitor, ExecuteCtx, GuestHandle}, http, hyper, in_memory_cache::InMemoryCache, service::ViceroyService, From 98a9bbc09d315c47fda83dbf17a13846136054cb Mon Sep 17 00:00:00 2001 From: Dominic Petrick Date: Thu, 9 May 2024 19:00:18 +0200 Subject: [PATCH 20/20] Loosen potentially errorneous cache parameter assertion. --- lib/src/in_memory_cache.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/src/in_memory_cache.rs b/lib/src/in_memory_cache.rs index 26eb63a5..97778d55 100644 --- a/lib/src/in_memory_cache.rs +++ b/lib/src/in_memory_cache.rs @@ -97,9 +97,10 @@ impl InMemoryCache { let surrogate_keys = if options_mask.contains(types::CacheWriteOptionsMask::SURROGATE_KEYS) { - if options.surrogate_keys_len == 0 { - return Err(Error::InvalidArgument); - } + // Unclear if needed. + // if options.surrogate_keys_len == 0 { + // return Err(Error::InvalidArgument); + // } let byte_slice = options .surrogate_keys_ptr