From c78d910e2df66c9e2386d22d6f2e6b0a3665600f Mon Sep 17 00:00:00 2001 From: konstin Date: Mon, 6 Nov 2023 10:49:57 +0100 Subject: [PATCH] Review --- Cargo.toml | 2 +- crates/install-wheel-rs/src/lib.rs | 4 +- crates/puffin-client/src/client.rs | 21 ++++------- crates/puffin-client/src/error.rs | 5 +-- crates/puffin-client/src/remote_metadata.rs | 42 +++++++++++++++++++-- 5 files changed, 52 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 58b12bad20702..93fa35771f2e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] anyhow = { version = "1.0.75" } -async_http_range_reader = { version = "0.3.0", git = "https://github.com/baszalmstra/async_http_range_reader", ref = "4cafe5afda889d53060e0565c949d4ffd6ef3786" } +async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", ref = "4cafe5afda889d53060e0565c949d4ffd6ef3786" } async_zip = { version = "0.0.15", features = ["tokio", "deflate"] } bitflags = { version = "2.4.0" } cacache = { version = "11.7.1", default-features = false, features = ["tokio-runtime"] } diff --git a/crates/install-wheel-rs/src/lib.rs b/crates/install-wheel-rs/src/lib.rs index 9501ff7c78afc..6615d8341fea5 100644 --- a/crates/install-wheel-rs/src/lib.rs +++ b/crates/install-wheel-rs/src/lib.rs @@ -96,12 +96,12 @@ pub fn find_dist_info_metadata<'a, T: Copy>( .collect(); let (payload, path) = match metadatas[..] { [] => { - return Err("Missing .dist-info directory".to_string()); + return Err("no .dist-info directory".to_string()); } [(payload, path)] => (payload, path), _ => { return Err(format!( - "Multiple .dist-info directories: {}", + "multiple .dist-info directories: {}", metadatas .into_iter() .map(|(_, path)| path.to_string()) diff --git a/crates/puffin-client/src/client.rs b/crates/puffin-client/src/client.rs index aa7c6e4839220..1c1429b467a96 100644 --- a/crates/puffin-client/src/client.rs +++ b/crates/puffin-client/src/client.rs @@ -208,10 +208,7 @@ impl RegistryClient { return Err(Error::NoIndex(file.filename)); } - // Per PEP 658, if `data-dist-info-metadata` is available, we can request it directly; - // otherwise, try using HTTP range requests to read only the `.dist-info/METADATA` file - // from the zip, and if that also fails, download the whole wheel into the cache and read - // from there + // If the metadata file is available at its own url (PEP 658), download it from there let url = Url::parse(&file.url)?; if file.data_dist_info_metadata.is_available() { let url = Url::parse(&format!("{}.metadata", file.url))?; @@ -225,6 +222,9 @@ impl RegistryClient { })?; Ok(Metadata21::parse(text.as_bytes())?) + // If we lack PEP 658 support, try using HTTP range requests to read only the + // `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel + // into the cache and read from there } else { self.wheel_metadata_no_index(&filename, &url).await } @@ -245,11 +245,7 @@ impl RegistryClient { } else if let Some((mut reader, headers)) = self.range_reader(url.clone()).await? { debug!("Using remote zip reader for wheel metadata for {url}"); let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?; - let mut metadata = Metadata21::parse(text.as_bytes())?; - if metadata.description.is_some() { - // Perf (and cache size) improvement - metadata.description = Some("[omitted]".to_string()); - } + let metadata = Metadata21::parse(text.as_bytes())?; let is_immutable = headers .get(header::CACHE_CONTROL) .and_then(|header| header.to_str().ok()) @@ -289,7 +285,7 @@ impl RegistryClient { Some(((idx, e), e.entry().filename().as_str().ok()?)) }), ) - .map_err(|err| Error::InvalidWheel(filename.clone(), err))?; + .map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?; // Read the contents of the METADATA file let mut contents = Vec::new(); @@ -341,9 +337,8 @@ impl RegistryClient { )) } - /// Try using HTTP range requests to only read the METADATA file of a remote zip - /// - /// + /// An async for individual files inside a remote zip file, if the server supports it. Returns + /// the headers of the initial request for caching. async fn range_reader( &self, url: Url, diff --git a/crates/puffin-client/src/error.rs b/crates/puffin-client/src/error.rs index c25b4881306d8..7d97b6cbe6af0 100644 --- a/crates/puffin-client/src/error.rs +++ b/crates/puffin-client/src/error.rs @@ -50,9 +50,8 @@ pub enum Error { #[error(transparent)] AsyncHttpRangeReader(#[from] AsyncHttpRangeReaderError), - /// Invalid dist-info dir - #[error("Invalid wheel {0}: {0}")] - InvalidWheel(WheelFilename, String), + #[error("Expected a single .dist-info directory in {0}, found {1}")] + InvalidDistInfo(WheelFilename, String), #[error("The wheel {0} is not a valid zip file")] Zip(WheelFilename, #[source] ZipError), diff --git a/crates/puffin-client/src/remote_metadata.rs b/crates/puffin-client/src/remote_metadata.rs index 1177eaa5ae909..516558eb618e5 100644 --- a/crates/puffin-client/src/remote_metadata.rs +++ b/crates/puffin-client/src/remote_metadata.rs @@ -51,8 +51,44 @@ pub(crate) async fn wheel_metadata_write_cache( /// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the /// entire wheel just for the one file. /// -/// This method is copied from and licensed under -/// BSD-3-Clause, see `LICENSE.BSD-3-Clause` +/// This method is derived from `prefix-div/rip`, which is available under the following BSD-3 +/// Clause license: +/// +/// ```text +/// BSD 3-Clause License +/// +/// Copyright (c) 2023, prefix.dev GmbH +/// +/// Redistribution and use in source and binary forms, with or without +/// modification, are permitted provided that the following conditions are met: +/// +/// 1. Redistributions of source code must retain the above copyright notice, this +/// list of conditions and the following disclaimer. +/// +/// 2. Redistributions in binary form must reproduce the above copyright notice, +/// this list of conditions and the following disclaimer in the documentation +/// and/or other materials provided with the distribution. +/// +/// 3. Neither the name of the copyright holder nor the names of its +/// contributors may be used to endorse or promote products derived from +/// this software without specific prior written permission. +/// +/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +/// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +/// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +/// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +/// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +/// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +/// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/// ``` +/// +/// Additional work and modifications to the originating source are available under the +/// Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or ) +/// or MIT license ([LICENSE-MIT](LICENSE-MIT) or ), as per the +/// rest of the crate. pub(crate) async fn wheel_metadata_from_remote_zip( filename: &WheelFilename, reader: &mut AsyncHttpRangeReader, @@ -79,7 +115,7 @@ pub(crate) async fn wheel_metadata_from_remote_zip( .enumerate() .filter_map(|(idx, e)| Some(((idx, e), e.entry().filename().as_str().ok()?))), ) - .map_err(|err| Error::InvalidWheel(filename.clone(), err))?; + .map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?; let offset = metadata_entry.header_offset(); let size = metadata_entry.entry().compressed_size()