Skip to content

Commit

Permalink
Reuse a temp dir per device
Browse files Browse the repository at this point in the history
Try to use the system tempdir first, then make a tempdir per device the
root directories reside on.

This has several benefits:
- Most of the time, we'll use the common system tempdir entirely
- We will now exclude the tempdir we use for each root: it used to be
  possible for the temp files to be picked up by the reader thread: this
  is now only possible if we cross into a different device than all root
  dirs.
  • Loading branch information
Dr-Emann committed Jan 9, 2024
1 parent b2f0a74 commit c413d8e
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 18 deletions.
1 change: 1 addition & 0 deletions crates/applesauce/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ mod rfork_storage;
mod scan;
mod seq_queue;
mod threads;
mod tmpdir_paths;
mod xattr;

use libc::c_char;
Expand Down
15 changes: 11 additions & 4 deletions crates/applesauce/src/scan.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use crate::progress::Progress;
use crate::tmpdir_paths::TmpdirPaths;
use ignore::WalkState;
use std::collections::HashSet;
use std::fs::FileType;
use std::path::{Path, PathBuf};

pub struct Walker<'a, P> {
paths: ignore::WalkParallel,
paths: ignore::WalkBuilder,
progress: &'a P,
}

Expand All @@ -21,13 +23,18 @@ impl<'a, P: Progress + Send + Sync> Walker<'a, P> {
});

Self {
paths: builder.build_parallel(),
paths: builder,
progress,
}
}

pub fn run(self, f: impl Fn(FileType, PathBuf) + Send + Sync) {
self.paths.run(|| {
pub fn run(self, tmpdirs: &TmpdirPaths, f: impl Fn(FileType, PathBuf) + Send + Sync) {
let ignored_dirs: HashSet<PathBuf> = tmpdirs.paths().map(PathBuf::from).collect();
let mut paths = self.paths;
let walker = paths
.filter_entry(move |entry| !ignored_dirs.contains(entry.path()))
.build_parallel();
walker.run(|| {
Box::new(|entry| {
handle_entry(entry, self.progress, &f);
WalkState::Continue
Expand Down
22 changes: 18 additions & 4 deletions crates/applesauce/src/threads/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use crate::info::FileCompressionState;
use crate::progress::{self, Progress, SkipReason};
use crate::tmpdir_paths::TmpdirPaths;
use crate::{info, scan, Stats};
use applesauce_core::compressor;
use std::num::NonZeroUsize;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::thread::{self, JoinHandle};
use std::{fmt, mem};
use std::{fmt, io, mem};

pub mod compressing;
pub mod reader;
Expand Down Expand Up @@ -41,6 +42,7 @@ pub struct OperationContext {
mode: Mode,
stats: Stats,
finished_stats: crossbeam_channel::Sender<Stats>,
tempdirs: TmpdirPaths,
verify: bool,
}

Expand All @@ -50,9 +52,15 @@ impl OperationContext {
mode,
stats: Stats::default(),
finished_stats,
tempdirs: TmpdirPaths::new(),
verify,
}
}

fn add_dst(&mut self, dst: &Path) -> io::Result<()> {
let metadata = dst.metadata()?;
self.tempdirs.add_dst(dst, &metadata)
}
}

impl Drop for OperationContext {
Expand Down Expand Up @@ -131,12 +139,18 @@ impl BackgroundThreads {
P::Task: Send + Sync + 'static,
{
let (finished_stats, finished_stats_rx) = crossbeam_channel::bounded(1);
let operation = Arc::new(OperationContext::new(mode, finished_stats, verify));
let mut operation = OperationContext::new(mode, finished_stats, verify);
let walker = scan::Walker::new(
paths
.into_iter()
.inspect(|path| operation.add_dst(path).unwrap()),
progress,
);
let operation = Arc::new(operation);
let stats = &operation.stats;
let chan = self.reader.chan();

let walker = scan::Walker::new(paths, progress);
walker.run(|file_type, path| {
walker.run(&operation.tempdirs, |file_type, path| {
// We really only want to deal with files, not symlinks to files, or fifos, etc.
#[allow(clippy::filetype_is_file)]
if !file_type.is_file() {
Expand Down
18 changes: 8 additions & 10 deletions crates/applesauce/src/threads/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use std::fs::{File, Metadata};
use std::io::{BufRead, BufReader, BufWriter, Seek, Write};
use std::os::fd::AsRawFd;
use std::os::macos::fs::MetadataExt;
use std::path::Path;
use std::sync::Arc;
use std::{cmp, io, ptr};
use tempfile::NamedTempFile;
Expand Down Expand Up @@ -104,7 +103,7 @@ impl Handler {
) -> io::Result<()> {
let uncompressed_file_size = item.metadata.len();

let mut tmp_file = tmp_file_for(&item.context.path)?;
let mut tmp_file = tmp_file_for(&item)?;
copy_xattrs(&item.file, tmp_file.as_file())?;

let mut writer =
Expand Down Expand Up @@ -165,7 +164,7 @@ impl Handler {
}

fn write_uncompressed_file(&mut self, item: WorkItem) -> io::Result<()> {
let mut tmp_file = tmp_file_for(&item.context.path)?;
let mut tmp_file = tmp_file_for(&item)?;
copy_xattrs(&item.file, tmp_file.as_file())?;

let chunks =
Expand Down Expand Up @@ -212,13 +211,12 @@ impl WorkHandler<WorkItem> for Handler {
}
}

#[tracing::instrument(level="debug", skip_all, err, fields(path=%path.display()))]
fn tmp_file_for(path: &Path) -> io::Result<NamedTempFile> {
let mut builder = tempfile::Builder::new();
if let Some(name) = path.file_name() {
builder.prefix(name);
}
builder.tempfile_in(path.parent().ok_or(io::ErrorKind::InvalidInput)?)
#[tracing::instrument(level="debug", skip_all, err, fields(path=%item.context.path.display()))]
fn tmp_file_for(item: &WorkItem) -> io::Result<NamedTempFile> {
item.context
.operation
.tempdirs
.tempfile_for(&item.context.path, &item.metadata)
}

#[tracing::instrument(level = "debug", skip_all, err)]
Expand Down
71 changes: 71 additions & 0 deletions crates/applesauce/src/tmpdir_paths.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fs::Metadata;
use std::io;
use std::os::macos::fs::MetadataExt;
use std::path::Path;
use tempfile::{NamedTempFile, TempDir};

const TEMPDIR_PREFIX: &str = "applesauce_tmp";
const TEMPFILE_PREFIX: &str = "applesauce_tmp";

#[derive(Debug)]
pub struct TmpdirPaths {
/// Map from device to temp dir
dirs: HashMap<u64, TempDir>,
}

impl TmpdirPaths {
pub fn new() -> Self {
let mut dirs = HashMap::new();
let system = TempDir::with_prefix(TEMPDIR_PREFIX);
match system {
Ok(system) => match system.path().metadata() {
Ok(system_metadata) => {
dirs.insert(system_metadata.st_dev(), system);
}
Err(e) => {
tracing::warn!("failed to get metadata for system temp dir: {e}");
}
},
Err(e) => {
tracing::warn!("failed to create temp dir in system temp dir: {e}");
}
}

Self { dirs }
}

pub fn paths(&self) -> impl Iterator<Item = &Path> + '_ {
self.dirs.values().map(|dir| dir.path())
}

pub fn add_dst(&mut self, dst: &Path, metadata: &Metadata) -> io::Result<()> {
let device = metadata.st_dev();
match self.dirs.entry(device) {
Entry::Occupied(_) => {}
Entry::Vacant(entry) => {
let dir = TempDir::with_prefix_in(TEMPDIR_PREFIX, dst)?;
entry.insert(dir);
}
}
Ok(())
}

pub fn tempfile_for(&self, path: &Path, metadata: &Metadata) -> io::Result<NamedTempFile> {
let device = metadata.st_dev();
match self.dirs.get(&device) {
Some(dir) => tempfile::Builder::new()
.prefix(TEMPFILE_PREFIX)
.tempfile_in(dir.path()),
None => {
let parent = path
.parent()
.ok_or_else(|| io::Error::other("expected path to have a parent"))?;
tempfile::Builder::new()
.prefix(TEMPFILE_PREFIX)
.tempfile_in(parent)
}
}
}
}

0 comments on commit c413d8e

Please sign in to comment.