From 9bcb8e2d7524d3e913b2755a709767fa1d9d2d0d Mon Sep 17 00:00:00 2001 From: purajit <7026198+purajit@users.noreply.github.com> Date: Fri, 27 Dec 2024 01:57:29 -0500 Subject: [PATCH] implement ruff analyze live --- crates/ruff/src/args.rs | 53 ++++++ crates/ruff/src/commands/analyze_graph.rs | 35 ++-- crates/ruff/src/commands/analyze_live.rs | 205 ++++++++++++++++++++++ crates/ruff/src/commands/mod.rs | 1 + crates/ruff/src/lib.rs | 10 +- crates/ruff_graph/src/lib.rs | 46 ++++- 6 files changed, 336 insertions(+), 14 deletions(-) create mode 100644 crates/ruff/src/commands/analyze_live.rs diff --git a/crates/ruff/src/args.rs b/crates/ruff/src/args.rs index 5a3d2f2874106..8c85b11fdcba3 100644 --- a/crates/ruff/src/args.rs +++ b/crates/ruff/src/args.rs @@ -146,6 +146,7 @@ pub enum Command { pub enum AnalyzeCommand { /// Generate a map of Python file dependencies or dependents. Graph(AnalyzeGraphCommand), + Live(AnalyzeLiveCommand), } #[derive(Clone, Debug, clap::Parser)] @@ -171,6 +172,20 @@ pub struct AnalyzeGraphCommand { target_version: Option, } +#[derive(Clone, Debug, clap::Parser)] +pub struct AnalyzeLiveCommand { + #[clap(help = "Command to run on changed files")] + pub cmd: Vec, + #[clap( + long, + help = "Comma-separated list of files or directories to trigger command on [default: .]" + )] + pub paths: String, + /// Args passed to `analyze graph` + #[clap(long)] + pub detect_string_imports: bool, +} + // The `Parser` derive is for ruff_dev, for ruff `Args` would be sufficient #[derive(Clone, Debug, clap::Parser)] #[allow(clippy::struct_excessive_bools)] @@ -801,6 +816,37 @@ impl AnalyzeGraphCommand { } } +impl AnalyzeLiveCommand { + /// Partition the CLI into command-line arguments and configuration + /// overrides. + pub fn partition( + self, + global_options: GlobalConfigArgs, + ) -> anyhow::Result<(AnalyzeLiveArgs, ConfigArguments)> { + let format_arguments = AnalyzeLiveArgs { + paths: self + .paths + .split(",") + .map(PathBuf::from) + .collect::>(), + cmd: self.cmd, + }; + + let cli_overrides = ExplicitConfigOverrides { + detect_string_imports: if self.detect_string_imports { + Some(true) + } else { + None + }, + preview: Some(PreviewMode::Enabled), + ..ExplicitConfigOverrides::default() + }; + + let config_args = ConfigArguments::from_cli_arguments(global_options, cli_overrides)?; + Ok((format_arguments, config_args)) + } +} + fn resolve_bool_arg(yes: bool, no: bool) -> Option { match (yes, no) { (true, false) => Some(true), @@ -1211,6 +1257,13 @@ pub struct AnalyzeGraphArgs { pub direction: Direction, } +/// CLI settings that are distinct from configuration (commands, lists of files, etc.). +#[derive(Clone, Debug)] +pub struct AnalyzeLiveArgs { + pub paths: Vec, + pub cmd: Vec, +} + /// Configuration overrides provided via dedicated CLI flags: /// `--line-length`, `--respect-gitignore`, etc. #[derive(Clone, Default)] diff --git a/crates/ruff/src/commands/analyze_graph.rs b/crates/ruff/src/commands/analyze_graph.rs index f8a0684d428a7..987e133d70a1c 100644 --- a/crates/ruff/src/commands/analyze_graph.rs +++ b/crates/ruff/src/commands/analyze_graph.rs @@ -15,11 +15,33 @@ use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; -/// Generate an import map. +/// Generate and print an import map. pub(crate) fn analyze_graph( args: AnalyzeGraphArgs, config_arguments: &ConfigArguments, ) -> Result { + let import_map = generate_import_map(args, config_arguments); + + match import_map { + Ok(import_map) => { + // Print to JSON. + writeln!( + std::io::stdout(), + "{}", + serde_json::to_string_pretty(&import_map)? + )?; + } + Err(err) => return Err(err), + }; + + Ok(ExitStatus::Success) +} + +/// Generate an import map. +pub(crate) fn generate_import_map( + args: AnalyzeGraphArgs, + config_arguments: &ConfigArguments, +) -> Result { // Construct the "default" settings. These are used when no `pyproject.toml` // files are present, or files are injected from outside the hierarchy. let pyproject_config = resolve(config_arguments, None)?; @@ -37,7 +59,7 @@ pub(crate) fn analyze_graph( if paths.is_empty() { warn_user_once!("No Python files found under the given path(s)"); - return Ok(ExitStatus::Success); + return Ok(ImportMap::default()); } // Resolve all package roots. @@ -180,16 +202,9 @@ pub(crate) fn analyze_graph( Direction::Dependents => ImportMap::dependents(imports), }; - // Print to JSON. - writeln!( - std::io::stdout(), - "{}", - serde_json::to_string_pretty(&import_map)? - )?; - std::mem::forget(db); - Ok(ExitStatus::Success) + Ok(import_map) } /// A resolver for glob sets. diff --git a/crates/ruff/src/commands/analyze_live.rs b/crates/ruff/src/commands/analyze_live.rs new file mode 100644 index 0000000000000..5956fff3c6e25 --- /dev/null +++ b/crates/ruff/src/commands/analyze_live.rs @@ -0,0 +1,205 @@ +use crate::args::{AnalyzeGraphArgs, AnalyzeLiveArgs, ConfigArguments}; +use crate::commands; +use crate::ExitStatus; +use anyhow::Result; +use log::warn; +use notify::event::{CreateKind, ModifyKind, RemoveKind}; +use notify::EventKind::{Create, Modify, Remove}; +use notify::{Event, RecursiveMode, Result as WatcherResult, Watcher}; +use ruff_db::system::SystemPathBuf; +use ruff_graph::{Direction, ImportMap}; +use std::collections::{HashSet, VecDeque}; +use std::env; +use std::ffi::OsStr; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::mpsc; + +pub(crate) fn analyze_live( + args: AnalyzeLiveArgs, + config_arguments: &ConfigArguments, +) -> Result { + let cmd_name = OsStr::new(args.cmd.first().expect("Command must be provided")); + let cmd_args = &args.cmd[1..]; + let cwd = env::current_dir()?; + + let (tx, rx) = mpsc::channel::>(); + let mut watcher = notify::recommended_watcher(tx)?; + + // maintaining both a dependents graph and dependency graph since: + // * the dependents graph directly powers the basic functionality + // * the dependency graph allows us to monitor which edges were removed in a + // file change without traversing the entire graph + let mut import_map_dependents = commands::analyze_graph::generate_import_map( + AnalyzeGraphArgs { + files: vec![PathBuf::from(".")], + direction: Direction::Dependents, + }, + &config_arguments, + )?; + + let mut import_map_dependencies = commands::analyze_graph::generate_import_map( + AnalyzeGraphArgs { + files: vec![PathBuf::from(".")], + direction: Direction::Dependencies, + }, + &config_arguments, + )?; + + watcher.watch(Path::new("."), RecursiveMode::Recursive)?; + for res in rx { + let _ = match res { + Ok(event) => match event.kind { + Modify(ModifyKind::Name(_)) + | Modify(ModifyKind::Data(_)) + | Create(CreateKind::File) + | Remove(RemoveKind::File) => { + // we only want to rerun analyze on files that changed, specifically either + // files already tracked by the import map, or if they're python files, or + // if it's a project/ruff configuration + let changed_paths = event + .paths + .into_iter() + .filter(|p| { + let sp = p.to_str().unwrap(); + // a non-python file might be a dependent explicitly declared + // `include-dependencies`; if so, we want to track its changes + import_map_dependents.contains_key(&SystemPathBuf::from(sp)) + // there might be a new python file + || sp.ends_with(".py") + // or a change to the config itself + || sp.ends_with("ruff.toml") + || sp.ends_with(".ruff.toml") + || sp.ends_with("pyproject.toml") + }) + .map(|p| p.strip_prefix(&cwd).map(PathBuf::from).unwrap()) + .collect::>(); + + if changed_paths.is_empty() { + continue; + } + + warn!("changed paths: {:?}", changed_paths); + + // if a file has been removed, first find the impacted files before changing the + // import map and losing that information; otherwise, we update the graph first - + // even if there are removed edges, we can still evaluate with the updated graph + // because for a file to be impacted by it, there must be some file in its path + // (possibly itself) that was modified, which will still trigger it + if event.kind != Remove(RemoveKind::Any) { + // TODO: if config file changed, reconstruct entire graph; this could be + // optimized by just adding new edges from include-dependencies, but + // in pathological cases, `src` and such might be modified as well + let import_map_dependencies_update = + match commands::analyze_graph::generate_import_map( + AnalyzeGraphArgs { + files: changed_paths.clone(), + // when a file is changed, only its dependencies might change + // so this is sufficient to update our view of the graph + direction: Direction::Dependencies, + }, + &config_arguments, + ) { + Ok(new_import_map) => new_import_map, + Err(_) => continue, + }; + + for (path, new_dependencies) in import_map_dependencies_update.iter() { + let old_dependencies = import_map_dependencies + .insert(path.clone(), new_dependencies.clone()); + // handle removed edges + if old_dependencies.is_some() { + for m in old_dependencies.unwrap().difference(new_dependencies) { + if import_map_dependents.contains_key(m) { + import_map_dependents + .entry(m.clone()) + .and_modify(|curr| curr.remove(&path)); + } + } + } + // add new edges + for m in new_dependencies.iter() { + let values = import_map_dependents.entry(m.clone()).or_default(); + values.insert(path.clone()); + } + } + } + + let affected_files = get_affected_files(&changed_paths, &import_map_dependents) + .into_iter() + .filter(|p| { + let sp = p.to_str(); + sp.is_some() + && import_map_dependents + .contains_key(&SystemPathBuf::from(sp.unwrap())) + && args.paths.iter().any(|args_path| p.starts_with(args_path)) + }) + .collect::>(); + + if event.kind == Remove(RemoveKind::File) { + // remove node and all edges to it in both graphs + for p in changed_paths.into_iter() { + let spb = SystemPathBuf::from_path_buf(p).unwrap(); + let _ = import_map_dependents.remove(&spb); + let old_dependencies = import_map_dependencies.remove(&spb); + if old_dependencies.is_some() { + for m in old_dependencies.unwrap().iter() { + import_map_dependents + .entry(m.clone()) + .and_modify(|curr| curr.remove(&spb)); + } + } + } + } + + if affected_files.is_empty() { + warn!("Nothing to do!"); + continue; + } + + warn!("transitively affected files: {:?}", affected_files); + Command::new(cmd_name) + .args(cmd_args) + .args(affected_files.into_iter().map(|p| p.into_os_string())) + .status() + .expect("failed to execute process"); + } + _ => continue, + }, + Err(_) => continue, + }; + } + + return Ok(ExitStatus::Success); +} + +fn get_affected_files( + modified_files: &Vec, + import_map_dependents: &ImportMap, +) -> HashSet { + // run a plain BFS of the dependents graph; all visited nodes are affected files + let mut visited: HashSet = HashSet::new(); + let mut queue: VecDeque = VecDeque::new(); + visited.extend(modified_files.clone()); + queue.extend(modified_files.clone()); + while let Some(file) = queue.pop_front() { + let Ok(module_imports) = + SystemPathBuf::from_path_buf(file).map(|p| import_map_dependents.get(&p)) + else { + warn!("Failed to convert to system path"); + continue; + }; + match module_imports { + Some(mi) => { + for dependent_file in mi.iter() { + if visited.insert(dependent_file.clone().into_std_path_buf()) { + queue.push_back(dependent_file.clone().into_std_path_buf()); + } + } + } + None => continue, + } + } + + visited +} diff --git a/crates/ruff/src/commands/mod.rs b/crates/ruff/src/commands/mod.rs index 4d463a4ef5d15..690149dfe4672 100644 --- a/crates/ruff/src/commands/mod.rs +++ b/crates/ruff/src/commands/mod.rs @@ -1,5 +1,6 @@ pub(crate) mod add_noqa; pub(crate) mod analyze_graph; +pub(crate) mod analyze_live; pub(crate) mod check; pub(crate) mod check_stdin; pub(crate) mod clean; diff --git a/crates/ruff/src/lib.rs b/crates/ruff/src/lib.rs index b88ae264e8e82..02ab974507fd7 100644 --- a/crates/ruff/src/lib.rs +++ b/crates/ruff/src/lib.rs @@ -21,7 +21,8 @@ use ruff_linter::{fs, warn_user, warn_user_once}; use ruff_workspace::Settings; use crate::args::{ - AnalyzeCommand, AnalyzeGraphCommand, Args, CheckCommand, Command, FormatCommand, + AnalyzeCommand, AnalyzeGraphCommand, AnalyzeLiveCommand, Args, CheckCommand, Command, + FormatCommand, }; use crate::printer::{Flags as PrinterFlags, Printer}; @@ -189,6 +190,7 @@ pub fn run( Command::Format(args) => format(args, global_options), Command::Server(args) => server(args), Command::Analyze(AnalyzeCommand::Graph(args)) => analyze_graph(args, global_options), + Command::Analyze(AnalyzeCommand::Live(args)) => analyze_live(args, global_options), } } @@ -211,6 +213,12 @@ fn analyze_graph( commands::analyze_graph::analyze_graph(cli, &config_arguments) } +fn analyze_live(args: AnalyzeLiveCommand, global_options: GlobalConfigArgs) -> Result { + let (cli, config_arguments) = args.partition(global_options)?; + + commands::analyze_live::analyze_live(cli, &config_arguments) +} + fn server(args: ServerCommand) -> Result { let four = NonZeroUsize::new(4).unwrap(); diff --git a/crates/ruff_graph/src/lib.rs b/crates/ruff_graph/src/lib.rs index 0d6a6669bafb6..d9c3a44a02b87 100644 --- a/crates/ruff_graph/src/lib.rs +++ b/crates/ruff_graph/src/lib.rs @@ -1,3 +1,6 @@ +use std::collections::btree_map; +use std::collections::btree_map::Entry; +use std::collections::btree_set; use std::collections::{BTreeMap, BTreeSet}; use anyhow::Result; @@ -10,13 +13,12 @@ use crate::collector::Collector; pub use crate::db::ModuleDb; use crate::resolver::Resolver; pub use crate::settings::{AnalyzeSettings, Direction}; - mod collector; mod db; mod resolver; mod settings; -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct ModuleImports(BTreeSet); @@ -54,11 +56,21 @@ impl ModuleImports { Ok(resolved_imports) } - /// Insert a file path into the module imports. pub fn insert(&mut self, path: SystemPathBuf) { self.0.insert(path); } + pub fn remove(&mut self, key: &SystemPathBuf) { + self.0.remove(key); + } + + pub fn difference<'a>(&'a self, other: &'a ModuleImports) -> BTreeSet<&'a SystemPathBuf> { + // TODO: don't collect; return iterator + self.0 + .difference(&other.0) + .collect::>() + } + /// Extend the module imports with additional file paths. pub fn extend(&mut self, paths: impl IntoIterator) { self.0.extend(paths); @@ -74,6 +86,10 @@ impl ModuleImports { self.0.len() } + pub fn iter(&self) -> btree_set::Iter<'_, SystemPathBuf> { + self.0.iter() + } + /// Convert the file paths to be relative to a given path. #[must_use] pub fn relative_to(self, path: &SystemPath) -> Self { @@ -120,4 +136,28 @@ impl ImportMap { } reverse } + + pub fn iter(&self) -> btree_map::Iter<'_, SystemPathBuf, ModuleImports> { + self.0.iter() + } + + pub fn contains_key(&self, key: &SystemPathBuf) -> bool { + self.0.contains_key(key) + } + + pub fn get(&self, key: &SystemPathBuf) -> Option<&ModuleImports> { + self.0.get(key) + } + + pub fn insert(&mut self, key: SystemPathBuf, value: ModuleImports) -> Option { + self.0.insert(key, value) + } + + pub fn remove(&mut self, key: &SystemPathBuf) -> Option { + self.0.remove(key) + } + + pub fn entry(&mut self, key: SystemPathBuf) -> Entry<'_, SystemPathBuf, ModuleImports> { + self.0.entry(key) + } }