bnjbvr · bnjbvr · Sep 25, 2024 · Apr 15, 2024 · Apr 15, 2024 · Apr 15, 2024
diff --git a/.rustfmt.toml b/.rustfmt.toml
@@ -0,0 +1,71 @@
+# Configuration policy:
+# - When we don't care about the config or specifically want to follow Rust default, don't set it
+# - When we specifically want a value for the config, indicate it, even if it's the default
+#
+# When adding a configuration, please add its doc and default value. Configs should be ordered
+# alphabetically except for `edition` that should be first. See
+# https://rust-lang.github.io/rustfmt/ for more information
+
+# The edition of the parser (RFC 2052)
+edition = "2021"
+# Maximum width of an array literal before falling back to vertical formatting. Default 60
+array_width = 72
+# Maximum width of the args of a function-like attributes before falling back to vertical formatting.
+# Default 70
+attr_fn_like_width = 84
+# Maximum length of a chain to fit on a single line. Default 60
+chain_width = 72
+# Don't reformat anything. Default: false
+disable_all_formatting = false
+# Maximum width of the args of a function call before falling back to vertical formatting.
+# Default 60
+fn_call_width = 72
+# Control the layout of arguments in a function. Default: Tall
+fn_params_layout = "Tall"
+# Use tab characters for indentation, spaces for alignment. Default: false
+hard_tabs = false
+# Determines whether leading pipes are emitted on match arms. Default: Never
+match_arm_leading_pipes = "Never"
+# Put a trailing comma after a block based match arm (non-block arms are not affected).
+# Default: false
+match_block_trailing_comma = false
+# Maximum width of each line. Default: 100
+max_width = 120
+# Merge multiple `#[derive(...)]` into a single one. Default true
+merge_derives = true
+# Unix or Windows line endings, values: [Auto|Windows|Unix|Native]. Default: Auto
+newline_style = "Unix"
+# Prints the names of mismatched files that were formatted. Prints the names of files that would be
+# formated when used with `--check` mode. Default false
+print_misformatted_file_names = false
+# Remove nested parens. Default true
+remove_nested_parens = true
+# Reorder import and extern crate statements alphabetically. Default: true
+reorder_imports = true
+# Reorder module statements alphabetically in group. Default: true
+reorder_modules = true
+# Width threshold for an array element to be considered short. Default: 10
+short_array_element_width_threshold = 12
+# Maximum line length for single line if-else expressions. A value of zero means always break
+# if-else expressions. Default 50
+single_line_if_else_max_width = 60
+# Maximum line length for single line let-else statements. See the let-else statement section of the
+# Rust Style Guide for more details on when a let-else statement may be written on a single line. A
+# value of 0 (zero) means the divergent else block will always be formatted over multiple lines.
+#
+# Note this occurs when use_small_heuristics is set to Off.
+#
+# By default this option is set as a percentage of max_width provided by use_small_heuristics, but a
+# value set directly for single_line_let_else_max_width will take precedence.
+#
+# Default value: 50
+single_line_let_else_max_width = 60
+# Maximum width in the body of a struct lit before falling back to vertical formatting. Default 18
+struct_lit_width = 22
+# Maximum width in the body of a struct variant before falling back to vertical formatting.
+# Default 35
+struct_variant_width = 42
+# Use field initialization shorthand if possible. Default: false
+use_field_init_shorthand = true
+# Replace uses of the try! macro by the ? shorthand. Default: false
+use_try_shorthand = true
diff --git a/src/search_unused.rs b/src/search_unused.rs
@@ -79,39 +79,48 @@ fn make_line_regexp(name: &str) -> String {
     // Breaking down this regular expression: given a line,
     // - `use (::)?(?i){name}(?-i)(::|;| as)`: matches `use foo;`, `use foo::bar`, `use foo as bar;`, with
     // an optional "::" in front of the crate's name.
-    // - `\b(?i){name}(?-i)::`: matches `foo::X`, but not `barfoo::X`. `\b` means word boundary, so
-    // putting it before the crate's name ensures there's no polluting prefix.
+    // - `(?:[^:]|^|\W::)\b(?i){name}(?-i)::`: matches `foo::X`, but not `barfoo::X`. To ensure there's no polluting
+    //   prefix we add `(?:[^:]|^|\W::)\b`, meaning that the crate name must be prefixed by either:
+    //    * Not a `:` (therefore not a sub module)
+    //    * The start of a line
+    //    * Not a word character followed by `::` (to allow ::my_crate)
     // - `extern crate (?i){name}(?-i)( |;)`: matches `extern crate foo`, or `extern crate foo as bar`.
     // - `(?i){name}(?-i)` makes the match against the crate's name case insensitive
     format!(
-        r#"use (::)?(?i){name}(?-i)(::|;| as)|\b(?i){name}(?-i)::|extern crate (?i){name}(?-i)( |;)"#
+        r#"use (::)?(?i){name}(?-i)(::|;| as)|(?:[^:]|^|\W::)\b(?i){name}(?-i)::|extern crate (?i){name}(?-i)( |;)"#
     )
 }
 
 fn make_multiline_regexp(name: &str) -> String {
     // Syntax documentation: https://docs.rs/regex/latest/regex/#syntax
     //
-    // Breaking down this Terrible regular expression: tries to match compound `use as` statements,
-    // as in `use { X as Y };`, with possibly multiple-lines in between. Will match the first `};`
-    // that it finds, which *should* be the end of the use statement, but oh well.
-    // `(?i){name}(?-i)` makes the match against the crate's name case insensitive.
-    format!(r#"use \{{\s[^;]*(?i){name}(?-i)\s*as\s*[^;]*\}};"#)
+    // Breaking down this Terrible regular expression: tries to match uses of the crate's name in
+    // compound `use` statement accross multiple lines.
+    //
+    // It's split into 3 parts:
+    //   1. Matches modules before the usage of the crate's name: `\s*(?:(::)?\w+{sub_modules_match}\s*,\s*)*`
+    //   2. Matches the crate's name with optional sub-modules: `(::)?{name}{sub_modules_match}\s*`
+    //   3. Matches modules after the usage of the crate's name: `(?:\s*,\s*(::)?\w+{sub_modules_match})*\s*,?\s*`
+    //
+    // In order to avoid false usage detection of `not_my_dep::my_dep` the regexp ensures that the
+    // crate's name is at the top level of the use statement. However, it's not possible with
+    // regexp to allow any number of matching `{` and `}` before the crate's usage (rust regexp
+    // engine doesn't support recursion). Therefore, sub modules are authorized up to 4 levels
+    // deep.
+
+    let sub_modules_match = r#"(?:::\w+)*(?:::\*|\s+as\s+\w+|::\{(?:[^{}]*(?:\{(?:[^{}]*(?:\{(?:[^{}]*(?:\{[^{}]*\})?[^{}]*)*\})?[^{}]*)*\})?[^{}]*)*\})?"#;
+
+    format!(
+        r#"use \{{\s*(?:(::)?\w+{sub_modules_match}\s*,\s*)*(::)?{name}{sub_modules_match}\s*(?:\s*,\s*(::)?\w+{sub_modules_match})*\s*,?\s*\}};"#
+    )
 }
 
 /// Returns all the paths to the Rust source files for a crate contained at the given path.
 fn collect_paths(dir_path: &Path, analysis: &PackageAnalysis) -> Vec<PathBuf> {
     let mut root_paths = HashSet::new();
 
-    if let Some(path) = analysis
-        .manifest
-        .lib
-        .as_ref()
-        .and_then(|lib| lib.path.as_ref())
-    {
-        assert!(
-            path.ends_with(".rs"),
-            "paths provided by cargo_toml are to Rust files"
-        );
+    if let Some(path) = analysis.manifest.lib.as_ref().and_then(|lib| lib.path.as_ref()) {
+        assert!(path.ends_with(".rs"), "paths provided by cargo_toml are to Rust files");
         let mut path_buf = PathBuf::from(path);
         // Remove .rs extension.
         path_buf.pop();
@@ -127,10 +136,7 @@ fn collect_paths(dir_path: &Path, analysis: &PackageAnalysis) -> Vec<PathBuf> {
         .chain(analysis.manifest.example.iter())
     {
         if let Some(ref path) = product.path {
-            assert!(
-                path.ends_with(".rs"),
-                "paths provided by cargo_toml are to Rust files"
-            );
+            assert!(path.ends_with(".rs"), "paths provided by cargo_toml are to Rust files");
             let mut path_buf = PathBuf::from(path);
             // Remove .rs extension.
             path_buf.pop();
@@ -237,22 +243,16 @@ impl Search {
                     return Ok(true);
                 }
                 // Single line matcher didn't work, try the multiline matcher now.
-                func(
-                    &mut self.multiline_searcher,
-                    &self.multiline_matcher,
-                    &mut self.sink,
-                )
-                .map_err(|err| anyhow::anyhow!("when searching with complex pattern: {err}"))
-                .map(|()| self.sink.found)
+                func(&mut self.multiline_searcher, &self.multiline_matcher, &mut self.sink)
+                    .map_err(|err| anyhow::anyhow!("when searching with complex pattern: {err}"))
+                    .map(|()| self.sink.found)
             }
             Err(err) => anyhow::bail!("when searching with line pattern: {err}"),
         }
     }
 
     fn search_path(&mut self, path: &Path) -> anyhow::Result<bool> {
-        self.try_singleline_then_multiline(|searcher, matcher, sink| {
-            searcher.search_path(matcher, path, sink)
-        })
+        self.try_singleline_then_multiline(|searcher, matcher, sink| searcher.search_path(matcher, path, sink))
     }
 
     #[cfg(test)]
@@ -277,8 +277,7 @@ fn get_full_manifest(
     // `inherit_workspace`). See https://gitlab.com/crates.rs/cargo_toml/-/issues/20 for details,
     // and a possible future fix.
     let cargo_toml_content = std::fs::read(manifest_path)?;
-    let mut manifest =
-        cargo_toml::Manifest::<PackageMetadata>::from_slice_with_metadata(&cargo_toml_content)?;
+    let mut manifest = cargo_toml::Manifest::<PackageMetadata>::from_slice_with_metadata(&cargo_toml_content)?;
 
     let mut ws_manifest_and_path = None;
     let mut workspace_ignored = vec![];
@@ -666,18 +665,89 @@ pub use futures::future;
     "#
     )?);
 
+    // multi-dep single use statements
+    assert!(test_one("futures", r#"pub use {async_trait, futures, reqwest};"#)?);
+
+    // multi-dep single use statements with ::
+    assert!(test_one("futures", r#"pub use {async_trait, ::futures, reqwest};"#)?);
+
+    // No false usage detection of `not_my_dep::my_dep` on compound imports
+    assert!(!test_one(
+        "futures",
+        r#"pub use {async_trait, not_futures::futures, reqwest};"#
+    )?);
+
+    // No false usage detection of `not_my_dep::my_dep` on multiple lines
+    assert!(!test_one(
+        "futures",
+        r#"
+pub use {
+    async_trait,
+    not_futures::futures,
+    reqwest,
+};"#
+    )?);
+
+    // No false usage detection on single line `not_my_dep::my_dep`
+    assert!(!test_one("futures", r#"use not_futures::futures::stuff_in_futures;"#)?);
+
+    // multi-dep single use statements with nesting
+    assert!(test_one(
+        "futures",
+        r#"pub use {
+            async_trait::{mod1, dep2},
+            futures::{futures_mod1, futures_mod2::{futures_mod21, futures_mod22}},
+            reqwest,
+        };"#
+    )?);
+
+    // multi-dep single use statements with star import and renaming
+    assert!(test_one(
+        "futures",
+        r#"pub use {
+            async_trait::sub_mod::*,
+            futures as futures_renamed,
+            reqwest,
+        };"#
+    )?);
+
+    // multi-dep single use statements with complex imports and renaming
+    assert!(test_one(
+        "futures",
+        r#"pub use {
+            other_dep::{
+                star_mod::*,
+                unnamed_import::{UnnamedTrait as _, other_mod},
+                renamed_import as new_name,
+                sub_import::{mod1, mod2},
+            },
+            futures as futures_renamed,
+            reqwest,
+        };"#
+    )?);
+
+    // No false usage detection of `not_my_dep::my_dep` with nesting
+    assert!(!test_one(
+        "futures",
+        r#"pub use {
+            async_trait::{mod1, dep2},
+            not_futures::futures::{futures_mod1, futures_mod2::{futures_mod21, futures_mod22}},
+            reqwest,
+        };"#
+    )?);
+
+    // Detects top level usage
+    assert!(test_one("futures", r#" ::futures::mod1"#)?);
+
     Ok(())
 }
 
 #[cfg(test)]
 fn check_analysis<F: Fn(PackageAnalysis)>(rel_path: &str, callback: F) {
     for use_cargo_metadata in UseCargoMetadata::all() {
-        let analysis = find_unused(
-            &PathBuf::from(TOP_LEVEL).join(rel_path),
-            *use_cargo_metadata,
-        )
-        .expect("find_unused must return an Ok result")
-        .expect("no error during processing");
+        let analysis = find_unused(&PathBuf::from(TOP_LEVEL).join(rel_path), *use_cargo_metadata)
+            .expect("find_unused must return an Ok result")
+            .expect("no error during processing");
         callback(analysis);
     }
 }
@@ -693,63 +763,45 @@ fn test_just_unused() {
 #[test]
 fn test_just_unused_with_manifest() {
     // a crate that does not use a dependency it refers to, and uses workspace properties
-    check_analysis(
-        "./integration-tests/workspace-package/program/Cargo.toml",
-        |analysis| {
-            assert_eq!(analysis.unused, &["log".to_string()]);
-        },
-    );
+    check_analysis("./integration-tests/workspace-package/program/Cargo.toml", |analysis| {
+        assert_eq!(analysis.unused, &["log".to_string()]);
+    });
 }
 
 #[test]
 fn test_unused_transitive() {
     // lib1 has zero dependencies
-    check_analysis(
-        "./integration-tests/unused-transitive/lib1/Cargo.toml",
-        |analysis| {
-            assert!(analysis.unused.is_empty());
-        },
-    );
+    check_analysis("./integration-tests/unused-transitive/lib1/Cargo.toml", |analysis| {
+        assert!(analysis.unused.is_empty());
+    });
 
     // lib2 effectively uses lib1
-    check_analysis(
-        "./integration-tests/unused-transitive/lib2/Cargo.toml",
-        |analysis| {
-            assert!(analysis.unused.is_empty());
-        },
-    );
+    check_analysis("./integration-tests/unused-transitive/lib2/Cargo.toml", |analysis| {
+        assert!(analysis.unused.is_empty());
+    });
 
     // but top level references both lib1 and lib2, and only uses lib2
-    check_analysis(
-        "./integration-tests/unused-transitive/Cargo.toml",
-        |analysis| {
-            assert_eq!(analysis.unused, &["lib1".to_string()]);
-        },
-    );
+    check_analysis("./integration-tests/unused-transitive/Cargo.toml", |analysis| {
+        assert_eq!(analysis.unused, &["lib1".to_string()]);
+    });
 }
 
 #[test]
 fn test_false_positive_macro_use() {
     // when a lib uses a dependency via a macro, there's no way we can find it by scanning the
     // source code.
-    check_analysis(
-        "./integration-tests/false-positive-log/Cargo.toml",
-        |analysis| {
-            assert_eq!(analysis.unused, &["log".to_string()]);
-        },
-    );
+    check_analysis("./integration-tests/false-positive-log/Cargo.toml", |analysis| {
+        assert_eq!(analysis.unused, &["log".to_string()]);
+    });
 }
 
 #[test]
 fn test_with_bench() {
     // when a package has a bench file designated by binary name, it seems that `cargo_toml`
     // doesn't fill in a default path to the source code.
-    check_analysis(
-        "./integration-tests/with-bench/bench/Cargo.toml",
-        |analysis| {
-            assert!(analysis.unused.is_empty());
-        },
-    );
+    check_analysis("./integration-tests/with-bench/bench/Cargo.toml", |analysis| {
+        assert!(analysis.unused.is_empty());
+    });
 }
 
 #[test]