Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix false positive for multi dep single use statements #120

Merged
71 changes: 71 additions & 0 deletions .rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Configuration policy:
# - When we don't care about the config or specifically want to follow Rust default, don't set it
# - When we specifically want a value for the config, indicate it, even if it's the default
#
# When adding a configuration, please add its doc and default value. Configs should be ordered
# alphabetically except for `edition` that should be first. See
# https://rust-lang.github.io/rustfmt/ for more information

# The edition of the parser (RFC 2052)
edition = "2021"
# Maximum width of an array literal before falling back to vertical formatting. Default 60
array_width = 72
# Maximum width of the args of a function-like attributes before falling back to vertical formatting.
# Default 70
attr_fn_like_width = 84
# Maximum length of a chain to fit on a single line. Default 60
chain_width = 72
# Don't reformat anything. Default: false
disable_all_formatting = false
# Maximum width of the args of a function call before falling back to vertical formatting.
# Default 60
fn_call_width = 72
# Control the layout of arguments in a function. Default: Tall
fn_params_layout = "Tall"
# Use tab characters for indentation, spaces for alignment. Default: false
hard_tabs = false
# Determines whether leading pipes are emitted on match arms. Default: Never
match_arm_leading_pipes = "Never"
# Put a trailing comma after a block based match arm (non-block arms are not affected).
# Default: false
match_block_trailing_comma = false
# Maximum width of each line. Default: 100
max_width = 120
# Merge multiple `#[derive(...)]` into a single one. Default true
merge_derives = true
# Unix or Windows line endings, values: [Auto|Windows|Unix|Native]. Default: Auto
newline_style = "Unix"
# Prints the names of mismatched files that were formatted. Prints the names of files that would be
# formated when used with `--check` mode. Default false
print_misformatted_file_names = false
# Remove nested parens. Default true
remove_nested_parens = true
# Reorder import and extern crate statements alphabetically. Default: true
reorder_imports = true
# Reorder module statements alphabetically in group. Default: true
reorder_modules = true
# Width threshold for an array element to be considered short. Default: 10
short_array_element_width_threshold = 12
# Maximum line length for single line if-else expressions. A value of zero means always break
# if-else expressions. Default 50
single_line_if_else_max_width = 60
# Maximum line length for single line let-else statements. See the let-else statement section of the
# Rust Style Guide for more details on when a let-else statement may be written on a single line. A
# value of 0 (zero) means the divergent else block will always be formatted over multiple lines.
#
# Note this occurs when use_small_heuristics is set to Off.
#
# By default this option is set as a percentage of max_width provided by use_small_heuristics, but a
# value set directly for single_line_let_else_max_width will take precedence.
#
# Default value: 50
single_line_let_else_max_width = 60
# Maximum width in the body of a struct lit before falling back to vertical formatting. Default 18
struct_lit_width = 22
# Maximum width in the body of a struct variant before falling back to vertical formatting.
# Default 35
struct_variant_width = 42
# Use field initialization shorthand if possible. Default: false
use_field_init_shorthand = true
# Replace uses of the try! macro by the ? shorthand. Default: false
use_try_shorthand = true
204 changes: 128 additions & 76 deletions src/search_unused.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,39 +79,48 @@ fn make_line_regexp(name: &str) -> String {
// Breaking down this regular expression: given a line,
// - `use (::)?(?i){name}(?-i)(::|;| as)`: matches `use foo;`, `use foo::bar`, `use foo as bar;`, with
// an optional "::" in front of the crate's name.
// - `\b(?i){name}(?-i)::`: matches `foo::X`, but not `barfoo::X`. `\b` means word boundary, so
// putting it before the crate's name ensures there's no polluting prefix.
// - `(?:[^:]|^|\W::)\b(?i){name}(?-i)::`: matches `foo::X`, but not `barfoo::X`. To ensure there's no polluting
// prefix we add `(?:[^:]|^|\W::)\b`, meaning that the crate name must be prefixed by either:
// * Not a `:` (therefore not a sub module)
// * The start of a line
// * Not a word character followed by `::` (to allow ::my_crate)
// - `extern crate (?i){name}(?-i)( |;)`: matches `extern crate foo`, or `extern crate foo as bar`.
// - `(?i){name}(?-i)` makes the match against the crate's name case insensitive
format!(
r#"use (::)?(?i){name}(?-i)(::|;| as)|\b(?i){name}(?-i)::|extern crate (?i){name}(?-i)( |;)"#
r#"use (::)?(?i){name}(?-i)(::|;| as)|(?:[^:]|^|\W::)\b(?i){name}(?-i)::|extern crate (?i){name}(?-i)( |;)"#
bnjbvr marked this conversation as resolved.
Show resolved Hide resolved
)
}

fn make_multiline_regexp(name: &str) -> String {
// Syntax documentation: https://docs.rs/regex/latest/regex/#syntax
//
// Breaking down this Terrible regular expression: tries to match compound `use as` statements,
// as in `use { X as Y };`, with possibly multiple-lines in between. Will match the first `};`
// that it finds, which *should* be the end of the use statement, but oh well.
// `(?i){name}(?-i)` makes the match against the crate's name case insensitive.
format!(r#"use \{{\s[^;]*(?i){name}(?-i)\s*as\s*[^;]*\}};"#)
// Breaking down this Terrible regular expression: tries to match uses of the crate's name in
// compound `use` statement accross multiple lines.
bnjbvr marked this conversation as resolved.
Show resolved Hide resolved
//
// It's split into 3 parts:
// 1. Matches modules before the usage of the crate's name: `\s*(?:(::)?\w+{sub_modules_match}\s*,\s*)*`
// 2. Matches the crate's name with optional sub-modules: `(::)?{name}{sub_modules_match}\s*`
// 3. Matches modules after the usage of the crate's name: `(?:\s*,\s*(::)?\w+{sub_modules_match})*\s*,?\s*`
//
// In order to avoid false usage detection of `not_my_dep::my_dep` the regexp ensures that the
// crate's name is at the top level of the use statement. However, it's not possible with
// regexp to allow any number of matching `{` and `}` before the crate's usage (rust regexp
// engine doesn't support recursion). Therefore, sub modules are authorized up to 4 levels
// deep.

let sub_modules_match = r#"(?:::\w+)*(?:::\*|\s+as\s+\w+|::\{(?:[^{}]*(?:\{(?:[^{}]*(?:\{(?:[^{}]*(?:\{[^{}]*\})?[^{}]*)*\})?[^{}]*)*\})?[^{}]*)*\})?"#;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's already a nice comment, but can I ask for a little bit more please: could you take each part of this regular expression, and tie it to the comment? (Which part is 1. in your list, 2. in your list, etc. and feel free to add more comment sections if necessary)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've linked the different parts 😊


format!(
r#"use \{{\s*(?:(::)?\w+{sub_modules_match}\s*,\s*)*(::)?{name}{sub_modules_match}\s*(?:\s*,\s*(::)?\w+{sub_modules_match})*\s*,?\s*\}};"#
)
}

/// Returns all the paths to the Rust source files for a crate contained at the given path.
fn collect_paths(dir_path: &Path, analysis: &PackageAnalysis) -> Vec<PathBuf> {
let mut root_paths = HashSet::new();

if let Some(path) = analysis
.manifest
.lib
.as_ref()
.and_then(|lib| lib.path.as_ref())
{
assert!(
path.ends_with(".rs"),
"paths provided by cargo_toml are to Rust files"
);
if let Some(path) = analysis.manifest.lib.as_ref().and_then(|lib| lib.path.as_ref()) {
assert!(path.ends_with(".rs"), "paths provided by cargo_toml are to Rust files");
let mut path_buf = PathBuf::from(path);
// Remove .rs extension.
path_buf.pop();
Expand All @@ -127,10 +136,7 @@ fn collect_paths(dir_path: &Path, analysis: &PackageAnalysis) -> Vec<PathBuf> {
.chain(analysis.manifest.example.iter())
{
if let Some(ref path) = product.path {
assert!(
path.ends_with(".rs"),
"paths provided by cargo_toml are to Rust files"
);
assert!(path.ends_with(".rs"), "paths provided by cargo_toml are to Rust files");
let mut path_buf = PathBuf::from(path);
// Remove .rs extension.
path_buf.pop();
Expand Down Expand Up @@ -237,22 +243,16 @@ impl Search {
return Ok(true);
}
// Single line matcher didn't work, try the multiline matcher now.
func(
&mut self.multiline_searcher,
&self.multiline_matcher,
&mut self.sink,
)
.map_err(|err| anyhow::anyhow!("when searching with complex pattern: {err}"))
.map(|()| self.sink.found)
func(&mut self.multiline_searcher, &self.multiline_matcher, &mut self.sink)
.map_err(|err| anyhow::anyhow!("when searching with complex pattern: {err}"))
.map(|()| self.sink.found)
}
Err(err) => anyhow::bail!("when searching with line pattern: {err}"),
}
}

fn search_path(&mut self, path: &Path) -> anyhow::Result<bool> {
self.try_singleline_then_multiline(|searcher, matcher, sink| {
searcher.search_path(matcher, path, sink)
})
self.try_singleline_then_multiline(|searcher, matcher, sink| searcher.search_path(matcher, path, sink))
}

#[cfg(test)]
Expand All @@ -277,8 +277,7 @@ fn get_full_manifest(
// `inherit_workspace`). See https://gitlab.com/crates.rs/cargo_toml/-/issues/20 for details,
// and a possible future fix.
let cargo_toml_content = std::fs::read(manifest_path)?;
let mut manifest =
cargo_toml::Manifest::<PackageMetadata>::from_slice_with_metadata(&cargo_toml_content)?;
let mut manifest = cargo_toml::Manifest::<PackageMetadata>::from_slice_with_metadata(&cargo_toml_content)?;

let mut ws_manifest_and_path = None;
let mut workspace_ignored = vec![];
Expand Down Expand Up @@ -666,18 +665,89 @@ pub use futures::future;
"#
)?);

// multi-dep single use statements
assert!(test_one("futures", r#"pub use {async_trait, futures, reqwest};"#)?);

// multi-dep single use statements with ::
assert!(test_one("futures", r#"pub use {async_trait, ::futures, reqwest};"#)?);

// No false usage detection of `not_my_dep::my_dep` on compound imports
assert!(!test_one(
"futures",
r#"pub use {async_trait, not_futures::futures, reqwest};"#
)?);

// No false usage detection of `not_my_dep::my_dep` on multiple lines
assert!(!test_one(
"futures",
r#"
pub use {
async_trait,
not_futures::futures,
reqwest,
};"#
)?);

// No false usage detection on single line `not_my_dep::my_dep`
assert!(!test_one("futures", r#"use not_futures::futures::stuff_in_futures;"#)?);

// multi-dep single use statements with nesting
assert!(test_one(
"futures",
r#"pub use {
async_trait::{mod1, dep2},
futures::{futures_mod1, futures_mod2::{futures_mod21, futures_mod22}},
reqwest,
};"#
)?);

// multi-dep single use statements with star import and renaming
assert!(test_one(
"futures",
r#"pub use {
async_trait::sub_mod::*,
futures as futures_renamed,
reqwest,
};"#
)?);

// multi-dep single use statements with complex imports and renaming
assert!(test_one(
"futures",
r#"pub use {
other_dep::{
star_mod::*,
unnamed_import::{UnnamedTrait as _, other_mod},
renamed_import as new_name,
sub_import::{mod1, mod2},
},
futures as futures_renamed,
reqwest,
};"#
)?);

// No false usage detection of `not_my_dep::my_dep` with nesting
assert!(!test_one(
"futures",
r#"pub use {
async_trait::{mod1, dep2},
not_futures::futures::{futures_mod1, futures_mod2::{futures_mod21, futures_mod22}},
reqwest,
};"#
)?);

// Detects top level usage
assert!(test_one("futures", r#" ::futures::mod1"#)?);

Ok(())
}

#[cfg(test)]
fn check_analysis<F: Fn(PackageAnalysis)>(rel_path: &str, callback: F) {
for use_cargo_metadata in UseCargoMetadata::all() {
let analysis = find_unused(
&PathBuf::from(TOP_LEVEL).join(rel_path),
*use_cargo_metadata,
)
.expect("find_unused must return an Ok result")
.expect("no error during processing");
let analysis = find_unused(&PathBuf::from(TOP_LEVEL).join(rel_path), *use_cargo_metadata)
.expect("find_unused must return an Ok result")
.expect("no error during processing");
callback(analysis);
}
}
Expand All @@ -693,63 +763,45 @@ fn test_just_unused() {
#[test]
fn test_just_unused_with_manifest() {
// a crate that does not use a dependency it refers to, and uses workspace properties
check_analysis(
"./integration-tests/workspace-package/program/Cargo.toml",
|analysis| {
assert_eq!(analysis.unused, &["log".to_string()]);
},
);
check_analysis("./integration-tests/workspace-package/program/Cargo.toml", |analysis| {
assert_eq!(analysis.unused, &["log".to_string()]);
});
}

#[test]
fn test_unused_transitive() {
// lib1 has zero dependencies
check_analysis(
"./integration-tests/unused-transitive/lib1/Cargo.toml",
|analysis| {
assert!(analysis.unused.is_empty());
},
);
check_analysis("./integration-tests/unused-transitive/lib1/Cargo.toml", |analysis| {
assert!(analysis.unused.is_empty());
});

// lib2 effectively uses lib1
check_analysis(
"./integration-tests/unused-transitive/lib2/Cargo.toml",
|analysis| {
assert!(analysis.unused.is_empty());
},
);
check_analysis("./integration-tests/unused-transitive/lib2/Cargo.toml", |analysis| {
assert!(analysis.unused.is_empty());
});

// but top level references both lib1 and lib2, and only uses lib2
check_analysis(
"./integration-tests/unused-transitive/Cargo.toml",
|analysis| {
assert_eq!(analysis.unused, &["lib1".to_string()]);
},
);
check_analysis("./integration-tests/unused-transitive/Cargo.toml", |analysis| {
assert_eq!(analysis.unused, &["lib1".to_string()]);
});
}

#[test]
fn test_false_positive_macro_use() {
// when a lib uses a dependency via a macro, there's no way we can find it by scanning the
// source code.
check_analysis(
"./integration-tests/false-positive-log/Cargo.toml",
|analysis| {
assert_eq!(analysis.unused, &["log".to_string()]);
},
);
check_analysis("./integration-tests/false-positive-log/Cargo.toml", |analysis| {
assert_eq!(analysis.unused, &["log".to_string()]);
});
}

#[test]
fn test_with_bench() {
// when a package has a bench file designated by binary name, it seems that `cargo_toml`
// doesn't fill in a default path to the source code.
check_analysis(
"./integration-tests/with-bench/bench/Cargo.toml",
|analysis| {
assert!(analysis.unused.is_empty());
},
);
check_analysis("./integration-tests/with-bench/bench/Cargo.toml", |analysis| {
assert!(analysis.unused.is_empty());
});
}

#[test]
Expand Down
Loading