Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BREAKING: Implements directory recursion #57

Merged
merged 3 commits into from
Feb 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/vhs/demo-photos-hdd.tape
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Enter
Type "# but we want to ignore sort, tmp & events folders in the collection"
Enter
Sleep 1s
Type "./smash /media/thushan/smash/photos/ --exclude-dir=sort,tmp,events -o report.json"
Type "./smash /media/thushan/smash/photos/ -r --exclude-dir=sort,tmp,events -o report.json"
Sleep 500ms
Enter
Sleep 30s
Expand Down
2 changes: 1 addition & 1 deletion docs/vhs/demo.tape
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Enter
Type "# exlude the git dir and saving to report.json!"
Enter
Sleep 1s
Type "./smash ~/linux/drivers --exclude-dir=git -o report.json"
Type "./smash ~/linux/drivers -r --exclude-dir=git -o report.json"
Sleep 500ms
Enter
Sleep 10s
Expand Down
2 changes: 1 addition & 1 deletion docs/vhs/install.tape
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Sleep 500ms
Enter

# smash Linux/drivers
Type "smash /linux/drivers
Type "smash /linux/drivers -r
Sleep 500ms
Enter
Sleep 60s
1 change: 1 addition & 0 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ func init() {
flags.BoolVarP(&af.IgnoreHidden, "ignore-hidden", "", true, "Ignore hidden files & folders Eg. files/folders starting with '.'")
flags.BoolVarP(&af.IgnoreSystem, "ignore-system", "", true, "Ignore system files & folders Eg. '$MFT', '.Trash'")
flags.BoolVarP(&af.Silent, "silent", "q", false, "Run in silent mode")
flags.BoolVarP(&af.Recurse, "recurse", "r", false, "Recursively search directories for files")
flags.BoolVarP(&af.Verbose, "verbose", "", false, "Run in verbose mode")
flags.BoolVarP(&af.Profile, "profile", "", false, "Enable Go Profiler - see localhost:1984/debug/pprof")
flags.BoolVarP(&af.HideProgress, "no-progress", "", false, "Disable progress updates")
Expand Down
3 changes: 2 additions & 1 deletion internal/smash/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ func (app *App) Exec() error {
files := app.Runtime.Files
locations := app.Locations
isVerbose := app.Flags.Verbose && !app.Flags.Silent
walkOptions := indexer.WalkConfig{Recurse: app.Flags.Recurse}
showProgress := (!app.Flags.HideProgress && !app.Flags.Silent) || isVerbose

pap := theme.MultiWriter()
Expand All @@ -121,7 +122,7 @@ func (app *App) Exec() error {
}()
for _, location := range locations {
psi.UpdateText("Indexing location: " + location)
err := wk.WalkDirectory(os.DirFS(location), location, files)
err := wk.WalkDirectory(os.DirFS(location), location, walkOptions, files)

if err != nil {
if isVerbose {
Expand Down
1 change: 1 addition & 0 deletions internal/smash/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func (app *App) printConfiguration() {
theme.Println(b.Sprint("Slicing: "), theme.ColourConfig(enabledOrDisabled(!f.DisableSlicing)), config)
theme.Println(b.Sprint("Algorithm: "), theme.ColourConfig(algorithms.Algorithm(f.Algorithm)))
theme.Println(b.Sprint("Locations: "), theme.ColourConfig(strings.Join(app.Locations, ", ")))
theme.Println(b.Sprint("Recursive: "), theme.ColourConfig(enabledOrDisabled(f.Recurse)))

if !f.HideOutput && f.OutputFile != "" {
theme.Println(b.Sprint("Output: "), theme.ColourConfig(f.OutputFile), "(json)")
Expand Down
1 change: 1 addition & 0 deletions internal/smash/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Flags struct {
IgnoreSystem bool `yaml:"ignore-system"`
ShowVersion bool `yaml:"version"`
ShowNerdStats bool `yaml:"nerd-stats"`
Recurse bool `yaml:"recurse"`
ShowDuplicates bool `yaml:"show-duplicates"`
Silent bool `yaml:"silent"`
HideTopList bool `yaml:"no-top-list"`
Expand Down
2 changes: 1 addition & 1 deletion internal/smash/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
)

var (
Version = "v0.0.7"
Version = "v0.7.0"
Commit = "none"
Date = "unknown"
Time = "nowish"
Expand Down
11 changes: 8 additions & 3 deletions pkg/indexer/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ type IndexerConfig struct {
IgnoreHiddenItems bool
IgnoreSystemItems bool
}
type WalkConfig struct {
Recurse bool
}

func New() *IndexerConfig {
return &IndexerConfig{
Expand Down Expand Up @@ -63,8 +66,9 @@ func NewConfigured(excludeDirFilter []string, excludeFileFilter []string, ignore
return indexer
}

func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan *FileFS) error {
walkErr := fs.WalkDir(f, ".", func(path string, d fs.DirEntry, err error) error {
func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, options WalkConfig, files chan *FileFS) error {
const RootDir = "."
walkErr := fs.WalkDir(f, RootDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
if errors.Is(err, fs.ErrPermission) {
return fs.SkipDir
Expand All @@ -79,8 +83,9 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan *Fil

isIgnoreDir := config.IgnoreSystemItems && config.isIgnored(name, config.excludeSysDirFilter)
isExludeDir := len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)
dontRecurse := !options.Recurse && name != RootDir

if isHiddenObj || isIgnoreDir || isExludeDir {
if isHiddenObj || isIgnoreDir || isExludeDir || dontRecurse {
return fs.SkipDir
}

Expand Down
64 changes: 52 additions & 12 deletions pkg/indexer/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ func TestIndexDirectoryWithFilesInRoot(t *testing.T) {
"DSC19841.ARW",
"DSC19842.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, walkOptions, t)

expected := mockFiles
actual := walkedFiles
Expand All @@ -35,7 +35,8 @@ func TestIndexDirectoryWithFilesAcrossFolders(t *testing.T) {
"subfolder-2/DSC19848.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, walkOptions, t)

expected := mockFiles
actual := walkedFiles
Expand All @@ -49,6 +50,37 @@ func TestIndexDirectoryWithFilesAcrossFolders(t *testing.T) {
}
}

func TestIndexDirectoryWithDirExclusionsNoRecurse(t *testing.T) {
exclude_dir := []string{}
exclude_file := []string{}

mockFiles := []string{
"DSC19841.ARW",
"DSC19842.ARW",
"subfolder-1/DSC19845.ARW",
"subfolder-1/DSC19846.ARW",
"subfolder-2/DSC19847.ARW",
"subfolder-2/DSC19848.ARW",
}

walkOptions := WalkConfig{Recurse: false}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
mockFiles[1],
}

actual := walkedFiles

if len(actual) != len(expected) {
t.Errorf("expected %d, got %d files", len(expected), len(actual))
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v files", expected, actual)
}
}
func TestIndexDirectoryWithDirExclusions(t *testing.T) {
exclude_dir := []string{"subfolder-1", "subfolder-2", "subfolder-not-found"}
exclude_file := []string{}
Expand All @@ -62,7 +94,8 @@ func TestIndexDirectoryWithDirExclusions(t *testing.T) {
"subfolder-2/DSC19848.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -90,7 +123,8 @@ func TestIndexDirectoryWithFileExclusions(t *testing.T) {
"exclude.me",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -120,7 +154,8 @@ func TestIndexDirectoryWithFileAndDirExclusions(t *testing.T) {
"exclude-dir/random.file",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -149,7 +184,8 @@ func TestIndexDirectoryWithHiddenFilesThatShouldBeIndexed(t *testing.T) {
".config/smash/config.json",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, false, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, false, walkOptions, t)

expected := []string{
mockFiles[3],
Expand Down Expand Up @@ -181,7 +217,8 @@ func TestIndexDirectoryWithHiddenFiles(t *testing.T) {
".config/smash/config.json",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand All @@ -208,7 +245,8 @@ func TestIndexDirectoryWhichContainsSystemFiles(t *testing.T) {
"desktop.ini",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand All @@ -235,7 +273,8 @@ func TestIndexDirectoryWhichContainsWindowsSystemFiles(t *testing.T) {
"$MFT/random.file",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand All @@ -260,15 +299,16 @@ func channelFileToSliceOfFiles(ch <-chan *FileFS) []string {
return result
}

func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, ignoreHiddenItems bool, t *testing.T) []string {
func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, ignoreHiddenItems bool, options WalkConfig, t *testing.T) []string {
fr := "mock://"
fs := createMockFS(files)
ch := make(chan *FileFS)
wo := options

go func() {
defer close(ch)
indexer := NewConfigured(excludeDir, excludeFiles, ignoreHiddenItems, true)
err := indexer.WalkDirectory(fs, fr, ch)
err := indexer.WalkDirectory(fs, fr, wo, ch)
if err != nil {
t.Errorf("WalkDirectory returned an error: %v", err)
}
Expand Down
21 changes: 14 additions & 7 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Flags:
-o, --output-file string Export analysis as JSON (generated automatically otherwise)
--profile Enable Go Profiler - see localhost:1984/debug/pprof
--progress-update int Update progress every x seconds (default 5)
-r, --recurse Recursively search directories for files
--show-duplicates Show full list of duplicates
--show-top int Show the top x duplicates (default 10)
-q, --silent Run in silent mode
Expand All @@ -85,12 +86,18 @@ See the [full list of algorithms](./docs/algorithms.md) supported.

Examples are given in Unix format, but apply to Windows as well.

> \[!TIP]
>
> To recursively smash through directories, use the `--recursive` or `-r` switch.
>
> By default, `smash` will only look in the current folder (from v0.7+)

### Basic

To check for duplicates in a single path (Eg. `~/media/photos`) & output report to `report.json`

```bash
$ ./smash ~/media/photos -o report.json
$ ./smash ~/media/photos -r -o report.json
```

You can then look at `report.json` with [jq](https://github.com/jqlang/jq) to check duplicates:
Expand All @@ -104,7 +111,7 @@ $ jq '.analysis.dupes[]|[.location,.path,.filename]|join("/")' report.json | xar
By default, `smash` ignores empty files but can report on them with the `--ignore-empty=false` argument:

```bash
$ ./smash ~/media/photos --ignore-empty=false -o report.json
$ ./smash ~/media/photos -r --ignore-empty=false -o report.json
```

You can then look at `report.json` with [jq](https://github.com/jqlang/jq) to check empty files:
Expand All @@ -118,7 +125,7 @@ $ jq '.analysis.empty[]|[.location,.path,.filename]|join("/")' report.json | xar
By default, `smash` shows the top 10 duplicate files in the CLI and leaves the rest for the report, you can change that with the `--show-top=50` argument to show the top 50 instead.

```bash
$ ./smash ~/media/photos --show-top=50
$ ./smash ~/media/photos -r --show-top=50
```

### Multiple Directories
Expand All @@ -136,13 +143,13 @@ Smash will find and report all duplicates within any number of directories passe
You can exclude certain directories or files with the `--exclude-dir` and `--exclude-file` switches including wildcard characters:

```bash
$ ./smash --exclude-dir=.git,.svn --exclude-file=.gitignore,*.csv ~/media/photos
$ ./smash -r --exclude-dir=.git,.svn --exclude-file=.gitignore,*.csv ~/media/photos
```

For example, to ignore all hidden files on unix (those that start with `.` such as `.config` or `.gnome` folders):

```bash
$ ./smash --exclude-dir=.config,.gnome ~/media/photos
$ ./smash -r --exclude-dir=.config,.gnome ~/media/photos
```

### Disabling Slicing & Getting Full Hash
Expand All @@ -152,7 +159,7 @@ By default, `smash` uses slicing to efficiently slice a file into mulitple segme
If you prefer not to use slicing for a run, you can disable slicing with:

```bash
$ ./smash --disable-slicing ~/media/photos
$ ./smash -r --disable-slicing ~/media/photos
```

### Changing Hashing Algorithms
Expand All @@ -163,7 +170,7 @@ of algorithms [as documented](./docs/algorithms.md).
To use another supported algorithm, use the `--algorithm` switch:

```bash
$ ./smash --algorithm:murmur3 ~/media/photos
$ ./smash -r --algorithm:murmur3 ~/media/photos
```

# Acknowledgements
Expand Down
Loading