Skip to content

Commit

Permalink
BREAKING: Implements directory recursion (#57)
Browse files Browse the repository at this point in the history
* BREAKING: smash now requires the use of `-r` or `--recurse` to recurse through folders.

* bump version

* refactor for other options later for walker, texas ranger!
  • Loading branch information
thushan authored Feb 3, 2024
1 parent ad4e11a commit 3865e98
Show file tree
Hide file tree
Showing 11 changed files with 83 additions and 27 deletions.
2 changes: 1 addition & 1 deletion docs/vhs/demo-photos-hdd.tape
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Enter
Type "# but we want to ignore sort, tmp & events folders in the collection"
Enter
Sleep 1s
Type "./smash /media/thushan/smash/photos/ --exclude-dir=sort,tmp,events -o report.json"
Type "./smash /media/thushan/smash/photos/ -r --exclude-dir=sort,tmp,events -o report.json"
Sleep 500ms
Enter
Sleep 30s
Expand Down
2 changes: 1 addition & 1 deletion docs/vhs/demo.tape
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Enter
Type "# exlude the git dir and saving to report.json!"
Enter
Sleep 1s
Type "./smash ~/linux/drivers --exclude-dir=git -o report.json"
Type "./smash ~/linux/drivers -r --exclude-dir=git -o report.json"
Sleep 500ms
Enter
Sleep 10s
Expand Down
2 changes: 1 addition & 1 deletion docs/vhs/install.tape
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Sleep 500ms
Enter

# smash Linux/drivers
Type "smash /linux/drivers
Type "smash /linux/drivers -r
Sleep 500ms
Enter
Sleep 60s
1 change: 1 addition & 0 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ func init() {
flags.BoolVarP(&af.IgnoreHidden, "ignore-hidden", "", true, "Ignore hidden files & folders Eg. files/folders starting with '.'")
flags.BoolVarP(&af.IgnoreSystem, "ignore-system", "", true, "Ignore system files & folders Eg. '$MFT', '.Trash'")
flags.BoolVarP(&af.Silent, "silent", "q", false, "Run in silent mode")
flags.BoolVarP(&af.Recurse, "recurse", "r", false, "Recursively search directories for files")
flags.BoolVarP(&af.Verbose, "verbose", "", false, "Run in verbose mode")
flags.BoolVarP(&af.Profile, "profile", "", false, "Enable Go Profiler - see localhost:1984/debug/pprof")
flags.BoolVarP(&af.HideProgress, "no-progress", "", false, "Disable progress updates")
Expand Down
3 changes: 2 additions & 1 deletion internal/smash/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ func (app *App) Exec() error {
files := app.Runtime.Files
locations := app.Locations
isVerbose := app.Flags.Verbose && !app.Flags.Silent
walkOptions := indexer.WalkConfig{Recurse: app.Flags.Recurse}
showProgress := (!app.Flags.HideProgress && !app.Flags.Silent) || isVerbose

pap := theme.MultiWriter()
Expand All @@ -121,7 +122,7 @@ func (app *App) Exec() error {
}()
for _, location := range locations {
psi.UpdateText("Indexing location: " + location)
err := wk.WalkDirectory(os.DirFS(location), location, files)
err := wk.WalkDirectory(os.DirFS(location), location, walkOptions, files)

if err != nil {
if isVerbose {
Expand Down
1 change: 1 addition & 0 deletions internal/smash/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func (app *App) printConfiguration() {
theme.Println(b.Sprint("Slicing: "), theme.ColourConfig(enabledOrDisabled(!f.DisableSlicing)), config)
theme.Println(b.Sprint("Algorithm: "), theme.ColourConfig(algorithms.Algorithm(f.Algorithm)))
theme.Println(b.Sprint("Locations: "), theme.ColourConfig(strings.Join(app.Locations, ", ")))
theme.Println(b.Sprint("Recursive: "), theme.ColourConfig(enabledOrDisabled(f.Recurse)))

if !f.HideOutput && f.OutputFile != "" {
theme.Println(b.Sprint("Output: "), theme.ColourConfig(f.OutputFile), "(json)")
Expand Down
1 change: 1 addition & 0 deletions internal/smash/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Flags struct {
IgnoreSystem bool `yaml:"ignore-system"`
ShowVersion bool `yaml:"version"`
ShowNerdStats bool `yaml:"nerd-stats"`
Recurse bool `yaml:"recurse"`
ShowDuplicates bool `yaml:"show-duplicates"`
Silent bool `yaml:"silent"`
HideTopList bool `yaml:"no-top-list"`
Expand Down
2 changes: 1 addition & 1 deletion internal/smash/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
)

var (
Version = "v0.0.7"
Version = "v0.7.0"
Commit = "none"
Date = "unknown"
Time = "nowish"
Expand Down
11 changes: 8 additions & 3 deletions pkg/indexer/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ type IndexerConfig struct {
IgnoreHiddenItems bool
IgnoreSystemItems bool
}
type WalkConfig struct {
Recurse bool
}

func New() *IndexerConfig {
return &IndexerConfig{
Expand Down Expand Up @@ -63,8 +66,9 @@ func NewConfigured(excludeDirFilter []string, excludeFileFilter []string, ignore
return indexer
}

func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan *FileFS) error {
walkErr := fs.WalkDir(f, ".", func(path string, d fs.DirEntry, err error) error {
func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, options WalkConfig, files chan *FileFS) error {
const RootDir = "."
walkErr := fs.WalkDir(f, RootDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
if errors.Is(err, fs.ErrPermission) {
return fs.SkipDir
Expand All @@ -79,8 +83,9 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan *Fil

isIgnoreDir := config.IgnoreSystemItems && config.isIgnored(name, config.excludeSysDirFilter)
isExludeDir := len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)
dontRecurse := !options.Recurse && name != RootDir

if isHiddenObj || isIgnoreDir || isExludeDir {
if isHiddenObj || isIgnoreDir || isExludeDir || dontRecurse {
return fs.SkipDir
}

Expand Down
64 changes: 52 additions & 12 deletions pkg/indexer/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ func TestIndexDirectoryWithFilesInRoot(t *testing.T) {
"DSC19841.ARW",
"DSC19842.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, walkOptions, t)

expected := mockFiles
actual := walkedFiles
Expand All @@ -35,7 +35,8 @@ func TestIndexDirectoryWithFilesAcrossFolders(t *testing.T) {
"subfolder-2/DSC19848.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, walkOptions, t)

expected := mockFiles
actual := walkedFiles
Expand All @@ -49,6 +50,37 @@ func TestIndexDirectoryWithFilesAcrossFolders(t *testing.T) {
}
}

func TestIndexDirectoryWithDirExclusionsNoRecurse(t *testing.T) {
exclude_dir := []string{}
exclude_file := []string{}

mockFiles := []string{
"DSC19841.ARW",
"DSC19842.ARW",
"subfolder-1/DSC19845.ARW",
"subfolder-1/DSC19846.ARW",
"subfolder-2/DSC19847.ARW",
"subfolder-2/DSC19848.ARW",
}

walkOptions := WalkConfig{Recurse: false}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
mockFiles[1],
}

actual := walkedFiles

if len(actual) != len(expected) {
t.Errorf("expected %d, got %d files", len(expected), len(actual))
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v files", expected, actual)
}
}
func TestIndexDirectoryWithDirExclusions(t *testing.T) {
exclude_dir := []string{"subfolder-1", "subfolder-2", "subfolder-not-found"}
exclude_file := []string{}
Expand All @@ -62,7 +94,8 @@ func TestIndexDirectoryWithDirExclusions(t *testing.T) {
"subfolder-2/DSC19848.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -90,7 +123,8 @@ func TestIndexDirectoryWithFileExclusions(t *testing.T) {
"exclude.me",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -120,7 +154,8 @@ func TestIndexDirectoryWithFileAndDirExclusions(t *testing.T) {
"exclude-dir/random.file",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -149,7 +184,8 @@ func TestIndexDirectoryWithHiddenFilesThatShouldBeIndexed(t *testing.T) {
".config/smash/config.json",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, false, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, false, walkOptions, t)

expected := []string{
mockFiles[3],
Expand Down Expand Up @@ -181,7 +217,8 @@ func TestIndexDirectoryWithHiddenFiles(t *testing.T) {
".config/smash/config.json",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand All @@ -208,7 +245,8 @@ func TestIndexDirectoryWhichContainsSystemFiles(t *testing.T) {
"desktop.ini",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand All @@ -235,7 +273,8 @@ func TestIndexDirectoryWhichContainsWindowsSystemFiles(t *testing.T) {
"$MFT/random.file",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)
walkOptions := WalkConfig{Recurse: true}
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, walkOptions, t)

expected := []string{
mockFiles[0],
Expand All @@ -260,15 +299,16 @@ func channelFileToSliceOfFiles(ch <-chan *FileFS) []string {
return result
}

func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, ignoreHiddenItems bool, t *testing.T) []string {
func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, ignoreHiddenItems bool, options WalkConfig, t *testing.T) []string {
fr := "mock://"
fs := createMockFS(files)
ch := make(chan *FileFS)
wo := options

go func() {
defer close(ch)
indexer := NewConfigured(excludeDir, excludeFiles, ignoreHiddenItems, true)
err := indexer.WalkDirectory(fs, fr, ch)
err := indexer.WalkDirectory(fs, fr, wo, ch)
if err != nil {
t.Errorf("WalkDirectory returned an error: %v", err)
}
Expand Down
21 changes: 14 additions & 7 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Flags:
-o, --output-file string Export analysis as JSON (generated automatically otherwise)
--profile Enable Go Profiler - see localhost:1984/debug/pprof
--progress-update int Update progress every x seconds (default 5)
-r, --recurse Recursively search directories for files
--show-duplicates Show full list of duplicates
--show-top int Show the top x duplicates (default 10)
-q, --silent Run in silent mode
Expand All @@ -85,12 +86,18 @@ See the [full list of algorithms](./docs/algorithms.md) supported.

Examples are given in Unix format, but apply to Windows as well.

> \[!TIP]
>
> To recursively smash through directories, use the `--recursive` or `-r` switch.
>
> By default, `smash` will only look in the current folder (from v0.7+)
### Basic

To check for duplicates in a single path (Eg. `~/media/photos`) & output report to `report.json`

```bash
$ ./smash ~/media/photos -o report.json
$ ./smash ~/media/photos -r -o report.json
```

You can then look at `report.json` with [jq](https://github.com/jqlang/jq) to check duplicates:
Expand All @@ -104,7 +111,7 @@ $ jq '.analysis.dupes[]|[.location,.path,.filename]|join("/")' report.json | xar
By default, `smash` ignores empty files but can report on them with the `--ignore-empty=false` argument:

```bash
$ ./smash ~/media/photos --ignore-empty=false -o report.json
$ ./smash ~/media/photos -r --ignore-empty=false -o report.json
```

You can then look at `report.json` with [jq](https://github.com/jqlang/jq) to check empty files:
Expand All @@ -118,7 +125,7 @@ $ jq '.analysis.empty[]|[.location,.path,.filename]|join("/")' report.json | xar
By default, `smash` shows the top 10 duplicate files in the CLI and leaves the rest for the report, you can change that with the `--show-top=50` argument to show the top 50 instead.

```bash
$ ./smash ~/media/photos --show-top=50
$ ./smash ~/media/photos -r --show-top=50
```

### Multiple Directories
Expand All @@ -136,13 +143,13 @@ Smash will find and report all duplicates within any number of directories passe
You can exclude certain directories or files with the `--exclude-dir` and `--exclude-file` switches including wildcard characters:

```bash
$ ./smash --exclude-dir=.git,.svn --exclude-file=.gitignore,*.csv ~/media/photos
$ ./smash -r --exclude-dir=.git,.svn --exclude-file=.gitignore,*.csv ~/media/photos
```

For example, to ignore all hidden files on unix (those that start with `.` such as `.config` or `.gnome` folders):

```bash
$ ./smash --exclude-dir=.config,.gnome ~/media/photos
$ ./smash -r --exclude-dir=.config,.gnome ~/media/photos
```

### Disabling Slicing & Getting Full Hash
Expand All @@ -152,7 +159,7 @@ By default, `smash` uses slicing to efficiently slice a file into mulitple segme
If you prefer not to use slicing for a run, you can disable slicing with:

```bash
$ ./smash --disable-slicing ~/media/photos
$ ./smash -r --disable-slicing ~/media/photos
```

### Changing Hashing Algorithms
Expand All @@ -163,7 +170,7 @@ of algorithms [as documented](./docs/algorithms.md).
To use another supported algorithm, use the `--algorithm` switch:

```bash
$ ./smash --algorithm:murmur3 ~/media/photos
$ ./smash -r --algorithm:murmur3 ~/media/photos
```

# Acknowledgements
Expand Down

0 comments on commit 3865e98

Please sign in to comment.