diff --git a/internal/cli/cli.go b/internal/cli/cli.go index cfd41ea..b5a49e6 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -39,11 +39,13 @@ func init() { flags.StringSliceVarP(&af.ExcludeDir, "exclude-dir", "", nil, "Directories to exclude separated by comma. Eg. --exclude-dir=.git,.idea") flags.IntVarP(&af.MaxThreads, "max-threads", "p", runtime.NumCPU(), "Maximum threads to utilise.") flags.IntVarP(&af.MaxWorkers, "max-workers", "w", bestMaxWorkers(), "Maximum workers to utilise when smashing.") + flags.IntVarP(&af.UpdateSeconds, "update-seconds", "", 5, "Update progress every x seconds.") flags.BoolVarP(&af.DisableSlicing, "disable-slicing", "", false, "Disable slicing (hashes full file).") flags.BoolVarP(&af.IgnoreEmptyFiles, "ignore-emptyfiles", "", false, "Ignore & don't report on empty/zero byte files.") flags.StringVarP(&af.OutputFile, "output-file", "o", "", "Export as JSON") flags.BoolVarP(&af.Silent, "silent", "q", false, "Run in silent mode.") flags.BoolVarP(&af.Verbose, "verbose", "", false, "Run in verbose mode.") + flags.BoolVarP(&af.NoProgress, "no-progress", "", false, "Disable progress updates.") flags.BoolVarP(&af.ShowVersion, "version", "v", false, "Show version information.") } func bestMaxWorkers() int { diff --git a/internal/report/summary.go b/internal/report/summary.go index f921dcb..a91b0cf 100644 --- a/internal/report/summary.go +++ b/internal/report/summary.go @@ -2,6 +2,7 @@ package report import ( "fmt" + "time" "github.com/thushan/smash/internal/theme" ) @@ -20,7 +21,7 @@ type RunSummary struct { func PrintRunSummary(rs RunSummary, ignoreEmptyFiles bool) { theme.StyleHeading.Println("---| Analysis Summary") - theme.Println(writeCategory("Total Time:"), theme.ColourTime(fmt.Sprintf("%dms", rs.ElapsedTime))) + theme.Println(writeCategory("Total Time:"), theme.ColourTime(calcTotalTime(rs.ElapsedTime))) theme.Println(writeCategory("Total Analysed:"), theme.ColourNumber(rs.TotalFiles)) theme.Println(writeCategory("Total Unique:"), theme.ColourNumber(rs.UniqueFiles), "(excludes empty files)") if rs.TotalFileErrors > 0 { @@ -34,6 +35,20 @@ func PrintRunSummary(rs RunSummary, ignoreEmptyFiles bool) { theme.Println(writeCategory("Space Reclaimable:"), theme.ColourFileSizeA(rs.DuplicateFileSizeF), "(approx)") } } +func calcTotalTime(elapsedNs int64) string { + duration := time.Duration(elapsedNs) + switch { + case duration >= 60*time.Minute: + return duration.Round(time.Minute).String() + case duration >= 1*time.Minute: + return duration.Round(time.Second).String() + case duration <= 1*time.Second: + return duration.Round(time.Millisecond).String() + default: + return duration.Round(time.Second).String() + } +} + func writeCategory(category string) string { return fmt.Sprintf("%20s", category) } diff --git a/internal/report/summary_test.go b/internal/report/summary_test.go new file mode 100644 index 0000000..eeb88e8 --- /dev/null +++ b/internal/report/summary_test.go @@ -0,0 +1,26 @@ +package report + +import ( + "strings" + "testing" +) + +func TestCalcTotalTime(t *testing.T) { + var data = []struct { + expected string + elapsedNs int64 + }{ + {elapsedNs: 12660100, expected: "13ms"}, + {elapsedNs: 22592034100, expected: "23s"}, + {elapsedNs: 60592034100, expected: "1m1s"}, + {elapsedNs: 360592034100, expected: "6m1s"}, + {elapsedNs: 8960592034100, expected: "2h29m0s"}, + } + + for _, item := range data { + actual := calcTotalTime(item.elapsedNs) + if !strings.EqualFold(actual, item.expected) { + t.Errorf("expected time %s, got %s", item.expected, actual) + } + } +} diff --git a/internal/smash/app.go b/internal/smash/app.go index 9f04f55..58a388b 100644 --- a/internal/smash/app.go +++ b/internal/smash/app.go @@ -58,7 +58,7 @@ func (app *App) Run() error { Dupes: haxmap.New[string, []report.SmashFile](), Fails: haxmap.New[string, error](), Empty: &[]report.SmashFile{}, - StartTime: time.Now().UnixMilli(), + StartTime: time.Now().UnixNano(), EndTime: -1, } @@ -93,6 +93,7 @@ func (app *App) Exec() error { files := app.Runtime.Files locations := app.Locations isVerbose := app.Flags.Verbose && !app.Flags.Silent + showProgress := (!app.Flags.NoProgress && !app.Flags.Silent) || isVerbose pap := theme.MultiWriter() psi, _ := theme.IndexingSpinner().WithWriter(pap.NewWriter()).Start("Indexing locations...") @@ -117,11 +118,17 @@ func (app *App) Exec() error { }() totalFiles := int64(0) - updateTicker := int64(1000) pss, _ := theme.SmashingSpinner().WithWriter(pap.NewWriter()).Start("Finding duplicates...") var wg sync.WaitGroup + + updateProgressTicker := make(chan bool) + + if showProgress { + app.updateDupeCount(updateProgressTicker, pss, &totalFiles) + } + for i := 0; i < app.Flags.MaxWorkers; i++ { wg.Add(1) go func() { @@ -129,11 +136,7 @@ func (app *App) Exec() error { for file := range files { sf := resolveFilename(file) - currentFileCount := atomic.AddInt64(&totalFiles, 1) - - if currentFileCount%updateTicker == 0 { - pss.UpdateText(fmt.Sprintf("Finding duplicates... (%s files smash'd)", pterm.Gray(currentFileCount))) - } + atomic.AddInt64(&totalFiles, 1) startTime := time.Now().UnixMilli() stats, err := sl.SliceFS(file.FileSystem, file.Path, slo) @@ -152,6 +155,9 @@ func (app *App) Exec() error { } wg.Wait() + // Signal we're done + updateProgressTicker <- true + pss.Success("Finding duplicates...Done!") psr, _ := theme.FinaliseSpinner().WithWriter(pap.NewWriter()).Start("Finding smash hits...") @@ -166,6 +172,25 @@ func (app *App) Exec() error { return nil } +func (app *App) updateDupeCount(updateProgressTicker chan bool, pss *pterm.SpinnerPrinter, totalFiles *int64) { + if app.Flags.NoProgress { + return + } + go func() { + ticker := time.Tick(time.Duration(app.Flags.UpdateSeconds) * time.Second) + for { + select { + case <-ticker: + latestFileCount := atomic.LoadInt64(totalFiles) + pss.UpdateText(fmt.Sprintf("Finding duplicates... (%s files smash'd)", pterm.Gray(latestFileCount))) + case <-updateProgressTicker: + return + } + } + }() + +} + func (app *App) checkTerminal() { if !term.IsTerminal(int(os.Stdout.Fd())) { pterm.DisableColor() diff --git a/internal/smash/configuration.go b/internal/smash/configuration.go index fc3f92b..71566f1 100644 --- a/internal/smash/configuration.go +++ b/internal/smash/configuration.go @@ -20,8 +20,17 @@ func (app *App) printConfiguration() { theme.StyleHeading.Println("---| Configuration") if app.Flags.Verbose { - theme.Println(b.Sprint("Concurrency: "), theme.ColourConfig(f.MaxWorkers), "workers |", theme.ColourConfig(f.MaxThreads), "threads") - config = "(Slices: " + theme.ColourConfig(slicer.DefaultSlices) + " | Size: " + theme.ColourConfig(humanize.Bytes(slicer.DefaultSliceSize)) + " | Threshold: " + theme.ColourConfig(humanize.Bytes(slicer.DefaultThreshold)) + ")" + slices := theme.ColourConfig(slicer.DefaultSlices) + size := theme.ColourConfig(humanize.Bytes(slicer.DefaultSliceSize)) + threshold := theme.ColourConfig(humanize.Bytes(slicer.DefaultThreshold)) + + config = "(Slices: " + slices + " | Size: " + size + " | Threshold: " + threshold + ")" + + maxThreads := theme.ColourConfig(f.MaxThreads) + maxWorkers := theme.ColourConfig(f.MaxWorkers) + + theme.Println(b.Sprint("Concurrency: "), maxWorkers, "workers |", maxThreads, "threads") + } else { config = "" } diff --git a/internal/smash/structs.go b/internal/smash/flags.go similarity index 85% rename from internal/smash/structs.go rename to internal/smash/flags.go index d7e0c63..b95dc75 100644 --- a/internal/smash/structs.go +++ b/internal/smash/flags.go @@ -8,9 +8,11 @@ type Flags struct { Algorithm int `yaml:"algorithm"` MaxThreads int `yaml:"max-threads"` MaxWorkers int `yaml:"max-workers"` + UpdateSeconds int `yaml:"update-seconds"` DisableSlicing bool `yaml:"disable-slicing"` IgnoreEmptyFiles bool `yaml:"ignore-emptyfiles"` ShowVersion bool `yaml:"show-version"` Silent bool `yaml:"silent"` + NoProgress bool `yaml:"no-progress"` Verbose bool `yaml:"verbose"` } diff --git a/internal/smash/formatter.go b/internal/smash/formatter.go index 9559866..6f1f42b 100644 --- a/internal/smash/formatter.go +++ b/internal/smash/formatter.go @@ -99,7 +99,7 @@ func (app *App) generateRunSummary(totalFiles int64) { DuplicateFiles: int64(totalDuplicates), DuplicateFileSize: totalDuplicateSize, DuplicateFileSizeF: humanize.Bytes(totalDuplicateSize), - ElapsedTime: time.Now().UnixMilli() - app.Session.StartTime, + ElapsedTime: time.Now().UnixNano() - app.Session.StartTime, } app.Summary = &summary } diff --git a/readme.md b/readme.md index a6ba4f9..c38b089 100644 --- a/readme.md +++ b/readme.md @@ -51,15 +51,21 @@ Usage: Flags: --algorithm algorithm Algorithm to use to hash files. Supported: xxhash, murmur3, md5, sha512, sha256 (full list, see readme) (default xxhash) + --base strings Base directories to use for comparison. Eg. --base=/c/dos,/c/dos/run/,/run/dos/run --disable-slicing Disable slicing (hashes full file). --exclude-dir strings Directories to exclude separated by comma. Eg. --exclude-dir=.git,.idea --exclude-file strings Files to exclude separated by comma. Eg. --exclude-file=.gitignore,*.csv -h, --help help for smash + --ignore-emptyfiles Ignore & don't report on empty/zero byte files. -p, --max-threads int Maximum threads to utilise. (default 16) -w, --max-workers int Maximum workers to utilise when smashing. (default 8) + --no-progress Disable progress updates. + -o, --output-file string Export as JSON -q, --silent Run in silent mode. + --update-seconds int Update progress every x seconds. (default 5) --verbose Run in verbose mode. - -v, --version version for smash + -v, --version Show version information. + ``` See the [full list of algorithms](./docs/algorithms.md) supported. @@ -68,7 +74,7 @@ See the [full list of algorithms](./docs/algorithms.md) supported. Examples are given in Unix format, but apply to Windows as well. -### Simplest +### Basic To check for duplicates in a single path (Eg. `~/media/photos`) @@ -112,13 +118,13 @@ $ ./smash --disable-slicing ~/media/photos ### Changing Hashing Algorithms -By default, smash uses `xxhash`, an extremely fast non-cryptographic hash algorithm -(which you can [read about further](https://xxhash.com/)). +By default, smash uses `xxhash`, an extremely fast non-cryptographic hash algorithm. However, you can choose a variety +of algorithms [as documented](./docs/algorithms.md). To use another supported algorithm, use the `--algorithm` switch: ```bash -$ ./smash --algorithm:fnv128a ~/media/photos +$ ./smash --algorithm:murmur3 ~/media/photos ``` # Acknowledgements