Skip to content

Commit

Permalink
Implement minSize/maxSize thresholds. (#58)
Browse files Browse the repository at this point in the history
* initial minsize maxsize

* size checks,

* - shouldAnalyse based on thresholds

- Test cases.

* refactor tests.

* update readme.

* lint refactor.

* lint issue.
  • Loading branch information
thushan authored Feb 4, 2024
1 parent 3865e98 commit fe010e7
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 101 deletions.
2 changes: 2 additions & 0 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ func init() {
flags.StringSliceVarP(&af.ExcludeDir, "exclude-dir", "", nil, "Directories to exclude separated by comma Eg. --exclude-dir=.git,.idea")
flags.IntVarP(&af.MaxThreads, "max-threads", "p", runtime.NumCPU(), "Maximum threads to utilise")
flags.IntVarP(&af.MaxWorkers, "max-workers", "w", runtime.NumCPU(), "Maximum workers to utilise when smashing")
flags.Int64VarP(&af.MinSize, "min-size", "G", 0, "Minimum file size to consider for hashing (in bytes)")
flags.Int64VarP(&af.MaxSize, "max-size", "L", 0, "Maximum file size to consider for hashing (in bytes)")
flags.IntVarP(&af.ProgressUpdate, "progress-update", "", 5, "Update progress every x seconds")
flags.IntVarP(&af.ShowTop, "show-top", "", 10, "Show the top x duplicates")
flags.BoolVarP(&af.HideTopList, "no-top-list", "", false, "Hides top x duplicates list")
Expand Down
10 changes: 7 additions & 3 deletions internal/smash/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ func (app *App) Run() error {
DisableSlicing: af.DisableSlicing,
DisableMeta: af.DisableMeta,
DisableAutoText: af.DisableAutoText,
MinSize: uint64(af.MinSize),
MaxSize: uint64(af.MaxSize),
}

app.Runtime = &AppRuntime{
Expand Down Expand Up @@ -155,13 +157,15 @@ func (app *App) Exec() error {
startTime := time.Now().UnixMilli()
stats, err := sl.SliceFS(*file.FileSystem, file.Path, slo)
elapsedMs := time.Now().UnixMilli() - startTime

if err != nil {
switch {
case err != nil:
if isVerbose {
theme.WarnSkipWithContext(file.FullName, err)
}
_, _ = session.Fails.LoadOrStore(file.Path, err)
} else {
case stats.IgnoredFile:
// Ignored counter
default:
SummariseSmashedFile(stats, file, elapsedMs, session.Dupes, session.Empty)
}
}
Expand Down
13 changes: 12 additions & 1 deletion internal/smash/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ type Flags struct {
Base []string `yaml:"base"`
ExcludeDir []string `yaml:"exclude-dir"`
ExcludeFile []string `yaml:"exclude-file"`
MinSize int64 `yaml:"min-size"`
MaxSize int64 `yaml:"max-size"`
Algorithm int `yaml:"algorithm"`
MaxThreads int `yaml:"max-threads"`
MaxWorkers int `yaml:"max-workers"`
Expand Down Expand Up @@ -39,7 +41,16 @@ func (app *App) validateArgs() error {
return errors.New("maxthreads cannot be below zero")
}
if f.MaxWorkers < 0 {
return errors.New("naxworkers cannot be below zero")
return errors.New("maxworkers cannot be below zero")
}
if f.MinSize < 0 {
return errors.New("minSize cannot be below zero")
}
if f.MaxSize < 0 {
return errors.New("maxSize cannot be below zero")
}
if f.MaxSize != 0 && f.MinSize > f.MaxSize {
return errors.New("minSize cannot be greater than maxSize")
}
if f.ShowTop <= 1 {
return errors.New("showtop should be greater than 1")
Expand Down
67 changes: 67 additions & 0 deletions internal/smash/flags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,73 @@ func TestApp_ValidateArgs(t *testing.T) {
},
wantErr: false,
},
{
name: "Should succeed when valid arguments are provided for min and max size",
flags: &Flags{
MinSize: 100,
Verbose: true,
MaxThreads: 5,
MaxWorkers: 5,
ShowTop: 10,
ProgressUpdate: 2,
},
wantErr: false,
},
{
name: "Should succeed when valid arguments are provided for max size",
flags: &Flags{
MaxSize: 200,
Verbose: true,
MaxThreads: 5,
MaxWorkers: 5,
ShowTop: 10,
ProgressUpdate: 2,
},
wantErr: false,
},
{
name: "Should succeed when valid arguments are provided for min size",
flags: &Flags{
MinSize: 200,
MaxSize: 0,
Verbose: true,
MaxThreads: 5,
MaxWorkers: 5,
ShowTop: 10,
ProgressUpdate: 2,
},
wantErr: false,
},
{
name: "Should fail when minSize is below zero",
flags: &Flags{
MinSize: -100,
},
wantErr: true,
},
{
name: "Should fail when maxSize is below zero",
flags: &Flags{
MaxSize: -100,
},
wantErr: true,
},
{
name: "Should fail when maxSize is below minSize",
flags: &Flags{
MaxSize: 20,
MinSize: 200,
},
wantErr: true,
},
{
name: "Should fail when maxSize is below minSize",
flags: &Flags{
MaxSize: 20,
MinSize: 200,
},
wantErr: true,
},
}

for _, tt := range tests {
Expand Down
41 changes: 34 additions & 7 deletions pkg/slicer/slicer.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,17 @@ type SlicerStats struct {
SliceSize uint64
FileSize uint64
Slices int
HashedFullFile bool
EmptyFile bool
IgnoredFile bool
HashedFullFile bool
}

type MetaSlice struct {
Size uint64
}
type Options struct {
MinSize uint64
MaxSize uint64
DisableSlicing bool
DisableMeta bool
DisableAutoText bool
Expand All @@ -44,6 +47,8 @@ const DefaultSlices = 4
const DefaultSliceSize = 8 * 1024
const DefaultThreshold = 100 * 1024
const DefaultMinimumSize = (DefaultSlices + 2) * DefaultSliceSize
const DefaultMinSize = 0
const DefaultMaxSize = 0

func New(algorithm algorithms.Algorithm) Slicer {
return NewConfigured(algorithm, DefaultSlices, DefaultSliceSize, DefaultThreshold)
Expand Down Expand Up @@ -84,20 +89,25 @@ func (slicer *Slicer) SliceFS(fs fs.FS, name string, options *Options) (SlicerSt
return stats, err
}

size := fi.Size()

stats.FileSize = uint64(size)
stats.Slices = slicer.slices
stats.SliceSize = slicer.sliceSize
size := uint64(fi.Size())

if size == 0 {
stats.EmptyFile = true
stats.Hash = nil
return stats, nil
}

if !shouldAnalyse(size, options.MinSize, options.MaxSize) {
stats.IgnoredFile = true
return stats, nil
}

stats.FileSize = size
stats.Slices = slicer.slices
stats.SliceSize = slicer.sliceSize

if fr, ok := f.(io.ReaderAt); ok {
sr := io.NewSectionReader(fr, 0, size)
sr := io.NewSectionReader(fr, 0, int64(size))
err := slicer.Slice(sr, options, &stats)
return stats, err
} else {
Expand Down Expand Up @@ -141,6 +151,11 @@ func (slicer *Slicer) Slice(sr *io.SectionReader, options *Options, stats *Slice
return nil
}

if !shouldAnalyse(size, options.MinSize, options.MaxSize) {
stats.IgnoredFile = true
return nil
}

algo := slicer.algorithm.New()
algo.Reset()

Expand Down Expand Up @@ -224,3 +239,15 @@ func (slicer *Slicer) Slice(sr *io.SectionReader, options *Options, stats *Slice
stats.Hash = algo.Sum(nil)
return nil
}
func shouldAnalyse(fileSize, minSize, maxSize uint64) bool {
if minSize == DefaultMinSize && maxSize == DefaultMaxSize {
return true
}
if minSize != DefaultMinSize && fileSize < minSize {
return false
}
if maxSize != DefaultMaxSize && fileSize > maxSize {
return false
}
return true
}
Loading

0 comments on commit fe010e7

Please sign in to comment.