diff --git a/internal/app/version.go b/internal/app/version.go index 78a0d14..2a6a370 100644 --- a/internal/app/version.go +++ b/internal/app/version.go @@ -6,12 +6,12 @@ import ( "github.com/logrusorgru/aurora/v3" ) -var ( - Version = "v0.0.1" - Edition = "open-source" - Home = "github.com/thushan/smash" - Time string - User string +const ( + Version = "v0.0.1" + Edition = "open-source" + Home = "github.com/thushan/smash" + Time string = "nowish" + User string = "local" ) func PrintVersionInfo(extendedInfo bool) { diff --git a/internal/smash/app.go b/internal/smash/app.go index 19da35e..c681e45 100644 --- a/internal/smash/app.go +++ b/internal/smash/app.go @@ -1,6 +1,7 @@ package smash import ( + "log" "os" "github.com/logrusorgru/aurora/v3" @@ -8,8 +9,6 @@ import ( "github.com/thushan/smash/pkg/indexer" ) -var FileQueueSize = 1000 - type App struct { Flags *app.Flags Args []string @@ -20,6 +19,7 @@ func (app *App) Run() error { var locations = app.Locations var excludeDirs = app.Flags.ExcludeDir var excludeFiles = app.Flags.ExcludeFile + var walker = indexer.NewConfigured(excludeDirs, excludeFiles) if !app.Flags.Silent { @@ -28,22 +28,26 @@ func (app *App) Run() error { app.setMaxThreads() - fsq := make(chan string, FileQueueSize) + files := make(chan indexer.FileFS) go func() { + defer close(files) for _, location := range locations { app.printVerbose("Indexing location ", aurora.Cyan(location)) - walker.WalkDirectory(os.DirFS(location), fsq) + err := walker.WalkDirectory(os.DirFS(location), location, files) + + if err != nil { + log.Println("Failed to walk location ", aurora.Magenta(location), " because ", aurora.Red(err)) + } } - close(fsq) }() totalFiles := 0 - - for filename := range fsq { + for file := range files { totalFiles++ - app.printVerbose("Indexed file ", aurora.Blue(filename)) + app.printVerbose("Indexed file ", aurora.Blue(file.Path)) } + app.printVerbose("Total Files: ", aurora.Blue(totalFiles)) return nil } diff --git a/pkg/indexer/indexer.go b/pkg/indexer/indexer.go index d3f3ac3..9dd0af7 100644 --- a/pkg/indexer/indexer.go +++ b/pkg/indexer/indexer.go @@ -1,18 +1,23 @@ package indexer import ( - "fmt" "io/fs" - "os" "path/filepath" "regexp" "strings" ) +type FileFS struct { + FileSystem fs.FS + Path string + Name string + FullName string +} type IndexerConfig struct { dirMatcher *regexp.Regexp fileMatcher *regexp.Regexp + excludeSysFilter []string ExcludeDirFilter []string ExcludeFileFilter []string } @@ -23,40 +28,57 @@ func New() *IndexerConfig { ExcludeDirFilter: nil, dirMatcher: nil, fileMatcher: nil, + excludeSysFilter: []string{ + "System Volume Information", "$RECYCLE.BIN", "$MFT", /* Windows */ + ".Trash", ".Trash-1000", /* Linux */ + ".Trashes", /* macOS */ + }, } } - func NewConfigured(excludeDirFilter []string, excludeFileFilter []string) *IndexerConfig { - return &IndexerConfig{ - ExcludeDirFilter: excludeDirFilter, - ExcludeFileFilter: excludeFileFilter, - dirMatcher: regexp.MustCompile(strings.Join(excludeDirFilter, "|")), - fileMatcher: regexp.MustCompile(strings.Join(excludeFileFilter, "|")), + indexer := New() + if len(excludeFileFilter) > 0 { + indexer.ExcludeFileFilter = excludeFileFilter + indexer.fileMatcher = regexp.MustCompile(strings.Join(excludeFileFilter, "|")) } + if len(excludeDirFilter) > 0 { + indexer.ExcludeDirFilter = excludeDirFilter + indexer.dirMatcher = regexp.MustCompile(strings.Join(excludeDirFilter, "|")) + } + return indexer } -func (config *IndexerConfig) WalkDirectory(fsys fs.FS, files chan string) { - walkErr := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { +func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan FileFS) error { + walkErr := fs.WalkDir(f, ".", func(path string, d fs.DirEntry, err error) error { if err != nil { return err } - - // Index just the files + name := filepath.Clean(d.Name()) if d.IsDir() { - if len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path) { + if config.isSystemFolder(name) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { return filepath.SkipDir } } else { - filename := filepath.Base(path) - if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(filename) { + if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(name) { return nil } - files <- path + files <- FileFS{ + FileSystem: f, + Path: path, + Name: name, + FullName: filepath.Join(root, path), + } } - return nil }) - if walkErr != nil { - fmt.Fprintln(os.Stderr, "Walk Failed: ", walkErr) + return walkErr +} + +func (config *IndexerConfig) isSystemFolder(folder string) bool { + for _, v := range config.excludeSysFilter { + if folder == v { + return true + } } + return false } diff --git a/pkg/indexer/indexer_test.go b/pkg/indexer/indexer_test.go index 4165c03..11232b8 100644 --- a/pkg/indexer/indexer_test.go +++ b/pkg/indexer/indexer_test.go @@ -1,125 +1,199 @@ package indexer import ( - "crypto/rand" + "reflect" "testing" "testing/fstest" ) -func randomBytes(length int) []byte { - buffer := make([]byte, length) - _, _ = rand.Read(buffer) - return buffer -} - func TestIndexDirectoryWithFilesInRoot(t *testing.T) { - fsq := make(chan string, 10) - - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", } - indexer := New() - indexer.WalkDirectory(fs, fsq) + walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, t) - expected := len(fs) - actual := len(fsq) + expected := mockFiles + actual := walkedFiles - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) } } func TestIndexDirectoryWithFilesAcrossFolders(t *testing.T) { - fsq := make(chan string, 10) - - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, - "subfolder-1/DSC19845.ARW": {Data: randomBytes(1024)}, - "subfolder-1/DSC19846.ARW": {Data: randomBytes(1024)}, - "subfolder-2/DSC19847.ARW": {Data: randomBytes(1024)}, - "subfolder-2/DSC19848.ARW": {Data: randomBytes(1024)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "subfolder-1/DSC19845.ARW", + "subfolder-1/DSC19846.ARW", + "subfolder-2/DSC19847.ARW", + "subfolder-2/DSC19848.ARW", } - indexer := New() - indexer.WalkDirectory(fs, fsq) + walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, t) - expected := len(fs) - actual := len(fsq) + expected := mockFiles + actual := walkedFiles - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } + + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) } } func TestIndexDirectoryWithDirExclusions(t *testing.T) { - fsq := make(chan string, 10) exclude_dir := []string{"subfolder-1", "subfolder-2", "subfolder-not-found"} exclude_file := []string{} - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, - "subfolder-1/DSC19845.ARW": {Data: randomBytes(1024)}, - "subfolder-1/DSC19846.ARW": {Data: randomBytes(1024)}, - "subfolder-2/DSC19847.ARW": {Data: randomBytes(1024)}, - "subfolder-2/DSC19848.ARW": {Data: randomBytes(1024)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "subfolder-1/DSC19845.ARW", + "subfolder-1/DSC19846.ARW", + "subfolder-2/DSC19847.ARW", + "subfolder-2/DSC19848.ARW", + } + + walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t) + + expected := []string{ + mockFiles[0], + mockFiles[1], } - indexer := NewConfigured(exclude_dir, exclude_file) - indexer.WalkDirectory(fs, fsq) + actual := walkedFiles - expected := len(fs) - 4 - actual := len(fsq) + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) } } func TestIndexDirectoryWithFileExclusions(t *testing.T) { - fsq := make(chan string, 10) exclude_dir := []string{} exclude_file := []string{"exclude.me"} - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, - "exclude.me": {Data: randomBytes(1024)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "exclude.me", } - indexer := NewConfigured(exclude_dir, exclude_file) - indexer.WalkDirectory(fs, fsq) + walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t) + + expected := []string{ + mockFiles[0], + mockFiles[1], + } - expected := len(fs) - 1 - actual := len(fsq) + actual := walkedFiles + + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) } } func TestIndexDirectoryWithFileAndDirExclusions(t *testing.T) { - fsq := make(chan string, 10) + exclude_dir := []string{"exclude-dir"} exclude_file := []string{"exclude.me"} - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, - "exclude.me": {Data: randomBytes(1024)}, - "exclude-dir/random.file": {Data: randomBytes(1024)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "exclude.me", + "exclude-dir/random.file", + } + + walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t) + + expected := []string{ + mockFiles[0], + mockFiles[1], + } + + actual := walkedFiles + + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } + + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) + } +} + +func TestIndexDirectoryWhichContainsWindowsSystemFiles(t *testing.T) { + exclude_dir := []string{} + exclude_file := []string{} + + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "$RECYCLE.BIN/test.txt", + "$MFT/random.file", } - indexer := NewConfigured(exclude_dir, exclude_file) - indexer.WalkDirectory(fs, fsq) + walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t) - expected := len(fs) - 2 - actual := len(fsq) + expected := []string{ + mockFiles[0], + mockFiles[1], + } + + actual := walkedFiles + + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } + + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) + } +} +func channelFileToSliceOfFiles(ch <-chan FileFS) []string { + var result []string + for f := range ch { + result = append(result, f.Path) + } + return result +} - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) +func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, t *testing.T) []string { + fr := "mock://" + fs := createMockFS(files) + ch := make(chan FileFS) + + go func() { + defer close(ch) + indexer := NewConfigured(excludeDir, excludeFiles) + err := indexer.WalkDirectory(fs, fr, ch) + if err != nil { + t.Errorf("WalkDirectory returned an error: %v", err) + } + }() + + return channelFileToSliceOfFiles(ch) +} +func createMockFS(files []string) fstest.MapFS { + var fs fstest.MapFS = make(map[string]*fstest.MapFile) + for _, file := range files { + fs[file] = &fstest.MapFile{} } + return fs }