From 46a4ea2d803ba9cad9fc1a83732589af97c3707f Mon Sep 17 00:00:00 2001 From: Thushan Fernando Date: Mon, 6 Nov 2023 20:41:44 +1100 Subject: [PATCH 1/7] Ignore system files for Windows, Mac & Linux. --- pkg/indexer/indexer.go | 17 ++++++++++++++++- pkg/indexer/indexer_test.go | 24 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/pkg/indexer/indexer.go b/pkg/indexer/indexer.go index d3f3ac3..08950a0 100644 --- a/pkg/indexer/indexer.go +++ b/pkg/indexer/indexer.go @@ -43,7 +43,7 @@ func (config *IndexerConfig) WalkDirectory(fsys fs.FS, files chan string) { // Index just the files if d.IsDir() { - if len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path) { + if isSystemFolder(d.Name()) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { return filepath.SkipDir } } else { @@ -60,3 +60,18 @@ func (config *IndexerConfig) WalkDirectory(fsys fs.FS, files chan string) { fmt.Fprintln(os.Stderr, "Walk Failed: ", walkErr) } } + +func isSystemFolder(path string) bool { + folder := filepath.Clean(path) + skipDirs := []string{ + "System Volume Information", "$RECYCLE.BIN", "$MFT", /* Windows */ + ".Trash", ".Trash-1000", /* Linux */ + ".Trashes", /* macOS */ + } + for _, v := range skipDirs { + if folder == v { + return true + } + } + return false +} diff --git a/pkg/indexer/indexer_test.go b/pkg/indexer/indexer_test.go index 4165c03..67c0025 100644 --- a/pkg/indexer/indexer_test.go +++ b/pkg/indexer/indexer_test.go @@ -123,3 +123,27 @@ func TestIndexDirectoryWithFileAndDirExclusions(t *testing.T) { t.Errorf("expected %d, got %d files", expected, actual) } } + +func TestIndexDirectoryWhichContainsWindowsSystemFiles(t *testing.T) { + fsq := make(chan string, 10) + + exclude_dir := []string{} + exclude_file := []string{} + + fs := fstest.MapFS{ + "DSC19841.ARW": {Data: randomBytes(1024)}, + "DSC19842.ARW": {Data: randomBytes(2048)}, + "$RECYCLE.BIN/test.txt": {Data: randomBytes(1024)}, + "$MFT/random.file": {Data: randomBytes(1024)}, + } + + indexer := NewConfigured(exclude_dir, exclude_file) + indexer.WalkDirectory(fs, fsq) + + expected := 2 + actual := len(fsq) + + if actual != expected { + t.Errorf("expected %v, got %v files", expected, actual) + } +} From 82eadf138754c70eb8dec9d5da9c93e21cb56fc5 Mon Sep 17 00:00:00 2001 From: Thushan Fernando Date: Mon, 6 Nov 2023 21:27:50 +1100 Subject: [PATCH 2/7] WIP: Updated Indexer with channels & pipelines --- internal/smash/app.go | 32 +++++++++++++-------- pkg/indexer/indexer.go | 64 ++++++++++++++++++++++++++---------------- 2 files changed, 60 insertions(+), 36 deletions(-) diff --git a/internal/smash/app.go b/internal/smash/app.go index 19da35e..b7ba21f 100644 --- a/internal/smash/app.go +++ b/internal/smash/app.go @@ -1,6 +1,7 @@ package smash import ( + "io/fs" "os" "github.com/logrusorgru/aurora/v3" @@ -8,8 +9,6 @@ import ( "github.com/thushan/smash/pkg/indexer" ) -var FileQueueSize = 1000 - type App struct { Flags *app.Flags Args []string @@ -20,6 +19,7 @@ func (app *App) Run() error { var locations = app.Locations var excludeDirs = app.Flags.ExcludeDir var excludeFiles = app.Flags.ExcludeFile + var walker = indexer.NewConfigured(excludeDirs, excludeFiles) if !app.Flags.Silent { @@ -28,22 +28,30 @@ func (app *App) Run() error { app.setMaxThreads() - fsq := make(chan string, FileQueueSize) + list := make(chan indexer.FileFS) + done := make(chan struct{}) + defer close(done) - go func() { - for _, location := range locations { - app.printVerbose("Indexing location ", aurora.Cyan(location)) - walker.WalkDirectory(os.DirFS(location), fsq) - } - close(fsq) - }() + for _, location := range locations { + app.printVerbose("Indexing location ", aurora.Cyan(location)) + files, _ := walker.WalkDirectory(buildLocations, done) + } totalFiles := 0 - for filename := range fsq { + for file := range list { totalFiles++ - app.printVerbose("Indexed file ", aurora.Blue(filename)) + app.printVerbose("Indexed file ", aurora.Blue(file.Name)) } return nil } +func buildLocations(locations []string) []fs.FS { + paths := make([]fs.FS, len(locations)) + + for _, location := range locations { + // we support local for now + paths = append(paths, os.DirFS(location)) + } + return paths +} diff --git a/pkg/indexer/indexer.go b/pkg/indexer/indexer.go index 08950a0..20d31a6 100644 --- a/pkg/indexer/indexer.go +++ b/pkg/indexer/indexer.go @@ -1,14 +1,18 @@ package indexer import ( - "fmt" + "errors" "io/fs" - "os" "path/filepath" "regexp" "strings" ) +type FileFS struct { + FileSystem fs.FS + Path string + Name string +} type IndexerConfig struct { dirMatcher *regexp.Regexp fileMatcher *regexp.Regexp @@ -35,30 +39,42 @@ func NewConfigured(excludeDirFilter []string, excludeFileFilter []string) *Index } } -func (config *IndexerConfig) WalkDirectory(fsys fs.FS, files chan string) { - walkErr := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - // Index just the files - if d.IsDir() { - if isSystemFolder(d.Name()) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { - return filepath.SkipDir - } - } else { - filename := filepath.Base(path) - if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(filename) { - return nil +func (config *IndexerConfig) WalkDirectory(f fs.FS, done <-chan struct{}) (<-chan FileFS, <-chan error) { + files := make(chan FileFS) + errrs := make(chan error, 1) + go func() { + // Clean up after we walk + defer close(files) + errrs <- fs.WalkDir(f, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err } - files <- path - } - return nil - }) - if walkErr != nil { - fmt.Fprintln(os.Stderr, "Walk Failed: ", walkErr) - } + // Index just the files + if d.IsDir() { + if isSystemFolder(d.Name()) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { + return filepath.SkipDir + } + } else { + filename := filepath.Base(path) + if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(filename) { + return nil + } + + select { + case files <- FileFS{ + FileSystem: f, + Path: path, + Name: filename, + }: + case <-done: + return errors.New("operation cancelled") + } + } + return nil + }) + }() + return files, errrs } func isSystemFolder(path string) bool { From e8f48042786e78f4df9cf75d7885b63dc9582e35 Mon Sep 17 00:00:00 2001 From: Thushan Fernando Date: Mon, 6 Nov 2023 21:56:45 +1100 Subject: [PATCH 3/7] Walking async, need to fix tests. --- internal/smash/app.go | 29 ++++++++++++++--------------- pkg/indexer/indexer.go | 7 ++----- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/internal/smash/app.go b/internal/smash/app.go index b7ba21f..0cb6203 100644 --- a/internal/smash/app.go +++ b/internal/smash/app.go @@ -1,7 +1,7 @@ package smash import ( - "io/fs" + "log" "os" "github.com/logrusorgru/aurora/v3" @@ -30,28 +30,27 @@ func (app *App) Run() error { list := make(chan indexer.FileFS) done := make(chan struct{}) + defer close(done) - for _, location := range locations { - app.printVerbose("Indexing location ", aurora.Cyan(location)) - files, _ := walker.WalkDirectory(buildLocations, done) - } + go func() { + defer close(list) + for _, location := range locations { + app.printVerbose("Indexing location ", aurora.Cyan(location)) + errc := walker.WalkDirectory(os.DirFS(location), list, done) - totalFiles := 0 + if err := <-errc; err != nil { + log.Println("Failed to walk location ", aurora.Magenta(location), " because ", aurora.Red(errc)) + } + } + }() + totalFiles := 0 for file := range list { totalFiles++ app.printVerbose("Indexed file ", aurora.Blue(file.Name)) } + app.printVerbose("Total Files: ", aurora.Blue(totalFiles)) return nil } -func buildLocations(locations []string) []fs.FS { - paths := make([]fs.FS, len(locations)) - - for _, location := range locations { - // we support local for now - paths = append(paths, os.DirFS(location)) - } - return paths -} diff --git a/pkg/indexer/indexer.go b/pkg/indexer/indexer.go index 20d31a6..f5cb6bc 100644 --- a/pkg/indexer/indexer.go +++ b/pkg/indexer/indexer.go @@ -39,12 +39,9 @@ func NewConfigured(excludeDirFilter []string, excludeFileFilter []string) *Index } } -func (config *IndexerConfig) WalkDirectory(f fs.FS, done <-chan struct{}) (<-chan FileFS, <-chan error) { - files := make(chan FileFS) +func (config *IndexerConfig) WalkDirectory(f fs.FS, files chan FileFS, done <-chan struct{}) <-chan error { errrs := make(chan error, 1) go func() { - // Clean up after we walk - defer close(files) errrs <- fs.WalkDir(f, ".", func(path string, d fs.DirEntry, err error) error { if err != nil { return err @@ -74,7 +71,7 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, done <-chan struct{}) (<-cha return nil }) }() - return files, errrs + return errrs } func isSystemFolder(path string) bool { From 7d8aebb38dbec65abf89a41c0684f0965746406b Mon Sep 17 00:00:00 2001 From: Thushan Fernando Date: Thu, 9 Nov 2023 10:01:11 +1100 Subject: [PATCH 4/7] Indexer improvements. --- internal/smash/app.go | 15 +-- pkg/indexer/indexer.go | 55 ++++----- pkg/indexer/indexer_test.go | 216 +++++++++++++++++++++++------------- 3 files changed, 165 insertions(+), 121 deletions(-) diff --git a/internal/smash/app.go b/internal/smash/app.go index 0cb6203..586d5dc 100644 --- a/internal/smash/app.go +++ b/internal/smash/app.go @@ -28,25 +28,22 @@ func (app *App) Run() error { app.setMaxThreads() - list := make(chan indexer.FileFS) - done := make(chan struct{}) - - defer close(done) + files := make(chan indexer.FileFS) go func() { - defer close(list) for _, location := range locations { app.printVerbose("Indexing location ", aurora.Cyan(location)) - errc := walker.WalkDirectory(os.DirFS(location), list, done) + err := walker.WalkDirectory(os.DirFS(location), location, files) - if err := <-errc; err != nil { - log.Println("Failed to walk location ", aurora.Magenta(location), " because ", aurora.Red(errc)) + if err != nil { + log.Println("Failed to walk location ", aurora.Magenta(location), " because ", aurora.Red(err)) } } + defer close(files) }() totalFiles := 0 - for file := range list { + for file := range files { totalFiles++ app.printVerbose("Indexed file ", aurora.Blue(file.Name)) } diff --git a/pkg/indexer/indexer.go b/pkg/indexer/indexer.go index f5cb6bc..83801a7 100644 --- a/pkg/indexer/indexer.go +++ b/pkg/indexer/indexer.go @@ -1,7 +1,6 @@ package indexer import ( - "errors" "io/fs" "path/filepath" "regexp" @@ -12,6 +11,7 @@ type FileFS struct { FileSystem fs.FS Path string Name string + FullName string } type IndexerConfig struct { dirMatcher *regexp.Regexp @@ -39,39 +39,30 @@ func NewConfigured(excludeDirFilter []string, excludeFileFilter []string) *Index } } -func (config *IndexerConfig) WalkDirectory(f fs.FS, files chan FileFS, done <-chan struct{}) <-chan error { - errrs := make(chan error, 1) - go func() { - errrs <- fs.WalkDir(f, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err +func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan FileFS) error { + walkErr := fs.WalkDir(f, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + if isSystemFolder(d.Name()) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { + return filepath.SkipDir } - - // Index just the files - if d.IsDir() { - if isSystemFolder(d.Name()) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { - return filepath.SkipDir - } - } else { - filename := filepath.Base(path) - if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(filename) { - return nil - } - - select { - case files <- FileFS{ - FileSystem: f, - Path: path, - Name: filename, - }: - case <-done: - return errors.New("operation cancelled") - } + } else { + filename := filepath.Base(path) + if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(filename) { + return nil } - return nil - }) - }() - return errrs + files <- FileFS{ + FileSystem: f, + Path: path, + Name: filename, + FullName: filepath.Join(root, path), + } + } + return nil + }) + return walkErr } func isSystemFolder(path string) bool { diff --git a/pkg/indexer/indexer_test.go b/pkg/indexer/indexer_test.go index 67c0025..df0577f 100644 --- a/pkg/indexer/indexer_test.go +++ b/pkg/indexer/indexer_test.go @@ -2,148 +2,204 @@ package indexer import ( "crypto/rand" + "reflect" "testing" "testing/fstest" ) -func randomBytes(length int) []byte { - buffer := make([]byte, length) - _, _ = rand.Read(buffer) - return buffer +func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, t *testing.T) []string { + fr := "mock://" + fs := createMockFS(files) + ch := make(chan FileFS) + + go func() { + defer close(ch) + indexer := NewConfigured(excludeDir, excludeFiles) + err := indexer.WalkDirectory(fs, fr, ch) + if err != nil { + t.Errorf("WalkDirectory returned an error: %v", err) + } + }() + + return channelFileToSliceOfFiles(ch) } - func TestIndexDirectoryWithFilesInRoot(t *testing.T) { - fsq := make(chan string, 10) - - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", } - indexer := New() - indexer.WalkDirectory(fs, fsq) + walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, t) - expected := len(fs) - actual := len(fsq) + expected := mockFiles + actual := walkedFiles - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) } } func TestIndexDirectoryWithFilesAcrossFolders(t *testing.T) { - fsq := make(chan string, 10) - - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, - "subfolder-1/DSC19845.ARW": {Data: randomBytes(1024)}, - "subfolder-1/DSC19846.ARW": {Data: randomBytes(1024)}, - "subfolder-2/DSC19847.ARW": {Data: randomBytes(1024)}, - "subfolder-2/DSC19848.ARW": {Data: randomBytes(1024)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "subfolder-1/DSC19845.ARW", + "subfolder-1/DSC19846.ARW", + "subfolder-2/DSC19847.ARW", + "subfolder-2/DSC19848.ARW", } - indexer := New() - indexer.WalkDirectory(fs, fsq) + walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, t) - expected := len(fs) - actual := len(fsq) + expected := mockFiles + actual := walkedFiles + + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) } } func TestIndexDirectoryWithDirExclusions(t *testing.T) { - fsq := make(chan string, 10) exclude_dir := []string{"subfolder-1", "subfolder-2", "subfolder-not-found"} exclude_file := []string{} - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, - "subfolder-1/DSC19845.ARW": {Data: randomBytes(1024)}, - "subfolder-1/DSC19846.ARW": {Data: randomBytes(1024)}, - "subfolder-2/DSC19847.ARW": {Data: randomBytes(1024)}, - "subfolder-2/DSC19848.ARW": {Data: randomBytes(1024)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "subfolder-1/DSC19845.ARW", + "subfolder-1/DSC19846.ARW", + "subfolder-2/DSC19847.ARW", + "subfolder-2/DSC19848.ARW", + } + + walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t) + + expected := []string{ + mockFiles[0], + mockFiles[1], } - indexer := NewConfigured(exclude_dir, exclude_file) - indexer.WalkDirectory(fs, fsq) + actual := walkedFiles - expected := len(fs) - 4 - actual := len(fsq) + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) } } func TestIndexDirectoryWithFileExclusions(t *testing.T) { - fsq := make(chan string, 10) exclude_dir := []string{} exclude_file := []string{"exclude.me"} - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, - "exclude.me": {Data: randomBytes(1024)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "exclude.me", } - indexer := NewConfigured(exclude_dir, exclude_file) - indexer.WalkDirectory(fs, fsq) + walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t) - expected := len(fs) - 1 - actual := len(fsq) + expected := []string{ + mockFiles[0], + mockFiles[1], + } + + actual := walkedFiles - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } + + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) } } func TestIndexDirectoryWithFileAndDirExclusions(t *testing.T) { - fsq := make(chan string, 10) + exclude_dir := []string{"exclude-dir"} exclude_file := []string{"exclude.me"} - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, - "exclude.me": {Data: randomBytes(1024)}, - "exclude-dir/random.file": {Data: randomBytes(1024)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "exclude.me", + "exclude-dir/random.file", + } + + walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t) + + expected := []string{ + mockFiles[0], + mockFiles[1], } - indexer := NewConfigured(exclude_dir, exclude_file) - indexer.WalkDirectory(fs, fsq) + actual := walkedFiles - expected := len(fs) - 2 - actual := len(fsq) + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } - if actual != expected { - t.Errorf("expected %d, got %d files", expected, actual) + if !reflect.DeepEqual(actual, expected) { + t.Errorf("expected %v, got %v files", expected, actual) } } func TestIndexDirectoryWhichContainsWindowsSystemFiles(t *testing.T) { - fsq := make(chan string, 10) - exclude_dir := []string{} exclude_file := []string{} - fs := fstest.MapFS{ - "DSC19841.ARW": {Data: randomBytes(1024)}, - "DSC19842.ARW": {Data: randomBytes(2048)}, - "$RECYCLE.BIN/test.txt": {Data: randomBytes(1024)}, - "$MFT/random.file": {Data: randomBytes(1024)}, + mockFiles := []string{ + "DSC19841.ARW", + "DSC19842.ARW", + "$RECYCLE.BIN/test.txt", + "$MFT/random.file", } - indexer := NewConfigured(exclude_dir, exclude_file) - indexer.WalkDirectory(fs, fsq) + walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t) + + expected := []string{ + mockFiles[0], + mockFiles[1], + } - expected := 2 - actual := len(fsq) + actual := walkedFiles + + if len(actual) != len(expected) { + t.Errorf("expected %d, got %d files", len(expected), len(actual)) + } - if actual != expected { + if !reflect.DeepEqual(actual, expected) { t.Errorf("expected %v, got %v files", expected, actual) } } +func channelFileToSliceOfFiles(ch <-chan FileFS) []string { + var result []string + for f := range ch { + result = append(result, f.Path) + } + return result +} + +func createMockFS(files []string) fstest.MapFS { + var fs fstest.MapFS = make(map[string]*fstest.MapFile) + for _, file := range files { + fs[file] = &fstest.MapFile{} + } + return fs +} +func randomBytes(length int) []byte { + buffer := make([]byte, length) + _, _ = rand.Read(buffer) + return buffer +} From a092e62f6742bef5f699baa3d3ab934aa50e9d00 Mon Sep 17 00:00:00 2001 From: Thushan Fernando Date: Thu, 9 Nov 2023 11:07:43 +1100 Subject: [PATCH 5/7] reshuffle. --- internal/smash/app.go | 2 +- pkg/indexer/indexer_test.go | 38 ++++++++++++++++--------------------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/internal/smash/app.go b/internal/smash/app.go index 586d5dc..c4a7803 100644 --- a/internal/smash/app.go +++ b/internal/smash/app.go @@ -31,6 +31,7 @@ func (app *App) Run() error { files := make(chan indexer.FileFS) go func() { + defer close(files) for _, location := range locations { app.printVerbose("Indexing location ", aurora.Cyan(location)) err := walker.WalkDirectory(os.DirFS(location), location, files) @@ -39,7 +40,6 @@ func (app *App) Run() error { log.Println("Failed to walk location ", aurora.Magenta(location), " because ", aurora.Red(err)) } } - defer close(files) }() totalFiles := 0 diff --git a/pkg/indexer/indexer_test.go b/pkg/indexer/indexer_test.go index df0577f..11232b8 100644 --- a/pkg/indexer/indexer_test.go +++ b/pkg/indexer/indexer_test.go @@ -1,28 +1,11 @@ package indexer import ( - "crypto/rand" "reflect" "testing" "testing/fstest" ) -func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, t *testing.T) []string { - fr := "mock://" - fs := createMockFS(files) - ch := make(chan FileFS) - - go func() { - defer close(ch) - indexer := NewConfigured(excludeDir, excludeFiles) - err := indexer.WalkDirectory(fs, fr, ch) - if err != nil { - t.Errorf("WalkDirectory returned an error: %v", err) - } - }() - - return channelFileToSliceOfFiles(ch) -} func TestIndexDirectoryWithFilesInRoot(t *testing.T) { mockFiles := []string{ "DSC19841.ARW", @@ -191,6 +174,22 @@ func channelFileToSliceOfFiles(ch <-chan FileFS) []string { return result } +func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, t *testing.T) []string { + fr := "mock://" + fs := createMockFS(files) + ch := make(chan FileFS) + + go func() { + defer close(ch) + indexer := NewConfigured(excludeDir, excludeFiles) + err := indexer.WalkDirectory(fs, fr, ch) + if err != nil { + t.Errorf("WalkDirectory returned an error: %v", err) + } + }() + + return channelFileToSliceOfFiles(ch) +} func createMockFS(files []string) fstest.MapFS { var fs fstest.MapFS = make(map[string]*fstest.MapFile) for _, file := range files { @@ -198,8 +197,3 @@ func createMockFS(files []string) fstest.MapFS { } return fs } -func randomBytes(length int) []byte { - buffer := make([]byte, length) - _, _ = rand.Read(buffer) - return buffer -} From 414e33800d4bf3ab54cb105657988243c847288c Mon Sep 17 00:00:00 2001 From: Thushan Fernando Date: Thu, 9 Nov 2023 11:27:44 +1100 Subject: [PATCH 6/7] reorganise system filter to be a constant init at construction refactor & reshuffle. --- internal/app/version.go | 12 ++++++------ internal/smash/app.go | 2 +- pkg/indexer/indexer.go | 32 ++++++++++++++++++-------------- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/internal/app/version.go b/internal/app/version.go index 78a0d14..2a6a370 100644 --- a/internal/app/version.go +++ b/internal/app/version.go @@ -6,12 +6,12 @@ import ( "github.com/logrusorgru/aurora/v3" ) -var ( - Version = "v0.0.1" - Edition = "open-source" - Home = "github.com/thushan/smash" - Time string - User string +const ( + Version = "v0.0.1" + Edition = "open-source" + Home = "github.com/thushan/smash" + Time string = "nowish" + User string = "local" ) func PrintVersionInfo(extendedInfo bool) { diff --git a/internal/smash/app.go b/internal/smash/app.go index c4a7803..c681e45 100644 --- a/internal/smash/app.go +++ b/internal/smash/app.go @@ -45,7 +45,7 @@ func (app *App) Run() error { totalFiles := 0 for file := range files { totalFiles++ - app.printVerbose("Indexed file ", aurora.Blue(file.Name)) + app.printVerbose("Indexed file ", aurora.Blue(file.Path)) } app.printVerbose("Total Files: ", aurora.Blue(totalFiles)) diff --git a/pkg/indexer/indexer.go b/pkg/indexer/indexer.go index 83801a7..be1f775 100644 --- a/pkg/indexer/indexer.go +++ b/pkg/indexer/indexer.go @@ -17,6 +17,7 @@ type IndexerConfig struct { dirMatcher *regexp.Regexp fileMatcher *regexp.Regexp + excludeSysFilter []string ExcludeDirFilter []string ExcludeFileFilter []string } @@ -27,16 +28,24 @@ func New() *IndexerConfig { ExcludeDirFilter: nil, dirMatcher: nil, fileMatcher: nil, + excludeSysFilter: []string{ + "System Volume Information", "$RECYCLE.BIN", "$MFT", /* Windows */ + ".Trash", ".Trash-1000", /* Linux */ + ".Trashes", /* macOS */ + }, } } - func NewConfigured(excludeDirFilter []string, excludeFileFilter []string) *IndexerConfig { - return &IndexerConfig{ - ExcludeDirFilter: excludeDirFilter, - ExcludeFileFilter: excludeFileFilter, - dirMatcher: regexp.MustCompile(strings.Join(excludeDirFilter, "|")), - fileMatcher: regexp.MustCompile(strings.Join(excludeFileFilter, "|")), + indexer := New() + if len(excludeFileFilter) > 0 { + indexer.ExcludeFileFilter = excludeFileFilter + indexer.fileMatcher = regexp.MustCompile(strings.Join(excludeFileFilter, "|")) } + if len(excludeDirFilter) > 0 { + indexer.ExcludeDirFilter = excludeDirFilter + indexer.dirMatcher = regexp.MustCompile(strings.Join(excludeDirFilter, "|")) + } + return indexer } func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan FileFS) error { @@ -45,7 +54,7 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan File return err } if d.IsDir() { - if isSystemFolder(d.Name()) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { + if config.isSystemFolder(d.Name()) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { return filepath.SkipDir } } else { @@ -65,14 +74,9 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan File return walkErr } -func isSystemFolder(path string) bool { +func (config *IndexerConfig) isSystemFolder(path string) bool { folder := filepath.Clean(path) - skipDirs := []string{ - "System Volume Information", "$RECYCLE.BIN", "$MFT", /* Windows */ - ".Trash", ".Trash-1000", /* Linux */ - ".Trashes", /* macOS */ - } - for _, v := range skipDirs { + for _, v := range config.excludeSysFilter { if folder == v { return true } From 7329383fa7490f18d5b2a9f4a4927a1ac1c74a1f Mon Sep 17 00:00:00 2001 From: Thushan Fernando Date: Thu, 9 Nov 2023 11:34:56 +1100 Subject: [PATCH 7/7] cleanup walking. --- pkg/indexer/indexer.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pkg/indexer/indexer.go b/pkg/indexer/indexer.go index be1f775..9dd0af7 100644 --- a/pkg/indexer/indexer.go +++ b/pkg/indexer/indexer.go @@ -53,19 +53,19 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan File if err != nil { return err } + name := filepath.Clean(d.Name()) if d.IsDir() { - if config.isSystemFolder(d.Name()) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { + if config.isSystemFolder(name) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) { return filepath.SkipDir } } else { - filename := filepath.Base(path) - if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(filename) { + if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(name) { return nil } files <- FileFS{ FileSystem: f, Path: path, - Name: filename, + Name: name, FullName: filepath.Join(root, path), } } @@ -74,8 +74,7 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan File return walkErr } -func (config *IndexerConfig) isSystemFolder(path string) bool { - folder := filepath.Clean(path) +func (config *IndexerConfig) isSystemFolder(folder string) bool { for _, v := range config.excludeSysFilter { if folder == v { return true