Skip to content

Commit

Permalink
Fixed bug that caused more false positives in duplicates reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
m-manu committed Aug 31, 2022
1 parent 6b685f6 commit ced4deb
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 4 deletions.
17 changes: 14 additions & 3 deletions entity/digest_to_files.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,19 @@ func FileDigestComparator(a, b interface{}) int {
} else if fa.FileSize > fb.FileSize {
return -1
} else {
return 0
if fa.FileExtension < fb.FileExtension {
return 1
} else if fa.FileExtension > fb.FileExtension {
return -1
} else {
if fa.FileHash < fb.FileHash {
return 1
} else if fa.FileHash > fb.FileHash {
return -1
} else {
return 0
}
}
}
}

Expand All @@ -40,7 +52,6 @@ func (m *DigestToFiles) Set(key FileDigest, value string) {
if found {
values = valuesRaw.([]string)
values = append(values, value)
m.data.Put(key, values)
} else {
values = []string{value}
}
Expand All @@ -49,7 +60,7 @@ func (m *DigestToFiles) Set(key FileDigest, value string) {
}

// Remove removes entry in the map
func (m *DigestToFiles) Remove(fd *FileDigest) {
func (m *DigestToFiles) Remove(fd FileDigest) {
m.data.Remove(fd)
}

Expand Down
2 changes: 2 additions & 0 deletions report.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/m-manu/go-find-duplicates/entity"
"github.com/m-manu/go-find-duplicates/fmte"
"os"
"sort"
"strconv"
"time"
)
Expand Down Expand Up @@ -46,6 +47,7 @@ func getReportAsText(duplicates *entity.DigestToFiles) bytes.Buffer {
bb.Grow(duplicates.Size() * bytesPerLineGuess)
for iter := duplicates.Iterator(); iter.HasNext(); {
digest, paths := iter.Next()
sort.Strings(paths)
bb.WriteString(fmt.Sprintf("%s: %d duplicate(s)\n", digest, len(paths)-1))
for _, path := range paths {
bb.WriteString(fmt.Sprintf("\t%s\n", path))
Expand Down
6 changes: 5 additions & 1 deletion service/find_duplicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,16 @@ func computeDigestsAndGroupThem(shortlist entity.FileExtAndSizeToFiles, parallel
}
wg.Wait()
// Remove non-duplicates
var duplicateKeys []entity.FileDigest
for iter := duplicates.Iterator(); iter.HasNext(); {
digest, files := iter.Next()
if len(files) <= 1 {
duplicates.Remove(digest)
duplicateKeys = append(duplicateKeys, *digest)
}
}
for _, key := range duplicateKeys {
duplicates.Remove(key)
}
return
}

Expand Down

0 comments on commit ced4deb

Please sign in to comment.