Skip to content

Commit

Permalink
Merge pull request #25 from thushan/murmur3-impl
Browse files Browse the repository at this point in the history
Expands smash algorithms to support murmur3, sha256, sha512 & md5
  • Loading branch information
thushan authored Nov 21, 2023
2 parents c3bc034 + 3d9c7be commit 9945432
Show file tree
Hide file tree
Showing 8 changed files with 240 additions and 74 deletions.
98 changes: 98 additions & 0 deletions docs/algorithms.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Algorithms Supported

`smash` supports a variety of hashing algorithms.

<table>
<thead>
<tr>
<th>Algorithm</th>
<th>Default</th>
<th>Variations / Aliases</th>
</tr>
</thead>
<tbody>
<tr>
<td>
xxhash<br/>
<sub><sup><a href="https://xxhash.com/">learn more</a></sup></sub>
</td>
<td><code>xxhash</code></td>
<td>
<ul>
<li><code>xxhash</code></li>
</ul>
</td>
</tr>
<tr>
<td>
murmur3<br/>
<sub><sup><a href="https://en.wikipedia.org/wiki/MurmurHash">learn more</a></sup></sub>
</td>
<td><code>murmur3</code></td>
<td>
<ul>
<li><code>murmur3</code> (alias: <code>murmur3-128</code>)</li>
<li><code>murmur3-128</code></li>
<li><code>murmur3-64</code></li>
<li><code>murmur3-32</code></li>
</ul>
</td>
</tr>
<tr>
<td>SHA-256</td>
<td><code>sha256</code></td>
<td>
<ul>
<li><code>sha256</code></li>
<li><code>sha-256</code></li>
</ul>
</td>
</tr>
<tr>
<td>SHA-512</td>
<td><code>sha512</code></td>
<td>
<ul>
<li><code>sha512</code></li>
<li><code>sha-512</code></li>
</ul>
</td>
</tr>
<tr>
<td>MD5</td>
<td><code>md5</code></td>
<td>
<ul>
<li><code>md5</code></li>
</ul>
</td>
</tr>
<tr>
<td>FNV128<br/>
<sub><sup><a href="https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function">learn more</a></sup></sub></td>
<td><code>fnv128</code></td>
<td>
<ul>
<li><code>fnv128</code></li>
<li><code>fnv-128</code></li>
</ul>
</td>
</tr>
<tr>
<td>FNV128a<br/>
<sub><sup><a href="https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function">learn more</a></sup></sub></td>
<td><code>fnv128a</code></td>
<td>
<ul>
<li><code>fnv</code> (alias: <code>fnv128a</code>)</li>
<li><code>fnv128a</code></li>
<li><code>fnv-128a</code></li>
</ul>
</td>
</tr>
</tbody>
</table>

Generally, when slicing is enabled (default), we'd recommend `xxhash` or `murmur3`.

When you're wanting a full hash (`--disable-slicing` option), generally `sha512` or `sha-256`.
Binary file added docs/artefacts/smash-v0.0.3-long-demo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/cespare/xxhash v1.1.0
github.com/dustin/go-humanize v1.0.1
github.com/pterm/pterm v0.12.70
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72
github.com/spf13/cobra v1.8.0
github.com/thediveo/enumflag/v2 v2.0.5
golang.org/x/term v0.14.0
Expand Down
33 changes: 31 additions & 2 deletions internal/algorithms/algorithm.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
package algorithms

import (
md5h "crypto/md5"
sha256h "crypto/sha256"
sha512h "crypto/sha512"
"hash"
fnvh "hash/fnv"

"github.com/spaolacci/murmur3"

cxHash "github.com/cespare/xxhash"
)

Expand All @@ -13,13 +18,25 @@ const (
Xxhash Algorithm = iota
Fnv128
Fnv128a
Murmur3_128
Murmur3_64
Murmur3_32
Md5
Sha256
Sha512
)

// HashAlgorithms Used by CLI for validating --algorithm flag
var HashAlgorithms = map[int][]string{
0: {"xxhash"},
1: {"fnv128"},
2: {"fnv128a"},
2: {"fnv128a", "fnv"},
3: {"murmur3-128", "murmur3"},
4: {"murmur3-64"},
5: {"murmur3-32"},
6: {"md5"},
7: {"sha-256", "sha256"},
8: {"sha-512", "sha512"},
}

// New Instantiates a new representation of the Hash Algorithm.
Expand All @@ -31,8 +48,20 @@ func (a Algorithm) New() hash.Hash {
return fnvh.New128()
case Fnv128a:
return fnvh.New128a()
case Murmur3_32:
return murmur3.New32()
case Murmur3_64:
return murmur3.New64()
case Murmur3_128:
return murmur3.New128()
case Md5:
return md5h.New()
case Sha256:
return sha256h.New()
case Sha512:
return sha512h.New()
}
return fnvh.New128a()
return cxHash.New()
}

// Index Returns the index for the Hash Algorithm
Expand Down
4 changes: 2 additions & 2 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ func init() {
rootCmd.PersistentFlags().Var(
enumflag.New(&af.Algorithm, "algorithm", algorithms.HashAlgorithms, enumflag.EnumCaseInsensitive),
"algorithm",
"Algorithm to use, can be 'xxhash', 'fnv128', 'fnv128a'")
"Algorithm to use to hash files. Supported: xxhash, murmur3, md5, sha512, sha256 (full list, see readme)")
flags := rootCmd.Flags()
flags.StringSliceVarP(&af.Base, "base", "", nil, "Base directories to use for comparison. Eg. --base=/c/dos,/c/run/dos/")
flags.StringSliceVarP(&af.Base, "base", "", nil, "Base directories to use for comparison. Eg. --base=/c/dos,/c/dos/run/,/run/dos/run")
flags.StringSliceVarP(&af.ExcludeFile, "exclude-file", "", nil, "Files to exclude separated by comma. Eg. --exclude-file=.gitignore,*.csv")
flags.StringSliceVarP(&af.ExcludeDir, "exclude-dir", "", nil, "Directories to exclude separated by comma. Eg. --exclude-dir=.git,.idea")
flags.IntVarP(&af.MaxThreads, "max-threads", "p", runtime.NumCPU(), "Maximum threads to utilise.")
Expand Down
102 changes: 102 additions & 0 deletions pkg/slicer/slicer_algorithm_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
//nolint
package slicer

import (
"bytes"
"encoding/hex"
"io"
"os"
"strings"
"testing"

"github.com/thushan/smash/internal/algorithms"
)

// fieldalignment: struct with 40 pointer bytes could be 24 (govet)
// but this is nicer to see / read :)
var algoData = []struct {
algorithm algorithms.Algorithm
disableSlicing bool
filename string
expectHash string
}{
{algorithms.Xxhash, false, "./artefacts/test-manipulated.1mb", "4f595576799edcd9"},
{algorithms.Xxhash, true, "./artefacts/test-manipulated.1mb", "4a1960f16a88960c"},
{algorithms.Xxhash, false, "./artefacts/test.1mb", "bb83f43630ee546f"},
{algorithms.Xxhash, true, "./artefacts/test.1mb", "6b6255ee515dcc04"},
{algorithms.Murmur3_128, false, "./artefacts/test-manipulated.1mb", "daa0b57d39ab077f56bcdf855753d8dd"},
{algorithms.Murmur3_128, true, "./artefacts/test-manipulated.1mb", "7b49601fb19613cfa36cc032910228b7"},
{algorithms.Murmur3_128, false, "./artefacts/test.1mb", "92d0c527266ec9151a6a9239c105df84"},
{algorithms.Murmur3_128, true, "./artefacts/test.1mb", "35ec8ac6041a7e9b70c61cc30d40b592"},
{algorithms.Murmur3_64, false, "./artefacts/test-manipulated.1mb", "daa0b57d39ab077f"},
{algorithms.Murmur3_64, true, "./artefacts/test-manipulated.1mb", "7b49601fb19613cf"},
{algorithms.Murmur3_64, false, "./artefacts/test.1mb", "92d0c527266ec915"},
{algorithms.Murmur3_64, true, "./artefacts/test.1mb", "35ec8ac6041a7e9b"},
{algorithms.Murmur3_32, false, "./artefacts/test-manipulated.1mb", "eb6482f3"},
{algorithms.Murmur3_32, true, "./artefacts/test-manipulated.1mb", "e0fa6869"},
{algorithms.Murmur3_32, false, "./artefacts/test.1mb", "5ca146ee"},
{algorithms.Murmur3_32, true, "./artefacts/test.1mb", "3a3133fa"},
{algorithms.Fnv128, false, "./artefacts/test-manipulated.1mb", "e91da5b6fb6c3df866d19794bcc031a2"},
{algorithms.Fnv128, true, "./artefacts/test-manipulated.1mb", "8808e2a6d269deb5bce97f110f60e8dc"},
{algorithms.Fnv128, false, "./artefacts/test.1mb", "af25513dbbfb8ebf847829a2cd6e76f2"},
{algorithms.Fnv128, true, "./artefacts/test.1mb", "e55b683eca015645afc7316f7df9993b"},
{algorithms.Fnv128a, false, "./artefacts/test-manipulated.1mb", "04721f877b7be5ad3e487b87ad486f30"},
{algorithms.Fnv128a, true, "./artefacts/test-manipulated.1mb", "998f1046fb1e726b7dedd1eecd453c1a"},
{algorithms.Fnv128a, false, "./artefacts/test.1mb", "f80ebc069329ec8a59e2c444c300f218"},
{algorithms.Fnv128a, true, "./artefacts/test.1mb", "ebc231b45eb5b9c7be1c936829047f1f"},
{algorithms.Md5, false, "./artefacts/test-manipulated.1mb", "040ca2ff5e59e6b0870b0f68a92a3968"},
{algorithms.Md5, true, "./artefacts/test-manipulated.1mb", "df221ae4955e4b77f50ade6ab70c5210"},
{algorithms.Md5, false, "./artefacts/test.1mb", "546b9508c9650e5d2e0c1c15f63c342c"},
{algorithms.Md5, true, "./artefacts/test.1mb", "4c18efb7e70ac81f341ce3f5ef3684a4"},
{algorithms.Sha512, false, "./artefacts/test-manipulated.1mb", "b8b783b66d20b280709522abd2478f0f7e599a31d62d9f876d8d91e7ad3874e75964f5bbb2e35ca1380e4d28d9135c40b12d3cee7c7b1f89c29b5d2ef38d0cc7"},
{algorithms.Sha512, true, "./artefacts/test-manipulated.1mb", "dd69b1afbcb92135421574297fa47f612a23b386721b8562cd7852a0eebe0f4d8436d02b6773b7c072c18c67027d53eeedc9d18cc6171dfc82a907bfa570ae03"},
{algorithms.Sha512, false, "./artefacts/test.1mb", "88402b9df2f2dd06597f0a1db9c6257645acb6ddb949d4daa00a7f28dfd681b5a46cef809774e9c0e5f0f581d8a240eac62bde89d99220055342dae8d6e680cf"},
{algorithms.Sha512, true, "./artefacts/test.1mb", "8cedef8fa8d1ab8bdee1a9441165fe2af8ee37c9672e06f15ca30f5a3f840096585e474c2b800760bd66db96239f3c67761303ec1d87553f27afc7d8c9e7ea9f"},
{algorithms.Sha256, false, "./artefacts/test-manipulated.1mb", "9539725bbdda1bfb410c51d9ebc0ba72391e7ba2145e74422028253a30672506"},
{algorithms.Sha256, true, "./artefacts/test-manipulated.1mb", "aae139d218d16eb32cd63dc6f842f77c89a773fc26a8e7ef3b9023600fad3f17"},
{algorithms.Sha256, false, "./artefacts/test.1mb", "11cfdec95e731151953ab8dbe24de8b3c1a029731740ca649bc82f95338e0540"},
{algorithms.Sha256, true, "./artefacts/test.1mb", "e9403adc74d6a890a0db579ab217e2c4b0490b43e5a87552d3a239f1bdde91b8"},
}

func TestSlice_New_HashingAlgorithms_WithFileSystemFiles(t *testing.T) {

options := SlicerOptions{
DisableSlicing: true,
DisableMeta: false,
DisableFileDetection: false,
}

for _, item := range algoData {
options.DisableSlicing = item.disableSlicing
runHashCheckTestsForFileSystemFile(item.filename, item.algorithm, &options, item.expectHash, t)
}
}

func runHashCheckTestsForFileSystemFile(filename string, algorithm algorithms.Algorithm, options *SlicerOptions, expected string, t *testing.T) {
if binary, err := os.ReadFile(filename); err != nil {
t.Errorf("Unexpected io error %v", err)
} else {

fsSize := len(binary)
reader := bytes.NewReader(binary)
sr := io.NewSectionReader(reader, 0, int64(fsSize))

stats := SlicerStats{}

slicer := New(algorithm)

if err := slicer.Slice(sr, options, &stats); err != nil {
t.Errorf("Unexpected Slicer error %v", err)
}

actual := hex.EncodeToString(stats.Hash)

if len(expected) != len(actual) {
t.Errorf("hash length expected %d, got %d", len(expected), len(actual))
}

if !strings.EqualFold(actual, expected) {
t.Errorf("expected hash %s, got %s", expected, actual)
}
}
}
69 changes: 1 addition & 68 deletions pkg/slicer/slicer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,46 +228,7 @@ func TestSliceFS_New_FileSystemTestFile_TestManipulated1mb_WithSlicing(t *testin
}
runHashCheckTestsForFileSystemFile_WithSliceFS(fsys, filename, algorithm, &options, expected, t)
}
func TestSlice_New_FileSystemTestFile_Test1mb_WithSlicing(t *testing.T) {
algorithm := algorithms.Xxhash
expected := "bb83f43630ee546f"
options := SlicerOptions{
DisableSlicing: false,
DisableMeta: false,
DisableFileDetection: false,
}
runHashCheckTestsForFileSystemFile("./artefacts/test.1mb", algorithm, &options, expected, t)
}
func TestSlice_New_FileSystemTestFile_TestManipulated1mb_WithSlicing(t *testing.T) {
algorithm := algorithms.Xxhash
expected := "4f595576799edcd9"
options := SlicerOptions{
DisableSlicing: false,
DisableMeta: false,
DisableFileDetection: false,
}
runHashCheckTestsForFileSystemFile("./artefacts/test-manipulated.1mb", algorithm, &options, expected, t)
}
func TestSlice_New_FileSystemTestFile_Test1mb_WithoutSlicing(t *testing.T) {
algorithm := algorithms.Xxhash
expected := "6b6255ee515dcc04"
options := SlicerOptions{
DisableSlicing: true,
DisableMeta: false,
DisableFileDetection: false,
}
runHashCheckTestsForFileSystemFile("./artefacts/test.1mb", algorithm, &options, expected, t)
}
func TestSlice_New_FileSystemTestFile_TestManipulated1mb_WithoutSlicing(t *testing.T) {
algorithm := algorithms.Xxhash
expected := "4a1960f16a88960c"
options := SlicerOptions{
DisableSlicing: true,
DisableMeta: false,
DisableFileDetection: false,
}
runHashCheckTestsForFileSystemFile("./artefacts/test-manipulated.1mb", algorithm, &options, expected, t)
}

func runHashCheckTestsForFileSystemFile_WithSliceFS(fs fs.FS, filename string, algorithm algorithms.Algorithm, options *SlicerOptions, expected string, t *testing.T) {

slicer := New(algorithm)
Expand All @@ -288,34 +249,6 @@ func runHashCheckTestsForFileSystemFile_WithSliceFS(fs fs.FS, filename string, a
}

}
func runHashCheckTestsForFileSystemFile(filename string, algorithm algorithms.Algorithm, options *SlicerOptions, expected string, t *testing.T) {
if binary, err := os.ReadFile(filename); err != nil {
t.Errorf("Unexpected io error %v", err)
} else {

fsSize := len(binary)
reader := bytes.NewReader(binary)
sr := io.NewSectionReader(reader, 0, int64(fsSize))

stats := SlicerStats{}

slicer := New(algorithm)

if err := slicer.Slice(sr, options, &stats); err != nil {
t.Errorf("Unexpected Slicer error %v", err)
}

actual := hex.EncodeToString(stats.Hash)

if len(expected) != len(actual) {
t.Errorf("hash length expected %d, got %d", len(expected), len(actual))
}

if !strings.EqualFold(actual, expected) {
t.Errorf("expected hash %s, got %s", expected, actual)
}
}
}
func TestSlice_New_Hash_xxHash_With1KbBlob(t *testing.T) {
runHashAlgorithmTest(algorithms.Xxhash, t)
}
Expand Down
Loading

0 comments on commit 9945432

Please sign in to comment.