Skip to content

Commit

Permalink
feat: run file hash algorithms in parallel
Browse files Browse the repository at this point in the history
Signed-off-by: Keith Zantow <[email protected]>
  • Loading branch information
kzantow committed Jan 31, 2025
1 parent a16e374 commit 3ba4421
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 9 deletions.
3 changes: 3 additions & 0 deletions cmd/syft/internal/commands/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/anchore/clio"
"github.com/anchore/fangs"
"github.com/anchore/go-collections"
"github.com/anchore/go-sync"
"github.com/anchore/stereoscope"
"github.com/anchore/stereoscope/pkg/image"
"github.com/anchore/syft/cmd/syft/internal/options"
Expand Down Expand Up @@ -184,6 +185,8 @@ func runScan(ctx context.Context, id clio.Identification, opts *scanOptions, use
}
}

ctx = sync.SetContextExecutor(ctx, sync.NewExecutor(opts.Parallelism))

src, err := getSource(ctx, &opts.Catalog, userInput, sources...)

if err != nil {
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ require (
github.com/OneOfOne/xxhash v1.2.8
github.com/adrg/xdg v0.5.3
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
github.com/anchore/go-sync v0.0.0-20241216143621-0b0fc28c752f
github.com/hashicorp/hcl/v2 v2.23.0
github.com/magiconair/properties v1.8.9
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb h1:iDMnx6LIj
github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb/go.mod h1:DmTY2Mfcv38hsHbG78xMiTDdxFtkHpgYNVDPsF2TgHk=
github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 h1:aM1rlcoLz8y5B2r4tTLMiVTrMtpfY0O8EScKJxaSaEc=
github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA=
github.com/anchore/go-sync v0.0.0-20241216143621-0b0fc28c752f h1:0TPfHMmCRZedeu6qB33jxnbt0hX8waAzvVyPcClGiN8=
github.com/anchore/go-sync v0.0.0-20241216143621-0b0fc28c752f/go.mod h1:zrREDHPQOL+4BqMNkp529xdXenbTtafFhSclMdWSAzw=
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0vW0nnNKJfJieyH/TZ9UYAnTZs5/gHTdAe8=
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ=
github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods=
Expand Down
6 changes: 4 additions & 2 deletions internal/file/digest.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
package file

import (
"context"
"crypto"
"fmt"
"hash"
"io"
"strings"

"github.com/anchore/go-sync"
"github.com/anchore/syft/syft/file"
)

Expand All @@ -21,7 +23,7 @@ func supportedHashAlgorithms() []crypto.Hash {
}
}

func NewDigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]file.Digest, error) {
func NewDigestsFromFile(ctx context.Context, closer io.ReadCloser, hashes []crypto.Hash) ([]file.Digest, error) {
hashes = NormalizeHashes(hashes)
// create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(hashes))
Expand All @@ -31,7 +33,7 @@ func NewDigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]file.Dige
writers[idx] = hashers[idx]
}

size, err := io.Copy(io.MultiWriter(writers...), closer)
size, err := io.Copy(newParallelWriter(sync.ContextExecutor(ctx), writers...), closer)
if err != nil {
return nil, err
}
Expand Down
43 changes: 43 additions & 0 deletions internal/file/parallel_writer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package file

import (
"errors"
"io"
"sync"

gosync "github.com/anchore/go-sync"
)

type parallelWriter struct {
executor gosync.Executor
writers []io.Writer
}

func newParallelWriter(executor gosync.Executor, writers ...io.Writer) *parallelWriter {
return &parallelWriter{
executor: executor,
writers: writers,
}
}

func (w *parallelWriter) Write(p []byte) (int, error) {
errs := gosync.List[error]{}
wg := sync.WaitGroup{}
wg.Add(len(w.writers))
for _, writer := range w.writers {
w.executor.Execute(func() {
defer wg.Done()
_, err := writer.Write(p)
if err != nil {
errs.Add(err)
}
})
}
wg.Wait()
if errs.Len() > 0 {
return 0, errors.Join(errs.Values()...)
}
return len(p), nil
}

var _ io.Writer = (*parallelWriter)(nil)
6 changes: 3 additions & 3 deletions syft/file/cataloger/filedigest/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func (i *Cataloger) Catalog(ctx context.Context, resolver file.Resolver, coordin

prog := catalogingProgress(int64(len(locations)))
for _, location := range locations {
result, err := i.catalogLocation(resolver, location)
result, err := i.catalogLocation(ctx, resolver, location)

if errors.Is(err, ErrUndigestableFile) {
continue
Expand Down Expand Up @@ -83,7 +83,7 @@ func (i *Cataloger) Catalog(ctx context.Context, resolver file.Resolver, coordin
return results, errs
}

func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Location) ([]file.Digest, error) {
func (i *Cataloger) catalogLocation(ctx context.Context, resolver file.Resolver, location file.Location) ([]file.Digest, error) {
meta, err := resolver.FileMetadataByLocation(location)
if err != nil {
return nil, err
Expand All @@ -100,7 +100,7 @@ func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Locati
}
defer internal.CloseAndLogError(contentReader, location.AccessPath)

digests, err := intFile.NewDigestsFromFile(contentReader, i.hashes)
digests, err := intFile.NewDigestsFromFile(ctx, contentReader, i.hashes)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
}
Expand Down
6 changes: 3 additions & 3 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
}

// grab and assign digest for the entire archive
digests, err := getDigestsFromArchive(j.archivePath)
digests, err := getDigestsFromArchive(ctx, j.archivePath)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -472,15 +472,15 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren
return pkgs, nil
}

func getDigestsFromArchive(archivePath string) ([]file.Digest, error) {
func getDigestsFromArchive(ctx context.Context, archivePath string) ([]file.Digest, error) {
archiveCloser, err := os.Open(archivePath)
if err != nil {
return nil, fmt.Errorf("unable to open archive path (%s): %w", archivePath, err)
}
defer internal.CloseAndLogError(archiveCloser, archivePath)

// grab and assign digest for the entire archive
digests, err := intFile.NewDigestsFromFile(archiveCloser, javaArchiveHashes)
digests, err := intFile.NewDigestsFromFile(ctx, archiveCloser, javaArchiveHashes)
if err != nil {
log.Warnf("failed to create digest for file=%q: %+v", archivePath, err)
}
Expand Down
3 changes: 2 additions & 1 deletion syft/source/filesource/file_source.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package filesource

import (
"context"
"crypto"
"fmt"
"os"
Expand Down Expand Up @@ -68,7 +69,7 @@ func New(cfg Config) (source.Source, error) {

defer fh.Close()

digests, err = intFile.NewDigestsFromFile(fh, cfg.DigestAlgorithms)
digests, err = intFile.NewDigestsFromFile(context.TODO(), fh, cfg.DigestAlgorithms)
if err != nil {
return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err)
}
Expand Down

0 comments on commit 3ba4421

Please sign in to comment.