From 9987bb507384e7b04ca958ee725f163e5f1e6343 Mon Sep 17 00:00:00 2001 From: Eric Cornelissen Date: Sat, 2 Mar 2024 17:25:44 +0100 Subject: [PATCH 1/2] Let `ghasum update` fail if the gha.sum file is corrupt Update the internals for parsing the version from a gha.sum files in order to make `ghasum update` fail if the file is corrupted. This change is enforced going forward through the added test case. --- SPECIFICATION.md | 2 +- internal/sumfile/sumfile.go | 78 +++++++++++++++++++------------------ testdata/update/error.txtar | 10 +++++ 3 files changed, 51 insertions(+), 39 deletions(-) diff --git a/SPECIFICATION.md b/SPECIFICATION.md index 8e9150e..9375f44 100644 --- a/SPECIFICATION.md +++ b/SPECIFICATION.md @@ -36,7 +36,7 @@ not possible to process shall exit immediately (it means the file may be edited by another process leading to an inconsistent state). If the file lock is obtained, the process shall first read it and parse it -partially to extract the sumfile version. If this fails the process shall exit +completely to extract the sumfile version. If this fails the process shall exit immediately. Else it shall recompute checksums for all actions used in the repository (see [Computing Checksums]) using the best available hashing algorithm. It shall then store them in a sumfile (see [Storing Checksums]) using diff --git a/internal/sumfile/sumfile.go b/internal/sumfile/sumfile.go index f2b82f9..be6b5b5 100644 --- a/internal/sumfile/sumfile.go +++ b/internal/sumfile/sumfile.go @@ -35,48 +35,13 @@ type Entry struct { // if there is a syntax error in the checksum file or if the checksum file is // otherwise corrupted (for example multiple checksum directives for one Entry). func Decode(stored string) ([]Entry, error) { - lines := strings.Split(stored, "\n") - headers, err := parseHeaders(lines) - if err != nil { - return nil, err - } - - version, err := extractVersion(headers) - if err != nil { - return nil, err - } - - if lines[len(lines)-1] != "" { - err = errors.New("missing final newline") - return nil, errors.Join(ErrSyntax, err) - } - - content := []string{} - if len(lines) > len(headers)+1 { - content = lines[len(headers)+1 : len(lines)-1] - } - - var checksums []Entry - switch version { - case Version1: - checksums, err = decodeV1(content) - default: - err = unknownVersion(version) - } - - if err != nil { - return nil, err - } - - return checksums, nil + _, entries, err := parseFile(stored) + return entries, err } // DecodeVersion parses the given checksum file content to extract the version. -// -// This function may succeed even if the checksum file is corrupted. func DecodeVersion(stored string) (Version, error) { - lines := strings.Split(stored, "\n") - headers, err := parseHeaders(lines) + headers, _, err := parseFile(stored) if err != nil { return 0, err } @@ -102,6 +67,43 @@ func Encode(version Version, checksums []Entry) (string, error) { return fmt.Sprintf("version %d\n\n%s", version, encoded), err } +func parseFile(stored string) (map[string]string, []Entry, error) { + lines := strings.Split(stored, "\n") + headers, err := parseHeaders(lines) + if err != nil { + return nil, nil, err + } + + version, err := extractVersion(headers) + if err != nil { + return headers, nil, err + } + + if lines[len(lines)-1] != "" { + err = errors.New("missing final newline") + return headers, nil, errors.Join(ErrSyntax, err) + } + + content := []string{} + if len(lines) > len(headers)+1 { + content = lines[len(headers)+1 : len(lines)-1] + } + + var entries []Entry + switch version { + case Version1: + entries, err = decodeV1(content) + default: + err = unknownVersion(version) + } + + if err != nil { + return headers, entries, err + } + + return headers, entries, nil +} + func parseHeaders(lines []string) (map[string]string, error) { headers := make(map[string]string, 0) for i, line := range lines { diff --git a/testdata/update/error.txtar b/testdata/update/error.txtar index c112e54..bb9e59e 100644 --- a/testdata/update/error.txtar +++ b/testdata/update/error.txtar @@ -1,3 +1,9 @@ +# Invalid sumfile +! exec ghasum update invalid/ +! stdout 'Ok' +stderr 'an unexpected error occurred' +stderr 'could not decode the checksum file' + # Repo without actions ! exec ghasum update no-actions/ ! stdout 'Ok' @@ -10,6 +16,10 @@ stderr 'ghasum has not yet been initialized' stderr 'an unexpected error occurred' stderr 'ghasum has not yet been initialized' +-- invalid/.github/workflows/gha.sum -- +version 1 + +this-action/is-missing@a-checksum -- no-actions/.keep -- This file exists to create a repo that does not use Github Actions. -- uninitialized/.github/workflows/workflow.yml -- From a2119d2a7304e0007db72ec114c23c604290dfea Mon Sep 17 00:00:00 2001 From: Eric Cornelissen Date: Sat, 2 Mar 2024 17:53:51 +0100 Subject: [PATCH 2/2] Add support for `ghasum update` to ignore errors in the sumfile Update the `ghasum update` CLI to accept a `-force` flag that can be used to force updating to ignore and fix any errors in the gha.sum file. --- SPECIFICATION.md | 17 ++++-- cmd/ghasum/main.go | 1 + cmd/ghasum/update.go | 6 +- internal/ghasum/operations.go | 10 ++- internal/sumfile/errors.go | 4 ++ internal/sumfile/sumfile.go | 18 +++--- testdata/update/force.txtar | 112 ++++++++++++++++++++++++++++++++++ 7 files changed, 152 insertions(+), 16 deletions(-) create mode 100644 testdata/update/force.txtar diff --git a/SPECIFICATION.md b/SPECIFICATION.md index 9375f44..1ef6ecf 100644 --- a/SPECIFICATION.md +++ b/SPECIFICATION.md @@ -37,11 +37,18 @@ by another process leading to an inconsistent state). If the file lock is obtained, the process shall first read it and parse it completely to extract the sumfile version. If this fails the process shall exit -immediately. Else it shall recompute checksums for all actions used in the -repository (see [Computing Checksums]) using the best available hashing -algorithm. It shall then store them in a sumfile (see [Storing Checksums]) using -the same sumfile version as before and releases the lock. As a consequence, this -adds missing and removes redundant checksums from the sumfile. +immediately unless the `-force` flag is used (see details below). Else it shall +recompute checksums for all actions used in the repository (see [Computing +Checksums]) using the best available hashing algorithm. It shall then store them +in a sumfile (see [Storing Checksums]) using the same sumfile version as before +and releases the lock. As a consequence, this adds missing and removes redundant +checksums from the sumfile. + +With the `-force` flag the process will ignore errors in the sumfile and fix +those while updating. If the sumfile version can still be determined from +sumfile it will be used, otherwise the latest available version is used instead. +This option is disabled by default to avoid unknowingly fixing syntax errors in +a sumfile, which is an important fact to know about from a security perspective. This process does not verify any of the checksums currently in the sumfile. diff --git a/cmd/ghasum/main.go b/cmd/ghasum/main.go index 1b67aa8..860ba97 100644 --- a/cmd/ghasum/main.go +++ b/cmd/ghasum/main.go @@ -46,6 +46,7 @@ const ( const ( flagNameCache = "cache" + flagNameForce = "force" flagNameNoCache = "no-cache" ) diff --git a/cmd/ghasum/update.go b/cmd/ghasum/update.go index 33c1b1b..5bca35a 100644 --- a/cmd/ghasum/update.go +++ b/cmd/ghasum/update.go @@ -27,6 +27,7 @@ import ( func cmdUpdate(argv []string) error { var ( flags = flag.NewFlagSet(cmdNameUpdate, flag.ContinueOnError) + flagForce = flags.Bool(flagNameForce, false, "") flagCache = flags.String(flagNameCache, "", "") flagNoCache = flags.Bool(flagNameNoCache, false, "") ) @@ -57,7 +58,7 @@ func cmdUpdate(argv []string) error { Cache: c, } - if err := ghasum.Update(&cfg); err != nil { + if err := ghasum.Update(&cfg, *flagForce); err != nil { return errors.Join(errUnexpected, err) } @@ -79,6 +80,9 @@ The available flags are: The location of the cache directory. This is where ghasum stores and looks up repositories it needs. Defaults to a directory named .ghasum in the user's home directory. + -force + Force updating the gha.sum file, ignoring errors and fixing them in the + process. -no-cache Disable the use of the cache. Makes the -cache flag ineffective.` } diff --git a/internal/ghasum/operations.go b/internal/ghasum/operations.go index 3414293..192c16a 100644 --- a/internal/ghasum/operations.go +++ b/internal/ghasum/operations.go @@ -94,7 +94,7 @@ func Initialize(cfg *Config) error { // Update will update the ghasum checksums for the repository specified in the // given configuration. -func Update(cfg *Config) error { +func Update(cfg *Config, force bool) error { file, err := open(cfg.Path) if err != nil { return err @@ -111,7 +111,13 @@ func Update(cfg *Config) error { version, err := version(raw) if err != nil { - return err + if !force { + return errors.Join(ErrSumfileRead, err) + } + + if errors.Is(err, sumfile.ErrHeaders) || errors.Is(err, sumfile.ErrVersion) { + version = sumfile.VersionLatest + } } actions, err := find(cfg) diff --git a/internal/sumfile/errors.go b/internal/sumfile/errors.go index 000314e..c6fa76d 100644 --- a/internal/sumfile/errors.go +++ b/internal/sumfile/errors.go @@ -28,4 +28,8 @@ var ( // ErrSyntax is the error when a checksum file has a syntax error. ErrSyntax = errors.New("syntax error") + + // ErrVersion is the error when the version is invalid or missing from the + // checksum file. + ErrVersion = errors.New("version error") ) diff --git a/internal/sumfile/sumfile.go b/internal/sumfile/sumfile.go index be6b5b5..a3e7def 100644 --- a/internal/sumfile/sumfile.go +++ b/internal/sumfile/sumfile.go @@ -41,12 +41,13 @@ func Decode(stored string) ([]Entry, error) { // DecodeVersion parses the given checksum file content to extract the version. func DecodeVersion(stored string) (Version, error) { - headers, _, err := parseFile(stored) - if err != nil { - return 0, err + headers, _, parseErr := parseFile(stored) + version, _ := extractVersion(headers) + if parseErr != nil { + return version, parseErr } - return extractVersion(headers) + return version, nil } // Encode encodes the given checksums according to the specification of the @@ -114,7 +115,7 @@ func parseHeaders(lines []string) (map[string]string, error) { j := strings.IndexRune(line, ' ') if j == -1 { err := fmt.Errorf("invalid header on line %d", i) - return nil, errors.Join(ErrSyntax, err) + return nil, errors.Join(ErrHeaders, err) } key := line[0:j] @@ -129,18 +130,19 @@ func extractVersion(headers map[string]string) (Version, error) { version, ok := headers["version"] if !ok { err := errors.New("version not found") - return 0, errors.Join(ErrSyntax, err) + return 0, errors.Join(ErrVersion, err) } rawVersion, err := strconv.Atoi(version) if err != nil { err := errors.New("version not a number") - return 0, errors.Join(ErrSyntax, err) + return 0, errors.Join(ErrVersion, err) } return Version(rawVersion), nil } func unknownVersion(version Version) error { - return fmt.Errorf("unknown version %d", version) + err := fmt.Errorf("unknown version %d", version) + return errors.Join(ErrVersion, err) } diff --git a/testdata/update/force.txtar b/testdata/update/force.txtar new file mode 100644 index 0000000..691ff5b --- /dev/null +++ b/testdata/update/force.txtar @@ -0,0 +1,112 @@ +# Error in entries +exec ghasum update -cache .cache/ -force entries/ +stdout 'Ok' +! stderr . +cmp entries/.github/workflows/gha.sum .want/gha.sum + +# Error in headers +exec ghasum update -cache .cache/ -force headers/ +stdout 'Ok' +! stderr . +cmp headers/.github/workflows/gha.sum .want/gha.sum + +# Error in version +exec ghasum update -cache .cache/ -force nan-version/ +stdout 'Ok' +! stderr . +cmp nan-version/.github/workflows/gha.sum .want/gha.sum + +# Invalid version +exec ghasum update -cache .cache/ -force invalid-version/ +stdout 'Ok' +! stderr . +cmp invalid-version/.github/workflows/gha.sum .want/gha.sum + +# Missing version +exec ghasum update -cache .cache/ -force no-version/ +stdout 'Ok' +! stderr . +cmp no-version/.github/workflows/gha.sum .want/gha.sum + +-- entries/.github/workflows/gha.sum -- +version 1 + +this-action/is-missing@a-checksum +-- entries/.github/workflows/workflow.yml -- +name: Example workflow +on: [push] + +jobs: + example: + name: example + runs-on: ubuntu-22.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4.1.1 +-- headers/.github/workflows/gha.sum -- +invalid-header + +actions/checkout@v4.1.0 GGAV+/JnlPt41B9iINyvcX5z6a4ue+NblmwiDNVORz0= +-- headers/.github/workflows/workflow.yml -- +name: Example workflow +on: [push] + +jobs: + example: + name: example + runs-on: ubuntu-22.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4.1.1 +-- invalid-version/.github/workflows/gha.sum -- +version 0 + +actions/checkout@v4.1.0 GGAV+/JnlPt41B9iINyvcX5z6a4ue+NblmwiDNVORz0= +-- invalid-version/.github/workflows/workflow.yml -- +name: Example workflow +on: [push] + +jobs: + example: + name: example + runs-on: ubuntu-22.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4.1.1 +-- nan-version/.github/workflows/gha.sum -- +version not-a-number + +actions/checkout@v4.1.0 GGAV+/JnlPt41B9iINyvcX5z6a4ue+NblmwiDNVORz0= +-- nan-version/.github/workflows/workflow.yml -- +name: Example workflow +on: [push] + +jobs: + example: + name: example + runs-on: ubuntu-22.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4.1.1 +-- no-version/.github/workflows/gha.sum -- +version-header-missing 1 + +actions/checkout@v4.1.0 GGAV+/JnlPt41B9iINyvcX5z6a4ue+NblmwiDNVORz0= +-- no-version/.github/workflows/workflow.yml -- +name: Example workflow +on: [push] + +jobs: + example: + name: example + runs-on: ubuntu-22.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4.1.1 +-- .cache/actions/checkout/v4.1.1/.keep -- +This file exist to avoid fetching "actions/checkout@v4.1.1" and give the Action +a unique checksum. +-- .want/gha.sum -- +version 1 + +actions/checkout@v4.1.1 KsR9XQGH7ydTl01vlD8pIZrXhkzXyjcnzhmP+/KaJZI=