diff --git a/go.mod b/go.mod index 2dad8f180a..bfa4552554 100644 --- a/go.mod +++ b/go.mod @@ -231,3 +231,5 @@ require ( gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect tags.cncf.io/container-device-interface/specs-go v0.8.0 // indirect ) + +replace github.com/containers/image/v5 => github.com/mtrmac/image/v5 v5.0.0-20250121152720-954c5dfe4c1e diff --git a/go.sum b/go.sum index 55c912c7a8..836c5da2e0 100644 --- a/go.sum +++ b/go.sum @@ -84,8 +84,6 @@ github.com/containers/conmon v2.0.20+incompatible h1:YbCVSFSCqFjjVwHTPINGdMX1F6J github.com/containers/conmon v2.0.20+incompatible/go.mod h1:hgwZ2mtuDrppv78a/cOBNiCm6O0UMWGx1mu7P00nu5I= github.com/containers/gvisor-tap-vsock v0.8.1 h1:88qkOjGMF9NmyoVG/orUw73mdwj3z4aOwEbRS01hF78= github.com/containers/gvisor-tap-vsock v0.8.1/go.mod h1:gjdY4JBWnynrXsxT8+OM7peEOd4FCZpoOWjSadHva0g= -github.com/containers/image/v5 v5.33.1-0.20250116221711-317a9885aed9 h1:oAYA8USA2AL4LS+SK9RGw3e6Lv/BEFI7+2Z7B9Bcjs0= -github.com/containers/image/v5 v5.33.1-0.20250116221711-317a9885aed9/go.mod h1:eWqddLtRxT+IYU/063W8rk2HzSS7LotGkROGNJ243CA= github.com/containers/libhvee v0.9.0 h1:5UxJMka1lDfxTeITA25Pd8QVVttJAG43eQS1Getw1tc= github.com/containers/libhvee v0.9.0/go.mod h1:p44VJd8jMIx3SRN1eM6PxfCEwXQE0lJ0dQppCAlzjPQ= github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01 h1:Qzk5C6cYglewc+UyGf6lc8Mj2UaPTHy/iF2De0/77CA= @@ -381,6 +379,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/mtrmac/image/v5 v5.0.0-20250121152720-954c5dfe4c1e h1:i1qdDW9pjf//TFHEXUtmGNvKIZSHfYP9J7Ucdltlj8Q= +github.com/mtrmac/image/v5 v5.0.0-20250121152720-954c5dfe4c1e/go.mod h1:5RRf6085TsUFX7pckP9O+Ey2LSObXVsETMD0CRag4CU= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY= diff --git a/vendor/github.com/containers/image/v5/storage/storage_dest.go b/vendor/github.com/containers/image/v5/storage/storage_dest.go index be906810fc..0af4523ffa 100644 --- a/vendor/github.com/containers/image/v5/storage/storage_dest.go +++ b/vendor/github.com/containers/image/v5/storage/storage_dest.go @@ -33,6 +33,7 @@ import ( graphdriver "github.com/containers/storage/drivers" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/chunked" + "github.com/containers/storage/pkg/chunked/toc" "github.com/containers/storage/pkg/ioutils" digest "github.com/opencontainers/go-digest" imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1" @@ -111,8 +112,10 @@ type storageImageDestinationLockProtected struct { // // Ideally we wouldn’t have blobDiffIDs, and we would just keep records by index, but the public API does not require the caller // to provide layer indices; and configs don’t have layer indices. blobDiffIDs needs to exist for those cases. - indexToDiffID map[int]digest.Digest // Mapping from layer index to DiffID - indexToTOCDigest map[int]digest.Digest // Mapping from layer index to a TOC Digest + indexToDiffID map[int]digest.Digest // Mapping from layer index to DiffID + // Mapping from layer index to a TOC Digest. + // If this is set, then either c/storage/pkg/chunked/toc.GetTOCDigest must have returned a value, or indexToDiffID must be set as well. + indexToTOCDigest map[int]digest.Digest blobDiffIDs map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs. CAREFUL: See the WARNING above. // Layer data: Before commitLayer is called, either at least one of (diffOutputs, indexToAdditionalLayer, filenames) @@ -343,6 +346,56 @@ func (f *zstdFetcher) GetBlobAt(chunks []chunked.ImageSourceChunk) (chan io.Read // If the call fails with ErrFallbackToOrdinaryLayerDownload, the caller can fall back to PutBlobWithOptions. // The fallback _must not_ be done otherwise. func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, options private.PutBlobPartialOptions) (_ private.UploadedBlob, retErr error) { + inputTOCDigest, err := toc.GetTOCDigest(srcInfo.Annotations) + if err != nil { + return private.UploadedBlob{}, err + } + + // The identity of partially-pulled layers is, as long as we keep compatibility with tar-like consumers, + // unfixably ambiguous: there are two possible “views” of the same file (same compressed digest), + // the traditional “view” that decompresses the primary stream and consumes a tar file, + // and the partial-pull “view” that starts with the TOC. + // The two “views” have two separate metadata sets and may refer to different parts of the blob for file contents; + // the direct way to ensure they are consistent would be to read the full primary stream (and authenticate it against + // the compressed digest), and ensure the metadata and layer contents exactly match the partially-pulled contents - + // making the partial pull completely pointless. + // + // Instead, for partial-pull-capable layers (with inputTOCDigest set), we require the image to “commit” + // to uncompressed layer digest values via the config's RootFS.DiffIDs array: + // they are already naturally computed for traditionally-pulled layers, and for partially-pulled layers we + // do the optimal partial pull, and then reconstruct the uncompressed tar stream just to (expensively) compute this digest. + // + // Layers which don’t support partial pulls (inputTOCDigest == "", incl. all schema1 layers) can be let through: + // the partial pull code will either not engage, or consume the full layer; and the rules of indexToTOCDigest / layerIdentifiedByTOC + // ensure the layer is identified by DiffID, i.e. using the traditional “view”. + // + // But if inputTOCDigest is set and the input image doesn't have RootFS.DiffIDs (the config is invalid for schema2/OCI), + // don't allow a partial pull, and fall back to PutBlobWithOptions. + // + // (The user can opt out of the DiffID commitment checking by a c/storage option, giving up security for performance, + // but we will still trigger the fall back here, and we will still enforce a DiffID match, so that the set of accepted images + // is the same in both cases, and so that users are not tempted to set the c/storage option to allow accepting some invalid images.) + var untrustedDiffID digest.Digest // "" if unknown + udid, err := s.untrustedLayerDiffID(options.LayerIndex) + if err != nil { + var diffIDUnknownErr untrustedLayerDiffIDUnknownError + switch { + case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable): + // PutBlobPartial is a private API, so all callers are within c/image, and should have called + // NoteOriginalOCIConfig first. + return private.UploadedBlob{}, fmt.Errorf("internal error: in PutBlobPartial, untrustedLayerDiffID returned errUntrustedLayerDiffIDNotYetAvailable") + case errors.As(err, &diffIDUnknownErr): + if inputTOCDigest != nil { + return private.UploadedBlob{}, private.NewErrFallbackToOrdinaryLayerDownload(err) + } + untrustedDiffID = "" // A schema1 image or a non-TOC layer with no ambiguity, let it through + default: + return private.UploadedBlob{}, err + } + } else { + untrustedDiffID = udid + } + fetcher := zstdFetcher{ chunkAccessor: chunkAccessor, ctx: ctx, @@ -382,7 +435,18 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces if err := func() error { // A scope for defer defer s.lock.Unlock() + // For true partial pulls, c/storage decides whether to compute the uncompressed digest based on an option in storage.conf + // (defaults to true, to avoid ambiguity.) + // c/storage can also be configured, to consume a layer not prepared for partial pulls (primarily to allow composefs conversion), + // and in that case it always consumes the full blob and always computes the uncompressed digest. if out.UncompressedDigest != "" { + // This is centrally enforced later, in commitLayer, but because we have the value available, + // we might just as well check immediately. + if untrustedDiffID != "" && out.UncompressedDigest != untrustedDiffID { + return fmt.Errorf("uncompressed digest of layer %q is %q, config claims %q", srcInfo.Digest.String(), + out.UncompressedDigest.String(), untrustedDiffID.String()) + } + s.lockProtected.indexToDiffID[options.LayerIndex] = out.UncompressedDigest if out.TOCDigest != "" { s.lockProtected.indexToTOCDigest[options.LayerIndex] = out.TOCDigest @@ -401,6 +465,11 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces options.Cache.RecordDigestUncompressedPair(out.CompressedDigest, out.UncompressedDigest) } } else { + // Sanity-check the defined rules for indexToTOCDigest. + if inputTOCDigest == nil { + return fmt.Errorf("internal error: PrepareStagedLayer returned a TOC-only identity for layer %q with no TOC digest", srcInfo.Digest.String()) + } + // Use diffID for layer identity if it is known. if uncompressedDigest := options.Cache.UncompressedDigestForTOC(out.TOCDigest); uncompressedDigest != "" { s.lockProtected.indexToDiffID[options.LayerIndex] = uncompressedDigest @@ -449,22 +518,43 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige if err := blobDigest.Validate(); err != nil { return false, private.ReusedBlob{}, fmt.Errorf("Can not check for a blob with invalid digest: %w", err) } - if options.TOCDigest != "" { + useTOCDigest := false // If set, (options.TOCDigest != "" && options.LayerIndex != nil) AND we can use options.TOCDigest safely. + if options.TOCDigest != "" && options.LayerIndex != nil { if err := options.TOCDigest.Validate(); err != nil { return false, private.ReusedBlob{}, fmt.Errorf("Can not check for a blob with invalid digest: %w", err) } + // Only consider using TOCDigest if we can avoid ambiguous image “views”, see the detailed comment in PutBlobPartial. + _, err := s.untrustedLayerDiffID(*options.LayerIndex) + if err != nil { + var diffIDUnknownErr untrustedLayerDiffIDUnknownError + switch { + case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable): + // options.TOCDigest is a private API, so all callers are within c/image, and should have called + // NoteOriginalOCIConfig first. + return false, private.ReusedBlob{}, fmt.Errorf("internal error: in TryReusingBlobWithOptions, untrustedLayerDiffID returned errUntrustedLayerDiffIDNotYetAvailable") + case errors.As(err, &diffIDUnknownErr): + logrus.Debugf("Not using TOC %q to look for layer reuse: %v", options.TOCDigest, err) + // But don’t abort entirely, keep useTOCDigest = false, try a blobDigest match. + default: + return false, private.ReusedBlob{}, err + } + } else { + useTOCDigest = true + } } // lock the entire method as it executes fairly quickly s.lock.Lock() defer s.lock.Unlock() - if options.SrcRef != nil && options.TOCDigest != "" && options.LayerIndex != nil { + if options.SrcRef != nil && useTOCDigest { // Check if we have the layer in the underlying additional layer store. aLayer, err := s.imageRef.transport.store.LookupAdditionalLayer(options.TOCDigest, options.SrcRef.String()) if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { return false, private.ReusedBlob{}, fmt.Errorf(`looking for compressed layers with digest %q and labels: %w`, blobDigest, err) } else if err == nil { + // Compare the long comment in PutBlobPartial. We assume that the Additional Layer Store will, somehow, + // avoid layer “view” ambiguity. alsTOCDigest := aLayer.TOCDigest() if alsTOCDigest != options.TOCDigest { // FIXME: If alsTOCDigest is "", the Additional Layer Store FUSE server is probably just too old, and we could @@ -537,13 +627,13 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, uncompressedDigest, err) } if found, reused := reusedBlobFromLayerLookup(layers, blobDigest, size, options); found { - s.lockProtected.blobDiffIDs[blobDigest] = uncompressedDigest + s.lockProtected.blobDiffIDs[reused.Digest] = uncompressedDigest return true, reused, nil } } } - if options.TOCDigest != "" && options.LayerIndex != nil { + if useTOCDigest { // Check if we know which which UncompressedDigest the TOC digest resolves to, and we have a match for that. // Prefer this over LayersByTOCDigest because we can identify the layer using UncompressedDigest, maximizing reuse. uncompressedDigest := options.Cache.UncompressedDigestForTOC(options.TOCDigest) @@ -605,13 +695,22 @@ func reusedBlobFromLayerLookup(layers []storage.Layer, blobDigest digest.Digest, // trustedLayerIdentityData is a _consistent_ set of information known about a single layer. type trustedLayerIdentityData struct { - layerIdentifiedByTOC bool // true if we decided the layer should be identified by tocDigest, false if by diffID + // true if we decided the layer should be identified by tocDigest, false if by diffID + // This can only be true if c/storage/pkg/chunked/toc.GetTOCDigest returns a value. + layerIdentifiedByTOC bool diffID digest.Digest // A digest of the uncompressed full contents of the layer, or "" if unknown; must be set if !layerIdentifiedByTOC tocDigest digest.Digest // A digest of the TOC digest, or "" if unknown; must be set if layerIdentifiedByTOC blobDigest digest.Digest // A digest of the (possibly-compressed) layer as presented, or "" if unknown/untrusted. } +// logString() prints a representation of trusted suitable identifying a layer in logs and errors. +// The string is already quoted to expose malicious input and does not need to be quoted again. +// Note that it does not include _all_ of the contents. +func (trusted trustedLayerIdentityData) logString() string { + return fmt.Sprintf("%q/%q/%q", trusted.blobDigest, trusted.tocDigest, trusted.diffID) +} + // trustedLayerIdentityDataLocked returns a _consistent_ set of information for a layer with (layerIndex, blobDigest). // blobDigest is the (possibly-compressed) layer digest referenced in the manifest. // It returns (trusted, true) if the layer was found, or (_, false) if insufficient data is available. @@ -822,23 +921,6 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo) return nil } -// singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, -// and an indication whether the input already has the shape of a layer ID. -// It returns ("", false) if the layer is not found at all (which should never happen) -func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDigest digest.Digest) (string, bool) { - s.lock.Lock() - defer s.lock.Unlock() - - trusted, ok := s.trustedLayerIdentityDataLocked(layerIndex, blobDigest) - if !ok { - return "", false - } - if trusted.layerIdentifiedByTOC { - return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. - } - return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value. -} - // commitLayer commits the specified layer with the given index to the storage. // size can usually be -1; it can be provided if the layer is not known to be already present in blobDiffIDs. // @@ -850,16 +932,15 @@ func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDig // must guarantee that, at any given time, at most one goroutine may execute // `commitLayer()`. func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, size int64) (bool, error) { - // Already committed? Return early. if _, alreadyCommitted := s.indexToStorageID[index]; alreadyCommitted { return false, nil } - // Start with an empty string or the previous layer ID. Note that - // `s.indexToStorageID` can only be accessed by *one* goroutine at any - // given time. Hence, we don't need to lock accesses. - var parentLayer string + var parentLayer string // "" if no parent if index != 0 { + // s.indexToStorageID can only be written by this function, and our caller + // is responsible for ensuring it can be only be called by *one* goroutine at any + // given time. Hence, we don't need to lock accesses. prev, ok := s.indexToStorageID[index-1] if !ok { return false, fmt.Errorf("Internal error: commitLayer called with previous layer %d not committed yet", index-1) @@ -867,18 +948,17 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si parentLayer = prev } - // Carry over the previous ID for empty non-base layers. if info.emptyLayer { s.indexToStorageID[index] = parentLayer return false, nil } - // Check if there's already a layer with the ID that we'd give to the result of applying - // this layer blob to its parent, if it has one, or the blob's hex value otherwise. - // The layerID refers either to the DiffID or the digest of the TOC. - layerIDComponent, layerIDComponentStandalone := s.singleLayerIDComponent(index, info.digest) - if layerIDComponent == "" { - // Check if it's elsewhere and the caller just forgot to pass it to us in a PutBlob() / TryReusingBlob() / … + // Collect trusted parameters of the layer. + s.lock.Lock() + trusted, ok := s.trustedLayerIdentityDataLocked(index, info.digest) + s.lock.Unlock() + if !ok { + // Check if the layer exists already and the caller just (incorrectly) forgot to pass it to us in a PutBlob() / TryReusingBlob() / … // // Use none.NoCache to avoid a repeated DiffID lookup in the BlobInfoCache: a caller // that relies on using a blob digest that has never been seen by the store had better call @@ -902,23 +982,54 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, fmt.Errorf("error determining uncompressed digest for blob %q", info.digest.String()) } - layerIDComponent, layerIDComponentStandalone = s.singleLayerIDComponent(index, info.digest) - if layerIDComponent == "" { + s.lock.Lock() + trusted, ok = s.trustedLayerIdentityDataLocked(index, info.digest) + s.lock.Unlock() + if !ok { return false, fmt.Errorf("we have blob %q, but don't know its layer ID", info.digest.String()) } } - id := layerIDComponent - if !layerIDComponentStandalone || parentLayer != "" { - id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() + // Ensure that we always see the same “view” of a layer, as identified by the layer’s uncompressed digest, + // unless the user has explicitly opted out of this in storage.conf: see the more detailed explanation in PutBlobPartial. + if trusted.diffID != "" { + untrustedDiffID, err := s.untrustedLayerDiffID(index) + if err != nil { + var diffIDUnknownErr untrustedLayerDiffIDUnknownError + switch { + case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable): + logrus.Debugf("Skipping commit for layer %q, manifest not yet available for DiffID check", index) + return true, nil + case errors.As(err, &diffIDUnknownErr): + // If untrustedLayerDiffIDUnknownError, the input image is schema1, has no TOC annotations, + // so we could not have reused a TOC-identified layer nor have done a TOC-identified partial pull, + // i.e. there is no other “view” to worry about. Sanity-check that we really see the only expected view. + // + // Or, maybe, the input image is OCI, and has invalid/missing DiffID values in config. In that case + // we _must_ fail if we used a TOC-identified layer - but PutBlobPartial should have already + // refused to do a partial pull, so we are in an inconsistent state. + if trusted.layerIdentifiedByTOC { + return false, fmt.Errorf("internal error: layer %d for blob %s was identified by TOC, but we don't have a DiffID in config", + index, trusted.logString()) + } + // else a schema1 image or a non-TOC layer with no ambiguity, let it through + default: + return false, err + } + } else if trusted.diffID != untrustedDiffID { + return false, fmt.Errorf("layer %d (blob %s) does not match config's DiffID %q", index, trusted.logString(), untrustedDiffID) + } } + + id := layerID(parentLayer, trusted) + if layer, err2 := s.imageRef.transport.store.Layer(id); layer != nil && err2 == nil { // There's already a layer that should have the right contents, just reuse it. s.indexToStorageID[index] = layer.ID return false, nil } - layer, err := s.createNewLayer(index, info.digest, parentLayer, id) + layer, err := s.createNewLayer(index, trusted, parentLayer, id) if err != nil { return false, err } @@ -929,32 +1040,62 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } -// createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). +// layerID computes a layer (“chain”) ID for (a possibly-empty parentID, trusted) +func layerID(parentID string, trusted trustedLayerIdentityData) string { + var component string + mustHash := false + if trusted.layerIdentifiedByTOC { + // "@" is not a valid start of a digest.Digest.Encoded(), so this is unambiguous with the !layerIdentifiedByTOC case. + // But we _must_ hash this below to get a Digest.Encoded()-formatted value. + component = "@TOC=" + trusted.tocDigest.Encoded() + mustHash = true + } else { + component = trusted.diffID.Encoded() // This looks like chain IDs, and it uses the traditional value. + } + + if parentID == "" && !mustHash { + return component + } + return digest.Canonical.FromString(parentID + "+" + component).Encoded() +} + +// createNewLayer creates a new layer newLayerID for (index, trusted) on top of parentLayer (which may be ""). // If the layer cannot be committed yet, the function returns (nil, nil). -func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string) (*storage.Layer, error) { +func (s *storageImageDestination) createNewLayer(index int, trusted trustedLayerIdentityData, parentLayer, newLayerID string) (*storage.Layer, error) { s.lock.Lock() diffOutput, ok := s.lockProtected.diffOutputs[index] s.lock.Unlock() if ok { - // If we know a trusted DiffID value (e.g. from a BlobInfoCache), set it in diffOutput. + // Typically, we compute a trusted DiffID value to authenticate the layer contents, see the detailed explanation + // in PutBlobPartial. If the user has opted out of that, but we know a trusted DiffID value + // (e.g. from a BlobInfoCache), set it in diffOutput. // That way it will be persisted in storage even if the cache is deleted; also - // we can use the value below to avoid the untrustedUncompressedDigest logic (and notably - // the costly commit delay until a manifest is available). - s.lock.Lock() - if d, ok := s.lockProtected.indexToDiffID[index]; ok { - diffOutput.UncompressedDigest = d + // we can use the value below to avoid the untrustedUncompressedDigest logic. + if diffOutput.UncompressedDigest == "" && trusted.diffID != "" { + diffOutput.UncompressedDigest = trusted.diffID } - s.lock.Unlock() var untrustedUncompressedDigest digest.Digest if diffOutput.UncompressedDigest == "" { d, err := s.untrustedLayerDiffID(index) if err != nil { - return nil, err - } - if d == "" { - logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) - return nil, nil + var diffIDUnknownErr untrustedLayerDiffIDUnknownError + switch { + case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable): + logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) + return nil, nil + case errors.As(err, &diffIDUnknownErr): + // If untrustedLayerDiffIDUnknownError, the input image is schema1, has no TOC annotations, + // so we should have !trusted.layerIdentifiedByTOC, i.e. we should have set + // diffOutput.UncompressedDigest above in this function, at the very latest. + // + // Or, maybe, the input image is OCI, and has invalid/missing DiffID values in config. In that case + // commitLayer should have already refused this image when dealing with the “view” ambiguity. + return nil, fmt.Errorf("internal error: layer %d for blob %s was partially-pulled with unknown UncompressedDigest, but we don't have a DiffID in config", + index, trusted.logString()) + default: + return nil, err + } } untrustedUncompressedDigest = d @@ -1002,14 +1143,10 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // then we need to read the desired contents from a layer. var filename string var gotFilename bool - s.lock.Lock() - trusted, ok := s.trustedLayerIdentityDataLocked(index, layerDigest) - if ok && trusted.blobDigest != "" { + if trusted.blobDigest != "" { + s.lock.Lock() filename, gotFilename = s.lockProtected.filenames[trusted.blobDigest] - } - s.lock.Unlock() - if !ok { // We have already determined newLayerID, so the data must have been available. - return nil, fmt.Errorf("internal inconsistency: layer (%d, %q) not found", index, layerDigest) + s.lock.Unlock() } var trustedOriginalDigest digest.Digest // For storage.LayerOptions var trustedOriginalSize *int64 @@ -1036,7 +1173,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } } if layer == nil { - return nil, fmt.Errorf("layer for blob %q/%q/%q not found", trusted.blobDigest, trusted.tocDigest, trusted.diffID) + return nil, fmt.Errorf("layer for blob %s not found", trusted.logString()) } // Read the layer's contents. @@ -1046,7 +1183,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } diff, err2 := s.imageRef.transport.store.Diff("", layer.ID, diffOptions) if err2 != nil { - return nil, fmt.Errorf("reading layer %q for blob %q/%q/%q: %w", layer.ID, trusted.blobDigest, trusted.tocDigest, trusted.diffID, err2) + return nil, fmt.Errorf("reading layer %q for blob %s: %w", layer.ID, trusted.logString(), err2) } // Copy the layer diff to a file. Diff() takes a lock that it holds // until the ReadCloser that it returns is closed, and PutLayer() wants @@ -1125,7 +1262,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D UncompressedDigest: trusted.diffID, }, file) if err != nil && !errors.Is(err, storage.ErrDuplicateID) { - return nil, fmt.Errorf("adding layer with blob %q/%q/%q: %w", trusted.blobDigest, trusted.tocDigest, trusted.diffID, err) + return nil, fmt.Errorf("adding layer with blob %s: %w", trusted.logString(), err) } return layer, nil } @@ -1175,9 +1312,29 @@ func (u *uncommittedImageSource) GetBlob(ctx context.Context, info types.BlobInf return io.NopCloser(bytes.NewReader(blob)), int64(len(blob)), nil } +// errUntrustedLayerDiffIDNotYetAvailable is returned by untrustedLayerDiffID +// if the value is not yet available (but it can be available after s.manifests is set). +// This should only happen for external callers of the transport, not for c/image/copy. +// +// Callers of untrustedLayerDiffID before PutManifest must handle this error specially; +// callers after PutManifest can use the default, reporting an internal error. +var errUntrustedLayerDiffIDNotYetAvailable = errors.New("internal error: untrustedLayerDiffID has no value available and fallback was not implemented") + +// untrustedLayerDiffIDUnknownError is returned by untrustedLayerDiffID +// if the image’s format does not provide DiffIDs. +type untrustedLayerDiffIDUnknownError struct { + layerIndex int +} + +func (e untrustedLayerDiffIDUnknownError) Error() string { + return fmt.Sprintf("DiffID value for layer %d is unknown or explicitly empty", e.layerIndex) +} + // untrustedLayerDiffID returns a DiffID value for layerIndex from the image’s config. -// If the value is not yet available (but it can be available after s.manifets is set), it returns ("", nil). -// WARNING: We don’t validate the DiffID value against the layer contents; it must not be used for any deduplication. +// It may return two special errors, errUntrustedLayerDiffIDNotYetAvailable or untrustedLayerDiffIDUnknownError. +// +// WARNING: This function does not even validate that the returned digest has a valid format. +// WARNING: We don’t _always_ validate this DiffID value against the layer contents; it must not be used for any deduplication. func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.Digest, error) { // At this point, we are either inside the multi-threaded scope of HasThreadSafePutBlob, // nothing is writing to s.manifest yet, and s.untrustedDiffIDValues might have been set @@ -1190,7 +1347,7 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D // via NoteOriginalOCIConfig; this is a compatibility fallback for external callers // of the public types.ImageDestination. if s.manifest == nil { - return "", nil + return "", errUntrustedLayerDiffIDNotYetAvailable } ctx := context.Background() // This is all happening in memory, no need to worry about cancellation. @@ -1207,24 +1364,27 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D s.setUntrustedDiffIDValuesFromOCIConfig(configOCI) } + // Let entirely empty / missing diffIDs through; but if the array does exist, expect it to contain an entry for every layer, + // and fail hard on missing entries. This tries to account for completely naive image producers who just don’t fill DiffID, + // while still detecting incorrectly-built / confused images. + // + // schema1 images don’t have DiffID values in the config. + // Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values, so treat arrays of all-empty + // values as “DiffID unknown”. + // For schema 1, it is important to exit here, before the layerIndex >= len(s.untrustedDiffIDValues) + // check, because the format conversion from schema1 to OCI used to compute untrustedDiffIDValues + // changes the number of layres (drops items with Schema1V1Compatibility.ThrowAway). + if !slices.ContainsFunc(s.untrustedDiffIDValues, func(d digest.Digest) bool { + return d != "" + }) { + return "", untrustedLayerDiffIDUnknownError{ + layerIndex: layerIndex, + } + } if layerIndex >= len(s.untrustedDiffIDValues) { return "", fmt.Errorf("image config has only %d DiffID values, but a layer with index %d exists", len(s.untrustedDiffIDValues), layerIndex) } - res := s.untrustedDiffIDValues[layerIndex] - if res == "" { - // In practice, this should, right now, only matter for pulls of OCI images - // (this code path implies that we did a partial pull because a layer has an annotation), - // So, DiffIDs should always be set. - // - // It is, though, reachable by pulling an OCI image while converting to schema1, - // using a manual (skopeo copy) or something similar, not (podman pull). - // - // Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values. - // The current semantics of this function are that ("", nil) means "try again later", - // which is not what we want to happen; for now, turn that into an explicit error. - return "", fmt.Errorf("DiffID value for layer %d is unknown or explicitly empty", layerIndex) - } - return res, nil + return s.untrustedDiffIDValues[layerIndex], nil } // setUntrustedDiffIDValuesFromOCIConfig updates s.untrustedDiffIDvalues from config. diff --git a/vendor/modules.txt b/vendor/modules.txt index 40c1fc0813..1e9f288c3a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -251,7 +251,7 @@ github.com/containers/conmon/runner/config # github.com/containers/gvisor-tap-vsock v0.8.1 ## explicit; go 1.22.0 github.com/containers/gvisor-tap-vsock/pkg/types -# github.com/containers/image/v5 v5.33.1-0.20250116221711-317a9885aed9 +# github.com/containers/image/v5 v5.33.1-0.20250116221711-317a9885aed9 => github.com/mtrmac/image/v5 v5.0.0-20250121152720-954c5dfe4c1e ## explicit; go 1.22.8 github.com/containers/image/v5/copy github.com/containers/image/v5/directory @@ -1406,3 +1406,4 @@ tags.cncf.io/container-device-interface/pkg/parser # tags.cncf.io/container-device-interface/specs-go v0.8.0 ## explicit; go 1.19 tags.cncf.io/container-device-interface/specs-go +# github.com/containers/image/v5 => github.com/mtrmac/image/v5 v5.0.0-20250121152720-954c5dfe4c1e