diff --git a/ipld/car/.gitattributes b/ipld/car/.gitattributes
new file mode 100644
index 0000000000..6f95229927
--- /dev/null
+++ b/ipld/car/.gitattributes
@@ -0,0 +1,2 @@
+# To prevent CRLF breakages on Windows for fragile files, like testdata.
+* -text
diff --git a/ipld/car/.github/workflows/go-fuzz.yml b/ipld/car/.github/workflows/go-fuzz.yml
new file mode 100644
index 0000000000..830fc9ec29
--- /dev/null
+++ b/ipld/car/.github/workflows/go-fuzz.yml
@@ -0,0 +1,46 @@
+on: [ push, pull_request ]
+name: Go Fuzz
+
+jobs:
+ v1:
+ strategy:
+ fail-fast: true
+ matrix:
+ target: [ "CarReader" ]
+ runs-on: ubuntu-latest
+ name: Fuzz V1 ${{ matrix.target }}
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: recursive
+ - uses: actions/setup-go@v2
+ with:
+ go-version: 1.18.x
+ - name: Go information
+ run: |
+ go version
+ go env
+ - name: Run Fuzzing for 1m
+ run: go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m .
+ v2:
+ strategy:
+ fail-fast: true
+ matrix:
+ target: [ "BlockReader", "Reader", "Inspect" ]
+ runs-on: ubuntu-latest
+ name: Fuzz V2 ${{ matrix.target }}
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: recursive
+ - uses: actions/setup-go@v2
+ with:
+ go-version: 1.18.x
+ - name: Go information
+ run: |
+ go version
+ go env
+ - name: Run Fuzzing for 1m
+ run: |
+ cd v2
+ go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m .
diff --git a/ipld/car/.gitignore b/ipld/car/.gitignore
new file mode 100644
index 0000000000..b3f7c18ae7
--- /dev/null
+++ b/ipld/car/.gitignore
@@ -0,0 +1,4 @@
+car/car
+main
+coverage.txt
+dist/
diff --git a/ipld/car/LICENSE.md b/ipld/car/LICENSE.md
new file mode 100644
index 0000000000..2fa16a1537
--- /dev/null
+++ b/ipld/car/LICENSE.md
@@ -0,0 +1,229 @@
+The contents of this repository are Copyright (c) corresponding authors and
+contributors, licensed under the `Permissive License Stack` meaning either of:
+
+- Apache-2.0 Software License: https://www.apache.org/licenses/LICENSE-2.0
+ ([...4tr2kfsq](https://dweb.link/ipfs/bafkreiankqxazcae4onkp436wag2lj3ccso4nawxqkkfckd6cg4tr2kfsq))
+
+- MIT Software License: https://opensource.org/licenses/MIT
+ ([...vljevcba](https://dweb.link/ipfs/bafkreiepofszg4gfe2gzuhojmksgemsub2h4uy2gewdnr35kswvljevcba))
+
+You may not use the contents of this repository except in compliance
+with one of the listed Licenses. For an extended clarification of the
+intent behind the choice of Licensing please refer to
+https://protocol.ai/blog/announcing-the-permissive-license-stack/
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the terms listed in this notice is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied. See each License for the specific language
+governing permissions and limitations under that License.
+
+
+`SPDX-License-Identifier: Apache-2.0 OR MIT`
+
+Verbatim copies of both licenses are included below:
+
+Apache-2.0 Software License
+
+```
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+```
+
+
+MIT Software License
+
+```
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+```
+
diff --git a/ipld/car/README.md b/ipld/car/README.md
new file mode 100644
index 0000000000..af75fa1d73
--- /dev/null
+++ b/ipld/car/README.md
@@ -0,0 +1,71 @@
+go-car (go!)
+==================
+
+[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](https://protocol.ai)
+[![](https://img.shields.io/badge/project-ipld-orange.svg?style=flat-square)](https://github.com/ipld/ipld)
+[![](https://img.shields.io/badge/matrix-%23ipld-blue.svg?style=flat-square)](https://matrix.to/#/#ipld:ipfs.io)
+[![Go Reference](https://pkg.go.dev/badge/github.com/ipld/go-car.svg)](https://pkg.go.dev/github.com/ipld/go-car)
+[![Coverage Status](https://codecov.io/gh/ipld/go-car/branch/master/graph/badge.svg)](https://codecov.io/gh/ipld/go-car/branch/master)
+
+> Work with car (Content addressed ARchive) files!
+
+This is a Golang implementation of the [CAR specifications](https://ipld.io/specs/transport/car/), both [CARv1](https://ipld.io/specs/transport/car/carv1/) and [CARv2](https://ipld.io/specs/transport/car/carv2/).
+
+As a format, there are two major module versions:
+
+* [`go-car/v2`](v2/) is geared towards reading and writing CARv2 files, and also
+ supports consuming CARv1 files and using CAR files as an IPFS blockstore.
+* `go-car`, in the root directory, only supports reading and writing CARv1 files.
+
+Most users should use v2, especially for new software, since the v2 API transparently supports both CAR formats.
+
+## Usage / Installation
+
+This repository provides a `car` binary that can be used for creating, extracting, and working with car files.
+
+To install the latest version of `car`, run:
+```shell script
+go install github.com/ipld/go-car/cmd/car@latest
+```
+
+More information about this binary is available in [`cmd/car`](cmd/car/)
+
+
+## Features
+
+[CARv2](v2) features:
+* [Generate index](https://pkg.go.dev/github.com/ipld/go-car/v2#GenerateIndex) from an existing CARv1 file
+* [Wrap](https://pkg.go.dev/github.com/ipld/go-car/v2#WrapV1) CARv1 files into a CARv2 with automatic index generation.
+* Random-access to blocks in a CAR file given their CID via [Read-Only blockstore](https://pkg.go.dev/github.com/ipld/go-car/v2/blockstore#NewReadOnly) API, with transparent support for both CARv1 and CARv2
+* Write CARv2 files via [Read-Write blockstore](https://pkg.go.dev/github.com/ipld/go-car/v2/blockstore#OpenReadWrite) API, with support for appending blocks to an existing CARv2 file, and resumption from a partially written CARv2 files.
+* Individual access to [inner CARv1 data payload]((https://pkg.go.dev/github.com/ipld/go-car/v2#Reader.DataReader)) and [index]((https://pkg.go.dev/github.com/ipld/go-car/v2#Reader.IndexReader)) of a CARv2 file via the `Reader` API.
+
+
+## API Documentation
+
+See docs on [pkg.go.dev](https://pkg.go.dev/github.com/ipld/go-car).
+
+## Examples
+
+Here is a shortlist of other examples from the documentation
+
+* [Wrap an existing CARv1 file into an indexed CARv2 file](https://pkg.go.dev/github.com/ipld/go-car/v2#example-WrapV1File)
+* [Open read-only blockstore from a CAR file](https://pkg.go.dev/github.com/ipld/go-car/v2/blockstore#example-OpenReadOnly)
+* [Open read-write blockstore from a CAR file](https://pkg.go.dev/github.com/ipld/go-car/v2/blockstore#example-OpenReadWrite)
+* [Read the index from an existing CARv2 file](https://pkg.go.dev/github.com/ipld/go-car/v2/index#example-ReadFrom)
+* [Extract the index from a CARv2 file and store it as a separate file](https://pkg.go.dev/github.com/ipld/go-car/v2/index#example-WriteTo)
+
+## Maintainers
+
+* [Masih Derkani](https://github.com/masih)
+* [Will Scott](https://github.com/willscott)
+
+## Contribute
+
+PRs are welcome!
+
+When editing the Readme, please conform to the [standard-readme](https://github.com/RichardLitt/standard-readme) specification.
+
+## License
+
+Apache-2.0/MIT © Protocol Labs
diff --git a/ipld/car/car.go b/ipld/car/car.go
new file mode 100644
index 0000000000..026bbb7359
--- /dev/null
+++ b/ipld/car/car.go
@@ -0,0 +1,223 @@
+package car
+
+import (
+ "bufio"
+ "context"
+ "fmt"
+ "io"
+
+ cid "github.com/ipfs/go-cid"
+ cbor "github.com/ipfs/go-ipld-cbor"
+ format "github.com/ipfs/go-ipld-format"
+ blocks "github.com/ipfs/go-libipfs/blocks"
+ "github.com/ipfs/go-merkledag"
+
+ util "github.com/ipld/go-car/util"
+)
+
+func init() {
+ cbor.RegisterCborType(CarHeader{})
+}
+
+type Store interface {
+ Put(context.Context, blocks.Block) error
+}
+
+type ReadStore interface {
+ Get(context.Context, cid.Cid) (blocks.Block, error)
+}
+
+type CarHeader struct {
+ Roots []cid.Cid
+ Version uint64
+}
+
+type carWriter struct {
+ ds format.NodeGetter
+ w io.Writer
+ walk WalkFunc
+}
+
+type WalkFunc func(format.Node) ([]*format.Link, error)
+
+func WriteCar(ctx context.Context, ds format.NodeGetter, roots []cid.Cid, w io.Writer, options ...merkledag.WalkOption) error {
+ return WriteCarWithWalker(ctx, ds, roots, w, DefaultWalkFunc, options...)
+}
+
+func WriteCarWithWalker(ctx context.Context, ds format.NodeGetter, roots []cid.Cid, w io.Writer, walk WalkFunc, options ...merkledag.WalkOption) error {
+
+ h := &CarHeader{
+ Roots: roots,
+ Version: 1,
+ }
+
+ if err := WriteHeader(h, w); err != nil {
+ return fmt.Errorf("failed to write car header: %s", err)
+ }
+
+ cw := &carWriter{ds: ds, w: w, walk: walk}
+ seen := cid.NewSet()
+ for _, r := range roots {
+ if err := merkledag.Walk(ctx, cw.enumGetLinks, r, seen.Visit, options...); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func DefaultWalkFunc(nd format.Node) ([]*format.Link, error) {
+ return nd.Links(), nil
+}
+
+func ReadHeader(br *bufio.Reader) (*CarHeader, error) {
+ hb, err := util.LdRead(br)
+ if err != nil {
+ return nil, err
+ }
+
+ var ch CarHeader
+ if err := cbor.DecodeInto(hb, &ch); err != nil {
+ return nil, fmt.Errorf("invalid header: %v", err)
+ }
+
+ return &ch, nil
+}
+
+func WriteHeader(h *CarHeader, w io.Writer) error {
+ hb, err := cbor.DumpObject(h)
+ if err != nil {
+ return err
+ }
+
+ return util.LdWrite(w, hb)
+}
+
+func HeaderSize(h *CarHeader) (uint64, error) {
+ hb, err := cbor.DumpObject(h)
+ if err != nil {
+ return 0, err
+ }
+
+ return util.LdSize(hb), nil
+}
+
+func (cw *carWriter) enumGetLinks(ctx context.Context, c cid.Cid) ([]*format.Link, error) {
+ nd, err := cw.ds.Get(ctx, c)
+ if err != nil {
+ return nil, err
+ }
+
+ if err := cw.writeNode(ctx, nd); err != nil {
+ return nil, err
+ }
+
+ return cw.walk(nd)
+}
+
+func (cw *carWriter) writeNode(ctx context.Context, nd format.Node) error {
+ return util.LdWrite(cw.w, nd.Cid().Bytes(), nd.RawData())
+}
+
+type CarReader struct {
+ br *bufio.Reader
+ Header *CarHeader
+}
+
+func NewCarReader(r io.Reader) (*CarReader, error) {
+ br := bufio.NewReader(r)
+ ch, err := ReadHeader(br)
+ if err != nil {
+ return nil, err
+ }
+
+ if ch.Version != 1 {
+ return nil, fmt.Errorf("invalid car version: %d", ch.Version)
+ }
+
+ if len(ch.Roots) == 0 {
+ return nil, fmt.Errorf("empty car, no roots")
+ }
+
+ return &CarReader{
+ br: br,
+ Header: ch,
+ }, nil
+}
+
+func (cr *CarReader) Next() (blocks.Block, error) {
+ c, data, err := util.ReadNode(cr.br)
+ if err != nil {
+ return nil, err
+ }
+
+ hashed, err := c.Prefix().Sum(data)
+ if err != nil {
+ return nil, err
+ }
+
+ if !hashed.Equals(c) {
+ return nil, fmt.Errorf("mismatch in content integrity, name: %s, data: %s", c, hashed)
+ }
+
+ return blocks.NewBlockWithCid(data, c)
+}
+
+type batchStore interface {
+ PutMany(context.Context, []blocks.Block) error
+}
+
+func LoadCar(ctx context.Context, s Store, r io.Reader) (*CarHeader, error) {
+ cr, err := NewCarReader(r)
+ if err != nil {
+ return nil, err
+ }
+
+ if bs, ok := s.(batchStore); ok {
+ return loadCarFast(ctx, bs, cr)
+ }
+
+ return loadCarSlow(ctx, s, cr)
+}
+
+func loadCarFast(ctx context.Context, s batchStore, cr *CarReader) (*CarHeader, error) {
+ var buf []blocks.Block
+ for {
+ blk, err := cr.Next()
+ if err != nil {
+ if err == io.EOF {
+ if len(buf) > 0 {
+ if err := s.PutMany(ctx, buf); err != nil {
+ return nil, err
+ }
+ }
+ return cr.Header, nil
+ }
+ return nil, err
+ }
+
+ buf = append(buf, blk)
+
+ if len(buf) > 1000 {
+ if err := s.PutMany(ctx, buf); err != nil {
+ return nil, err
+ }
+ buf = buf[:0]
+ }
+ }
+}
+
+func loadCarSlow(ctx context.Context, s Store, cr *CarReader) (*CarHeader, error) {
+ for {
+ blk, err := cr.Next()
+ if err != nil {
+ if err == io.EOF {
+ return cr.Header, nil
+ }
+ return nil, err
+ }
+
+ if err := s.Put(ctx, blk); err != nil {
+ return nil, err
+ }
+ }
+}
diff --git a/ipld/car/car_test.go b/ipld/car/car_test.go
new file mode 100644
index 0000000000..3c6340be3e
--- /dev/null
+++ b/ipld/car/car_test.go
@@ -0,0 +1,229 @@
+package car_test
+
+import (
+ "bytes"
+ "context"
+ "encoding/hex"
+ "io"
+ "strings"
+ "testing"
+
+ "github.com/ipfs/go-cid"
+ format "github.com/ipfs/go-ipld-format"
+ "github.com/ipfs/go-merkledag"
+ dstest "github.com/ipfs/go-merkledag/test"
+ car "github.com/ipld/go-car"
+)
+
+func assertAddNodes(t *testing.T, ds format.DAGService, nds ...format.Node) {
+ for _, nd := range nds {
+ if err := ds.Add(context.Background(), nd); err != nil {
+ t.Fatal(err)
+ }
+ }
+}
+
+func TestRoundtrip(t *testing.T) {
+ ctx := context.Background()
+ dserv := dstest.Mock()
+ a := merkledag.NewRawNode([]byte("aaaa"))
+ b := merkledag.NewRawNode([]byte("bbbb"))
+ c := merkledag.NewRawNode([]byte("cccc"))
+
+ nd1 := &merkledag.ProtoNode{}
+ nd1.AddNodeLink("cat", a)
+
+ nd2 := &merkledag.ProtoNode{}
+ nd2.AddNodeLink("first", nd1)
+ nd2.AddNodeLink("dog", b)
+
+ nd3 := &merkledag.ProtoNode{}
+ nd3.AddNodeLink("second", nd2)
+ nd3.AddNodeLink("bear", c)
+
+ assertAddNodes(t, dserv, a, b, c, nd1, nd2, nd3)
+
+ buf := new(bytes.Buffer)
+ if err := car.WriteCar(context.Background(), dserv, []cid.Cid{nd3.Cid()}, buf); err != nil {
+ t.Fatal(err)
+ }
+
+ bserv := dstest.Bserv()
+ ch, err := car.LoadCar(ctx, bserv.Blockstore(), buf)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(ch.Roots) != 1 {
+ t.Fatal("should have one root")
+ }
+
+ if !ch.Roots[0].Equals(nd3.Cid()) {
+ t.Fatal("got wrong cid")
+ }
+
+ bs := bserv.Blockstore()
+ for _, nd := range []format.Node{a, b, c, nd1, nd2, nd3} {
+ has, err := bs.Has(ctx, nd.Cid())
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if !has {
+ t.Fatal("should have cid in blockstore")
+ }
+ }
+}
+
+// fixture is a clean single-block, single-root CAR
+const fixtureStr = "3aa265726f6f747381d82a58250001711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80b6776657273696f6e012c01711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80ba165646f646779f5"
+
+func TestEOFHandling(t *testing.T) {
+ fixture, err := hex.DecodeString(fixtureStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ load := func(t *testing.T, byts []byte) *car.CarReader {
+ cr, err := car.NewCarReader(bytes.NewReader(byts))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ blk, err := cr.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if blk.Cid().String() != "bafyreiavd7u6opdcm6tqmddpnrgmvfb4enxuwglhenejmchnwqvixd5ibm" {
+ t.Fatal("unexpected CID")
+ }
+
+ return cr
+ }
+
+ t.Run("CleanEOF", func(t *testing.T) {
+ cr := load(t, fixture)
+
+ blk, err := cr.Next()
+ if err != io.EOF {
+ t.Fatal("Didn't get expected EOF")
+ }
+ if blk != nil {
+ t.Fatal("EOF returned expected block")
+ }
+ })
+
+ t.Run("BadVarint", func(t *testing.T) {
+ fixtureBadVarint := append(fixture, 160)
+ cr := load(t, fixtureBadVarint)
+
+ blk, err := cr.Next()
+ if err != io.ErrUnexpectedEOF {
+ t.Fatal("Didn't get unexpected EOF")
+ }
+ if blk != nil {
+ t.Fatal("EOF returned unexpected block")
+ }
+ })
+
+ t.Run("TruncatedBlock", func(t *testing.T) {
+ fixtureTruncatedBlock := append(fixture, 100, 0, 0)
+ cr := load(t, fixtureTruncatedBlock)
+
+ blk, err := cr.Next()
+ if err != io.ErrUnexpectedEOF {
+ t.Fatal("Didn't get unexpected EOF")
+ }
+ if blk != nil {
+ t.Fatal("EOF returned unexpected block")
+ }
+ })
+}
+
+func TestBadHeaders(t *testing.T) {
+ testCases := []struct {
+ name string
+ hex string
+ errStr string // either the whole error string
+ errPfx string // or just the prefix
+ }{
+ {
+ "{version:2}",
+ "0aa16776657273696f6e02",
+ "invalid car version: 2",
+ "",
+ },
+ {
+ // an unfortunate error because we don't use a pointer
+ "{roots:[baeaaaa3bmjrq]}",
+ "13a165726f6f747381d82a480001000003616263",
+ "invalid car version: 0",
+ "",
+ }, {
+ "{version:\"1\",roots:[baeaaaa3bmjrq]}",
+ "1da265726f6f747381d82a4800010000036162636776657273696f6e6131",
+ "", "invalid header: ",
+ }, {
+ "{version:1}",
+ "0aa16776657273696f6e01",
+ "empty car, no roots",
+ "",
+ }, {
+ "{version:1,roots:{cid:baeaaaa3bmjrq}}",
+ "20a265726f6f7473a163636964d82a4800010000036162636776657273696f6e01",
+ "",
+ "invalid header: ",
+ }, {
+ "{version:1,roots:[baeaaaa3bmjrq],blip:true}",
+ "22a364626c6970f565726f6f747381d82a4800010000036162636776657273696f6e01",
+ "",
+ "invalid header: ",
+ }, {
+ "[1,[]]",
+ "03820180",
+ "",
+ "invalid header: ",
+ }, {
+ // this is an unfortunate error, it'd be nice to catch it better but it's
+ // very unlikely we'd ever see this in practice
+ "null",
+ "01f6",
+ "",
+ "invalid car version: 0",
+ },
+ }
+
+ makeCar := func(t *testing.T, byts string) error {
+ fixture, err := hex.DecodeString(byts)
+ if err != nil {
+ t.Fatal(err)
+ }
+ _, err = car.NewCarReader(bytes.NewReader(fixture))
+ return err
+ }
+
+ t.Run("Sanity check {version:1,roots:[baeaaaa3bmjrq]}", func(t *testing.T) {
+ err := makeCar(t, "1ca265726f6f747381d82a4800010000036162636776657273696f6e01")
+ if err != nil {
+ t.Fatal(err)
+ }
+ })
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ err := makeCar(t, tc.hex)
+ if err == nil {
+ t.Fatal("expected error from bad header, didn't get one")
+ }
+ if tc.errStr != "" {
+ if err.Error() != tc.errStr {
+ t.Fatalf("bad error: %v", err)
+ }
+ } else {
+ if !strings.HasPrefix(err.Error(), tc.errPfx) {
+ t.Fatalf("bad error: %v", err)
+ }
+ }
+ })
+ }
+}
diff --git a/ipld/car/cmd/car/README.md b/ipld/car/cmd/car/README.md
new file mode 100644
index 0000000000..995850fd73
--- /dev/null
+++ b/ipld/car/cmd/car/README.md
@@ -0,0 +1,38 @@
+car - The CLI tool
+==================
+
+[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](https://protocol.ai)
+[![](https://img.shields.io/badge/project-ipld-orange.svg?style=flat-square)](https://github.com/ipld/ipld)
+[![](https://img.shields.io/badge/matrix-%23ipld-blue.svg?style=flat-square)](https://matrix.to/#/#ipld:ipfs.io)
+
+> A CLI for interacting with car files
+
+## Usage
+
+```
+USAGE:
+ car [global options] command [command options] [arguments...]
+
+COMMANDS:
+ compile compile a car file from a debug patch
+ create, c Create a car file
+ debug debug a car file
+ detach-index Detach an index to a detached file
+ extract, x Extract the contents of a car when the car encodes UnixFS data
+ filter, f Filter the CIDs in a car
+ get-block, gb Get a block out of a car
+ get-dag, gd Get a dag out of a car
+ index, i write out the car with an index
+ inspect verifies a car and prints a basic report about its contents
+ list, l, ls List the CIDs in a car
+ root Get the root CID of a car
+ verify, v Verify a CAR is wellformed
+ help, h Shows a list of commands or help for one command
+```
+
+## Install
+
+To install the latest version of `car`, run:
+```shell script
+go install github.com/ipld/go-car/cmd/car@latest
+```
diff --git a/ipld/car/cmd/car/car.go b/ipld/car/cmd/car/car.go
new file mode 100644
index 0000000000..d2356484cc
--- /dev/null
+++ b/ipld/car/cmd/car/car.go
@@ -0,0 +1,218 @@
+package main
+
+import (
+ "log"
+ "os"
+
+ "github.com/multiformats/go-multicodec"
+ "github.com/urfave/cli/v2"
+)
+
+func main() { os.Exit(main1()) }
+
+func main1() int {
+ app := &cli.App{
+ Name: "car",
+ Usage: "Utility for working with car files",
+ Commands: []*cli.Command{
+ {
+ Name: "compile",
+ Usage: "compile a car file from a debug patch",
+ Action: CompileCar,
+ Flags: []cli.Flag{
+ &cli.StringFlag{
+ Name: "output",
+ Aliases: []string{"o", "f"},
+ Usage: "The file to write to",
+ TakesFile: true,
+ },
+ },
+ },
+ {
+ Name: "create",
+ Usage: "Create a car file",
+ Aliases: []string{"c"},
+ Action: CreateCar,
+ Flags: []cli.Flag{
+ &cli.StringFlag{
+ Name: "file",
+ Aliases: []string{"f", "output", "o"},
+ Usage: "The car file to write to",
+ TakesFile: true,
+ },
+ &cli.IntFlag{
+ Name: "version",
+ Value: 2,
+ Usage: "Write output as a v1 or v2 format car",
+ },
+ },
+ },
+ {
+ Name: "debug",
+ Usage: "debug a car file",
+ Action: DebugCar,
+ Flags: []cli.Flag{
+ &cli.StringFlag{
+ Name: "output",
+ Aliases: []string{"o", "f"},
+ Usage: "The file to write to",
+ TakesFile: true,
+ },
+ },
+ },
+ {
+ Name: "detach-index",
+ Usage: "Detach an index to a detached file",
+ Action: DetachCar,
+ Subcommands: []*cli.Command{{
+ Name: "list",
+ Usage: "List a detached index",
+ Action: DetachCarList,
+ }},
+ },
+ {
+ Name: "extract",
+ Aliases: []string{"x"},
+ Usage: "Extract the contents of a car when the car encodes UnixFS data",
+ Action: ExtractCar,
+ ArgsUsage: "[output directory|-]",
+ Flags: []cli.Flag{
+ &cli.StringFlag{
+ Name: "file",
+ Aliases: []string{"f"},
+ Usage: "The car file to extract from, or stdin if omitted",
+ Required: false,
+ TakesFile: true,
+ },
+ &cli.StringFlag{
+ Name: "path",
+ Aliases: []string{"p"},
+ Usage: "The unixfs path to extract",
+ Required: false,
+ },
+ &cli.BoolFlag{
+ Name: "verbose",
+ Aliases: []string{"v"},
+ Usage: "Include verbose information about extracted contents",
+ },
+ },
+ },
+ {
+ Name: "filter",
+ Aliases: []string{"f"},
+ Usage: "Filter the CIDs in a car",
+ Action: FilterCar,
+ Flags: []cli.Flag{
+ &cli.StringFlag{
+ Name: "cid-file",
+ Usage: "A file to read CIDs from",
+ TakesFile: true,
+ },
+ &cli.BoolFlag{
+ Name: "append",
+ Usage: "Append cids to an existing output file",
+ },
+ },
+ },
+ {
+ Name: "get-block",
+ Aliases: []string{"gb"},
+ Usage: "Get a block out of a car",
+ Action: GetCarBlock,
+ },
+ {
+ Name: "get-dag",
+ Aliases: []string{"gd"},
+ Usage: "Get a dag out of a car",
+ Action: GetCarDag,
+ Flags: []cli.Flag{
+ &cli.StringFlag{
+ Name: "selector",
+ Aliases: []string{"s"},
+ Usage: "A selector over the dag",
+ },
+ &cli.BoolFlag{
+ Name: "strict",
+ Usage: "Fail if the selector finds links to blocks not in the original car",
+ },
+ &cli.IntFlag{
+ Name: "version",
+ Value: 2,
+ Usage: "Write output as a v1 or v2 format car",
+ },
+ },
+ },
+ {
+ Name: "index",
+ Aliases: []string{"i"},
+ Usage: "write out the car with an index",
+ Action: IndexCar,
+ Flags: []cli.Flag{
+ &cli.StringFlag{
+ Name: "codec",
+ Aliases: []string{"c"},
+ Usage: "The type of index to write",
+ Value: multicodec.CarMultihashIndexSorted.String(),
+ },
+ &cli.IntFlag{
+ Name: "version",
+ Value: 2,
+ Usage: "Write output as a v1 or v2 format car",
+ },
+ },
+ Subcommands: []*cli.Command{{
+ Name: "create",
+ Usage: "Write out a detached index",
+ Action: CreateIndex,
+ }},
+ },
+ {
+ Name: "inspect",
+ Usage: "verifies a car and prints a basic report about its contents",
+ Action: InspectCar,
+ Flags: []cli.Flag{
+ &cli.BoolFlag{
+ Name: "full",
+ Value: false,
+ Usage: "Check that the block data hash digests match the CIDs",
+ },
+ },
+ },
+ {
+ Name: "list",
+ Aliases: []string{"l", "ls"},
+ Usage: "List the CIDs in a car",
+ Action: ListCar,
+ Flags: []cli.Flag{
+ &cli.BoolFlag{
+ Name: "verbose",
+ Aliases: []string{"v"},
+ Usage: "Include verbose information about contained blocks",
+ },
+ &cli.BoolFlag{
+ Name: "unixfs",
+ Usage: "List unixfs filesystem from the root of the car",
+ },
+ },
+ },
+ {
+ Name: "root",
+ Usage: "Get the root CID of a car",
+ Action: CarRoot,
+ },
+ {
+ Name: "verify",
+ Aliases: []string{"v"},
+ Usage: "Verify a CAR is wellformed",
+ Action: VerifyCar,
+ },
+ },
+ }
+
+ err := app.Run(os.Args)
+ if err != nil {
+ log.Println(err)
+ return 1
+ }
+ return 0
+}
diff --git a/ipld/car/cmd/car/compile.go b/ipld/car/cmd/car/compile.go
new file mode 100644
index 0000000000..f6a1b49791
--- /dev/null
+++ b/ipld/car/cmd/car/compile.go
@@ -0,0 +1,463 @@
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "fmt"
+ "io"
+ "os"
+ "regexp"
+ "strings"
+ "unicode/utf8"
+
+ "github.com/ipfs/go-cid"
+ blocks "github.com/ipfs/go-libipfs/blocks"
+ carv1 "github.com/ipld/go-car"
+ "github.com/ipld/go-car/util"
+ carv2 "github.com/ipld/go-car/v2"
+ "github.com/ipld/go-car/v2/blockstore"
+ "github.com/ipld/go-ipld-prime"
+ "github.com/ipld/go-ipld-prime/codec"
+ "github.com/ipld/go-ipld-prime/codec/dagjson"
+ "github.com/ipld/go-ipld-prime/datamodel"
+ "github.com/ipld/go-ipld-prime/linking"
+ cidlink "github.com/ipld/go-ipld-prime/linking/cid"
+ "github.com/ipld/go-ipld-prime/node/basicnode"
+ "github.com/ipld/go-ipld-prime/storage/memstore"
+ "github.com/polydawn/refmt/json"
+ "github.com/urfave/cli/v2"
+ "golang.org/x/exp/slices"
+)
+
+var (
+ plusLineRegex = regexp.MustCompile(`^\+\+\+ ([\w-]+) ([\S]+ )?([\w]+)$`)
+)
+
+// Compile is a command to translate between a human-debuggable patch-like format and a car file.
+func CompileCar(c *cli.Context) error {
+ var err error
+ inStream := os.Stdin
+ if c.Args().Len() >= 1 {
+ inStream, err = os.Open(c.Args().First())
+ if err != nil {
+ return err
+ }
+ }
+
+ //parse headers.
+ br := bufio.NewReader(inStream)
+ header, _, err := br.ReadLine()
+ if err != nil {
+ return err
+ }
+
+ v2 := strings.HasPrefix(string(header), "car compile --v2 ")
+ rest := strings.TrimPrefix(string(header), "car compile ")
+ if v2 {
+ rest = strings.TrimPrefix(rest, "--v2 ")
+ }
+ carName := strings.TrimSpace(rest)
+
+ roots := make([]cid.Cid, 0)
+ for {
+ peek, err := br.Peek(4)
+ if err == io.EOF {
+ break
+ } else if err != nil {
+ return err
+ }
+ if bytes.Equal(peek, []byte("--- ")) {
+ break
+ }
+ rootLine, _, err := br.ReadLine()
+ if err != nil {
+ return err
+ }
+ if strings.HasPrefix(string(rootLine), "root ") {
+ var rCidS string
+ fmt.Sscanf(string(rootLine), "root %s", &rCidS)
+ rCid, err := cid.Parse(rCidS)
+ if err != nil {
+ return err
+ }
+ roots = append(roots, rCid)
+ }
+ }
+
+ //parse blocks.
+ cidList := make([]cid.Cid, 0)
+ rawBlocks := make(map[cid.Cid][]byte)
+ rawCodecs := make(map[cid.Cid]string)
+
+ for {
+ nextCid, mode, nextBlk, err := parsePatch(br)
+ if err == io.EOF {
+ break
+ } else if err != nil {
+ return err
+ }
+ rawBlocks[nextCid] = nextBlk
+ rawCodecs[nextCid] = mode
+ cidList = append(cidList, nextCid)
+ }
+
+ // Re-create the original IPLD encoded blocks, but allowing for modifications of the
+ // patch data which may generate new CIDs; so we track the DAG relationships and
+ // rewrite CIDs in other referring where they get updated.
+
+ // structure as a tree
+ childMap := make(map[cid.Cid][]cid.Cid)
+ for c := range rawBlocks {
+ if _, ok := childMap[c]; !ok {
+ childMap[c] = make([]cid.Cid, 0)
+ }
+ for d, blk := range rawBlocks {
+ if c.Equals(d) {
+ continue
+ }
+ if strings.Contains(string(blk), c.String()) {
+ if _, ok := childMap[d]; !ok {
+ childMap[d] = make([]cid.Cid, 0)
+ }
+ childMap[d] = append(childMap[d], c)
+ } else if strings.Contains(string(blk), string(c.Bytes())) {
+ if _, ok := childMap[d]; !ok {
+ childMap[d] = make([]cid.Cid, 0)
+ }
+ childMap[d] = append(childMap[d], c)
+ }
+ }
+ }
+
+ // re-parse/re-build CIDs
+ outBlocks := make(map[cid.Cid][]byte)
+ for len(childMap) > 0 {
+ for origCid, kids := range childMap {
+ if len(kids) == 0 {
+ // compile to final cid
+ blk := rawBlocks[origCid]
+ finalCid, finalBlk, err := serializeBlock(c.Context, origCid.Prefix(), rawCodecs[origCid], blk)
+ if err != nil {
+ return err
+ }
+ outBlocks[finalCid] = finalBlk
+ idx := slices.Index(cidList, origCid)
+ cidList[idx] = finalCid
+
+ // update other remaining nodes of the new cid.
+ for otherCid, otherKids := range childMap {
+ for i, otherKid := range otherKids {
+ if otherKid.Equals(origCid) {
+ if !finalCid.Equals(origCid) {
+ // update block
+ rawBlocks[otherCid] = bytes.ReplaceAll(rawBlocks[otherCid], origCid.Bytes(), finalCid.Bytes())
+ rawBlocks[otherCid] = bytes.ReplaceAll(rawBlocks[otherCid], []byte(origCid.String()), []byte(finalCid.String()))
+ }
+ // remove from childMap
+ nok := append(otherKids[0:i], otherKids[i+1:]...)
+ childMap[otherCid] = nok
+ break // to next child map entry.
+ }
+ }
+ }
+
+ delete(childMap, origCid)
+ }
+ }
+ }
+
+ if !v2 {
+ // write output
+ outStream := os.Stdout
+ if c.IsSet("output") {
+ outFileName := c.String("output")
+ if outFileName == "" {
+ outFileName = carName
+ }
+ outFile, err := os.Create(outFileName)
+ if err != nil {
+ return err
+ }
+ defer outFile.Close()
+ outStream = outFile
+ }
+
+ if err := carv1.WriteHeader(&carv1.CarHeader{
+ Roots: roots,
+ Version: 1,
+ }, outStream); err != nil {
+ return err
+ }
+ for c, blk := range outBlocks {
+ if err := util.LdWrite(outStream, c.Bytes(), blk); err != nil {
+ return err
+ }
+ }
+ } else {
+ outFileName := c.String("output")
+ if outFileName == "" {
+ outFileName = carName
+ }
+
+ if outFileName == "-" && !c.IsSet("output") {
+ return fmt.Errorf("cannot stream carv2's to stdout")
+ }
+ bs, err := blockstore.OpenReadWrite(outFileName, roots)
+ if err != nil {
+ return err
+ }
+ for _, bc := range cidList {
+ blk := outBlocks[bc]
+ ob, _ := blocks.NewBlockWithCid(blk, bc)
+ bs.Put(c.Context, ob)
+ }
+ return bs.Finalize()
+ }
+
+ return nil
+}
+
+func serializeBlock(ctx context.Context, codec cid.Prefix, encoding string, raw []byte) (cid.Cid, []byte, error) {
+ ls := cidlink.DefaultLinkSystem()
+ store := memstore.Store{Bag: map[string][]byte{}}
+ ls.SetReadStorage(&store)
+ ls.SetWriteStorage(&store)
+ b := basicnode.Prototype.Any.NewBuilder()
+ if encoding == "dag-json" {
+ if err := dagjson.Decode(b, bytes.NewBuffer(raw)); err != nil {
+ return cid.Undef, nil, err
+ }
+ } else if encoding == "raw" {
+ if err := b.AssignBytes(raw); err != nil {
+ return cid.Undef, nil, err
+ }
+ } else {
+ return cid.Undef, nil, fmt.Errorf("unknown encoding: %s", encoding)
+ }
+ lnk, err := ls.Store(linking.LinkContext{Ctx: ctx}, cidlink.LinkPrototype{Prefix: codec}, b.Build())
+ if err != nil {
+ return cid.Undef, nil, err
+ }
+ outCid := lnk.(cidlink.Link).Cid
+ outBytes, outErr := store.Get(ctx, outCid.KeyString())
+ return outCid, outBytes, outErr
+}
+
+// DebugCar is a command to translate between a car file, and a human-debuggable patch-like format.
+func DebugCar(c *cli.Context) error {
+ var err error
+ inStream := os.Stdin
+ inFile := "-"
+ if c.Args().Len() >= 1 {
+ inFile = c.Args().First()
+ inStream, err = os.Open(inFile)
+ if err != nil {
+ return err
+ }
+ }
+
+ rd, err := carv2.NewBlockReader(inStream)
+ if err != nil {
+ return err
+ }
+
+ // patch the header.
+ outStream := os.Stdout
+ if c.IsSet("output") {
+ outFileName := c.String("output")
+ outFile, err := os.Create(outFileName)
+ if err != nil {
+ return err
+ }
+ defer outFile.Close()
+ outStream = outFile
+ }
+
+ outStream.WriteString("car compile ")
+ if rd.Version == 2 {
+ outStream.WriteString("--v2 ")
+ }
+
+ outStream.WriteString(inFile + "\n")
+ for _, rt := range rd.Roots {
+ fmt.Fprintf(outStream, "root %s\n", rt.String())
+ }
+
+ // patch each block.
+ nxt, err := rd.Next()
+ if err != nil {
+ return err
+ }
+ for nxt != nil {
+ chunk, err := patch(c.Context, nxt.Cid(), nxt.RawData())
+ if err != nil {
+ return err
+ }
+ outStream.Write(chunk)
+
+ nxt, err = rd.Next()
+ if err == io.EOF {
+ return nil
+ }
+ }
+
+ return nil
+}
+
+func patch(ctx context.Context, c cid.Cid, blk []byte) ([]byte, error) {
+ ls := cidlink.DefaultLinkSystem()
+ store := memstore.Store{Bag: map[string][]byte{}}
+ ls.SetReadStorage(&store)
+ ls.SetWriteStorage(&store)
+ store.Put(ctx, c.KeyString(), blk)
+ node, err := ls.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: c}, basicnode.Prototype.Any)
+ if err != nil {
+ return nil, fmt.Errorf("could not load block: %q", err)
+ }
+
+ outMode := "dag-json"
+ if node.Kind() == datamodel.Kind_Bytes && isPrintable(node) {
+ outMode = "raw"
+ }
+ finalBuf := bytes.NewBuffer(nil)
+
+ if outMode == "dag-json" {
+ opts := dagjson.EncodeOptions{
+ EncodeLinks: true,
+ EncodeBytes: true,
+ MapSortMode: codec.MapSortMode_Lexical,
+ }
+ if err := dagjson.Marshal(node, json.NewEncoder(finalBuf, json.EncodeOptions{Line: []byte{'\n'}, Indent: []byte{'\t'}}), opts); err != nil {
+ return nil, err
+ }
+ } else if outMode == "raw" {
+ nb, err := node.AsBytes()
+ if err != nil {
+ return nil, err
+ }
+ finalBuf.Write(nb)
+ }
+
+ // figure out number of lines.
+ lcnt := strings.Count(finalBuf.String(), "\n")
+ crStr := " (no-end-cr)"
+ if finalBuf.Bytes()[len(finalBuf.Bytes())-1] == '\n' {
+ crStr = ""
+ }
+
+ outBuf := bytes.NewBuffer(nil)
+ outBuf.WriteString("--- " + c.String() + "\n")
+ outBuf.WriteString("+++ " + outMode + crStr + " " + c.String() + "\n")
+ outBuf.WriteString(fmt.Sprintf("@@ -%d,%d +%d,%d @@\n", 0, lcnt, 0, lcnt))
+ outBuf.Write(finalBuf.Bytes())
+ outBuf.WriteString("\n")
+ return outBuf.Bytes(), nil
+}
+
+func isPrintable(n ipld.Node) bool {
+ b, err := n.AsBytes()
+ if err != nil {
+ return false
+ }
+ if !utf8.Valid(b) {
+ return false
+ }
+ if bytes.ContainsAny(b, string([]byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x10, 0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0x18, 0x19, 0x1c, 0x1d, 0x1e, 0x1f})) {
+ return false
+ }
+ // check if would confuse the 'end of patch' checker.
+ if bytes.Contains(b, []byte("\n--- ")) {
+ return false
+ }
+ return true
+}
+
+func parsePatch(br *bufio.Reader) (cid.Cid, string, []byte, error) {
+ // read initial line to parse CID.
+ l1, isPrefix, err := br.ReadLine()
+ if err != nil {
+ return cid.Undef, "", nil, err
+ }
+ if isPrefix {
+ return cid.Undef, "", nil, fmt.Errorf("unexpected long header l1")
+ }
+ var cs string
+ if _, err := fmt.Sscanf(string(l1), "--- %s", &cs); err != nil {
+ return cid.Undef, "", nil, fmt.Errorf("could not parse patch cid line (%s): %q", l1, err)
+ }
+ l2, isPrefix, err := br.ReadLine()
+ if err != nil {
+ return cid.Undef, "", nil, err
+ }
+ if isPrefix {
+ return cid.Undef, "", nil, fmt.Errorf("unexpected long header l2")
+ }
+ var mode string
+ var noEndReturn bool
+ matches := plusLineRegex.FindSubmatch(l2)
+ if len(matches) >= 2 {
+ mode = string(matches[1])
+ }
+ if len(matches) < 2 || string(matches[len(matches)-1]) != cs {
+ return cid.Undef, "", nil, fmt.Errorf("mismatched cid lines: %v", string(l2))
+ }
+ if len(matches[2]) > 0 {
+ noEndReturn = (string(matches[2]) == "(no-end-cr) ")
+ }
+ c, err := cid.Parse(cs)
+ if err != nil {
+ return cid.Undef, "", nil, err
+ }
+
+ // skip over @@ line.
+ l3, isPrefix, err := br.ReadLine()
+ if err != nil {
+ return cid.Undef, "", nil, err
+ }
+ if isPrefix {
+ return cid.Undef, "", nil, fmt.Errorf("unexpected long header l3")
+ }
+ if !strings.HasPrefix(string(l3), "@@") {
+ return cid.Undef, "", nil, fmt.Errorf("unexpected missing chunk prefix")
+ }
+
+ // keep going until next chunk or end.
+ outBuf := bytes.NewBuffer(nil)
+ for {
+ peek, err := br.Peek(4)
+ if err != nil && err != io.EOF {
+ return cid.Undef, "", nil, err
+ }
+ if bytes.Equal(peek, []byte("--- ")) {
+ break
+ }
+ // accumulate to buffer.
+ l, err := br.ReadBytes('\n')
+ if l != nil {
+ outBuf.Write(l)
+ }
+ if err == io.EOF {
+ break
+ } else if err != nil {
+ return cid.Undef, "", nil, err
+ }
+ }
+
+ ob := outBuf.Bytes()
+
+ // remove the final line return
+ if len(ob) > 2 && bytes.Equal(ob[len(ob)-2:], []byte("\r\n")) {
+ ob = ob[:len(ob)-2]
+ } else if len(ob) > 1 && bytes.Equal(ob[len(ob)-1:], []byte("\n")) {
+ ob = ob[:len(ob)-1]
+ }
+
+ if noEndReturn && len(ob) > 2 && bytes.Equal(ob[len(ob)-2:], []byte("\r\n")) {
+ ob = ob[:len(ob)-2]
+ } else if noEndReturn && len(ob) > 1 && bytes.Equal(ob[len(ob)-1:], []byte("\n")) {
+ ob = ob[:len(ob)-1]
+ }
+
+ return c, mode, ob, nil
+}
diff --git a/ipld/car/cmd/car/create.go b/ipld/car/cmd/car/create.go
new file mode 100644
index 0000000000..7b50b6458e
--- /dev/null
+++ b/ipld/car/cmd/car/create.go
@@ -0,0 +1,130 @@
+package main
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "io"
+ "path"
+
+ "github.com/ipfs/go-cid"
+ blocks "github.com/ipfs/go-libipfs/blocks"
+ "github.com/ipfs/go-unixfsnode/data/builder"
+ "github.com/ipld/go-car/v2"
+ "github.com/ipld/go-car/v2/blockstore"
+ dagpb "github.com/ipld/go-codec-dagpb"
+ "github.com/ipld/go-ipld-prime"
+ cidlink "github.com/ipld/go-ipld-prime/linking/cid"
+ "github.com/multiformats/go-multicodec"
+ "github.com/multiformats/go-multihash"
+ "github.com/urfave/cli/v2"
+)
+
+// CreateCar creates a car
+func CreateCar(c *cli.Context) error {
+ var err error
+ if c.Args().Len() == 0 {
+ return fmt.Errorf("a source location to build the car from must be specified")
+ }
+
+ if !c.IsSet("file") {
+ return fmt.Errorf("a file destination must be specified")
+ }
+
+ // make a cid with the right length that we eventually will patch with the root.
+ hasher, err := multihash.GetHasher(multihash.SHA2_256)
+ if err != nil {
+ return err
+ }
+ digest := hasher.Sum([]byte{})
+ hash, err := multihash.Encode(digest, multihash.SHA2_256)
+ if err != nil {
+ return err
+ }
+ proxyRoot := cid.NewCidV1(uint64(multicodec.DagPb), hash)
+
+ options := []car.Option{}
+ switch c.Int("version") {
+ case 1:
+ options = []car.Option{blockstore.WriteAsCarV1(true)}
+ case 2:
+ // already the default
+ default:
+ return fmt.Errorf("invalid CAR version %d", c.Int("version"))
+ }
+
+ cdest, err := blockstore.OpenReadWrite(c.String("file"), []cid.Cid{proxyRoot}, options...)
+ if err != nil {
+ return err
+ }
+
+ // Write the unixfs blocks into the store.
+ root, err := writeFiles(c.Context, cdest, c.Args().Slice()...)
+ if err != nil {
+ return err
+ }
+
+ if err := cdest.Finalize(); err != nil {
+ return err
+ }
+ // re-open/finalize with the final root.
+ return car.ReplaceRootsInFile(c.String("file"), []cid.Cid{root})
+}
+
+func writeFiles(ctx context.Context, bs *blockstore.ReadWrite, paths ...string) (cid.Cid, error) {
+ ls := cidlink.DefaultLinkSystem()
+ ls.TrustedStorage = true
+ ls.StorageReadOpener = func(_ ipld.LinkContext, l ipld.Link) (io.Reader, error) {
+ cl, ok := l.(cidlink.Link)
+ if !ok {
+ return nil, fmt.Errorf("not a cidlink")
+ }
+ blk, err := bs.Get(ctx, cl.Cid)
+ if err != nil {
+ return nil, err
+ }
+ return bytes.NewBuffer(blk.RawData()), nil
+ }
+ ls.StorageWriteOpener = func(_ ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) {
+ buf := bytes.NewBuffer(nil)
+ return buf, func(l ipld.Link) error {
+ cl, ok := l.(cidlink.Link)
+ if !ok {
+ return fmt.Errorf("not a cidlink")
+ }
+ blk, err := blocks.NewBlockWithCid(buf.Bytes(), cl.Cid)
+ if err != nil {
+ return err
+ }
+ bs.Put(ctx, blk)
+ return nil
+ }, nil
+ }
+
+ topLevel := make([]dagpb.PBLink, 0, len(paths))
+ for _, p := range paths {
+ l, size, err := builder.BuildUnixFSRecursive(p, &ls)
+ if err != nil {
+ return cid.Undef, err
+ }
+ name := path.Base(p)
+ entry, err := builder.BuildUnixFSDirectoryEntry(name, int64(size), l)
+ if err != nil {
+ return cid.Undef, err
+ }
+ topLevel = append(topLevel, entry)
+ }
+
+ // make a directory for the file(s).
+
+ root, _, err := builder.BuildUnixFSDirectory(topLevel, &ls)
+ if err != nil {
+ return cid.Undef, nil
+ }
+ rcl, ok := root.(cidlink.Link)
+ if !ok {
+ return cid.Undef, fmt.Errorf("could not interpret %s", root)
+ }
+
+ return rcl.Cid, nil
+}
diff --git a/ipld/car/cmd/car/detach.go b/ipld/car/cmd/car/detach.go
new file mode 100644
index 0000000000..e04eba9dd5
--- /dev/null
+++ b/ipld/car/cmd/car/detach.go
@@ -0,0 +1,73 @@
+package main
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ carv2 "github.com/ipld/go-car/v2"
+ "github.com/ipld/go-car/v2/index"
+ "github.com/multiformats/go-multihash"
+ "github.com/urfave/cli/v2"
+)
+
+// DetachCar is a command to output the index part of a car.
+func DetachCar(c *cli.Context) error {
+ r, err := carv2.OpenReader(c.Args().Get(0))
+ if err != nil {
+ return err
+ }
+ defer r.Close()
+
+ if !r.Header.HasIndex() {
+ return fmt.Errorf("no index present")
+ }
+
+ outStream := os.Stdout
+ if c.Args().Len() >= 2 {
+ outStream, err = os.Create(c.Args().Get(1))
+ if err != nil {
+ return err
+ }
+ }
+ defer outStream.Close()
+
+ ir, err := r.IndexReader()
+ if err != nil {
+ return err
+ }
+ _, err = io.Copy(outStream, ir)
+ return err
+}
+
+// DetachCarList prints a list of what's found in a detached index.
+func DetachCarList(c *cli.Context) error {
+ var err error
+
+ inStream := os.Stdin
+ if c.Args().Len() >= 1 {
+ inStream, err = os.Open(c.Args().First())
+ if err != nil {
+ return err
+ }
+ defer inStream.Close()
+ }
+
+ idx, err := index.ReadFrom(inStream)
+ if err != nil {
+ return err
+ }
+
+ if iidx, ok := idx.(index.IterableIndex); ok {
+ err := iidx.ForEach(func(mh multihash.Multihash, offset uint64) error {
+ fmt.Printf("%s %d\n", mh, offset)
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ return nil
+ }
+
+ return fmt.Errorf("index of codec %s is not iterable", idx.Codec())
+}
diff --git a/ipld/car/cmd/car/extract.go b/ipld/car/cmd/car/extract.go
new file mode 100644
index 0000000000..f9373fd37b
--- /dev/null
+++ b/ipld/car/cmd/car/extract.go
@@ -0,0 +1,443 @@
+package main
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "sync"
+
+ "github.com/ipfs/go-cid"
+ "github.com/ipfs/go-unixfsnode"
+ "github.com/ipfs/go-unixfsnode/data"
+ "github.com/ipfs/go-unixfsnode/file"
+ "github.com/ipld/go-car/v2"
+ carstorage "github.com/ipld/go-car/v2/storage"
+ dagpb "github.com/ipld/go-codec-dagpb"
+ "github.com/ipld/go-ipld-prime"
+ cidlink "github.com/ipld/go-ipld-prime/linking/cid"
+ basicnode "github.com/ipld/go-ipld-prime/node/basic"
+ "github.com/ipld/go-ipld-prime/storage"
+ "github.com/urfave/cli/v2"
+)
+
+var ErrNotDir = fmt.Errorf("not a directory")
+
+// ExtractCar pulls files and directories out of a car
+func ExtractCar(c *cli.Context) error {
+ outputDir, err := os.Getwd()
+ if err != nil {
+ return err
+ }
+ if c.Args().Present() {
+ outputDir = c.Args().First()
+ }
+
+ var store storage.ReadableStorage
+ var roots []cid.Cid
+
+ if c.String("file") == "" {
+ if f, ok := c.App.Reader.(*os.File); ok {
+ stat, err := f.Stat()
+ if err != nil {
+ return err
+ }
+ if (stat.Mode() & os.ModeCharDevice) != 0 {
+ // Is a terminal. In reality the user is unlikely to actually paste
+ // CAR data into this terminal, but this message may serve to make
+ // them aware that they can/should pipe data into this command.
+ stopKeys := "Ctrl+D"
+ if runtime.GOOS == "windows" {
+ stopKeys = "Ctrl+Z, Enter"
+ }
+ fmt.Fprintf(c.App.ErrWriter, "Reading from stdin; use %s to end\n", stopKeys)
+ }
+ }
+ var err error
+ store, roots, err = NewStdinReadStorage(c.App.Reader)
+ if err != nil {
+ return err
+ }
+ } else {
+ carFile, err := os.Open(c.String("file"))
+ if err != nil {
+ return err
+ }
+ store, err = carstorage.OpenReadable(carFile)
+ if err != nil {
+ return err
+ }
+ roots = store.(carstorage.ReadableCar).Roots()
+ }
+
+ ls := cidlink.DefaultLinkSystem()
+ ls.TrustedStorage = true
+ ls.SetReadStorage(store)
+
+ path, err := pathSegments(c.String("path"))
+ if err != nil {
+ return err
+ }
+
+ var extractedFiles int
+ for _, root := range roots {
+ count, err := extractRoot(c, &ls, root, outputDir, path)
+ if err != nil {
+ return err
+ }
+ extractedFiles += count
+ }
+ if extractedFiles == 0 {
+ return cli.Exit("no files extracted", 1)
+ } else {
+ fmt.Fprintf(c.App.ErrWriter, "extracted %d file(s)\n", extractedFiles)
+ }
+
+ return nil
+}
+
+func extractRoot(c *cli.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir string, path []string) (int, error) {
+ if root.Prefix().Codec == cid.Raw {
+ if c.IsSet("verbose") {
+ fmt.Fprintf(c.App.ErrWriter, "skipping raw root %s\n", root)
+ }
+ return 0, nil
+ }
+
+ pbn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: root}, dagpb.Type.PBNode)
+ if err != nil {
+ return 0, err
+ }
+ pbnode := pbn.(dagpb.PBNode)
+
+ ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
+ if err != nil {
+ return 0, err
+ }
+
+ var outputResolvedDir string
+ if outputDir != "-" {
+ outputResolvedDir, err = filepath.EvalSymlinks(outputDir)
+ if err != nil {
+ return 0, err
+ }
+ if _, err := os.Stat(outputResolvedDir); os.IsNotExist(err) {
+ if err := os.Mkdir(outputResolvedDir, 0755); err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ count, err := extractDir(c, ls, ufn, outputResolvedDir, "/", path)
+ if err != nil {
+ if !errors.Is(err, ErrNotDir) {
+ return 0, fmt.Errorf("%s: %w", root, err)
+ }
+
+ // if it's not a directory, it's a file.
+ ufsData, err := pbnode.LookupByString("Data")
+ if err != nil {
+ return 0, err
+ }
+ ufsBytes, err := ufsData.AsBytes()
+ if err != nil {
+ return 0, err
+ }
+ ufsNode, err := data.DecodeUnixFSData(ufsBytes)
+ if err != nil {
+ return 0, err
+ }
+ var outputName string
+ if outputDir != "-" {
+ outputName = filepath.Join(outputResolvedDir, "unknown")
+ }
+ if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw {
+ if err := extractFile(c, ls, pbnode, outputName); err != nil {
+ return 0, err
+ }
+ }
+ return 1, nil
+ }
+
+ return count, nil
+}
+
+func resolvePath(root, pth string) (string, error) {
+ rp, err := filepath.Rel("/", pth)
+ if err != nil {
+ return "", fmt.Errorf("couldn't check relative-ness of %s: %w", pth, err)
+ }
+ joined := path.Join(root, rp)
+
+ basename := path.Dir(joined)
+ final, err := filepath.EvalSymlinks(basename)
+ if err != nil {
+ return "", fmt.Errorf("couldn't eval symlinks in %s: %w", basename, err)
+ }
+ if final != path.Clean(basename) {
+ return "", fmt.Errorf("path attempts to redirect through symlinks")
+ }
+ return joined, nil
+}
+
+func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, outputPath string, matchPath []string) (int, error) {
+ if outputRoot != "" {
+ dirPath, err := resolvePath(outputRoot, outputPath)
+ if err != nil {
+ return 0, err
+ }
+ // make the directory.
+ if err := os.MkdirAll(dirPath, 0755); err != nil {
+ return 0, err
+ }
+ }
+
+ if n.Kind() != ipld.Kind_Map {
+ return 0, ErrNotDir
+ }
+
+ subPath := matchPath
+ if len(matchPath) > 0 {
+ subPath = matchPath[1:]
+ }
+
+ extractElement := func(name string, n ipld.Node) (int, error) {
+ var nextRes string
+ if outputRoot != "" {
+ var err error
+ nextRes, err = resolvePath(outputRoot, path.Join(outputPath, name))
+ if err != nil {
+ return 0, err
+ }
+ if c.IsSet("verbose") {
+ fmt.Fprintf(c.App.Writer, "%s\n", nextRes)
+ }
+ }
+
+ if n.Kind() != ipld.Kind_Link {
+ return 0, fmt.Errorf("unexpected map value for %s at %s", name, outputPath)
+ }
+ // a directory may be represented as a map of name: if unixADL is applied
+ vl, err := n.AsLink()
+ if err != nil {
+ return 0, err
+ }
+ dest, err := ls.Load(ipld.LinkContext{}, vl, basicnode.Prototype.Any)
+ if err != nil {
+ if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
+ fmt.Fprintf(c.App.ErrWriter, "data for entry not found: %s (skipping...)\n", path.Join(outputPath, name))
+ return 0, nil
+ }
+ return 0, err
+ }
+ // degenerate files are handled here.
+ if dest.Kind() == ipld.Kind_Bytes {
+ if err := extractFile(c, ls, dest, nextRes); err != nil {
+ return 0, err
+ }
+ return 1, nil
+ }
+
+ // dir / pbnode
+ pbb := dagpb.Type.PBNode.NewBuilder()
+ if err := pbb.AssignNode(dest); err != nil {
+ return 0, err
+ }
+ pbnode := pbb.Build().(dagpb.PBNode)
+
+ // interpret dagpb 'data' as unixfs data and look at type.
+ ufsData, err := pbnode.LookupByString("Data")
+ if err != nil {
+ return 0, err
+ }
+ ufsBytes, err := ufsData.AsBytes()
+ if err != nil {
+ return 0, err
+ }
+ ufsNode, err := data.DecodeUnixFSData(ufsBytes)
+ if err != nil {
+ return 0, err
+ }
+
+ switch ufsNode.DataType.Int() {
+ case data.Data_Directory, data.Data_HAMTShard:
+ ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
+ if err != nil {
+ return 0, err
+ }
+ return extractDir(c, ls, ufn, outputRoot, path.Join(outputPath, name), subPath)
+ case data.Data_File, data.Data_Raw:
+ if err := extractFile(c, ls, pbnode, nextRes); err != nil {
+ return 0, err
+ }
+ return 1, nil
+ case data.Data_Symlink:
+ if nextRes == "" {
+ return 0, fmt.Errorf("cannot extract a symlink to stdout")
+ }
+ data := ufsNode.Data.Must().Bytes()
+ if err := os.Symlink(string(data), nextRes); err != nil {
+ return 0, err
+ }
+ return 1, nil
+ default:
+ return 0, fmt.Errorf("unknown unixfs type: %d", ufsNode.DataType.Int())
+ }
+ }
+
+ // specific path segment
+ if len(matchPath) > 0 {
+ val, err := n.LookupByString(matchPath[0])
+ if err != nil {
+ return 0, err
+ }
+ return extractElement(matchPath[0], val)
+ }
+
+ if outputPath == "-" && len(matchPath) == 0 {
+ return 0, fmt.Errorf("cannot extract a directory to stdout, use a path to extract a specific file")
+ }
+
+ // everything
+ var count int
+ var shardSkip int
+ mi := n.MapIterator()
+ for !mi.Done() {
+ key, val, err := mi.Next()
+ if err != nil {
+ if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
+ shardSkip++
+ continue
+ }
+ return 0, err
+ }
+ ks, err := key.AsString()
+ if err != nil {
+ return 0, err
+ }
+ ecount, err := extractElement(ks, val)
+ if err != nil {
+ return 0, err
+ }
+ count += ecount
+ }
+ if shardSkip > 0 {
+ fmt.Fprintf(c.App.ErrWriter, "data for entry not found for %d unknown sharded entries (skipped...)\n", shardSkip)
+ }
+ return count, nil
+}
+
+func extractFile(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputName string) error {
+ node, err := file.NewUnixFSFile(c.Context, n, ls)
+ if err != nil {
+ return err
+ }
+ nlr, err := node.AsLargeBytes()
+ if err != nil {
+ return err
+ }
+ var f *os.File
+ if outputName == "" {
+ f = os.Stdout
+ } else {
+ f, err = os.Create(outputName)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ }
+ _, err = io.Copy(f, nlr)
+ return err
+}
+
+// TODO: dedupe this with lassie, probably into go-unixfsnode
+func pathSegments(path string) ([]string, error) {
+ segments := strings.Split(path, "/")
+ filtered := make([]string, 0, len(segments))
+ for i := 0; i < len(segments); i++ {
+ if segments[i] == "" {
+ // Allow one leading and one trailing '/' at most
+ if i == 0 || i == len(segments)-1 {
+ continue
+ }
+ return nil, fmt.Errorf("invalid empty path segment at position %d", i)
+ }
+ if segments[i] == "." || segments[i] == ".." {
+ return nil, fmt.Errorf("'%s' is unsupported in paths", segments[i])
+ }
+ filtered = append(filtered, segments[i])
+ }
+ return filtered, nil
+}
+
+var _ storage.ReadableStorage = (*stdinReadStorage)(nil)
+
+type stdinReadStorage struct {
+ blocks map[string][]byte
+ done bool
+ lk *sync.RWMutex
+ cond *sync.Cond
+}
+
+func NewStdinReadStorage(reader io.Reader) (*stdinReadStorage, []cid.Cid, error) {
+ var lk sync.RWMutex
+ srs := &stdinReadStorage{
+ blocks: make(map[string][]byte),
+ lk: &lk,
+ cond: sync.NewCond(&lk),
+ }
+ rdr, err := car.NewBlockReader(reader)
+ if err != nil {
+ return nil, nil, err
+ }
+ go func() {
+ for {
+ blk, err := rdr.Next()
+ if err == io.EOF {
+ srs.lk.Lock()
+ srs.done = true
+ srs.lk.Unlock()
+ return
+ }
+ if err != nil {
+ panic(err)
+ }
+ srs.lk.Lock()
+ srs.blocks[string(blk.Cid().Hash())] = blk.RawData()
+ srs.cond.Broadcast()
+ srs.lk.Unlock()
+ }
+ }()
+ return srs, rdr.Roots, nil
+}
+
+func (srs *stdinReadStorage) Has(ctx context.Context, key string) (bool, error) {
+ _, err := srs.Get(ctx, key)
+ if err != nil {
+ return false, err
+ }
+ return true, nil
+}
+
+func (srs *stdinReadStorage) Get(ctx context.Context, key string) ([]byte, error) {
+ c, err := cid.Cast([]byte(key))
+ if err != nil {
+ return nil, err
+ }
+ srs.lk.Lock()
+ defer srs.lk.Unlock()
+ for {
+ if data, ok := srs.blocks[string(c.Hash())]; ok {
+ return data, nil
+ }
+ if srs.done {
+ return nil, carstorage.ErrNotFound{Cid: c}
+ }
+ srs.cond.Wait()
+ }
+}
diff --git a/ipld/car/cmd/car/filter.go b/ipld/car/cmd/car/filter.go
new file mode 100644
index 0000000000..a76b6bd05a
--- /dev/null
+++ b/ipld/car/cmd/car/filter.go
@@ -0,0 +1,128 @@
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "os"
+ "strings"
+
+ "github.com/ipfs/go-cid"
+ carv2 "github.com/ipld/go-car/v2"
+ "github.com/ipld/go-car/v2/blockstore"
+ "github.com/urfave/cli/v2"
+)
+
+// FilterCar is a command to select a subset of a car by CID.
+func FilterCar(c *cli.Context) error {
+ if c.Args().Len() < 2 {
+ return fmt.Errorf("an output filename must be provided")
+ }
+
+ fd, err := os.Open(c.Args().First())
+ if err != nil {
+ return err
+ }
+ defer fd.Close()
+ rd, err := carv2.NewBlockReader(fd)
+ if err != nil {
+ return err
+ }
+
+ // Get the set of CIDs from stdin.
+ inStream := os.Stdin
+ if c.IsSet("cidFile") {
+ inStream, err = os.Open(c.String("cidFile"))
+ if err != nil {
+ return err
+ }
+ defer inStream.Close()
+ }
+ cidMap, err := parseCIDS(inStream)
+ if err != nil {
+ return err
+ }
+ fmt.Printf("filtering to %d cids\n", len(cidMap))
+
+ outRoots := make([]cid.Cid, 0)
+ for _, r := range rd.Roots {
+ if _, ok := cidMap[r]; ok {
+ outRoots = append(outRoots, r)
+ }
+ }
+
+ outPath := c.Args().Get(1)
+ if !c.Bool("append") {
+ if _, err := os.Stat(outPath); err == nil || !os.IsNotExist(err) {
+ // output to an existing file.
+ if err := os.Truncate(outPath, 0); err != nil {
+ return err
+ }
+ }
+ } else {
+ // roots will need to be whatever is in the output already.
+ cv2r, err := carv2.OpenReader(outPath)
+ if err != nil {
+ return err
+ }
+ if cv2r.Version != 2 {
+ return fmt.Errorf("can only append to version 2 car files")
+ }
+ outRoots, err = cv2r.Roots()
+ if err != nil {
+ return err
+ }
+ _ = cv2r.Close()
+ }
+
+ if len(outRoots) == 0 {
+ fmt.Fprintf(os.Stderr, "warning: no roots defined after filtering\n")
+ }
+
+ bs, err := blockstore.OpenReadWrite(outPath, outRoots)
+ if err != nil {
+ return err
+ }
+
+ for {
+ blk, err := rd.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ return err
+ }
+ if _, ok := cidMap[blk.Cid()]; ok {
+ if err := bs.Put(c.Context, blk); err != nil {
+ return err
+ }
+ }
+ }
+ return bs.Finalize()
+}
+
+func parseCIDS(r io.Reader) (map[cid.Cid]struct{}, error) {
+ cids := make(map[cid.Cid]struct{})
+ br := bufio.NewReader(r)
+ for {
+ line, _, err := br.ReadLine()
+ if err != nil {
+ if err == io.EOF {
+ return cids, nil
+ }
+ return nil, err
+ }
+ trimLine := strings.TrimSpace(string(line))
+ if len(trimLine) == 0 {
+ continue
+ }
+ c, err := cid.Parse(trimLine)
+ if err != nil {
+ return nil, err
+ }
+ if _, ok := cids[c]; ok {
+ fmt.Fprintf(os.Stderr, "duplicate cid: %s\n", c)
+ }
+ cids[c] = struct{}{}
+ }
+}
diff --git a/ipld/car/cmd/car/get.go b/ipld/car/cmd/car/get.go
new file mode 100644
index 0000000000..f5d5b1cdfa
--- /dev/null
+++ b/ipld/car/cmd/car/get.go
@@ -0,0 +1,215 @@
+package main
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+
+ "io"
+ "os"
+
+ dagpb "github.com/ipld/go-codec-dagpb"
+ "github.com/ipld/go-ipld-prime"
+ _ "github.com/ipld/go-ipld-prime/codec/cbor"
+ _ "github.com/ipld/go-ipld-prime/codec/dagcbor"
+ _ "github.com/ipld/go-ipld-prime/codec/dagjson"
+ _ "github.com/ipld/go-ipld-prime/codec/json"
+ _ "github.com/ipld/go-ipld-prime/codec/raw"
+
+ "github.com/ipfs/go-cid"
+ ipldfmt "github.com/ipfs/go-ipld-format"
+ "github.com/ipfs/go-unixfsnode"
+ "github.com/ipld/go-car"
+ "github.com/ipld/go-car/v2/blockstore"
+ "github.com/ipld/go-ipld-prime/datamodel"
+ "github.com/ipld/go-ipld-prime/linking"
+ cidlink "github.com/ipld/go-ipld-prime/linking/cid"
+ "github.com/ipld/go-ipld-prime/node/basicnode"
+ "github.com/ipld/go-ipld-prime/traversal"
+ "github.com/ipld/go-ipld-prime/traversal/selector"
+ selectorParser "github.com/ipld/go-ipld-prime/traversal/selector/parse"
+ "github.com/urfave/cli/v2"
+)
+
+// GetCarBlock is a command to get a block out of a car
+func GetCarBlock(c *cli.Context) error {
+ if c.Args().Len() < 2 {
+ return fmt.Errorf("usage: car get-block [output file]")
+ }
+
+ bs, err := blockstore.OpenReadOnly(c.Args().Get(0))
+ if err != nil {
+ return err
+ }
+
+ // string to CID
+ blkCid, err := cid.Parse(c.Args().Get(1))
+ if err != nil {
+ return err
+ }
+
+ blk, err := bs.Get(c.Context, blkCid)
+ if err != nil {
+ return err
+ }
+
+ outStream := os.Stdout
+ if c.Args().Len() >= 3 {
+ outStream, err = os.Create(c.Args().Get(2))
+ if err != nil {
+ return err
+ }
+ defer outStream.Close()
+ }
+
+ _, err = outStream.Write(blk.RawData())
+ return err
+}
+
+// GetCarDag is a command to get a dag out of a car
+func GetCarDag(c *cli.Context) error {
+ if c.Args().Len() < 2 {
+ return fmt.Errorf("usage: car get-dag [-s selector] [root cid]