Skip to content

Commit

Permalink
fix: update state to allow iter continuance on NotFound errors
Browse files Browse the repository at this point in the history
  • Loading branch information
rvagg committed Mar 9, 2023
1 parent aadb6fe commit ca00f89
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 7 deletions.
Binary file not shown.
14 changes: 7 additions & 7 deletions hamt/shardeddir.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,20 +197,19 @@ type _UnixFSShardedDir__ListItr struct {
}

func (itr *_UnixFSShardedDir__ListItr) Next() (int64, dagpb.PBLink, error) {
total := itr.total
itr.total++
next, err := itr.next()
if err != nil {
return -1, nil, err
}
if next == nil {
return -1, nil, nil
}
total := itr.total
itr.total++
return total, next, nil
}

func (itr *_UnixFSShardedDir__ListItr) next() (dagpb.PBLink, error) {

if itr.childIter == nil {
if itr._substrate.Done() {
return nil, nil
Expand All @@ -232,15 +231,16 @@ func (itr *_UnixFSShardedDir__ListItr) next() (dagpb.PBLink, error) {
nd: child,
maxPadLen: maxPadLength(child.data),
}

}
_, next, err := itr.childIter.Next()
if err != nil {
return nil, err
}
if itr.childIter.Done() {
// do this even on error to make sure we don't overrun a shard where the
// end is missing and the user is ignoring NotFound errors
itr.childIter = nil
}
if err != nil {
return nil, err
}
return next, nil
}

Expand Down
83 changes: 83 additions & 0 deletions hamt/shardeddir_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"io"
"math/rand"
"os"
"sort"
"testing"
"time"
Expand All @@ -16,10 +17,12 @@ import (
ft "github.com/ipfs/go-unixfs"
legacy "github.com/ipfs/go-unixfs/hamt"
"github.com/ipfs/go-unixfsnode/hamt"
"github.com/ipld/go-car/v2/storage"
dagpb "github.com/ipld/go-codec-dagpb"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/fluent/qp"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
basicnode "github.com/ipld/go-ipld-prime/node/basic"
"github.com/ipld/go-ipld-prime/schema"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -206,3 +209,83 @@ func TestFindNonExisting(t *testing.T) {
require.EqualError(t, err, schema.ErrNoSuchField{Field: ipld.PathSegmentOfString(key)}.Error())
}
}

func TestIncompleteShardedIteration(t *testing.T) {
ctx := context.Background()
req := require.New(t)

fixture := "./fixtures/wikipedia-cryptographic-hash-function.car"
f, err := os.Open(fixture)
req.NoError(err)
defer f.Close()
carstore, err := storage.OpenReadable(f)
req.NoError(err)
lsys := cidlink.DefaultLinkSystem()
lsys.TrustedStorage = true
lsys.SetReadStorage(carstore)

// classic recursive go-ipld-prime map iteration, being forgiving about
// NotFound block loads to see what we end up with

kvs := make(map[string]string)
var iterNotFound int
blockNotFound := make(map[string]struct{})

var iter func(string, ipld.Link)
iter = func(dir string, lnk ipld.Link) {
nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, lnk, basicnode.Prototype.Any)
if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
// got a named link that we can't load
blockNotFound[dir] = struct{}{}
return
}
req.NoError(err)
if nd.Kind() == ipld.Kind_Bytes {
bv, err := nd.AsBytes()
req.NoError(err)
kvs[dir] = string(bv)
return
}

nb := dagpb.Type.PBNode.NewBuilder()
req.NoError(nb.AssignNode(nd))
pbn := nb.Build()
hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, pbn, &lsys)
req.NoError(err)

mi := hamtShard.MapIterator()
for !mi.Done() {
k, v, err := mi.Next()
if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
// internal shard link that won't load, we don't know what it might
// point to
iterNotFound++
continue
}
req.NoError(err)
ks, err := k.AsString()
req.NoError(err)
req.Equal(ipld.Kind_Link, v.Kind())
lv, err := v.AsLink()
req.NoError(err)
iter(dir+"/"+ks, lv)
}
}
// walk the tree
iter("", cidlink.Link{Cid: carstore.Roots()[0]})

req.Len(kvs, 1)
req.Contains(kvs, "/wiki/Cryptographic_hash_function")
req.Contains(kvs["/wiki/Cryptographic_hash_function"], "<title>Cryptographic hash function</title>\n")
req.Equal(iterNotFound, 570) // tried to load 570 blocks that were not in the CAR
req.Len(blockNotFound, 110) // 110 blocks, for named links, were not found in the CAR
// some of the root block links
req.Contains(blockNotFound, "/favicon.ico")
req.Contains(blockNotFound, "/index.html")
req.Contains(blockNotFound, "/zimdump_version")
// some of the shard links
req.Contains(blockNotFound, "/wiki/UK_railway_Signal")
req.Contains(blockNotFound, "/wiki/Australian_House")
req.Contains(blockNotFound, "/wiki/ICloud_Drive")
req.Contains(blockNotFound, "/wiki/Édouard_Bamberger")
}

0 comments on commit ca00f89

Please sign in to comment.