diff --git a/signaling.go b/signaling.go index 056d2ab..b3442b5 100644 --- a/signaling.go +++ b/signaling.go @@ -1,15 +1,45 @@ package unixfsnode import ( - "strings" - "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/traversal/selector" "github.com/ipld/go-ipld-prime/traversal/selector/builder" ) +// ExploreAllRecursivelySelector is a selector that will explore all nodes. It +// is the same selector as selectorparse.CommonSelector_ExploreAllRecursively +// but it is precompiled for use with UnixFSPathSelectorBuilder(). +var ExploreAllRecursivelySelector = specBuilder(func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec { + return ssb.ExploreRecursive( + selector.RecursionLimitNone(), + ssb.ExploreAll(ssb.ExploreRecursiveEdge()), + ) +}) + +// MatchUnixFSPreloadSelector is a selector that will match a single node, +// similar to selectorparse.CommonSelector_MatchPoint, but uses the +// "unixfs-preload" ADL to load sharded files and directories as a single node. +// Can be used to shallow load an entire UnixFS directory listing, sharded or +// not, but not its contents. +// MatchUnixfsPreloadSelector is precompiled for use with +// UnixFSPathSelectorBuilder(). +var MatchUnixFSPreloadSelector = specBuilder(func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec { + return ssb.ExploreInterpretAs("unixfs-preload", ssb.Matcher()) +}) + +// MatchUnixFSSelector is a selector that will match a single node, similar to +// selectorparse.CommonSelector_MatchPoint, but uses the "unixfs" ADL to load +// as UnixFS data. Unlike MatchUnixFSPreloadSelector, this selector will not +// preload all blocks in sharded directories or files. Use +// MatchUnixFSPreloadSelector where the blocks that constitute the full UnixFS +// resource being selected are important to load. +var MatchUnixFSSelector = specBuilder(func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec { + return ssb.ExploreInterpretAs("unixfs", ssb.Matcher()) +}) + func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) { if lsys.KnownReifiers == nil { lsys.KnownReifiers = make(map[string]linking.NodeReifier) @@ -18,18 +48,67 @@ func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) { lsys.KnownReifiers["unixfs-preload"] = nonLazyReify } -// UnixFSPathSelector creates a selector for a file/path inside of a UnixFS directory -// if reification is setup on a link system +// UnixFSPathSelector creates a selector for IPLD path to a UnixFS resource if +// UnixFS reification is setup on a LinkSystem being used for traversal. +// +// Use UnixFSPathSelectorBuilder for more control over the selector, this +// function is the same as calling +// +// UnixFSPathSelectorBuilder(path, MatchUnixFSSelector, false) func UnixFSPathSelector(path string) datamodel.Node { - segments := strings.Split(path, "/") + return UnixFSPathSelectorBuilder(path, MatchUnixFSSelector, false) +} + +// UnixFSPathSelectorBuilder creates a selector for IPLD path to a UnixFS +// resource if UnixFS reification is setup on a LinkSystem being used for +// traversal. +// +// The path is interpreted according to +// github.com/ipld/go-ipld-prime/datamodel/Path rules, +// i.e. +// - leading and trailing slashes are ignored +// - redundant slashes are ignored +// - the segment `..` is a field named `..`, same with `.` +// +// targetSelector is the selector to apply to the final node in the path. +// Use ExploreAllRecursivelySelector to explore (i.e. load the blocks) all of +// the content from the terminus of the path. Use MatchUnixFSPreloadSelector to +// match the terminus of the path, but preload all blocks in sharded files and +// directories. Use MatchUnixFSSelector to match the terminus of the path, but +// not preload any blocks if the terminus is sharded. Or any other custom +// SelectorSpec can be supplied. +// +// If matchPath is false, the selector will explore, not match, so it's useful +// for traversals where block loads are important, not where the matcher visitor +// callback is important. if matchPath is true, the selector will match the +// nodes along the path while exploring them. +func UnixFSPathSelectorBuilder(path string, targetSelector builder.SelectorSpec, matchPath bool) ipld.Node { + segments := ipld.ParsePath(path) + + ss := targetSelector ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any) - selectorSoFar := ssb.ExploreInterpretAs("unixfs", ssb.Matcher()) - for i := len(segments) - 1; i >= 0; i-- { - selectorSoFar = ssb.ExploreInterpretAs("unixfs", - ssb.ExploreFields(func(efsb builder.ExploreFieldsSpecBuilder) { - efsb.Insert(segments[i], selectorSoFar) - }), - ) + + for segments.Len() > 0 { + // Wrap selector in ExploreFields as we walk back up through the path. + // We can assume each segment to be a unixfs path section, so we + // InterpretAs to make sure the node is reified through go-unixfsnode + // (if possible) and we can traverse through according to unixfs pathing + // rather than bare IPLD pathing - which also gives us the ability to + // traverse through HAMT shards. + ss = ssb.ExploreInterpretAs("unixfs", ssb.ExploreFields( + func(efsb builder.ExploreFieldsSpecBuilder) { + efsb.Insert(segments.Last().String(), ss) + }, + )) + if matchPath { + ss = ssb.ExploreUnion(ssb.Matcher(), ss) + } + segments = segments.Pop() } - return selectorSoFar.Node() + + return ss.Node() +} + +func specBuilder(b func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec) builder.SelectorSpec { + return b(builder.NewSelectorSpecBuilder(basicnode.Prototype.Any)) } diff --git a/signalling_test.go b/signalling_test.go new file mode 100644 index 0000000..f8a1329 --- /dev/null +++ b/signalling_test.go @@ -0,0 +1,203 @@ +package unixfsnode_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/ipfs/go-unixfsnode" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec/dagjson" + "github.com/ipld/go-ipld-prime/traversal/selector/builder" + selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + "github.com/stretchr/testify/require" +) + +// Selectors are tested against JSON expected forms; this doesn't necessarily +// validate that they work as advertised. It's just a sanity check that the +// selectors are being built as expected. + +var exploreAllJson = mustDagJson(selectorparse.CommonSelector_ExploreAllRecursively) + +// explore interpret-as (~), next (>), match (.), interpreted as unixfs-preload +var matchUnixfsPreloadJson = `{"~":{">":{".":{}},"as":"unixfs-preload"}}` + +// match interpret-as (~), next (>), match (.), interpreted as unixfs +var matchUnixfsJson = `{"~":{">":{".":{}},"as":"unixfs"}}` + +func TestUnixFSPathSelector(t *testing.T) { + testCases := []struct { + name string + path string + expextedSelector string + }{ + { + name: "empty path", + path: "", + expextedSelector: matchUnixfsJson, + }, + { + name: "single field", + path: "/foo", + expextedSelector: jsonFields(matchUnixfsJson, "foo"), + }, + { + name: "multiple fields", + path: "/foo/bar", + expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"), + }, + { + name: "leading slash optional", + path: "foo/bar", + expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"), + }, + { + name: "trailing slash optional", + path: "/foo/bar/", + expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"), + }, + // a go-ipld-prime specific thing, not clearly specified by path spec (?) + { + name: ".. is a field named ..", + path: "/foo/../bar/", + expextedSelector: jsonFields(matchUnixfsJson, "foo", "..", "bar"), + }, + { + // a go-ipld-prime specific thing, not clearly specified by path spec + name: "redundant slashes ignored", + path: "foo///bar", + expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + sel := unixfsnode.UnixFSPathSelector(tc.path) + require.Equal(t, tc.expextedSelector, mustDagJson(sel)) + }) + } +} + +func TestUnixFSPathSelectorBuilder(t *testing.T) { + testCases := []struct { + name string + path string + target builder.SelectorSpec + matchPath bool + expextedSelector string + }{ + { + name: "empty path", + path: "", + target: unixfsnode.ExploreAllRecursivelySelector, + expextedSelector: exploreAllJson, + }, + { + name: "empty path shallow", + path: "", + target: unixfsnode.MatchUnixFSPreloadSelector, + expextedSelector: matchUnixfsPreloadJson, + }, + { + name: "single field", + path: "/foo", + expextedSelector: jsonFields(exploreAllJson, "foo"), + target: unixfsnode.ExploreAllRecursivelySelector, + }, + { + name: "single field, match path", + path: "/foo", + expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo"), + target: unixfsnode.ExploreAllRecursivelySelector, + matchPath: true, + }, + { + name: "single field shallow", + path: "/foo", + expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo"), + target: unixfsnode.MatchUnixFSPreloadSelector, + }, + { + name: "multiple fields", + path: "/foo/bar", + expextedSelector: jsonFields(exploreAllJson, "foo", "bar"), + target: unixfsnode.ExploreAllRecursivelySelector, + }, + { + name: "multiple fields, match path", + path: "/foo/bar", + expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo", "bar"), + target: unixfsnode.ExploreAllRecursivelySelector, + matchPath: true, + }, + { + name: "multiple fields shallow", + path: "/foo/bar", + expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo", "bar"), + target: unixfsnode.MatchUnixFSPreloadSelector, + }, + { + name: "leading slash optional", + path: "foo/bar", + expextedSelector: jsonFields(exploreAllJson, "foo", "bar"), + target: unixfsnode.ExploreAllRecursivelySelector, + }, + { + name: "trailing slash optional", + path: "/foo/bar/", + expextedSelector: jsonFields(exploreAllJson, "foo", "bar"), + target: unixfsnode.ExploreAllRecursivelySelector, + }, + // a go-ipld-prime specific thing, not clearly specified by path spec (?) + { + name: ".. is a field named ..", + path: "/foo/../bar/", + expextedSelector: jsonFields(exploreAllJson, "foo", "..", "bar"), + target: unixfsnode.ExploreAllRecursivelySelector, + }, + { + // a go-ipld-prime specific thing, not clearly specified by path spec + name: "redundant slashes ignored", + path: "foo///bar", + expextedSelector: jsonFields(exploreAllJson, "foo", "bar"), + target: unixfsnode.ExploreAllRecursivelySelector, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + sel := unixfsnode.UnixFSPathSelectorBuilder(tc.path, tc.target, tc.matchPath) + require.Equal(t, tc.expextedSelector, mustDagJson(sel)) + }) + } +} + +func jsonFields(target string, fields ...string) string { + var sb strings.Builder + for _, n := range fields { + // explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name + sb.WriteString(fmt.Sprintf(`{"~":{">":{"f":{"f>":{"%s":`, n)) + } + sb.WriteString(target) + sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}`, len(fields))) + return sb.String() +} + +func jsonFieldsMatchPoint(target string, fields ...string) string { + var sb strings.Builder + for _, n := range fields { + // union (|) of match (.) and explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name + sb.WriteString(fmt.Sprintf(`{"|":[{".":{}},{"~":{">":{"f":{"f>":{"%s":`, n)) + } + sb.WriteString(target) + sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}]}`, len(fields))) + return sb.String() +} + +func mustDagJson(n ipld.Node) string { + byts, err := ipld.Encode(n, dagjson.Encode) + if err != nil { + panic(err) + } + return string(byts) +}