Skip to content

Commit

Permalink
xhtml: Use rangefuncs
Browse files Browse the repository at this point in the history
  • Loading branch information
earthboundkid committed May 17, 2024
1 parent c74e95d commit 7abc00c
Show file tree
Hide file tree
Showing 14 changed files with 137 additions and 103 deletions.
22 changes: 11 additions & 11 deletions internal/blocko/clean.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func Clean(root *html.Node) {

func MergeSiblings(root *html.Node) {
// find all matches first
inlineSiblings := xhtml.FindAll(root, func(n *html.Node) bool {
inlineSiblings := xhtml.SelectSlice(root, func(n *html.Node) bool {

Check failure on line 22 in internal/blocko/clean.go

View workflow job for this annotation

GitHub Actions / Vuln (1.22.x)

undefined: xhtml.SelectSlice
brother := n.NextSibling
return brother != nil &&
inlineElements[n.DataAtom] &&
Expand All @@ -38,7 +38,7 @@ func MergeSiblings(root *html.Node) {
}

func RemoveEmptyP(root *html.Node) {
emptyP := xhtml.FindAll(root, func(n *html.Node) bool {
emptyP := xhtml.SelectSlice(root, func(n *html.Node) bool {
return n.DataAtom == atom.P && isEmpty(n)
})
for _, n := range emptyP {
Expand All @@ -47,7 +47,7 @@ func RemoveEmptyP(root *html.Node) {
}

func RemoveMarks(root *html.Node) {
marks := xhtml.FindAll(root, xhtml.WithAtom(atom.Mark))
marks := xhtml.SelectSlice(root, xhtml.WithAtom(atom.Mark))
for _, mark := range marks {
xhtml.UnnestChildren(mark)
}
Expand All @@ -62,9 +62,9 @@ var whitespaceReplacer = strings.NewReplacer(
)

func replaceWhitespace(root *html.Node) {
xhtml.VisitAll(root, func(n *html.Node) {
for n := range xhtml.All(root) {
if n.Type != html.TextNode {
return
continue
}
// Ignore children of pre/code
codeblock := xhtml.Closest(n, func(n *html.Node) bool {
Expand All @@ -75,7 +75,7 @@ func replaceWhitespace(root *html.Node) {
if codeblock == nil {
n.Data = whitespaceReplacer.Replace(n.Data)
}
})
}
}

var specialReplacer = strings.NewReplacer(
Expand All @@ -92,21 +92,21 @@ var specialReplacer = strings.NewReplacer(
)

func replaceSpecials(root *html.Node) {
xhtml.VisitAll(root, func(n *html.Node) {
for n := range xhtml.All(root) {
if n.Type != html.TextNode {
return
continue
}
// Ignore children not of p
codeblock := xhtml.Closest(n, xhtml.WithAtom(atom.P))
if codeblock == nil {
return
continue
}
n.Data = specialReplacer.Replace(n.Data)
})
}
}

func fixBareLI(root *html.Node) {
bareLIs := xhtml.FindAll(root, func(n *html.Node) bool {
bareLIs := xhtml.SelectSlice(root, func(n *html.Node) bool {
child := n.FirstChild
return n.DataAtom == atom.Li && child != nil &&
(child.Type == html.TextNode ||
Expand Down
14 changes: 7 additions & 7 deletions internal/blocko/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,23 @@ var inlineElements = map[atom.Atom]bool{

func isEmpty(n *html.Node) bool {
root := n
n = xhtml.Find(n, func(n *html.Node) bool {
for n := range xhtml.All(n) {
if n == root {
return false
continue
}
switch n.Type {
case html.TextNode:
s := strings.ReplaceAll(n.Data, "\n", " ")
s = strings.TrimSpace(s)
if s == "" {
return false
continue
}
case html.ElementNode:
if inlineElements[n.DataAtom] {
return false
continue
}
}
return true
})
return n == nil
return false
}
return true
}
2 changes: 1 addition & 1 deletion internal/blocko/minify.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Minify(r io.Reader) (*nethtml.Node, error) {
if err != nil {
return nil, err
}
body := xhtml.Find(doc, xhtml.WithBody)
body := xhtml.Select(doc, xhtml.WithBody)
if body == nil {
return nil, fmt.Errorf("could not find body")
}
Expand Down
32 changes: 32 additions & 0 deletions internal/iterx/iterx.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Package iterx has iteration utilities.
package iterx

import "iter"

Check failure on line 4 in internal/iterx/iterx.go

View workflow job for this annotation

GitHub Actions / Vuln (1.22.x)

could not import iter (invalid package name: "")

// Filter returns a sequence of matching items.
func Filter[T any](seq iter.Seq[T], match func(T) bool) iter.Seq[T] {
return func(yield func(T) bool) {
for v := range seq {
if match(v) && !yield(v) {
return
}
}
}
}

// Collect returns a slice collected from a sequence.
func Collect[T any](seq iter.Seq[T]) []T {
var s []T
for v := range seq {
s = append(s, v)
}
return s
}

// First returns the first item in a sequence or the zero value.
func First[T any](seq iter.Seq[T]) (v T) {
for v := range seq {
return v
}
return
}
4 changes: 2 additions & 2 deletions internal/mailchimp/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ func ImportPage(ctx context.Context, cl *http.Client, page string) (body string,
}

func PageContent(doc *html.Node) (body string, err error) {
bNode := xhtml.Find(doc, xhtml.WithBody)
bNode := xhtml.Select(doc, xhtml.WithBody)
if bNode == nil {
err = fmt.Errorf("could not find body element")
return
}

remove := xhtml.FindAll(bNode, func(n *html.Node) bool {
remove := xhtml.SelectSlice(bNode, func(n *html.Node) bool {
return n.Type == html.CommentNode ||
n.DataAtom == atom.Style ||
n.DataAtom == atom.Script ||
Expand Down
28 changes: 14 additions & 14 deletions internal/xhtml/children.go
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
package xhtml

import (
"iter"

Check failure on line 4 in internal/xhtml/children.go

View workflow job for this annotation

GitHub Actions / Vuln (1.22.x)

could not import iter (invalid package name: "")
"strings"

"github.com/spotlightpa/almanack/internal/iterx"
"golang.org/x/net/html"
)

// Children returns a slice containing the children of n.
func Children(n *html.Node) []*html.Node {
if n == nil {
return nil
}
count := 0
for c := n.FirstChild; c != nil; c = c.NextSibling {
count++
}
s := make([]*html.Node, 0, count)
for c := n.FirstChild; c != nil; c = c.NextSibling {
s = append(s, c)
// Children returns a seq of the children of n.
func Children(n *html.Node) iter.Seq[*html.Node] {
return func(yield func(*html.Node) bool) {
if n == nil {
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if !yield(c) {
return
}
}
}
return s
}

func ReplaceWith(old, new *html.Node) {
Expand Down Expand Up @@ -79,7 +79,7 @@ func UnnestChildren(n *html.Node) {
if n.Parent == nil {
return
}
children := Children(n)
children := iterx.Collect(Children(n))

Check failure on line 82 in internal/xhtml/children.go

View workflow job for this annotation

GitHub Actions / Vuln (1.22.x)

cannot infer T (/home/runner/work/almanack/almanack/internal/iterx/iterx.go:18:14)
RemoveAll(children)
for _, c := range children {
n.Parent.InsertBefore(c, n)
Expand Down
13 changes: 6 additions & 7 deletions internal/xhtml/children_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,37 +32,36 @@ func TestUnnestChildren(t *testing.T) {

{
clone := xhtml.Clone(n)
i := xhtml.Find(clone, xhtml.WithAtom(atom.I))
i := xhtml.Select(clone, xhtml.WithAtom(atom.I))
xhtml.UnnestChildren(i)
be.Equal(t, `<a><b>test <i>one</i> <em><i>two</i></em> </b></a>`,
xhtml.InnerHTML(clone))
}
{
clone := xhtml.Clone(n)
em := xhtml.Find(clone, xhtml.WithAtom(atom.Em))
em := xhtml.Select(clone, xhtml.WithAtom(atom.Em))
xhtml.UnnestChildren(em)
be.Equal(t, `<a><b><i>test</i> <i>one</i> <i>two</i> </b></a>`,
xhtml.InnerHTML(clone))
}
{
clone := xhtml.Clone(n)
a := xhtml.Find(clone, xhtml.WithAtom(atom.A))
a := xhtml.Select(clone, xhtml.WithAtom(atom.A))
xhtml.UnnestChildren(a)
be.Equal(t, `<b><i>test</i> <i>one</i> <em><i>two</i></em> </b>`,
xhtml.InnerHTML(clone))
}
{
clone := xhtml.Clone(n)
b := xhtml.Find(clone, xhtml.WithAtom(atom.B))
b := xhtml.Select(clone, xhtml.WithAtom(atom.B))
xhtml.UnnestChildren(b)
be.Equal(t, `<a><i>test</i> <i>one</i> <em><i>two</i></em> </a>`,
xhtml.InnerHTML(clone))
}
{
clone := xhtml.Clone(n)
is := xhtml.FindAll(clone, xhtml.WithAtom(atom.I))
for _, n := range is {
xhtml.UnnestChildren(n)
for _, c := range xhtml.SelectSlice(clone, xhtml.WithAtom(atom.I)) {
xhtml.UnnestChildren(c)
}
be.Equal(t, `<a><b>test one <em>two</em> </b></a>`,
xhtml.InnerHTML(clone))
Expand Down
64 changes: 32 additions & 32 deletions internal/xhtml/iter.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
//go:build goexperiment.rangefunc

// Package xhtml makes x/net/html easier
package xhtml

import (
"iter"

"github.com/spotlightpa/almanack/internal/iterx"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
Expand All @@ -27,47 +32,42 @@ func all(n *html.Node, yield func(*html.Node) bool) bool {
return _continue
}

// Find returns the first matching child node or nil.
func Find(n *html.Node, match func(*html.Node) bool) *html.Node {
var found *html.Node
all(n, func(n *html.Node) bool {
if match(n) {
found = n
return _break
}
return _continue
})
return found
// All vists all child nodes in depth-first pre-order.
func All(n *html.Node) iter.Seq[*html.Node] {
return func(yield func(*html.Node) bool) {
all(n, yield)
}
}

// VisitAll vists child nodes in depth-first pre-order.
func VisitAll(n *html.Node, callback func(*html.Node)) {
all(n, func(n *html.Node) bool {
callback(n)
return _continue
})
// SelectAll returns an iterator yielding matching nodes.
func SelectAll(n *html.Node, match func(*html.Node) bool) iter.Seq[*html.Node] {
return iterx.Filter(All(n), match)
}

// FindAll returns a slice of matching nodes.
func FindAll(root *html.Node, match func(*html.Node) bool) []*html.Node {
var found []*html.Node
VisitAll(root, func(n *html.Node) {
if match(n) {
found = append(found, n)
// SelectSlice returns a slice of child nodes matched by the selector.
func SelectSlice(n *html.Node, match func(*html.Node) bool) []*html.Node {
return iterx.Collect(SelectAll(n, match))
}

// SelectSlice returns the first child node matched by the selector or nil.
func Select(n *html.Node, match func(*html.Node) bool) *html.Node {
return iterx.First(SelectAll(n, match))
}

// Parents returns an iterator traversing the node and its parents.
func Parents(n *html.Node) iter.Seq[*html.Node] {
return func(yield func(*html.Node) bool) {
for p := n; p != nil; p = p.Parent {
if !yield(p) {
return
}
}
})
return found
}
}

// Closest traverses the node and its parents until it finds a node that matches.
func Closest(n *html.Node, match func(*html.Node) bool) *html.Node {
for n != nil {
if match(n) {
return n
}
n = n.Parent
}
return nil
return iterx.First(iterx.Filter(Parents(n), match))
}

func WithAtom(a atom.Atom) func(n *html.Node) bool {
Expand Down
16 changes: 8 additions & 8 deletions internal/xhtml/new_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,28 @@ func TestClone(t *testing.T) {
n, err := html.Parse(strings.NewReader(tc))
be.NilErr(t, err)
body := n.FirstChild.FirstChild.NextSibling
be.Equal(t, xhtml.Find(n, xhtml.WithAtom(atom.Body)), body)
be.Equal(t, xhtml.Select(n, xhtml.WithAtom(atom.Body)), body)

s := xhtml.InnerHTML(body)
be.Equal(be.Relaxed(t), tc, s)

n2 := xhtml.Clone(n)
body2 := n2.FirstChild.FirstChild.NextSibling
be.Equal(t, xhtml.Find(n2, xhtml.WithAtom(atom.Body)), body2)
be.Equal(t, xhtml.Select(n2, xhtml.WithAtom(atom.Body)), body2)
be.Unequal(t, body, body2)

s = xhtml.InnerHTML(body2)
be.Equal(be.Relaxed(t), tc, s)

m := map[*html.Node]bool{}
xhtml.VisitAll(n, func(n *html.Node) {
m[n] = true
})
for c := range xhtml.All(n) {
m[c] = true
}

xhtml.VisitAll(n2, func(n *html.Node) {
if m[n] {
for c := range xhtml.All(n2) {
if m[c] {
t.Error("duplicate node:", n)
}
})
}
}
}
5 changes: 3 additions & 2 deletions internal/xhtml/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ func InnerText(n *html.Node) string {
var buf strings.Builder
buf.Grow(256)

VisitAll(n, func(n *html.Node) {
for n := range All(n) {

Check failure on line 63 in internal/xhtml/string.go

View workflow job for this annotation

GitHub Actions / Vuln (1.22.x)

undefined: All
if n.Type == html.TextNode {
buf.WriteString(n.Data)
}
})
}

return strings.TrimSpace(buf.String())
}
Loading

0 comments on commit 7abc00c

Please sign in to comment.