Skip to content

Commit

Permalink
Fix GlobPrefix. (#220)
Browse files Browse the repository at this point in the history
  • Loading branch information
ncruces authored Jan 28, 2025
1 parent c9135b9 commit b2f7ab8
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 17 deletions.
65 changes: 57 additions & 8 deletions ext/regexp/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ package regexp
import (
"errors"
"regexp"
"regexp/syntax"
"strings"
"unicode/utf8"

"github.com/ncruces/go-sqlite3"
)
Expand Down Expand Up @@ -50,16 +52,63 @@ func Register(db *sqlite3.Conn) error {
// SELECT column WHERE column GLOB :glob_prefix AND column REGEXP :regexp
//
// [LIKE optimization]: https://sqlite.org/optoverview.html#the_like_optimization
func GlobPrefix(re *regexp.Regexp) string {
prefix, complete := re.LiteralPrefix()
i := strings.IndexAny(prefix, "*?[")
if i < 0 {
if complete {
return prefix
func GlobPrefix(expr string) string {
re, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
return "" // no match possible
}
prog, err := syntax.Compile(re.Simplify())
if err != nil {
return "" // notest
}

i := &prog.Inst[prog.Start]

var empty syntax.EmptyOp
loop1:
for {
switch i.Op {
case syntax.InstFail:
return "" // notest
case syntax.InstCapture, syntax.InstNop:
// skip
case syntax.InstEmptyWidth:
empty |= syntax.EmptyOp(i.Arg)
default:
break loop1
}
i = len(prefix)
i = &prog.Inst[i.Out]
}
if empty&syntax.EmptyBeginText == 0 {
return "*" // not anchored
}
return prefix[:i] + "*"

var glob strings.Builder
loop2:
for {
switch i.Op {
case syntax.InstFail:
return "" // notest
case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop:
// skip
case syntax.InstRune, syntax.InstRune1:
if len(i.Rune) != 1 || syntax.Flags(i.Arg)&syntax.FoldCase != 0 {
break loop2
}
switch r := i.Rune[0]; r {
case '*', '?', '[', utf8.RuneError:
break loop2
default:
glob.WriteRune(r)
}
default:
break loop2
}
i = &prog.Inst[i.Out]
}

glob.WriteByte('*')
return glob.String()
}

func load(ctx sqlite3.Context, i int, expr string) (*regexp.Regexp, error) {
Expand Down
55 changes: 46 additions & 9 deletions ext/regexp/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package regexp
import (
"database/sql"
"regexp"
"strings"
"testing"

"github.com/ncruces/go-sqlite3/driver"
Expand Down Expand Up @@ -108,19 +109,55 @@ func TestGlobPrefix(t *testing.T) {
re string
want string
}{
{``, ""},
{`a`, "a"},
{`a*`, "*"},
{`a+`, "a*"},
{`ab*`, "a*"},
{`ab+`, "ab*"},
{`a\?b`, "a*"},
{`[`, ""},
{``, "*"},
{`^`, "*"},
{`a`, "*"},
{`ab`, "*"},
{`^a`, "a*"},
{`^a*`, "*"},
{`^a+`, "a*"},
{`^ab*`, "a*"},
{`^ab+`, "ab*"},
{`^a\?b`, "a*"},
{`^[a-z]`, "*"},
}
for _, tt := range tests {
t.Run(tt.re, func(t *testing.T) {
if got := GlobPrefix(regexp.MustCompile(tt.re)); got != tt.want {
t.Errorf("GlobPrefix() = %v, want %v", got, tt.want)
if got := GlobPrefix(tt.re); got != tt.want {
t.Errorf("GlobPrefix(%v) = %v, want %v", tt.re, got, tt.want)
}
})
}
}

func FuzzGlobPrefix(f *testing.F) {
f.Add(``, ``)
f.Add(`[`, ``)
f.Add(`^`, ``)
f.Add(`a`, `a`)
f.Add(`ab`, `b`)
f.Add(`^a`, `a`)
f.Add(`^a*`, `ab`)
f.Add(`^a+`, `ab`)
f.Add(`^ab*`, `ab`)
f.Add(`^ab+`, `ab`)
f.Add(`^a\?b`, `ab`)
f.Add(`^[a-z]`, `ab`)

f.Fuzz(func(t *testing.T, lit, str string) {
re, err := regexp.Compile(lit)
if err != nil {
t.SkipNow()
}
if re.MatchString(str) {
prefix, ok := strings.CutSuffix(GlobPrefix(lit), "*")
if !ok {
t.Fatalf("missing * after %q for %q with %q", prefix, lit, str)
}
if !strings.HasPrefix(str, prefix) {
t.Fatalf("missing prefix %q for %q with %q", prefix, lit, str)
}
}
})
}

0 comments on commit b2f7ab8

Please sign in to comment.