diff --git a/ext/regexp/regexp.go b/ext/regexp/regexp.go index dca2f12f..231be236 100644 --- a/ext/regexp/regexp.go +++ b/ext/regexp/regexp.go @@ -16,7 +16,9 @@ package regexp import ( "errors" "regexp" + "regexp/syntax" "strings" + "unicode/utf8" "github.com/ncruces/go-sqlite3" ) @@ -50,16 +52,63 @@ func Register(db *sqlite3.Conn) error { // SELECT column WHERE column GLOB :glob_prefix AND column REGEXP :regexp // // [LIKE optimization]: https://sqlite.org/optoverview.html#the_like_optimization -func GlobPrefix(re *regexp.Regexp) string { - prefix, complete := re.LiteralPrefix() - i := strings.IndexAny(prefix, "*?[") - if i < 0 { - if complete { - return prefix +func GlobPrefix(expr string) string { + re, err := syntax.Parse(expr, syntax.Perl) + if err != nil { + return "" // no match possible + } + prog, err := syntax.Compile(re.Simplify()) + if err != nil { + return "" // notest + } + + i := &prog.Inst[prog.Start] + + var empty syntax.EmptyOp +loop1: + for { + switch i.Op { + case syntax.InstFail: + return "" // notest + case syntax.InstCapture, syntax.InstNop: + // skip + case syntax.InstEmptyWidth: + empty |= syntax.EmptyOp(i.Arg) + default: + break loop1 } - i = len(prefix) + i = &prog.Inst[i.Out] + } + if empty&syntax.EmptyBeginText == 0 { + return "*" // not anchored } - return prefix[:i] + "*" + + var glob strings.Builder +loop2: + for { + switch i.Op { + case syntax.InstFail: + return "" // notest + case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop: + // skip + case syntax.InstRune, syntax.InstRune1: + if len(i.Rune) != 1 || syntax.Flags(i.Arg)&syntax.FoldCase != 0 { + break loop2 + } + switch r := i.Rune[0]; r { + case '*', '?', '[', utf8.RuneError: + break loop2 + default: + glob.WriteRune(r) + } + default: + break loop2 + } + i = &prog.Inst[i.Out] + } + + glob.WriteByte('*') + return glob.String() } func load(ctx sqlite3.Context, i int, expr string) (*regexp.Regexp, error) { diff --git a/ext/regexp/regexp_test.go b/ext/regexp/regexp_test.go index d5852ef2..f40dd537 100644 --- a/ext/regexp/regexp_test.go +++ b/ext/regexp/regexp_test.go @@ -3,6 +3,7 @@ package regexp import ( "database/sql" "regexp" + "strings" "testing" "github.com/ncruces/go-sqlite3/driver" @@ -108,19 +109,55 @@ func TestGlobPrefix(t *testing.T) { re string want string }{ - {``, ""}, - {`a`, "a"}, - {`a*`, "*"}, - {`a+`, "a*"}, - {`ab*`, "a*"}, - {`ab+`, "ab*"}, - {`a\?b`, "a*"}, + {`[`, ""}, + {``, "*"}, + {`^`, "*"}, + {`a`, "*"}, + {`ab`, "*"}, + {`^a`, "a*"}, + {`^a*`, "*"}, + {`^a+`, "a*"}, + {`^ab*`, "a*"}, + {`^ab+`, "ab*"}, + {`^a\?b`, "a*"}, + {`^[a-z]`, "*"}, } for _, tt := range tests { t.Run(tt.re, func(t *testing.T) { - if got := GlobPrefix(regexp.MustCompile(tt.re)); got != tt.want { - t.Errorf("GlobPrefix() = %v, want %v", got, tt.want) + if got := GlobPrefix(tt.re); got != tt.want { + t.Errorf("GlobPrefix(%v) = %v, want %v", tt.re, got, tt.want) } }) } } + +func FuzzGlobPrefix(f *testing.F) { + f.Add(``, ``) + f.Add(`[`, ``) + f.Add(`^`, ``) + f.Add(`a`, `a`) + f.Add(`ab`, `b`) + f.Add(`^a`, `a`) + f.Add(`^a*`, `ab`) + f.Add(`^a+`, `ab`) + f.Add(`^ab*`, `ab`) + f.Add(`^ab+`, `ab`) + f.Add(`^a\?b`, `ab`) + f.Add(`^[a-z]`, `ab`) + + f.Fuzz(func(t *testing.T, lit, str string) { + re, err := regexp.Compile(lit) + if err != nil { + t.SkipNow() + } + if re.MatchString(str) { + prefix, ok := strings.CutSuffix(GlobPrefix(lit), "*") + if !ok { + t.Fatalf("missing * after %q for %q with %q", prefix, lit, str) + } + if !strings.HasPrefix(str, prefix) { + t.Fatalf("missing prefix %q for %q with %q", prefix, lit, str) + } + } + }) +}