Skip to content

Commit

Permalink
Split fulltext search terms for both correctness and performance
Browse files Browse the repository at this point in the history
Previously for searches like:

    Content LIKE "foo" AND Content LIKE "bar"

If a ticket has "foo" saved in one attachment and "bar" in another, it
wouldn't match because these fulltext criteria needed to match on a single
attachment. What we want is to match on ticket level, i.e. as long as the
ticket has "foo" and "bar" saved in some attachments of the given ticket, it
should match.

Thus we split the query into 2 and intersect them:

    (Content LIKE "foo") INTERSECT (Content LIKE "bar")

For searches like:

    Content LIKE "foo" OR Subject LIKE "bar"

We split the query and unite them accordingly:

    (Content LIKE "foo") UNION (Subject LIKE "bar")

The latter version has much better performance as it makes use of fulltext
indexes, unfortunately the former version can not right now.

See also 8450f0a
  • Loading branch information
sunnavy committed Jan 23, 2024
1 parent 6f3cb67 commit 8788a99
Show file tree
Hide file tree
Showing 5 changed files with 293 additions and 1 deletion.
141 changes: 141 additions & 0 deletions lib/RT/Interface/Web/QueryBuilder/Tree.pm
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,147 @@ sub ParseSQL {
return @results;
}


=head2 Split Type => intersect|union, Fields => [FIELD1, FIELD2, ...]
E.g. to split "AND" Content terms: Type => 'insersect', Fields => ['Content']
Status = "open" AND Content LIKE "foo" AND Content LIKE "bar"
will be split into 2 subqueries:
Status = "open" AND Content LIKE "foo"
Status = "open" AND Content LIKE "bar"
then they can be joined via "INTERSECT".
To split "OR" Content terms: Type => 'union', Fields => ['Content']
Content LIKE "foo" OR Subject LIKE "foo"
will be split into 2 subqueries:
Content LIKE "foo"
Subject LIKE "foo"
then they can be joined via "UNION". Unlike the original version, the new SQL
can make use of fulltext indexes.
Note that queries like:
Content LIKE "foo" AND Subject LIKE "foo"
will not be split as there are no benifits, unlike the C<OR> example above.
=cut

sub Split {
my $self = shift;
my %args = (
Type => undef,
Fields => undef,
@_,
);

if ( !$args{Type} ) {
RT->Logger->warning("Missing Type, skipping");
return $self;
}

if ( $args{Type} !~ /^(?:intersect|union)$/i ) {
RT->Logger->warning("Unsupported type $args{Type}, should be 'intersect' or 'union', skipping");
return $self;
}

if ( !$args{Fields} || @{ $args{Fields} } == 0 ) {
RT->Logger->warning("Missing Fields, skipping");
return $self;
}

my @items;

my $relation = lc $args{Type} eq 'intersect' ? 'and' : 'or';

$self->traverse(
sub {
my $node = shift;
return unless $node->isLeaf;

if ( grep { lc $node->getNodeValue->{Key} eq lc $_ } @{ $args{Fields} } ) {
$node = $node->getParent;
if ( lc( $node->getNodeValue // '' ) eq $relation ) {
my @children = $node->getAllChildren;

my @splits;
my @others;
for my $child (@children) {
if ( $child->isLeaf && grep { lc $child->getNodeValue->{Key} eq lc $_ } @{ $args{Fields} } )
{
push @splits, $child;
}
else {
push @others, $child;
}
}

# Split others from split fields only if it's "OR" like "Content LIKE 'foo' OR Subject LIKE 'foo'"
return unless @splits > 1 || ( $relation eq 'or' && @splits + @others > 1 );

my $parent = $node->getParent;

my @list;

if ( $relation eq 'and' ) {
if ( @others ) {
for my $item ( @splits ) {
my $new = RT::Interface::Web::QueryBuilder::Tree->new( $relation, 'root');
$new->addChild($item);
$new->addChild($_->clone) for @others;
push @list, $new;
}
}
else {
@list = @splits;
}
}
else {
@list = @splits;
if (@others) {
my $others = RT::Interface::Web::QueryBuilder::Tree->new( $relation, 'root' );
$others->addChild( $_->clone ) for @others;
push @list, $others;
}
}

if ( $parent eq 'root' ) {
for my $item ( @list ) {
my $new = RT::Interface::Web::QueryBuilder::Tree->new( $relation, 'root');
$new->addChild($item->clone);
push @items, $new->clone->Split(%args);
}
}
else {
my $index = $node->getIndex;
$parent->removeChild($node);

for my $item ( @list ) {
$parent->insertChild( $index, $item );
push @items, $self->clone->Split(%args);
$parent->removeChild($item);
}
}

return 'ABORT';
}
}
}
);

return @items ? @items : $self;
}



RT::Base->_ImportOverlays();

1;
2 changes: 1 addition & 1 deletion lib/RT/Report.pm
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ sub SetupGroupings {
# within the matching tickets grouped by what is wanted.
$self->Columns( 'id' );
if ( RT->Config->Get('UseSQLForACLChecks') ) {
my $query = $self->BuildSelectQuery( PreferBind => 0 );
my $query = $self->{_split_query} || $self->BuildSelectQuery( PreferBind => 0 );
$self->CleanSlate;
$self->Limit( FIELD => 'Id', OPERATOR => 'IN', VALUE => "($query)", QUOTEVALUE => 0 );
}
Expand Down
102 changes: 102 additions & 0 deletions lib/RT/SearchBuilder.pm
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ sub CleanSlate {
$self->{'_sql_aliases'} = {};
delete $self->{'handled_disabled_column'};
delete $self->{'find_disabled_rows'};
delete $self->{'_split_query'};
delete $self->{'_untamed_order_by'};
return $self->SUPER::CleanSlate(@_);
}

Expand Down Expand Up @@ -1190,6 +1192,106 @@ sub CurrentUserCanSeeAll {
return $self->CurrentUser->HasRight( Right => 'SuperUser', Object => RT->System ) ? 1 : 0;
}

sub BuildSelectQuery {
my $self = shift;
return $self->_BuildQuery('BuildSelectQuery', @_);
}

sub BuildSelectCountQuery {
my $self = shift;
return $self->_BuildQuery('BuildSelectCountQuery', @_);
}

sub BuildSelectAndCountQuery {
my $self = shift;
return $self->_BuildQuery('BuildSelectAndCountQuery', @_);
}

sub _BuildQuery {
my $self = shift;
my $method = shift;
if ( my $query = $self->{_split_query} ) {
my $objects = $self->new( $self->CurrentUser );
$objects->Limit( FIELD => 'id', VALUE => "($query)", OPERATOR => 'IN', QUOTEVALUE => 0 );

# Sync page and columns related info
$objects->{$_} = $self->{$_} for qw/first_row show_rows columns/;

# Sync order by
if ( $self->{_untamed_order_by} ) {
$objects->OrderByCols( @{ $self->{_untamed_order_by} } );
}
elsif ( $self->{_order_by} ) {
$objects->OrderByCols( @{ $self->{_order_by} } );
}

my $query = $objects->$method(@_);
$self->{_bind_values} = $objects->{_bind_values};
return $query;
}
my $super = "SUPER::$method";
return $self->$super(@_);
}

sub _SplitQuery {
my $self = shift;

# SQLite doesn't support user defined precedences via parens for INTERSECT/UNION operations.
return if RT->Config->Get('DatabaseType') eq 'SQLite';

my $query = shift;
return unless $query;

return unless $self->can('SplitFields');
my @fields = $self->SplitFields;
return unless @fields;

my $tree = RT::Interface::Web::QueryBuilder::Tree->new;
$tree->ParseSQL(
Query => $query,
CurrentUser => $self->CurrentUser,
Class => ref $self,
);

my $split;
my %intersect_exists;
my @union_queries;
for my $intersect ( $tree->Split( Type => 'intersect', Fields => \@fields ) ) {
my $query = join ' ', map { $_->{TEXT} } @{ $intersect->__LinearizeTree };
next if $intersect_exists{$query}++;

my @union_selects;
my %union_exists;
for my $union ( $intersect->Split( Type => 'union', Fields => \@fields ) ) {
my $query = join ' ', map { $_->{TEXT} } @{ $union->__LinearizeTree };
next if $union_exists{$query}++;

my @intersects = $union->Split( Type => 'intersect', Fields => \@fields );
if ( @intersects > 1 ) {
push @union_selects, $self->_SplitQuery($query);
}
else {
my $collection = $self->new( $self->CurrentUser );
$collection->{_no_split} = 1;

$collection->FromSQL($query);
$collection->OrderByCols();
$collection->CurrentUserCanSee
if RT->Config->Get('UseSQLForACLChecks') && $collection->can('CurrentUserCanSee');
$collection->Columns('id');
push @union_selects, $collection->BuildSelectQuery( PreferBind => 0 );
}
}
push @union_queries, '( ' . join( ' UNION ', @union_selects ) . ' )';
$split = 1 if @union_selects > 1;
}

$split = 1 if @union_queries > 1;
return unless $split; # Return empty if the query is not split.

return '(' . join( ' INTERSECT ', @union_queries ) . ')';
}

RT::Base->_ImportOverlays();

1;
26 changes: 26 additions & 0 deletions lib/RT/Tickets.pm
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,23 @@ sub SortFields {
return (@SORTFIELDS);
}

=head2 SplitFields
Returns the list of fields that are supposed to be split into individual
subqueries and then combined later.
If fulltext search is enabled, it returns C<Content>, otherwise it returns
an empty list.
=cut

sub SplitFields {
my $self = shift;
my $config = RT->Config->Get('FullTextSearch') || {};
return 'Content' if $config->{Enable};
return;
}


# BEGIN SQL STUFF *********************************

Expand Down Expand Up @@ -1617,6 +1634,9 @@ sub OrderByCols {
my @res = ();
my $order = 0;

# Save original args so we can redo OrderByCols later for split queries, especially to joins tables if needed.
$self->{_untamed_order_by} = \@_;

foreach my $row (@args) {
if ( $row->{ALIAS} ) {
push @res, $row;
Expand Down Expand Up @@ -3719,6 +3739,12 @@ sub FromSQL {
);
}

if ( !$self->{_no_split} ) {
if ( my $split_query = $self->_SplitQuery($query) ) {
$self->{_split_query} = $split_query;
}
}

# set SB's dirty flag
$self->{'must_redo_search'} = 1;
$self->{'RecalcTicketLimits'} = 0;
Expand Down
23 changes: 23 additions & 0 deletions lib/RT/Transactions.pm
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,23 @@ sub SortFields {
return (@SORTFIELDS);
}

=head2 SplitFields
Returns the list of fields that are supposed to be split into individual
subqueries and then combined later.
If fulltext search is enabled, it returns C<Content>, otherwise it returns
an empty list.
=cut

sub SplitFields {
my $self = shift;
my $config = RT->Config->Get('FullTextSearch') || {};
return 'Content' if $config->{Enable};
return;
}

=head1 Limit Helper Routines
These routines are the targets of a dispatch table depending on the
Expand Down Expand Up @@ -1179,6 +1196,12 @@ sub FromSQL {
return (0, $error);
}

if ( !$self->{_no_split} ) {
if ( my $split_query = $self->_SplitQuery($query) ) {
$self->{_split_query} = $split_query;
}
}

# set SB's dirty flag
$self->{'must_redo_search'} = 1;

Expand Down

0 comments on commit 8788a99

Please sign in to comment.