From bd1e54b59b1b5e44cf300e77a1044b8f74edf3ff Mon Sep 17 00:00:00 2001 From: Andy Gallagher Date: Sun, 14 Jul 2024 16:04:17 +0100 Subject: [PATCH] Add fuzzy-matching option for tag names --- .../gu/contentapi/porter/graphql}/Edge.scala | 4 +-- .../contentapi/porter/graphql/RootQuery.scala | 1 + .../porter/graphql/TagQueryParameters.scala | 20 ++++++++++-- src/main/scala/datastore/DocumentRepo.scala | 1 + .../scala/datastore/ElasticsearchRepo.scala | 32 ++++++++++++------- 5 files changed, 43 insertions(+), 15 deletions(-) rename src/main/scala/{deprecated/anotherschema => com/gu/contentapi/porter/graphql}/Edge.scala (95%) diff --git a/src/main/scala/deprecated/anotherschema/Edge.scala b/src/main/scala/com/gu/contentapi/porter/graphql/Edge.scala similarity index 95% rename from src/main/scala/deprecated/anotherschema/Edge.scala rename to src/main/scala/com/gu/contentapi/porter/graphql/Edge.scala index 48ec3ec..b9f6ca5 100644 --- a/src/main/scala/deprecated/anotherschema/Edge.scala +++ b/src/main/scala/com/gu/contentapi/porter/graphql/Edge.scala @@ -9,11 +9,11 @@ import java.nio.charset.StandardCharsets import java.util.Base64 import scala.util.Try import io.circe.syntax._ -@deprecated("you should be using com.gu.contentapi.porter.graphql") + case class Edge[T:io.circe.Decoder](totalCount:Long, endCursor:Option[String], hasNextPage:Boolean, nodes:Seq[T]) { def map[V:io.circe.Decoder](mapper:(T)=>V) = Edge[V](totalCount, endCursor, hasNextPage, nodes.map(mapper)) } -@deprecated("you should be using com.gu.contentapi.porter.graphql") + object Edge { private val logger = LoggerFactory.getLogger(getClass) private val encoder = Base64.getEncoder diff --git a/src/main/scala/com/gu/contentapi/porter/graphql/RootQuery.scala b/src/main/scala/com/gu/contentapi/porter/graphql/RootQuery.scala index 1358325..84f3860 100644 --- a/src/main/scala/com/gu/contentapi/porter/graphql/RootQuery.scala +++ b/src/main/scala/com/gu/contentapi/porter/graphql/RootQuery.scala @@ -74,6 +74,7 @@ object RootQuery { arguments = TagQueryParameters.AllTagQueryParameters, resolve = ctx => ctx.ctx.repo.marshalledTags(ctx arg TagQueryParameters.QueryString, + ctx arg TagQueryParameters.Fuzziness, ctx arg TagQueryParameters.tagId, ctx arg TagQueryParameters.Section, ctx arg TagQueryParameters.TagType, diff --git a/src/main/scala/com/gu/contentapi/porter/graphql/TagQueryParameters.scala b/src/main/scala/com/gu/contentapi/porter/graphql/TagQueryParameters.scala index cd17ec8..2c3d86d 100644 --- a/src/main/scala/com/gu/contentapi/porter/graphql/TagQueryParameters.scala +++ b/src/main/scala/com/gu/contentapi/porter/graphql/TagQueryParameters.scala @@ -96,13 +96,29 @@ object TagQueryParameters { ) ) + val FuzzinessOptions = EnumType( + "FuzzinessOptions", + Some("Valid options for making a fuzzy-match query"), + List( + EnumValue("AUTO", + value="AUTO", + description=Some("Generates an edit distance based on the length of the term. If the term is >5 chars, then 2 edits allowed; if <3 chars than no edits allowed") + ), + EnumValue("OFF", + value="OFF", + description=Some("Disable fuzzy-matching") + ) + ) + ) + val tagId = Argument("tagId", OptionInputType(StringType), description = "Retrieve this specific tag") val Section = Argument("section", OptionInputType(StringType), description = "Only return tags from this section") val TagType = Argument("type", OptionInputType(TagTypes), description = "Type of the tag to return") - val QueryString = Argument("q", OptionInputType(StringType), description = "Generic Lucene query string for finding tags") + val QueryString = Argument("q", OptionInputType(StringType), description = "Search for tags that match this public-facing name") + val Fuzziness = Argument("fuzzy", OptionInputType(FuzzinessOptions), description = "Perform a fuzzy-matching query (default). Set to `OFF` to disable fuzzy-matching.") val Category = Argument("category", OptionInputType(StringType), description = "A category to match against tags") val Reference = Argument("reference", OptionInputType(StringType), description = "A reference to match against tags") - val AllTagQueryParameters = QueryString :: tagId :: Section :: TagType :: Category :: + val AllTagQueryParameters = QueryString :: tagId :: Section :: TagType :: Fuzziness :: Category :: Reference :: Cursor :: OrderBy :: Limit :: Nil val NonPaginatedTagQueryParameters = Section :: TagType :: Nil diff --git a/src/main/scala/datastore/DocumentRepo.scala b/src/main/scala/datastore/DocumentRepo.scala index 14d545c..01325b7 100644 --- a/src/main/scala/datastore/DocumentRepo.scala +++ b/src/main/scala/datastore/DocumentRepo.scala @@ -21,6 +21,7 @@ trait DocumentRepo { limit: Option[Int], cursor: Option[String]): Future[Edge[Content]] def marshalledTags(maybeQuery:Option[String], + maybeFuzziness:Option[String], maybeTagId:Option[String], maybeSection: Option[String], tagType:Option[String], diff --git a/src/main/scala/datastore/ElasticsearchRepo.scala b/src/main/scala/datastore/ElasticsearchRepo.scala index beeb488..a23d3cd 100644 --- a/src/main/scala/datastore/ElasticsearchRepo.scala +++ b/src/main/scala/datastore/ElasticsearchRepo.scala @@ -8,7 +8,7 @@ import com.sksamuel.elastic4s.ElasticDsl._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future import com.sksamuel.elastic4s.requests.searches.SearchResponse -import com.sksamuel.elastic4s.requests.searches.queries.{ExistsQuery, NestedQuery, Query, RangeQuery} +import com.sksamuel.elastic4s.requests.searches.queries.{DisMaxQuery, ExistsQuery, Fuzzy, FuzzyQuery, NestedQuery, Query, RangeQuery} import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery import com.sksamuel.elastic4s.requests.searches.queries.matches.{FieldWithOptionalBoost, MatchAllQuery, MatchQuery, MultiMatchQuery} import com.sksamuel.elastic4s.requests.searches.sort.{FieldSort, ScoreSort, Sort, SortOrder} @@ -185,8 +185,19 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20 private def tagQueryParams(maybeTagId:Option[String], maybeSection:Option[String], tagType:Option[String], maybeCategory:Option[String], - maybeReferences: Option[String]):Seq[Query] = { + maybeReferences: Option[String], queryString:Option[String], fuzziness:Option[String]):Seq[Query] = { Seq( + queryString.map(qs=>{ + if(fuzziness.getOrElse("AUTO") != "OFF") { + //Why DisMax here? Because we want to include exact-matches as well, if they are relevant. E.g. FuzzyQuery on "politics" returns no results! + DisMaxQuery(Seq( + FuzzyQuery("webTitle", qs, fuzziness), + MatchQuery("webTitle", qs) + )) + } else { + MatchQuery("webTitle", qs) + } + }), maybeTagId.map(MatchQuery("id", _)), maybeSection.map(MatchQuery("sectionId", _)), tagType.map({ @@ -204,18 +215,16 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20 private def buildTagQuery(maybeTagId:Option[String], maybeSection:Option[String], tagType:Option[String], maybeQuery:Option[String], + maybeFuzziness:Option[String], maybeCategory:Option[String], maybeReferences:Option[String]) = { val baseSearch = search("tag") - val searchWithQuery = maybeQuery match { - case Some(q)=>baseSearch.query(q) - case None=>baseSearch - } - val params = tagQueryParams(maybeTagId, maybeSection, tagType, maybeCategory, maybeReferences) + + val params = tagQueryParams(maybeTagId, maybeSection, tagType, maybeCategory, maybeReferences, maybeQuery, maybeFuzziness) if(params.isEmpty) { - searchWithQuery + baseSearch } else { - searchWithQuery.query(BoolQuery(must=params)) + baseSearch.query(BoolQuery(must=params)) } } @@ -230,6 +239,7 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20 //FIXME: tagsForList / marshalledTags could be DRY'd out a bit override def marshalledTags(maybeQuery:Option[String], + maybeFuzziness:Option[String], maybeTagId:Option[String], maybeSection: Option[String], tagType:Option[String], @@ -249,7 +259,7 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20 Edge.decodeCursor(cursor) match { case Right(maybeCursor)=> client.execute { - buildTagQuery(maybeTagId, maybeSection, tagType, maybeQuery, maybeCategory, maybeReferences) + buildTagQuery(maybeTagId, maybeSection, tagType, maybeQuery, maybeFuzziness, maybeCategory, maybeReferences) .sortBy(sortParam) .limit(pageSize) .searchAfter(maybeCursor) @@ -281,7 +291,7 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20 val tagIdMatches = tagIdList.map(MatchQuery("id", _)) val response = client.execute { - val restrictions = tagQueryParams(None, maybeSection, tagType, maybeCategory, maybeReferences) + val restrictions = tagQueryParams(None, maybeSection, tagType, maybeCategory, maybeReferences, None, None) if(restrictions.nonEmpty) { search("tag").query(