Skip to content

Commit

Permalink
Merge pull request #232 from SwissDataScienceCenter/filter-invalid-se…
Browse files Browse the repository at this point in the history
…arch-results

Filter invalid search results
  • Loading branch information
eikek authored Nov 27, 2024
2 parents 86ab943 + 8538e1a commit 50a0101
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@ import io.renku.search.model.*
import io.renku.search.query.Query
import io.renku.search.solr.client.SearchSolrSuite
import io.renku.search.solr.client.SolrDocumentGenerators.*
import io.renku.search.solr.documents.{EntityDocument, User as SolrUser}
import io.renku.search.solr.documents.EntityDocument
import io.renku.solr.client.DocVersion
import io.renku.solr.client.ResponseBody
import munit.CatsEffectSuite
import org.scalacheck.Gen
import scribe.Scribe

class SearchApiSpec extends CatsEffectSuite with SearchSolrSuite:
Expand All @@ -41,29 +40,43 @@ class SearchApiSpec extends CatsEffectSuite with SearchSolrSuite:
private given Scribe[IO] = scribe.cats[IO]

test("do a lookup in Solr to find entities matching the given phrase"):
val project1 = projectDocumentGen(
"matching",
"matching description",
Gen.const(None),
Gen.const(None),
Gen.const(Visibility.Public)
).generateOne
val project2 = projectDocumentGen(
"disparate",
"disparate description",
Gen.const(None),
Gen.const(None),
Gen.const(Visibility.Public)
).generateOne
val user = userDocumentGen.generateOne
val project1 = projectDocumentGenForInsert
.map(p =>
p.copy(
name = Name("matching"),
description = Description("matching description").some,
createdBy = user.id,
namespace = user.namespace,
visibility = Visibility.Public
)
)
.generateOne
val project2 = projectDocumentGenForInsert
.map(p =>
p.copy(
name = Name("disparate"),
description = Description("disparate description").some,
createdBy = user.id,
namespace = user.namespace,
visibility = Visibility.Public
)
)
.generateOne
for {
client <- IO(searchSolrClient())
searchApi = new SearchApiImpl[IO](client)
_ <- client.upsert((project1 :: project2 :: Nil).map(_.widen))
_ <- client.upsert((project1 :: project2 :: user :: Nil).map(_.widen))
results <- searchApi
.query(AuthContext.anonymous)(mkQuery("matching"))
.query(AuthContext.anonymous)(mkQuery("matching type:Project"))
.map(_.fold(err => fail(s"Calling Search API failed with $err"), identity))

expected = toApiEntities(project1).toSet
expected = toApiEntities(
project1.copy(
creatorDetails = ResponseBody.single(user).some,
namespaceDetails = ResponseBody.single(user).some
)
).toSet
obtained = results.items.map(scoreToNone).toSet
} yield assert(
expected.diff(obtained).isEmpty,
Expand All @@ -72,14 +85,21 @@ class SearchApiSpec extends CatsEffectSuite with SearchSolrSuite:

test("return Project and User entities"):
val userId = ModelGenerators.idGen.generateOne
val user = SolrUser(userId, DocVersion.NotExists, FirstName("exclusive").some)
val project = projectDocumentGen(
"exclusive",
"exclusive description",
Gen.const(None),
Gen.const(None),
Gen.const(Visibility.Public)
).generateOne.copy(createdBy = userId)
val user = userDocumentGen
.map(u => u.copy(id = userId, firstName = FirstName("exclusive").some))
.generateOne
val project = projectDocumentGenForInsert
.map(p =>
p.copy(
name = Name("exclusive"),
description = Description("exclusive description").some,
createdBy = userId,
namespace = user.namespace,
visibility = Visibility.Public
)
)
.generateOne
.copy(createdBy = userId)
for {
client <- IO(searchSolrClient())
searchApi = new SearchApiImpl[IO](client)
Expand All @@ -89,7 +109,10 @@ class SearchApiSpec extends CatsEffectSuite with SearchSolrSuite:
.map(_.fold(err => fail(s"Calling Search API failed with $err"), identity))

expected = toApiEntities(
project.copy(creatorDetails = ResponseBody.single(user).some),
project.copy(
creatorDetails = ResponseBody.single(user).some,
namespaceDetails = ResponseBody.single(user).some
),
user
).toSet
obtained = results.items.map(scoreToNone).toSet
Expand All @@ -104,6 +127,6 @@ class SearchApiSpec extends CatsEffectSuite with SearchSolrSuite:
case e: SearchEntity.Group => e.copy(score = None)

private def mkQuery(phrase: String): QueryInput =
QueryInput.pageOne(Query.parse(s"Fields $phrase").fold(sys.error, identity))
QueryInput.pageOne(Query.parse(phrase).fold(sys.error, identity))

private def toApiEntities(e: EntityDocument*) = e.map(EntityConverter.apply)
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ object RenkuEntityQuery:
def apply(role: SearchRole, sq: SolrQuery, limit: Int, offset: Int): QueryData =
QueryData(QueryString(sq.query.value, limit, offset))
.addFilter(
SolrToken.kindIs(DocumentKind.FullEntity).value
SolrToken.kindIs(DocumentKind.FullEntity).value,
SolrToken.namespaceExists.value,
SolrToken.createdByExists.value,
"{!join from=namespace to=namespace}(_type:User OR _type:Group)"
)
.addFilter(constrainRole(role).map(_.value)*)
.withSort(sq.sort)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package io.renku.search.solr.query

import cats.Monad
import cats.effect.Sync
import cats.syntax.all.*

import io.renku.search.query.Query
import io.renku.search.solr.SearchRole
Expand All @@ -33,7 +34,7 @@ final class LuceneQueryInterpreter[F[_]: Monad]
private val encoder = SolrTokenEncoder[F, Query]

def run(ctx: Context[F], query: Query): F[SolrQuery] =
encoder.encode(ctx, query)
encoder.encode(ctx, query).map(_.emptyToAll)

object LuceneQueryInterpreter:
def forSync[F[_]: Sync](role: SearchRole): QueryInterpreter.WithContext[F] =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ final case class SolrQuery(
def ++(next: SolrQuery): SolrQuery =
SolrQuery(query && next.query, sort ++ next.sort)

def emptyToAll: SolrQuery =
if (query.isEmpty) SolrQuery(SolrToken.all, sort)
else this

object SolrQuery:
val empty: SolrQuery = SolrQuery(SolrToken.empty, SolrSort.empty)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ object SolrToken:
def fromVisibility(v: Visibility): SolrToken = v.name
private def fromEntityType(et: EntityType): SolrToken = et.name

val all: SolrToken = "*:*"

def fromKeyword(kw: Keyword): SolrToken =
StringEscape.queryChars(kw.value)

Expand Down Expand Up @@ -79,6 +81,11 @@ object SolrToken:
def namespaceIs(ns: Namespace): SolrToken =
fieldIs(SolrField.namespace, fromNamespace(ns))

def namespaceExists: SolrToken = fieldExists(SolrField.namespace)

def createdByExists: SolrToken =
"(createdBy:[* TO *] OR (*:* AND -_type:Project))"

def createdDateIs(date: Instant): SolrToken =
fieldIs(SolrField.creationDate, fromInstant(date))
def createdDateGt(date: Instant): SolrToken =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,15 @@ class RenkuEntityQuerySpec extends FunSuite:
query("bla", adminRole),
SolrToken.kindIs(DocumentKind.FullEntity).value
)

test("only entities with existing namespace property"):
assertFilter(
query("bla", adminRole),
SolrToken.namespaceExists.value
)

test("only projects with createdBy property"):
assertFilter(
query("bla", adminRole),
SolrToken.createdByExists.value
)
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,71 @@ class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSuite:
override def munitFixtures: Seq[munit.AnyFixture[?]] =
List(solrServer, searchSolrClient)

test("ignore entities with non-resolvable namespace"):
val user = userDocumentGen.generateOne
val group = groupDocumentGen.generateOne
val randomNs = ModelGenerators.namespaceGen.generateOne
val project0 = projectDocumentGenForInsert.generateOne.copy(
createdBy = user.id,
namespace = group.namespace.some
)
val project1 = projectDocumentGenForInsert.generateOne.copy(
createdBy = user.id,
namespace = randomNs.some
)

for
client <- IO(searchSolrClient())
_ <- client.upsert(Seq(project0.widen, project1.widen, user.widen, group.widen))

qr <- client.queryEntity(
SearchRole.admin(Id("admin")),
Query.empty,
10,
0
)
_ = assert(
!qr.responseBody.docs.map(_.id).contains(project1.id),
"project with non-existing namespace was in the result set"
)
_ = assertEquals(qr.responseBody.docs.size, 3)
yield ()

test("ignore entities with non-existing namespace"):
val user = userDocumentGen.generateOne
val group = groupDocumentGen.generateOne
val project0 = projectDocumentGen(
"project-test0uae",
"project-test0uae description",
Gen.const(None),
Gen.const(None)
).generateOne.copy(createdBy = user.id, namespace = group.namespace.some)
val project1 = projectDocumentGen(
"project-test1",
"project-test1 description",
Gen.const(None),
Gen.const(None)
).generateOne.copy(createdBy = user.id, namespace = None)

for
client <- IO(searchSolrClient())
_ <- client.upsert(Seq(project0.widen, project1.widen, user.widen, group.widen))

qr <- client.queryEntity(
SearchRole.admin(Id("admin")),
Query.parse("test0uae").toOption.get,
10,
0
)
_ = assertEquals(qr.responseBody.docs.size, 1)
yield ()

test("load project with resolved namespace and creator"):
val user = userDocumentGen.generateOne
val group = groupDocumentGen.generateOne
val project = projectDocumentGen(
"project-test0",
"project-test0 description",
"project-test1trfg",
"project-test1trfg description",
Gen.const(None),
Gen.const(None)
).generateOne.copy(createdBy = user.id, namespace = group.namespace.some)
Expand All @@ -59,7 +118,7 @@ class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSuite:

qr <- client.queryEntity(
SearchRole.admin(Id("admin")),
Query.parse("test0").toOption.get,
Query.parse("test1trfg").toOption.get,
10,
0
)
Expand All @@ -77,16 +136,18 @@ class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSuite:
yield ()

test("be able to insert and fetch a Project document"):
val project =
projectDocumentGen(
"solr-project",
"solr project description",
Gen.const(None),
Gen.const(None)
).generateOne
for {
client <- IO(searchSolrClient())
_ <- client.upsert(Seq(project.widen))
user <- IO(userDocumentGen.generateOne)
project <- IO(
projectDocumentGenForInsert.generateOne
.copy(
createdBy = user.id,
namespace = user.namespace,
name = Name("solr project")
)
)
_ <- client.upsert(Seq(project.widen, user.widen))
qr <- client.queryEntity(
SearchRole.admin(Id("admin")),
Query.parse("solr").toOption.get,
Expand Down Expand Up @@ -160,12 +221,14 @@ class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSuite:
for
client <- IO(searchSolrClient())
entityMembers <- IO(entityMembersGen.suchThat(_.nonEmpty).generateOne)
user <- IO(userDocumentGen.generateOne)
project <- IO(
projectDocumentGenForInsert
.map(p => p.setMembers(entityMembers).copy(visibility = Visibility.Private))
.generateOne
.copy(createdBy = user.id, namespace = user.namespace)
)
_ <- client.upsertSuccess(Seq(project))
_ <- client.upsertSuccess(Seq(project, user))
member = entityMembers.allIds.head
nonMember <- IO(ModelGenerators.idGen.generateOne)
query = Query(Query.Segment.idIs(project.id.value))
Expand All @@ -185,19 +248,20 @@ class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSuite:
test("search partial words"):
for
client <- IO(searchSolrClient())
user <- IO(userDocumentGen.generateOne)
project <- IO(
projectDocumentGen(
"NeuroDesk",
"This is a Neurodesk project",
Gen.const(None),
Gen.const(None),
Gen.const(Visibility.Public)
).generateOne
).generateOne.copy(createdBy = user.id, namespace = user.namespace)
)
_ <- client.upsertSuccess(Seq(project))
_ <- client.upsertSuccess(Seq(project, user))
result1 <- client.queryEntity(
SearchRole.anonymous,
Query(Query.Segment.text("neuro")),
Query(Query.Segment.text("neuro"), Query.Segment.idIs(project.id.value)),
1,
0
)
Expand All @@ -206,7 +270,7 @@ class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSuite:

result2 <- client.queryEntity(
SearchRole.anonymous,
Query(Query.Segment.nameIs("neuro")),
Query(Query.Segment.nameIs("neuro"), Query.Segment.idIs(project.id.value)),
1,
0
)
Expand All @@ -215,15 +279,12 @@ class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSuite:
yield ()

test("delete all entities"):
val project =
projectDocumentGen(
"solr-project",
"solr project description",
Gen.const(None),
Gen.const(None)
).generateOne
val user = userDocumentGen.generateOne
val group = groupDocumentGen.generateOne
val project = projectDocumentGenForInsert.generateOne.copy(
createdBy = user.id,
namespace = group.namespace.some
)
val role = SearchRole.admin(Id("admin"))
val query = Query(Query.Segment.idIs(user.id.value, group.id.value, project.id.value))
for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ trait SolrDocumentGenerators:
idGen,
Gen.option(userFirstNameGen),
Gen.option(userLastNameGen),
Gen.option(ModelGenerators.namespaceGen)
Gen.some(ModelGenerators.namespaceGen)
)
.flatMapN { case (id, f, l, ns) =>
User.of(id, ns, f, l)
Expand Down
4 changes: 3 additions & 1 deletion nix/services.nix
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@

services.dev-redis = {
enable = true;
instance = "search";
instances = {
search = { port = 6379; };
};
};

services.openapi-docs = {
Expand Down

0 comments on commit 50a0101

Please sign in to comment.