diff --git a/src/engine/sparqlExpressions/NaryExpression.h b/src/engine/sparqlExpressions/NaryExpression.h index 9f3b2c9f18..a746478866 100644 --- a/src/engine/sparqlExpressions/NaryExpression.h +++ b/src/engine/sparqlExpressions/NaryExpression.h @@ -65,7 +65,8 @@ SparqlExpression::Ptr makeStrLangTagExpression(SparqlExpression::Ptr child1, SparqlExpression::Ptr child2); SparqlExpression::Ptr makeStrExpression(SparqlExpression::Ptr child); -SparqlExpression::Ptr makeIriOrUriExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeIriOrUriExpression(SparqlExpression::Ptr child, + SparqlExpression::Ptr baseIri); SparqlExpression::Ptr makeStrlenExpression(SparqlExpression::Ptr child); SparqlExpression::Ptr makeSubstrExpression(SparqlExpression::Ptr string, SparqlExpression::Ptr start, diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index cc39fbca9f..234bab18c8 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -115,14 +115,36 @@ struct LiftStringFunction { // IRI or URI // -// 1. Check for `BASE` URL and if it exists, prepend it. -// 2. What's the correct behavior for non-strings, like `1` or `true`? +// 1. What's the correct behavior for non-strings, like `1` or `true`? // -// @1: TODO implement `BASE` -// @2: Only a `LiteralOrIri` or an `Id` from `Vocab`/`LocalVocab` is in +// @1: Only a `LiteralOrIri` or an `Id` from `Vocab`/`LocalVocab` is in // consideration within the `IriOrUriValueGetter`, hence automatically // ignores values like `1`, `true`, `Date` etc. -using IriOrUriExpression = NARY<1, FV>; + +const Iri& extractIri(const IdOrLiteralOrIri& litOrIri) { + AD_CORRECTNESS_CHECK(std::holds_alternative(litOrIri)); + const auto& baseIriOrUri = std::get(litOrIri); + AD_CORRECTNESS_CHECK(baseIriOrUri.isIri()); + return baseIriOrUri.getIri(); +} + +[[maybe_unused]] auto applyBaseIfPresent = + [](IdOrLiteralOrIri iri, const IdOrLiteralOrIri& base) -> IdOrLiteralOrIri { + if (std::holds_alternative(iri)) { + AD_CORRECTNESS_CHECK(std::get(iri).isUndefined()); + return iri; + } + const auto& baseIri = extractIri(base); + if (baseIri.empty()) { + return iri; + } + // TODO Avoid unnecessary string copies because of conversion. + return LiteralOrIri{Iri::fromIrirefConsiderBase( + extractIri(iri).toStringRepresentation(), baseIri.getBaseIri(false), + baseIri.getBaseIri(true))}; +}; +using IriOrUriExpression = + NARY<2, FV>; // STRLEN [[maybe_unused]] auto strlen = [](std::string_view s) { @@ -551,8 +573,8 @@ Expr make(std::same_as auto&... children) { } Expr makeStrExpression(Expr child) { return make(child); } -Expr makeIriOrUriExpression(Expr child) { - return make(child); +Expr makeIriOrUriExpression(Expr child, SparqlExpression::Ptr baseIri) { + return make(child, baseIri); } Expr makeStrlenExpression(Expr child) { return make(child); } diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 85ced1ed73..f1355c1b04 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -1162,8 +1162,16 @@ TripleComponent::Iri Visitor::visit(Parser::IriContext* ctx) { } // ____________________________________________________________________________________ -string Visitor::visit(Parser::IrirefContext* ctx) { - return RdfEscaping::unescapeIriref(ctx->getText()); +string Visitor::visit(Parser::IrirefContext* ctx) const { + if (baseIri_.empty()) { + return ctx->getText(); + } + // TODO Avoid unnecessary string copies because of conversion. + // Handle IRIs with base IRI. + return std::move( + ad_utility::triple_component::Iri::fromIrirefConsiderBase( + ctx->getText(), baseIri_.getBaseIri(false), baseIri_.getBaseIri(true)) + .toStringRepresentation()); } // ____________________________________________________________________________________ @@ -1211,8 +1219,17 @@ DatasetClause SparqlQleverVisitor::visit(Parser::UsingClauseContext* ctx) { // ____________________________________________________________________________________ void Visitor::visit(Parser::PrologueContext* ctx) { - visitVector(ctx->baseDecl()); - visitVector(ctx->prefixDecl()); + // Process in an interleaved way, so PREFIX statements are processed correctly + // to only use the BASE IRIs defined before them, not after them. + for (auto* child : ctx->children) { + if (auto* baseDecl = dynamic_cast(child)) { + visit(baseDecl); + } else { + auto* prefixDecl = dynamic_cast(child); + AD_CORRECTNESS_CHECK(prefixDecl != nullptr); + visit(prefixDecl); + } + } // Remember the whole prologue (we need this when we encounter a SERVICE // clause, see `visit(ServiceGraphPatternContext*)` below. if (ctx->getStart() && ctx->getStop()) { @@ -1221,8 +1238,15 @@ void Visitor::visit(Parser::PrologueContext* ctx) { } // ____________________________________________________________________________________ -void Visitor::visit(const Parser::BaseDeclContext* ctx) { - reportNotSupported(ctx, "BASE declarations are"); +void Visitor::visit(Parser::BaseDeclContext* ctx) { + auto rawIri = ctx->iriref()->getText(); + bool hasScheme = ctre::starts_with<"<[A-Za-z]*[A-Za-z0-9+-.]:">(rawIri); + if (!hasScheme) { + reportError( + ctx, + "The base IRI must be an absolute IRI with a scheme, was: " + rawIri); + } + baseIri_ = TripleComponent::Iri::fromIriref(visit(ctx->iriref())); } // ____________________________________________________________________________________ @@ -2330,7 +2354,9 @@ ExpressionPtr Visitor::visit([[maybe_unused]] Parser::BuiltInCallContext* ctx) { if (functionName == "str") { return createUnary(&makeStrExpression); } else if (functionName == "iri" || functionName == "uri") { - return createUnary(&makeIriOrUriExpression); + AD_CORRECTNESS_CHECK(argList.size() == 1, argList.size()); + return makeIriOrUriExpression(std::move(argList[0]), + std::make_unique(baseIri_)); } else if (functionName == "strlang") { return createBinary(&makeStrLangTagExpression); } else if (functionName == "strdt") { diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index c1d891aa8e..b9a29a6fbc 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -80,6 +80,7 @@ class SparqlQleverVisitor { // `addVisibleVariable`. std::vector visibleVariables_{}; PrefixMap prefixMap_{}; + ad_utility::triple_component::Iri baseIri_{}; // We need to remember the prologue (prefix declarations) when we encounter it // because we need it when we encounter a SERVICE query. When there is no // prologue, this string simply remains empty. @@ -122,7 +123,7 @@ class SparqlQleverVisitor { void visit(Parser::PrologueContext* ctx); // ___________________________________________________________________________ - [[noreturn]] static void visit(const Parser::BaseDeclContext* ctx); + void visit(Parser::BaseDeclContext* ctx); // ___________________________________________________________________________ void visit(Parser::PrefixDeclContext* ctx); @@ -469,7 +470,7 @@ class SparqlQleverVisitor { TripleComponent::Iri visit(Parser::IriContext* ctx); - static string visit(Parser::IrirefContext* ctx); + string visit(Parser::IrirefContext* ctx) const; string visit(Parser::PrefixedNameContext* ctx); diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp index 8d87b3c2dd..5ee06749e1 100644 --- a/test/SparqlAntlrParserTest.cpp +++ b/test/SparqlAntlrParserTest.cpp @@ -1587,8 +1587,16 @@ TEST(SparqlParser, builtInCall) { expectBuiltInCall("ucaSe(?x)", matchUnary(&makeUppercaseExpression)); expectBuiltInCall("lCase(?x)", matchUnary(&makeLowercaseExpression)); expectBuiltInCall("StR(?x)", matchUnary(&makeStrExpression)); - expectBuiltInCall("iRI(?x)", matchUnary(&makeIriOrUriExpression)); - expectBuiltInCall("uRI(?x)", matchUnary(&makeIriOrUriExpression)); + expectBuiltInCall( + "iRI(?x)", + matchNaryWithChildrenMatchers( + &makeIriOrUriExpression, variableExpressionMatcher(Variable{"?x"}), + matchLiteralExpression(ad_utility::triple_component::Iri{}))); + expectBuiltInCall( + "uRI(?x)", + matchNaryWithChildrenMatchers( + &makeIriOrUriExpression, variableExpressionMatcher(Variable{"?x"}), + matchLiteralExpression(ad_utility::triple_component::Iri{}))); expectBuiltInCall("year(?x)", matchUnary(&makeYearExpression)); expectBuiltInCall("month(?x)", matchUnary(&makeMonthExpression)); expectBuiltInCall("tz(?x)", matchUnary(&makeTimezoneStrExpression)); diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index 891b990e92..3957383441 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -570,13 +570,15 @@ TEST(SparqlExpression, dateOperators) { } // _____________________________________________________________________________________ +namespace { auto checkStrlen = testUnaryExpression<&makeStrlenExpression>; auto checkStr = testUnaryExpression<&makeStrExpression>; -auto checkIriOrUri = testUnaryExpression<&makeIriOrUriExpression>; -static auto makeStrlenWithStr = [](auto arg) { +auto checkIriOrUri = testNaryExpressionVec<&makeIriOrUriExpression>; +auto makeStrlenWithStr = [](auto arg) { return makeStrlenExpression(makeStrExpression(std::move(arg))); }; auto checkStrlenWithStrChild = testUnaryExpression; +} // namespace TEST(SparqlExpression, stringOperators) { // Test `StrlenExpression` and `StrExpression`. checkStrlen( @@ -611,20 +613,13 @@ TEST(SparqlExpression, stringOperators) { DateYearOrDuration(11853, DateYearOrDuration::Type::Year)); // Test `iriOrUriExpression`. // test invalid - checkIriOrUri(IdOrLiteralOrIriVec{U, IntId(2), DoubleId(12.99), dateDate, - dateLYear, T, F}, - IdOrLiteralOrIriVec{U, U, U, U, U, U, U}); + checkIriOrUri(IdOrLiteralOrIriVec{U, U, U, U, U, U, U}, + std::tuple{IdOrLiteralOrIriVec{U, IntId(2), DoubleId(12.99), + dateDate, dateLYear, T, F}, + IdOrLiteralOrIri{LocalVocabEntry{ + ad_utility::triple_component::Iri{}}}}); // test valid checkIriOrUri( - IdOrLiteralOrIriVec{ - lit("bimbim"), iriref(""), - lit("https://www.bimbimbam/2001/bamString"), - lit("http://www.w3.\torg/2001/\nXMLSchema#\runsignedShort"), - lit("http://www.w3.org/2001/XMLSchema#string"), - iriref(""), - testContext().notInVocabIri, testContext().notInVocabIriLit, - lit("http://example/"), iriref(""), - lit("\t\n\r")}, IdOrLiteralOrIriVec{ iriref(""), iriref(""), iriref(""), @@ -634,7 +629,35 @@ TEST(SparqlExpression, stringOperators) { iriref(""), iriref(""), iriref(""), iriref(""), - iriref("<\t\n\r>")}); + iriref("<\t\n\r>")}, + std::tuple{ + IdOrLiteralOrIriVec{ + lit("bimbim"), iriref(""), + lit("https://www.bimbimbam/2001/bamString"), + lit("http://www.w3.\torg/2001/\nXMLSchema#\runsignedShort"), + lit("http://www.w3.org/2001/XMLSchema#string"), + iriref(""), + testContext().notInVocabIri, testContext().notInVocabIriLit, + lit("http://example/"), iriref(""), + lit("\t\n\r")}, + IdOrLiteralOrIri{ + LocalVocabEntry{ad_utility::triple_component::Iri{}}}}); + + // test with base iri + checkIriOrUri( + IdOrLiteralOrIriVec{ + U, + iriref(""), + iriref(""), + iriref(""), + iriref(""), + iriref(""), + }, + std::tuple{ + IdOrLiteralOrIriVec{U, lit("bimbim"), iriref(""), + lit("https://www.bimbimbam/2001/bamString"), + lit("/hello"), iriref("")}, + IdOrLiteralOrIri{iriref("")}}); // A simple test for uniqueness of the cache key. auto c1a = makeStrlenExpression(std::make_unique(iri(""))) diff --git a/test/SparqlParserTest.cpp b/test/SparqlParserTest.cpp index c232f2186a..828da03f0e 100644 --- a/test/SparqlParserTest.cpp +++ b/test/SparqlParserTest.cpp @@ -1349,3 +1349,38 @@ TEST(ParserTest, HandlesSurrogatesCorrectly) { // So writing unit tests for these cases is not possible without creating // semi-invalid UTF-8 strings. } + +// _____________________________________________________________________________ +TEST(ParserTest, BaseDeclaration) { + // Simple case + auto query1 = SparqlParser::parseQuery( + "BASE SELECT * WHERE { ?s

}"); + EXPECT_EQ(getFirstTriple(query1), + "{s: ?s, p: , o: }"); + // Relative and absolute IRIs mixed + auto query2 = SparqlParser::parseQuery( + "BASE " + "SELECT * WHERE {

}"); + EXPECT_EQ(getFirstTriple(query2), + "{s: ," + " p: ," + " o: }"); + + // Cascading prefixes + auto query3 = SparqlParser::parseQuery( + "BASE " + "PREFIX ex1: " + "BASE " + "PREFIX ex2: " + "BASE " + "SELECT * WHERE { ex2:hello ex1:test }"); + EXPECT_EQ(getFirstTriple(query3), + "{s: ," + " p: ," + " o: }"); + + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + SparqlParser::parseQuery("BASE BASE " + "SELECT * WHERE { ?s ?p ?o }"), + ::testing::HasSubstr("absolute IRI"), InvalidSparqlQueryException); +}