Skip to content

Commit

Permalink
Support BASE declarations in SPARQL queries (#1786)
Browse files Browse the repository at this point in the history
So far, the `BASE` keyword was supported for `Turtle` inputs, but not as part of a `SPARQL` query. Now it is correctly supported in all cases
  • Loading branch information
RobinTF authored Feb 14, 2025
1 parent 98caf37 commit 6349abd
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 34 deletions.
3 changes: 2 additions & 1 deletion src/engine/sparqlExpressions/NaryExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ SparqlExpression::Ptr makeStrLangTagExpression(SparqlExpression::Ptr child1,
SparqlExpression::Ptr child2);

SparqlExpression::Ptr makeStrExpression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeIriOrUriExpression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeIriOrUriExpression(SparqlExpression::Ptr child,
SparqlExpression::Ptr baseIri);
SparqlExpression::Ptr makeStrlenExpression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeSubstrExpression(SparqlExpression::Ptr string,
SparqlExpression::Ptr start,
Expand Down
36 changes: 29 additions & 7 deletions src/engine/sparqlExpressions/StringExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,36 @@ struct LiftStringFunction {

// IRI or URI
//
// 1. Check for `BASE` URL and if it exists, prepend it.
// 2. What's the correct behavior for non-strings, like `1` or `true`?
// 1. What's the correct behavior for non-strings, like `1` or `true`?
//
// @1: TODO implement `BASE`
// @2: Only a `LiteralOrIri` or an `Id` from `Vocab`/`LocalVocab` is in
// @1: Only a `LiteralOrIri` or an `Id` from `Vocab`/`LocalVocab` is in
// consideration within the `IriOrUriValueGetter`, hence automatically
// ignores values like `1`, `true`, `Date` etc.
using IriOrUriExpression = NARY<1, FV<std::identity, IriOrUriValueGetter>>;

const Iri& extractIri(const IdOrLiteralOrIri& litOrIri) {
AD_CORRECTNESS_CHECK(std::holds_alternative<LocalVocabEntry>(litOrIri));
const auto& baseIriOrUri = std::get<LocalVocabEntry>(litOrIri);
AD_CORRECTNESS_CHECK(baseIriOrUri.isIri());
return baseIriOrUri.getIri();
}

[[maybe_unused]] auto applyBaseIfPresent =
[](IdOrLiteralOrIri iri, const IdOrLiteralOrIri& base) -> IdOrLiteralOrIri {
if (std::holds_alternative<Id>(iri)) {
AD_CORRECTNESS_CHECK(std::get<Id>(iri).isUndefined());
return iri;
}
const auto& baseIri = extractIri(base);
if (baseIri.empty()) {
return iri;
}
// TODO<RobinTF> Avoid unnecessary string copies because of conversion.
return LiteralOrIri{Iri::fromIrirefConsiderBase(
extractIri(iri).toStringRepresentation(), baseIri.getBaseIri(false),
baseIri.getBaseIri(true))};
};
using IriOrUriExpression =
NARY<2, FV<decltype(applyBaseIfPresent), IriOrUriValueGetter>>;

// STRLEN
[[maybe_unused]] auto strlen = [](std::string_view s) {
Expand Down Expand Up @@ -554,8 +576,8 @@ CPP_template(typename T,
}
Expr makeStrExpression(Expr child) { return make<StrExpression>(child); }

Expr makeIriOrUriExpression(Expr child) {
return make<IriOrUriExpression>(child);
Expr makeIriOrUriExpression(Expr child, SparqlExpression::Ptr baseIri) {
return make<IriOrUriExpression>(child, baseIri);
}

Expr makeStrlenExpression(Expr child) { return make<StrlenExpression>(child); }
Expand Down
40 changes: 33 additions & 7 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1162,8 +1162,16 @@ TripleComponent::Iri Visitor::visit(Parser::IriContext* ctx) {
}

// ____________________________________________________________________________________
string Visitor::visit(Parser::IrirefContext* ctx) {
return RdfEscaping::unescapeIriref(ctx->getText());
string Visitor::visit(Parser::IrirefContext* ctx) const {
if (baseIri_.empty()) {
return ctx->getText();
}
// TODO<RobinTF> Avoid unnecessary string copies because of conversion.
// Handle IRIs with base IRI.
return std::move(
ad_utility::triple_component::Iri::fromIrirefConsiderBase(
ctx->getText(), baseIri_.getBaseIri(false), baseIri_.getBaseIri(true))
.toStringRepresentation());
}

// ____________________________________________________________________________________
Expand Down Expand Up @@ -1211,8 +1219,17 @@ DatasetClause SparqlQleverVisitor::visit(Parser::UsingClauseContext* ctx) {

// ____________________________________________________________________________________
void Visitor::visit(Parser::PrologueContext* ctx) {
visitVector(ctx->baseDecl());
visitVector(ctx->prefixDecl());
// Process in an interleaved way, so PREFIX statements are processed correctly
// to only use the BASE IRIs defined before them, not after them.
for (auto* child : ctx->children) {
if (auto* baseDecl = dynamic_cast<Parser::BaseDeclContext*>(child)) {
visit(baseDecl);
} else {
auto* prefixDecl = dynamic_cast<Parser::PrefixDeclContext*>(child);
AD_CORRECTNESS_CHECK(prefixDecl != nullptr);
visit(prefixDecl);
}
}
// Remember the whole prologue (we need this when we encounter a SERVICE
// clause, see `visit(ServiceGraphPatternContext*)` below.
if (ctx->getStart() && ctx->getStop()) {
Expand All @@ -1221,8 +1238,15 @@ void Visitor::visit(Parser::PrologueContext* ctx) {
}

// ____________________________________________________________________________________
void Visitor::visit(const Parser::BaseDeclContext* ctx) {
reportNotSupported(ctx, "BASE declarations are");
void Visitor::visit(Parser::BaseDeclContext* ctx) {
auto rawIri = ctx->iriref()->getText();
bool hasScheme = ctre::starts_with<"<[A-Za-z]*[A-Za-z0-9+-.]:">(rawIri);
if (!hasScheme) {
reportError(
ctx,
"The base IRI must be an absolute IRI with a scheme, was: " + rawIri);
}
baseIri_ = TripleComponent::Iri::fromIriref(visit(ctx->iriref()));
}

// ____________________________________________________________________________________
Expand Down Expand Up @@ -2330,7 +2354,9 @@ ExpressionPtr Visitor::visit([[maybe_unused]] Parser::BuiltInCallContext* ctx) {
if (functionName == "str") {
return createUnary(&makeStrExpression);
} else if (functionName == "iri" || functionName == "uri") {
return createUnary(&makeIriOrUriExpression);
AD_CORRECTNESS_CHECK(argList.size() == 1, argList.size());
return makeIriOrUriExpression(std::move(argList[0]),
std::make_unique<IriExpression>(baseIri_));
} else if (functionName == "strlang") {
return createBinary(&makeStrLangTagExpression);
} else if (functionName == "strdt") {
Expand Down
5 changes: 3 additions & 2 deletions src/parser/sparqlParser/SparqlQleverVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class SparqlQleverVisitor {
// `addVisibleVariable`.
std::vector<Variable> visibleVariables_{};
PrefixMap prefixMap_{};
ad_utility::triple_component::Iri baseIri_{};
// We need to remember the prologue (prefix declarations) when we encounter it
// because we need it when we encounter a SERVICE query. When there is no
// prologue, this string simply remains empty.
Expand Down Expand Up @@ -122,7 +123,7 @@ class SparqlQleverVisitor {
void visit(Parser::PrologueContext* ctx);

// ___________________________________________________________________________
[[noreturn]] static void visit(const Parser::BaseDeclContext* ctx);
void visit(Parser::BaseDeclContext* ctx);

// ___________________________________________________________________________
void visit(Parser::PrefixDeclContext* ctx);
Expand Down Expand Up @@ -469,7 +470,7 @@ class SparqlQleverVisitor {

TripleComponent::Iri visit(Parser::IriContext* ctx);

static string visit(Parser::IrirefContext* ctx);
string visit(Parser::IrirefContext* ctx) const;

string visit(Parser::PrefixedNameContext* ctx);

Expand Down
12 changes: 10 additions & 2 deletions test/SparqlAntlrParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1587,8 +1587,16 @@ TEST(SparqlParser, builtInCall) {
expectBuiltInCall("ucaSe(?x)", matchUnary(&makeUppercaseExpression));
expectBuiltInCall("lCase(?x)", matchUnary(&makeLowercaseExpression));
expectBuiltInCall("StR(?x)", matchUnary(&makeStrExpression));
expectBuiltInCall("iRI(?x)", matchUnary(&makeIriOrUriExpression));
expectBuiltInCall("uRI(?x)", matchUnary(&makeIriOrUriExpression));
expectBuiltInCall(
"iRI(?x)",
matchNaryWithChildrenMatchers(
&makeIriOrUriExpression, variableExpressionMatcher(Variable{"?x"}),
matchLiteralExpression(ad_utility::triple_component::Iri{})));
expectBuiltInCall(
"uRI(?x)",
matchNaryWithChildrenMatchers(
&makeIriOrUriExpression, variableExpressionMatcher(Variable{"?x"}),
matchLiteralExpression(ad_utility::triple_component::Iri{})));
expectBuiltInCall("year(?x)", matchUnary(&makeYearExpression));
expectBuiltInCall("month(?x)", matchUnary(&makeMonthExpression));
expectBuiltInCall("tz(?x)", matchUnary(&makeTimezoneStrExpression));
Expand Down
53 changes: 38 additions & 15 deletions test/SparqlExpressionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,13 +573,15 @@ TEST(SparqlExpression, dateOperators) {
}

// _____________________________________________________________________________________
namespace {
auto checkStrlen = testUnaryExpression<&makeStrlenExpression>;
auto checkStr = testUnaryExpression<&makeStrExpression>;
auto checkIriOrUri = testUnaryExpression<&makeIriOrUriExpression>;
static auto makeStrlenWithStr = [](auto arg) {
auto checkIriOrUri = testNaryExpressionVec<&makeIriOrUriExpression>;
auto makeStrlenWithStr = [](auto arg) {
return makeStrlenExpression(makeStrExpression(std::move(arg)));
};
auto checkStrlenWithStrChild = testUnaryExpression<makeStrlenWithStr>;
} // namespace
TEST(SparqlExpression, stringOperators) {
// Test `StrlenExpression` and `StrExpression`.
checkStrlen(
Expand Down Expand Up @@ -614,20 +616,13 @@ TEST(SparqlExpression, stringOperators) {
DateYearOrDuration(11853, DateYearOrDuration::Type::Year));
// Test `iriOrUriExpression`.
// test invalid
checkIriOrUri(IdOrLiteralOrIriVec{U, IntId(2), DoubleId(12.99), dateDate,
dateLYear, T, F},
IdOrLiteralOrIriVec{U, U, U, U, U, U, U});
checkIriOrUri(IdOrLiteralOrIriVec{U, U, U, U, U, U, U},
std::tuple{IdOrLiteralOrIriVec{U, IntId(2), DoubleId(12.99),
dateDate, dateLYear, T, F},
IdOrLiteralOrIri{LocalVocabEntry{
ad_utility::triple_component::Iri{}}}});
// test valid
checkIriOrUri(
IdOrLiteralOrIriVec{
lit("bimbim"), iriref("<bambim>"),
lit("https://www.bimbimbam/2001/bamString"),
lit("http://www.w3.\torg/2001/\nXMLSchema#\runsignedShort"),
lit("http://www.w3.org/2001/XMLSchema#string"),
iriref("<http://www.w3.org/2001/XMLSchema#string>"),
testContext().notInVocabIri, testContext().notInVocabIriLit,
lit("http://example/"), iriref("<http://\t\t\nexample/>"),
lit("\t\n\r")},
IdOrLiteralOrIriVec{
iriref("<bimbim>"), iriref("<bambim>"),
iriref("<https://www.bimbimbam/2001/bamString>"),
Expand All @@ -637,7 +632,35 @@ TEST(SparqlExpression, stringOperators) {
iriref("<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>"),
iriref("<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>"),
iriref("<http://example/>"), iriref("<http://\t\t\nexample/>"),
iriref("<\t\n\r>")});
iriref("<\t\n\r>")},
std::tuple{
IdOrLiteralOrIriVec{
lit("bimbim"), iriref("<bambim>"),
lit("https://www.bimbimbam/2001/bamString"),
lit("http://www.w3.\torg/2001/\nXMLSchema#\runsignedShort"),
lit("http://www.w3.org/2001/XMLSchema#string"),
iriref("<http://www.w3.org/2001/XMLSchema#string>"),
testContext().notInVocabIri, testContext().notInVocabIriLit,
lit("http://example/"), iriref("<http://\t\t\nexample/>"),
lit("\t\n\r")},
IdOrLiteralOrIri{
LocalVocabEntry{ad_utility::triple_component::Iri{}}}});

// test with base iri
checkIriOrUri(
IdOrLiteralOrIriVec{
U,
iriref("<http://example.com/hi/bimbim>"),
iriref("<http://example.com/hi/bambim>"),
iriref("<https://www.bimbimbam/2001/bamString>"),
iriref("<http://example.com/hello>"),
iriref("<http://example.com/hello>"),
},
std::tuple{
IdOrLiteralOrIriVec{U, lit("bimbim"), iriref("<bambim>"),
lit("https://www.bimbimbam/2001/bamString"),
lit("/hello"), iriref("</hello>")},
IdOrLiteralOrIri{iriref("<http://example.com/hi>")}});

// A simple test for uniqueness of the cache key.
auto c1a = makeStrlenExpression(std::make_unique<IriExpression>(iri("<bim>")))
Expand Down
35 changes: 35 additions & 0 deletions test/SparqlParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1349,3 +1349,38 @@ TEST(ParserTest, HandlesSurrogatesCorrectly) {
// So writing unit tests for these cases is not possible without creating
// semi-invalid UTF-8 strings.
}

// _____________________________________________________________________________
TEST(ParserTest, BaseDeclaration) {
// Simple case
auto query1 = SparqlParser::parseQuery(
"BASE <http://example.org/> SELECT * WHERE { ?s <p> <test> }");
EXPECT_EQ(getFirstTriple(query1),
"{s: ?s, p: <http://example.org/p>, o: <http://example.org/test>}");
// Relative and absolute IRIs mixed
auto query2 = SparqlParser::parseQuery(
"BASE <http://example.org/something> "
"SELECT * WHERE { </root> <p> <http://other.example.org/p> }");
EXPECT_EQ(getFirstTriple(query2),
"{s: <http://example.org/root>,"
" p: <http://example.org/something/p>,"
" o: <http://other.example.org/p>}");

// Cascading prefixes
auto query3 = SparqlParser::parseQuery(
"BASE <http://example.org/> "
"PREFIX ex1: <ex1/> "
"BASE <http://other.example.org/> "
"PREFIX ex2: <ex2/> "
"BASE <http://alternative.example.org/> "
"SELECT * WHERE { ex2:hello <world> ex1:test }");
EXPECT_EQ(getFirstTriple(query3),
"{s: <http://other.example.org/ex2/hello>,"
" p: <http://alternative.example.org/world>,"
" o: <http://example.org/ex1/test>}");

AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(
SparqlParser::parseQuery("BASE <http://example.com> BASE <relative> "
"SELECT * WHERE { ?s ?p ?o }"),
::testing::HasSubstr("absolute IRI"), InvalidSparqlQueryException);
}

0 comments on commit 6349abd

Please sign in to comment.