Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support BASE declarations in SPARQL queries #1786

Merged
merged 13 commits into from
Feb 14, 2025
3 changes: 2 additions & 1 deletion src/engine/sparqlExpressions/NaryExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ SparqlExpression::Ptr makeStrLangTagExpression(SparqlExpression::Ptr child1,
SparqlExpression::Ptr child2);

SparqlExpression::Ptr makeStrExpression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeIriOrUriExpression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeIriOrUriExpression(SparqlExpression::Ptr child,
SparqlExpression::Ptr baseIri);
SparqlExpression::Ptr makeStrlenExpression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeSubstrExpression(SparqlExpression::Ptr string,
SparqlExpression::Ptr start,
Expand Down
36 changes: 29 additions & 7 deletions src/engine/sparqlExpressions/StringExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,36 @@ struct LiftStringFunction {

// IRI or URI
//
// 1. Check for `BASE` URL and if it exists, prepend it.
// 2. What's the correct behavior for non-strings, like `1` or `true`?
// 1. What's the correct behavior for non-strings, like `1` or `true`?
//
// @1: TODO implement `BASE`
// @2: Only a `LiteralOrIri` or an `Id` from `Vocab`/`LocalVocab` is in
// @1: Only a `LiteralOrIri` or an `Id` from `Vocab`/`LocalVocab` is in
// consideration within the `IriOrUriValueGetter`, hence automatically
// ignores values like `1`, `true`, `Date` etc.
using IriOrUriExpression = NARY<1, FV<std::identity, IriOrUriValueGetter>>;

const Iri& extractIri(const IdOrLiteralOrIri& litOrIri) {
AD_CORRECTNESS_CHECK(std::holds_alternative<LocalVocabEntry>(litOrIri));
const auto& baseIriOrUri = std::get<LocalVocabEntry>(litOrIri);
AD_CORRECTNESS_CHECK(baseIriOrUri.isIri());
return baseIriOrUri.getIri();
}

[[maybe_unused]] auto applyBaseIfPresent =
[](IdOrLiteralOrIri iri, const IdOrLiteralOrIri& base) -> IdOrLiteralOrIri {
if (std::holds_alternative<Id>(iri)) {
AD_CORRECTNESS_CHECK(std::get<Id>(iri).isUndefined());
return iri;
}
const auto& baseIri = extractIri(base);
if (baseIri.empty()) {
return iri;
}
// TODO<RobinTF> Avoid unnecessary string copies because of conversion.
return LiteralOrIri{Iri::fromIrirefConsiderBase(
extractIri(iri).toStringRepresentation(), baseIri.getBaseIri(false),
baseIri.getBaseIri(true))};
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
};
using IriOrUriExpression =
NARY<2, FV<decltype(applyBaseIfPresent), IriOrUriValueGetter>>;

// STRLEN
[[maybe_unused]] auto strlen = [](std::string_view s) {
Expand Down Expand Up @@ -551,8 +573,8 @@ Expr make(std::same_as<Expr> auto&... children) {
}
Expr makeStrExpression(Expr child) { return make<StrExpression>(child); }

Expr makeIriOrUriExpression(Expr child) {
return make<IriOrUriExpression>(child);
Expr makeIriOrUriExpression(Expr child, SparqlExpression::Ptr baseIri) {
return make<IriOrUriExpression>(child, baseIri);
}

Expr makeStrlenExpression(Expr child) { return make<StrlenExpression>(child); }
Expand Down
40 changes: 33 additions & 7 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1162,8 +1162,16 @@ TripleComponent::Iri Visitor::visit(Parser::IriContext* ctx) {
}

// ____________________________________________________________________________________
string Visitor::visit(Parser::IrirefContext* ctx) {
return RdfEscaping::unescapeIriref(ctx->getText());
string Visitor::visit(Parser::IrirefContext* ctx) const {
if (baseIri_.empty()) {
return ctx->getText();
}
// TODO<RobinTF> Avoid unnecessary string copies because of conversion.
// Handle IRIs with base IRI.
return std::move(
ad_utility::triple_component::Iri::fromIrirefConsiderBase(
ctx->getText(), baseIri_.getBaseIri(false), baseIri_.getBaseIri(true))
.toStringRepresentation());
}

// ____________________________________________________________________________________
Expand Down Expand Up @@ -1211,8 +1219,17 @@ DatasetClause SparqlQleverVisitor::visit(Parser::UsingClauseContext* ctx) {

// ____________________________________________________________________________________
void Visitor::visit(Parser::PrologueContext* ctx) {
visitVector(ctx->baseDecl());
visitVector(ctx->prefixDecl());
// Process in an interleaved way, so PREFIX statements are processed correctly
// to only use the BASE IRIs defined before them, not after them.
for (auto* child : ctx->children) {
RobinTF marked this conversation as resolved.
Show resolved Hide resolved
if (auto* baseDecl = dynamic_cast<Parser::BaseDeclContext*>(child)) {
visit(baseDecl);
} else {
auto* prefixDecl = dynamic_cast<Parser::PrefixDeclContext*>(child);
AD_CORRECTNESS_CHECK(prefixDecl != nullptr);
visit(prefixDecl);
}
}
// Remember the whole prologue (we need this when we encounter a SERVICE
// clause, see `visit(ServiceGraphPatternContext*)` below.
if (ctx->getStart() && ctx->getStop()) {
Expand All @@ -1221,8 +1238,15 @@ void Visitor::visit(Parser::PrologueContext* ctx) {
}

// ____________________________________________________________________________________
void Visitor::visit(const Parser::BaseDeclContext* ctx) {
reportNotSupported(ctx, "BASE declarations are");
void Visitor::visit(Parser::BaseDeclContext* ctx) {
auto rawIri = ctx->iriref()->getText();
bool hasScheme = ctre::starts_with<"<[A-Za-z]*[A-Za-z0-9+-.]:">(rawIri);
if (!hasScheme) {
reportError(
ctx,
"The base IRI must be an absolute IRI with a scheme, was: " + rawIri);
}
baseIri_ = TripleComponent::Iri::fromIriref(visit(ctx->iriref()));
}

// ____________________________________________________________________________________
Expand Down Expand Up @@ -2330,7 +2354,9 @@ ExpressionPtr Visitor::visit([[maybe_unused]] Parser::BuiltInCallContext* ctx) {
if (functionName == "str") {
return createUnary(&makeStrExpression);
} else if (functionName == "iri" || functionName == "uri") {
return createUnary(&makeIriOrUriExpression);
AD_CORRECTNESS_CHECK(argList.size() == 1, argList.size());
return makeIriOrUriExpression(std::move(argList[0]),
std::make_unique<IriExpression>(baseIri_));
} else if (functionName == "strlang") {
return createBinary(&makeStrLangTagExpression);
} else if (functionName == "strdt") {
Expand Down
5 changes: 3 additions & 2 deletions src/parser/sparqlParser/SparqlQleverVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class SparqlQleverVisitor {
// `addVisibleVariable`.
std::vector<Variable> visibleVariables_{};
PrefixMap prefixMap_{};
ad_utility::triple_component::Iri baseIri_{};
// We need to remember the prologue (prefix declarations) when we encounter it
// because we need it when we encounter a SERVICE query. When there is no
// prologue, this string simply remains empty.
Expand Down Expand Up @@ -122,7 +123,7 @@ class SparqlQleverVisitor {
void visit(Parser::PrologueContext* ctx);

// ___________________________________________________________________________
[[noreturn]] static void visit(const Parser::BaseDeclContext* ctx);
void visit(Parser::BaseDeclContext* ctx);

// ___________________________________________________________________________
void visit(Parser::PrefixDeclContext* ctx);
Expand Down Expand Up @@ -469,7 +470,7 @@ class SparqlQleverVisitor {

TripleComponent::Iri visit(Parser::IriContext* ctx);

static string visit(Parser::IrirefContext* ctx);
string visit(Parser::IrirefContext* ctx) const;

string visit(Parser::PrefixedNameContext* ctx);

Expand Down
12 changes: 10 additions & 2 deletions test/SparqlAntlrParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1587,8 +1587,16 @@ TEST(SparqlParser, builtInCall) {
expectBuiltInCall("ucaSe(?x)", matchUnary(&makeUppercaseExpression));
expectBuiltInCall("lCase(?x)", matchUnary(&makeLowercaseExpression));
expectBuiltInCall("StR(?x)", matchUnary(&makeStrExpression));
expectBuiltInCall("iRI(?x)", matchUnary(&makeIriOrUriExpression));
expectBuiltInCall("uRI(?x)", matchUnary(&makeIriOrUriExpression));
expectBuiltInCall(
"iRI(?x)",
matchNaryWithChildrenMatchers(
&makeIriOrUriExpression, variableExpressionMatcher(Variable{"?x"}),
matchLiteralExpression(ad_utility::triple_component::Iri{})));
expectBuiltInCall(
"uRI(?x)",
matchNaryWithChildrenMatchers(
&makeIriOrUriExpression, variableExpressionMatcher(Variable{"?x"}),
matchLiteralExpression(ad_utility::triple_component::Iri{})));
expectBuiltInCall("year(?x)", matchUnary(&makeYearExpression));
expectBuiltInCall("month(?x)", matchUnary(&makeMonthExpression));
expectBuiltInCall("tz(?x)", matchUnary(&makeTimezoneStrExpression));
Expand Down
53 changes: 38 additions & 15 deletions test/SparqlExpressionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,13 +570,15 @@ TEST(SparqlExpression, dateOperators) {
}

// _____________________________________________________________________________________
namespace {
auto checkStrlen = testUnaryExpression<&makeStrlenExpression>;
auto checkStr = testUnaryExpression<&makeStrExpression>;
auto checkIriOrUri = testUnaryExpression<&makeIriOrUriExpression>;
static auto makeStrlenWithStr = [](auto arg) {
auto checkIriOrUri = testNaryExpressionVec<&makeIriOrUriExpression>;
auto makeStrlenWithStr = [](auto arg) {
return makeStrlenExpression(makeStrExpression(std::move(arg)));
};
auto checkStrlenWithStrChild = testUnaryExpression<makeStrlenWithStr>;
} // namespace
TEST(SparqlExpression, stringOperators) {
// Test `StrlenExpression` and `StrExpression`.
checkStrlen(
Expand Down Expand Up @@ -611,20 +613,13 @@ TEST(SparqlExpression, stringOperators) {
DateYearOrDuration(11853, DateYearOrDuration::Type::Year));
// Test `iriOrUriExpression`.
// test invalid
checkIriOrUri(IdOrLiteralOrIriVec{U, IntId(2), DoubleId(12.99), dateDate,
dateLYear, T, F},
IdOrLiteralOrIriVec{U, U, U, U, U, U, U});
checkIriOrUri(IdOrLiteralOrIriVec{U, U, U, U, U, U, U},
std::tuple{IdOrLiteralOrIriVec{U, IntId(2), DoubleId(12.99),
dateDate, dateLYear, T, F},
IdOrLiteralOrIri{LocalVocabEntry{
ad_utility::triple_component::Iri{}}}});
// test valid
checkIriOrUri(
IdOrLiteralOrIriVec{
lit("bimbim"), iriref("<bambim>"),
lit("https://www.bimbimbam/2001/bamString"),
lit("http://www.w3.\torg/2001/\nXMLSchema#\runsignedShort"),
lit("http://www.w3.org/2001/XMLSchema#string"),
iriref("<http://www.w3.org/2001/XMLSchema#string>"),
testContext().notInVocabIri, testContext().notInVocabIriLit,
lit("http://example/"), iriref("<http://\t\t\nexample/>"),
lit("\t\n\r")},
IdOrLiteralOrIriVec{
iriref("<bimbim>"), iriref("<bambim>"),
iriref("<https://www.bimbimbam/2001/bamString>"),
Expand All @@ -634,7 +629,35 @@ TEST(SparqlExpression, stringOperators) {
iriref("<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>"),
iriref("<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>"),
iriref("<http://example/>"), iriref("<http://\t\t\nexample/>"),
iriref("<\t\n\r>")});
iriref("<\t\n\r>")},
std::tuple{
IdOrLiteralOrIriVec{
lit("bimbim"), iriref("<bambim>"),
lit("https://www.bimbimbam/2001/bamString"),
lit("http://www.w3.\torg/2001/\nXMLSchema#\runsignedShort"),
lit("http://www.w3.org/2001/XMLSchema#string"),
iriref("<http://www.w3.org/2001/XMLSchema#string>"),
testContext().notInVocabIri, testContext().notInVocabIriLit,
lit("http://example/"), iriref("<http://\t\t\nexample/>"),
lit("\t\n\r")},
IdOrLiteralOrIri{
LocalVocabEntry{ad_utility::triple_component::Iri{}}}});

// test with base iri
checkIriOrUri(
IdOrLiteralOrIriVec{
U,
iriref("<http://example.com/hi/bimbim>"),
iriref("<http://example.com/hi/bambim>"),
iriref("<https://www.bimbimbam/2001/bamString>"),
iriref("<http://example.com/hello>"),
iriref("<http://example.com/hello>"),
},
std::tuple{
IdOrLiteralOrIriVec{U, lit("bimbim"), iriref("<bambim>"),
lit("https://www.bimbimbam/2001/bamString"),
lit("/hello"), iriref("</hello>")},
IdOrLiteralOrIri{iriref("<http://example.com/hi>")}});

// A simple test for uniqueness of the cache key.
auto c1a = makeStrlenExpression(std::make_unique<IriExpression>(iri("<bim>")))
Expand Down
35 changes: 35 additions & 0 deletions test/SparqlParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1349,3 +1349,38 @@ TEST(ParserTest, HandlesSurrogatesCorrectly) {
// So writing unit tests for these cases is not possible without creating
// semi-invalid UTF-8 strings.
}

// _____________________________________________________________________________
TEST(ParserTest, BaseDeclaration) {
// Simple case
auto query1 = SparqlParser::parseQuery(
"BASE <http://example.org/> SELECT * WHERE { ?s <p> <test> }");
EXPECT_EQ(getFirstTriple(query1),
"{s: ?s, p: <http://example.org/p>, o: <http://example.org/test>}");
// Relative and absolute IRIs mixed
auto query2 = SparqlParser::parseQuery(
"BASE <http://example.org/something> "
"SELECT * WHERE { </root> <p> <http://other.example.org/p> }");
EXPECT_EQ(getFirstTriple(query2),
"{s: <http://example.org/root>,"
" p: <http://example.org/something/p>,"
" o: <http://other.example.org/p>}");

// Cascading prefixes
auto query3 = SparqlParser::parseQuery(
"BASE <http://example.org/> "
"PREFIX ex1: <ex1/> "
"BASE <http://other.example.org/> "
"PREFIX ex2: <ex2/> "
"BASE <http://alternative.example.org/> "
"SELECT * WHERE { ex2:hello <world> ex1:test }");
EXPECT_EQ(getFirstTriple(query3),
"{s: <http://other.example.org/ex2/hello>,"
" p: <http://alternative.example.org/world>,"
" o: <http://example.org/ex1/test>}");

AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(
SparqlParser::parseQuery("BASE <http://example.com> BASE <relative> "
"SELECT * WHERE { ?s ?p ?o }"),
::testing::HasSubstr("absolute IRI"), InvalidSparqlQueryException);
}
Loading