diff --git a/src/engine/SpatialJoin.h b/src/engine/SpatialJoin.h index af76c49a81..c85a25059a 100644 --- a/src/engine/SpatialJoin.h +++ b/src/engine/SpatialJoin.h @@ -174,6 +174,10 @@ class SpatialJoin : public Operation { return childRight_; } + PreparedSpatialJoinParams onlyForTestingGetPrepareJoin() const { + return prepareJoin(); + } + private: // helper function to generate a variable to column map from `childRight_` // that only contains the columns selected by `config_.payloadVariables_` diff --git a/src/engine/SpatialJoinAlgorithms.cpp b/src/engine/SpatialJoinAlgorithms.cpp index 82d5102df6..07338e49c9 100644 --- a/src/engine/SpatialJoinAlgorithms.cpp +++ b/src/engine/SpatialJoinAlgorithms.cpp @@ -12,7 +12,9 @@ #include #include +#include +#include "engine/ExportQueryExecutionTrees.h" #include "engine/SpatialJoin.h" #include "util/GeoSparqlHelpers.h" @@ -38,18 +40,100 @@ std::optional SpatialJoinAlgorithms::getPoint(const IdTable* restable, }; // ____________________________________________________________________________ -Id SpatialJoinAlgorithms::computeDist(const IdTable* idTableLeft, - const IdTable* idTableRight, - size_t rowLeft, size_t rowRight, - ColumnIndex leftPointCol, - ColumnIndex rightPointCol) const { - auto point1 = getPoint(idTableLeft, rowLeft, leftPointCol); - auto point2 = getPoint(idTableRight, rowRight, rightPointCol); - if (!point1.has_value() || !point2.has_value()) { - return Id::makeUndefined(); - } - return Id::makeFromDouble( - ad_utility::detail::wktDistImpl(point1.value(), point2.value())); +std::string_view SpatialJoinAlgorithms::betweenQuotes( + std::string_view extractFrom) const { + size_t pos1 = extractFrom.find("\"", 0); + size_t pos2 = extractFrom.find("\"", pos1 + 1); + if (pos1 != std::string::npos && pos2 != std::string::npos) { + return extractFrom.substr(pos1 + 1, pos2 - pos1 - 1); + } else { + return extractFrom; + } +} + +std::optional SpatialJoinAlgorithms::getAnyGeometry( + const IdTable* idtable, size_t row, size_t col) { + auto printWarning = [this, &spatialJoin = spatialJoin_]() { + if (this->numFailedParsedGeometries_ == 0) { + std::string warning = + "The input to a spatial join contained at least one element, " + "that is not a Point, Linestring, Polygon, MultiPoint, " + "MultiLinestring or MultiPolygon geometry and is thus skipped. Note " + "that QLever currently only accepts those geometries for " + "the spatial joins"; + AD_LOG_WARN << warning << std::endl; + this->numFailedParsedGeometries_ += 1; + if (spatialJoin.has_value()) { + AD_CORRECTNESS_CHECK(spatialJoin.value() != nullptr); + spatialJoin.value()->addWarning(warning); + } + } + }; + + // unfortunately, the current implementation requires the fully materialized + // string. In the future this might get changed. When only the bounding box + // is needed, one could store it in an ID similar to GeoPoint (but with less + // precision), and then the full geometry would only need to be read, when + // the exact distance is wanted + std::string str(betweenQuotes(ExportQueryExecutionTrees::idToStringAndType( + qec_->getIndex(), idtable->at(row, col), {}) + .value() + .first)); + AnyGeometry geometry; + try { + bg::read_wkt(str, geometry); + geometries_.push_back(std::move(geometry)); + } catch (...) { + printWarning(); + return std::nullopt; + } + return geometries_.size() - 1; // index of the last element +} + +// ____________________________________________________________________________ +double SpatialJoinAlgorithms::computeDist(const size_t geometryIndex1, + const size_t geometryIndex2) const { + return boost::apply_visitor(ClosestPointVisitor(), + geometries_.at(geometryIndex1), + geometries_.at(geometryIndex2)); +}; + +// ____________________________________________________________________________ +size_t SpatialJoinAlgorithms::convertGeoPointToPoint(GeoPoint point) { + geometries_.emplace_back(Point(point.getLng(), point.getLat())); + return geometries_.size() - 1; // index of the last element +}; + +// ____________________________________________________________________________ +Id SpatialJoinAlgorithms::computeDist(RtreeEntry& geo1, RtreeEntry& geo2) { + auto convertPoint = [&](RtreeEntry& entry) { + if (entry.geoPoint_) { + return entry.geoPoint_.value(); + } + if (!entry.boundingBox_.has_value()) { + entry.boundingBox_ = boost::apply_visitor( + BoundingBoxVisitor(), geometries_.at(entry.geometryIndex_.value())); + } + Point p = calculateMidpointOfBox(entry.boundingBox_.value()); + return GeoPoint(p.get<1>(), p.get<0>()); + }; + + auto getIndex = [&](RtreeEntry& entry) { + if (!entry.geometryIndex_) { + entry.geometryIndex_ = convertGeoPointToPoint(entry.geoPoint_.value()); + } + return entry.geometryIndex_.value(); + }; + + // use the already parsed geometries to calculate the distance + if (useMidpointForAreas_ || + (geo1.geoPoint_.has_value() && geo2.geoPoint_.has_value())) { + return Id::makeFromDouble(ad_utility::detail::wktDistImpl( + convertPoint(geo1), convertPoint(geo2))); + } else { + // at least one area + return Id::makeFromDouble(computeDist(getIndex(geo1), getIndex(geo2))); + } } // ____________________________________________________________________________ @@ -118,10 +202,17 @@ Result SpatialJoinAlgorithms::BaselineAlgorithm() { decltype(compare)> intermediate(compare); + auto entryLeft = getRtreeEntry(idTableLeft, rowLeft, leftJoinCol); + // Inner loop of cartesian product for (size_t rowRight = 0; rowRight < idTableRight->size(); rowRight++) { - Id dist = computeDist(idTableLeft, idTableRight, rowLeft, rowRight, - leftJoinCol, rightJoinCol); + auto entryRight = getRtreeEntry(idTableRight, rowRight, rightJoinCol); + + if (!entryLeft || !entryRight) { + continue; + } + + Id dist = computeDist(entryLeft.value(), entryRight.value()); // Ensure `maxDist_` constraint if (dist.getDatatype() != Datatype::Double || @@ -240,8 +331,8 @@ Result SpatialJoinAlgorithms::S2geometryAlgorithm() { } // ____________________________________________________________________________ -std::vector SpatialJoinAlgorithms::computeBoundingBox( - const Point& startPoint) const { +std::vector SpatialJoinAlgorithms::computeQueryBox( + const Point& startPoint, double additionalDist) const { const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, rightJoinCol, rightSelectedCols, numColumns, maxDist, maxResults] = params_; @@ -254,13 +345,14 @@ std::vector SpatialJoinAlgorithms::computeBoundingBox( auto archaversine = [](double theta) { return std::acos(1 - 2 * theta); }; // safety buffer for numerical inaccuracies - double maxDistInMetersBuffer; - if (maxDist.value() < 10) { + double maxDistInMetersBuffer = + static_cast(maxDist.value()) + additionalDist; + if (maxDistInMetersBuffer < 10) { maxDistInMetersBuffer = 10; } else if (static_cast(maxDist.value()) < static_cast(std::numeric_limits::max()) / 1.02) { - maxDistInMetersBuffer = 1.01 * static_cast(maxDist.value()); + maxDistInMetersBuffer = 1.01 * maxDistInMetersBuffer; } else { maxDistInMetersBuffer = static_cast(std::numeric_limits::max()); @@ -270,7 +362,7 @@ std::vector SpatialJoinAlgorithms::computeBoundingBox( // a single bounding box for the whole planet, do an optimized version if (static_cast(maxDist.value()) > circumferenceMax_ / 4.0 && static_cast(maxDist.value()) < circumferenceMax_ / 2.01) { - return computeBoundingBoxForLargeDistances(startPoint); + return computeQueryBoxForLargeDistances(startPoint); } // compute latitude bound @@ -323,7 +415,7 @@ std::vector SpatialJoinAlgorithms::computeBoundingBox( } // ____________________________________________________________________________ -std::vector SpatialJoinAlgorithms::computeBoundingBoxForLargeDistances( +std::vector SpatialJoinAlgorithms::computeQueryBoxForLargeDistances( const Point& startPoint) const { const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, rightJoinCol, rightSelectedCols, numColumns, maxDist, @@ -442,22 +534,74 @@ std::array SpatialJoinAlgorithms::isAPoleTouched( return std::array{northPoleReached, southPoleReached}; } +// ____________________________________________________________________________ +Point SpatialJoinAlgorithms::calculateMidpointOfBox(const Box& box) const { + double lng = (box.min_corner().get<0>() + box.max_corner().get<0>()) / 2.0; + double lat = (box.min_corner().get<1>() + box.max_corner().get<1>()) / 2.0; + return Point(lng, lat); +} + +// ____________________________________________________________________________ +double SpatialJoinAlgorithms::getMaxDistFromMidpointToAnyPointInsideTheBox( + const Box& box, std::optional midpoint) const { + if (!midpoint) { + midpoint = calculateMidpointOfBox(box); + } + double distLng = + std::abs(box.min_corner().get<0>() - midpoint.value().get<0>()); + double distLat = + std::abs(box.min_corner().get<1>() - midpoint.value().get<1>()); + // convert to meters and return + return (distLng + distLat) * 40075000 / 360; +} + +// ____________________________________________________________________________ +std::optional SpatialJoinAlgorithms::getRtreeEntry( + const IdTable* idTable, const size_t row, const ColumnIndex col) { + RtreeEntry entry{row, std::nullopt, std::nullopt, std::nullopt}; + entry.geoPoint_ = getPoint(idTable, row, col); + + if (!entry.geoPoint_) { + entry.geometryIndex_ = getAnyGeometry(idTable, row, col); + if (!entry.geometryIndex_) { + return std::nullopt; + } + entry.boundingBox_ = boost::apply_visitor( + BoundingBoxVisitor(), geometries_.at(entry.geometryIndex_.value())); + } else { + entry.boundingBox_ = + Box(Point(entry.geoPoint_.value().getLng(), + entry.geoPoint_.value().getLat()), + Point(entry.geoPoint_.value().getLng() + 0.00000001, + entry.geoPoint_.value().getLat() + 0.00000001)); + } + return entry; +} + +// ____________________________________________________________________________ +std::vector SpatialJoinAlgorithms::getQueryBox( + const std::optional& entry) const { + if (!entry.value().geoPoint_) { + auto midpoint = calculateMidpointOfBox(entry.value().boundingBox_.value()); + return computeQueryBox(midpoint, + getMaxDistFromMidpointToAnyPointInsideTheBox( + entry.value().boundingBox_.value(), midpoint)); + } else { + return computeQueryBox(Point(entry.value().geoPoint_.value().getLng(), + entry.value().geoPoint_.value().getLat())); + } +} + // ____________________________________________________________________________ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { - auto printWarning = [alreadyWarned = false, - &spatialJoin = spatialJoin_]() mutable { - if (!alreadyWarned) { - std::string warning = - "The input to a spatial join contained at least one element, " - "that is not a point geometry and is thus skipped. Note that " - "QLever currently only accepts point geometries for the " - "spatial joins"; - AD_LOG_WARN << warning << std::endl; - alreadyWarned = true; - if (spatialJoin.has_value()) { - AD_CORRECTNESS_CHECK(spatialJoin.value() != nullptr); - spatialJoin.value()->addWarning(warning); - } + // helper struct to avoid duplicate entries for areas + struct AddedPair { + size_t rowLeft_; + size_t rowRight_; + + auto operator<=>(const AddedPair& other) const { + return (rowLeft_ == other.rowLeft_) ? (rowRight_ <=> other.rowRight_) + : (rowLeft_ <=> other.rowLeft_); } }; @@ -478,56 +622,65 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { std::swap(smallerResJoinCol, otherResJoinCol); } + // build rtree with one child bgi::rtree, bgi::indexable, bgi::equal_to, ad_utility::AllocatorWithLimit> rtree(bgi::quadratic<16>{}, bgi::indexable{}, bgi::equal_to{}, qec_->getAllocator()); for (size_t i = 0; i < smallerResult->numRows(); i++) { - // get point of row i - auto geopoint = getPoint(smallerResult, i, smallerResJoinCol); - - if (!geopoint) { - printWarning(); + // add every box together with the additional information into the rtree + std::optional entry = + getRtreeEntry(smallerResult, i, smallerResJoinCol); + if (!entry) { + // nothing to do. When parsing a point or an area fails, a warning + // message gets printed at another place and the point/area just gets + // skipped continue; } - - Point p(geopoint.value().getLng(), geopoint.value().getLat()); - - // add every point together with the row number into the rtree - rtree.insert(std::make_pair(std::move(p), i)); + rtree.insert(std::pair(entry.value().boundingBox_.value(), + std::move(entry.value()))); } + + // query rtree with the other child std::vector> results{ qec_->getAllocator()}; for (size_t i = 0; i < otherResult->numRows(); i++) { - auto geopoint1 = getPoint(otherResult, i, otherResJoinCol); - - if (!geopoint1) { - printWarning(); + std::optional entry = + getRtreeEntry(otherResult, i, otherResJoinCol); + if (!entry) { + // nothing to do. When parsing a point or an area fails, a warning + // message gets printed at another place and the point/area just gets + // skipped continue; } + std::vector queryBox = getQueryBox(entry); - Point p(geopoint1.value().getLng(), geopoint1.value().getLat()); - - // query the other rtree for every point using the following bounding box - std::vector bbox = computeBoundingBox(p); results.clear(); - ql::ranges::for_each(bbox, [&](const Box& bbox) { + ql::ranges::for_each(queryBox, [&](const Box& bbox) { rtree.query(bgi::intersects(bbox), std::back_inserter(results)); }); - ql::ranges::for_each(results, [&](const Value& res) { - size_t rowLeft = res.second; + std::set pairs; + ql::ranges::for_each(results, [&](Value& res) { + size_t rowLeft = res.second.row_; size_t rowRight = i; if (!leftResSmaller) { std::swap(rowLeft, rowRight); } - auto distance = computeDist(idTableLeft, idTableRight, rowLeft, rowRight, - leftJoinCol, rightJoinCol); + auto distance = computeDist(res.second, entry.value()); AD_CORRECTNESS_CHECK(distance.getDatatype() == Datatype::Double); if (distance.getDouble() * 1000 <= static_cast(maxDist.value())) { - addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, - rowRight, distance); + // make sure, that no duplicate elements are inserted in the result + // table. As duplicates can only occur, when areas are not approximated + // as midpoints, the additional runtime can be saved in that case + if (useMidpointForAreas_) { + addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, + rowRight, distance); + } else if (pairs.insert(AddedPair{rowLeft, rowRight}).second) { + addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, + rowRight, distance); + } } }); } diff --git a/src/engine/SpatialJoinAlgorithms.h b/src/engine/SpatialJoinAlgorithms.h index c3c2729267..e41beb42a3 100644 --- a/src/engine/SpatialJoinAlgorithms.h +++ b/src/engine/SpatialJoinAlgorithms.h @@ -10,9 +10,11 @@ #include #include #include +#include #include "engine/Result.h" #include "engine/SpatialJoin.h" +#include "util/GeoSparqlHelpers.h" namespace BoostGeometryNamespace { namespace bg = boost::geometry; @@ -20,10 +22,60 @@ namespace bgi = boost::geometry::index; using Point = bg::model::point; using Box = bg::model::box; -using Value = std::pair; +using Polygon = boost::geometry::model::polygon< + boost::geometry::model::d2::point_xy>; +using Linestring = bg::model::linestring; +using MultiPoint = bg::model::multi_point; +using MultiLinestring = bg::model::multi_linestring; +using MultiPolygon = bg::model::multi_polygon; +using AnyGeometry = boost::variant; +using Segment = boost::geometry::model::segment; + +// this struct is used to get the bounding box of an arbitrary geometry type. +struct BoundingBoxVisitor : public boost::static_visitor { + template + Box operator()(const Geometry& geometry) const { + Box box; + boost::geometry::envelope(geometry, box); + return box; + } +}; + +// this struct is used to calculate the distance between two arbitrary +// geometries. It calculates the two closest points (in euclidean geometry), +// transforms the two closest points, to a GeoPoint and then calculates the +// distance of the two points on the earth. As the closest points are calculated +// using euclidean geometry, this is only an approximation. On the sphere two +// other points might be closer. +struct ClosestPointVisitor : public boost::static_visitor { + template + double operator()(const Geometry1& geo1, const Geometry2& geo2) const { + Segment seg; + bg::closest_points(geo1, geo2, seg); + GeoPoint closestPoint1(bg::get<0, 1>(seg), bg::get<0, 0>(seg)); + GeoPoint closestPoint2(bg::get<1, 1>(seg), bg::get<1, 0>(seg)); + return ad_utility::detail::wktDistImpl(closestPoint1, closestPoint2); + } +}; + +struct RtreeEntry { + size_t row_; + std::optional geometryIndex_; + std::optional geoPoint_; + std::optional boundingBox_; +}; + +using Value = std::pair; + } // namespace BoostGeometryNamespace class SpatialJoinAlgorithms { + using Point = BoostGeometryNamespace::Point; + using Box = BoostGeometryNamespace::Box; + using AnyGeometry = BoostGeometryNamespace::AnyGeometry; + using RtreeEntry = BoostGeometryNamespace::RtreeEntry; + public: // initialize the Algorithm with the needed parameters SpatialJoinAlgorithms(QueryExecutionContext* qec, @@ -34,16 +86,57 @@ class SpatialJoinAlgorithms { Result S2geometryAlgorithm(); Result BoundingBoxAlgorithm(); - std::vector - OnlyForTestingWrapperComputeBoundingBox( - const BoostGeometryNamespace::Point& startPoint) const { - return computeBoundingBox(startPoint); + // This function computes the bounding box(es) which represent all points, + // which are in reach of the starting point with a distance of at most + // 'maxDistanceInMeters'. In theory there is always only one bounding box, but + // when mapping the spherical surface on a cartesian plane there are borders. + // So when the "single true" bounding box crosses the left or right (+/-180 + // longitude line) or the poles (+/- 90 latitude, which on the cartesian + // mapping is the top and bottom edge of the rectangular mapping) then the + // single box gets split into multiple boxes (i.e. one on the left and one on + // the right, which when seen on the sphere look like a single box, but on the + // map and in the internal representation it looks like two/more boxes). The + // additionalDist gets added on the max distance to compensate for areas being + // bigger than points. AdditionalDist must be the max distance from the + // midpoint of the bounding box of the area to any point inside the area. + // The function getMaxDistFromMidpointToAnyPointInsideTheBox() can be used to + // calculate it. + std::vector computeQueryBox(const Point& startPoint, + double additionalDist = 0) const; + + // This function returns true, iff the given point is contained in any of the + // bounding boxes + bool isContainedInBoundingBoxes(const std::vector& boundingBox, + Point point) const; + + // calculates the midpoint of the given Box + Point calculateMidpointOfBox(const Box& box) const; + + void setUseMidpointForAreas_(bool useMidpointForAreas) { + useMidpointForAreas_ = useMidpointForAreas; } - bool OnlyForTestingWrapperContainedInBoundingBoxes( - const std::vector& boundingBox, - const BoostGeometryNamespace::Point& point) const { - return isContainedInBoundingBoxes(boundingBox, point); + // Helper function, which computes the distance of two geometries, where each + // geometry has already been parsed and is available as an RtreeEntry + Id computeDist(RtreeEntry& geo1, RtreeEntry& geo2); + + // this function calculates the maximum distance from the midpoint of the box + // to any other point, which is contained in the box. If the midpoint has + // already been calculated, because it is needed in other places as well, it + // can be given to the function, otherwise the function calculates the + // midpoint itself + double getMaxDistFromMidpointToAnyPointInsideTheBox( + const Box& box, std::optional midpoint = std::nullopt) const; + + // this function gets the string which represents the area from the idtable. + std::optional getAnyGeometry(const IdTable* idtable, size_t row, + size_t col); + + // wrapper to access non const private function for testing + std::optional onlyForTestingGetRtreeEntry(const IdTable* idTable, + const size_t row, + const ColumnIndex col) { + return getRtreeEntry(idTable, row, col); } private: @@ -52,11 +145,9 @@ class SpatialJoinAlgorithms { std::optional getPoint(const IdTable* restable, size_t row, ColumnIndex col) const; - // Helper function, which computes the distance of two points, where each - // point comes from a different result table - Id computeDist(const IdTable* resLeft, const IdTable* resRight, - size_t rowLeft, size_t rowRight, ColumnIndex leftPointCol, - ColumnIndex rightPointCol) const; + // returns everything between the first two quotes. If the string does not + // contain two quotes, the string is returned as a whole + std::string_view betweenQuotes(std::string_view extractFrom) const; // Helper function, which adds a row, which belongs to the result to the // result table. As inputs it uses a row of the left and a row of the right @@ -65,19 +156,6 @@ class SpatialJoinAlgorithms { const IdTable* resultRight, size_t rowLeft, size_t rowRight, Id distance) const; - // This function computes the bounding box(es) which represent all points, - // which are in reach of the starting point with a distance of at most - // 'maxDistanceInMeters'. In theory there is always only one bounding box, but - // when mapping the spherical surface on a cartesian plane there are borders. - // So when the "single true" bounding box crosses the left or right (+/-180 - // longitude line) or the poles (+/- 90 latitude, which on the cartesian - // mapping is the top and bottom edge of the rectangular mapping) then the - // single box gets split into multiple boxes (i.e. one on the left and one on - // the right, which when seen on the sphere look like a single box, but on the - // map and in the internal representation it looks like two/more boxes) - std::vector computeBoundingBox( - const BoostGeometryNamespace::Point& startPoint) const; - // This helper function calculates the bounding boxes based on a box, where // definitely no match can occur. This means every element in the anti // bounding box is guaranteed to be more than 'maxDistanceInMeters' away from @@ -86,20 +164,46 @@ class SpatialJoinAlgorithms { // gets used, when the usual procedure, would just result in taking a big // bounding box, which covers the whole planet (so for extremely large max // distances) - std::vector computeBoundingBoxForLargeDistances( - const BoostGeometryNamespace::Point& startPoint) const; - - // This function returns true, iff the given point is contained in any of the - // bounding boxes - bool isContainedInBoundingBoxes( - const std::vector& boundingBox, - BoostGeometryNamespace::Point point) const; + std::vector computeQueryBoxForLargeDistances( + const Point& startPoint) const; + + // this helper function approximates a conversion of the distance between two + // objects from degrees to meters. Here we assume, that the conversion from + // degrees to meters is constant, which is however only true for the latitude + // values. For the longitude values this is not true. Therefore a value which + // works very good for almost all longitudes and latitudes has been chosen. + // Only for the poles, the conversion will be way to large (for the longitude + // difference). Note, that this function is expensive and should only be + // called when needed + double computeDist(const size_t geometryIndex1, + const size_t geometryIndex2) const; + + // this helper function takes an idtable, a row and a column. It then tries + // to parse a geometry or a geoPoint of that cell in the idtable. If it + // succeeds, it returns an rtree entry of that geometry/geopoint + std::optional getRtreeEntry(const IdTable* idTable, + const size_t row, + const ColumnIndex col); + + // this helper function converts a GeoPoint into a boost geometry Point + size_t convertGeoPointToPoint(GeoPoint point); + + // this helper function calculates the query box. The query box, is the box, + // which contains the area, where all possible candidates of the max distance + // query must be contained in. It returns a vector, because if the box crosses + // the poles or the -180/180 longitude line, then it is disjoint in the + // cartesian coordinates. The boxes themselves are disjoint to each other. + std::vector getQueryBox(const std::optional& entry) const; QueryExecutionContext* qec_; PreparedSpatialJoinParams params_; SpatialJoinConfiguration config_; std::optional spatialJoin_; + // if the distance calculation should be approximated, by the midpoint of + // the area + bool useMidpointForAreas_ = true; + // circumference in meters at the equator (max) and the pole (min) (as the // earth is not exactly a sphere the circumference is different. Note that // the values are given in meters) @@ -111,8 +215,17 @@ class SpatialJoinAlgorithms { static constexpr double radius_ = 6'378'000; // convert coordinates to the usual ranges (-180 to 180 and -90 to 90) - void convertToNormalCoordinates(BoostGeometryNamespace::Point& point) const; + void convertToNormalCoordinates(Point& point) const; // return whether one of the poles is being touched std::array isAPoleTouched(const double& latitude) const; + + // number of times the parsing of a geometry failed. For now this is only used + // to print the warning once, but it could also be used to print how many + // geometries failed. It is mutable to let parsing function which are const + // still modify the the nr of failed parsings. + size_t numFailedParsedGeometries_ = 0; + + // this vector stores the geometries, which have already been parsed + std::vector geometries_; }; diff --git a/test/engine/SpatialJoinAlgorithmsTest.cpp b/test/engine/SpatialJoinAlgorithmsTest.cpp index f4c9640d16..f2304bb7ea 100644 --- a/test/engine/SpatialJoinAlgorithmsTest.cpp +++ b/test/engine/SpatialJoinAlgorithmsTest.cpp @@ -31,9 +31,10 @@ using SJ = std::variant; namespace computeResultTest { // Represents from left to right: the algorithm, addLeftChildFirst, -// bigChildLeft, a spatial join task +// bigChildLeft, a spatial join task and if areas (=true) or points (=false) +// should be used using SpatialJoinTestParam = - std::tuple; + std::tuple; using Row = std::vector; using Rows = std::vector; @@ -131,9 +132,10 @@ class SpatialJoinParamTest bool containsWrongPointWarning = false; std::string warningMessage = "The input to a spatial join contained at least one element, " - "that is not a point geometry and is thus skipped. Note that " - "QLever currently only accepts point geometries for the " - "spatial joins"; + "that is not a Point, Linestring, Polygon, MultiPoint, " + "MultiLinestring or MultiPolygon geometry and is thus skipped. Note " + "that QLever currently only accepts those geometries for " + "the spatial joins"; for (const auto& warning : warnings) { if (warning == warningMessage) { containsWrongPointWarning = true; @@ -158,7 +160,7 @@ class SpatialJoinParamTest void buildAndTestSmallTestSetLargeChildren(SJ task, bool addLeftChildFirst, Rows expectedOutput, Row columnNames) { - auto qec = buildTestQEC(); + auto qec = buildTestQEC(std::get<4>(GetParam())); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); // ===================== build the first child @@ -191,7 +193,7 @@ class SpatialJoinParamTest void buildAndTestSmallTestSetSmallChildren(SJ task, bool addLeftChildFirst, Rows expectedOutput, Row columnNames) { - auto qec = buildTestQEC(); + auto qec = buildTestQEC(std::get<4>(GetParam())); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); // ====================== build inputs =================================== @@ -218,7 +220,7 @@ class SpatialJoinParamTest Rows expectedOutput, Row columnNames, bool bigChildLeft) { - auto qec = buildTestQEC(); + auto qec = buildTestQEC(std::get<4>(GetParam())); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); // ========================= build big child ============================= @@ -246,13 +248,15 @@ class SpatialJoinParamTest void testDiffSizeIdTables(SJ task, bool addLeftChildFirst, Rows expectedOutput, Row columnNames, bool bigChildLeft) { - auto qec = buildTestQEC(); + auto qec = buildTestQEC(std::get<4>(GetParam())); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); // ====================== build small input ============================== + std::string geometry = + std::get<4>(GetParam()) ? "" : ""; TripleComponent point1{Variable{"?point1"}}; TripleComponent subject{ - ad_utility::triple_component::Iri::fromIriref("")}; + ad_utility::triple_component::Iri::fromIriref(geometry)}; auto smallChild = ad_utility::makeExecutionTree( qec, Permutation::Enum::PSO, SparqlTriple{subject, std::string{""}, point1}); @@ -274,7 +278,7 @@ class SpatialJoinParamTest void testWrongPointInInput(SJ task, bool addLeftChildFirst, Rows expectedOutput, Row columnNames) { - auto kg = createSmallDatasetWithPoints(); + auto kg = createSmallDataset(); // make first point wrong: auto pos = kg.find("POINT("); kg = kg.insert(pos + 7, "wrongStuff"); @@ -314,478 +318,526 @@ class SpatialJoinParamTest } return std::nullopt; } -}; -Row mergeToRow(Row part1, Row part2, Row part3) { - Row result = part1; - for (size_t i = 0; i < part2.size(); i++) { - result.push_back(part2.at(i)); - } - for (size_t i = 0; i < part3.size(); i++) { - result.push_back(part3.at(i)); + Row mergeToRow(Row part1, Row part2, Row part3) { + Row result = part1; + for (size_t i = 0; i < part2.size(); i++) { + result.push_back(part2.at(i)); + } + for (size_t i = 0; i < part3.size(); i++) { + result.push_back(part3.at(i)); + } + return result; + }; + + std::string name1 = (std::get<4>(GetParam())) ? "\"Uni Freiburg TF Area\"" + : "\"Uni Freiburg TF\""; + std::string name2 = (std::get<4>(GetParam())) ? "\"Minster Freiburg Area\"" + : "\"Minster Freiburg\""; + std::string name3 = + (std::get<4>(GetParam())) ? "\"London Eye Area\"" : "\"London Eye\""; + std::string name4 = (std::get<4>(GetParam())) ? "\"Statue of liberty Area\"" + : "\"Statue of liberty\""; + std::string name5 = + (std::get<4>(GetParam())) ? "\"eiffel tower Area\"" : "\"eiffel tower\""; + + std::string node1 = (std::get<4>(GetParam())) ? "" : ""; + std::string node2 = (std::get<4>(GetParam())) ? "" : ""; + std::string node3 = (std::get<4>(GetParam())) ? "" : ""; + std::string node4 = (std::get<4>(GetParam())) ? "" : ""; + std::string node5 = (std::get<4>(GetParam())) ? "" : ""; + + std::string geometry1 = + (std::get<4>(GetParam())) ? "" : ""; + std::string geometry2 = + (std::get<4>(GetParam())) ? "" : ""; + std::string geometry3 = + (std::get<4>(GetParam())) ? "" : ""; + std::string geometry4 = + (std::get<4>(GetParam())) ? "" : ""; + std::string geometry5 = + (std::get<4>(GetParam())) ? "" : ""; + + std::string wktString1 = (std::get<4>(GetParam())) + ? SpatialJoinTestHelpers::areaUniFreiburg + : "POINT(7.835050 48.012670)"; + std::string wktString2 = (std::get<4>(GetParam())) + ? SpatialJoinTestHelpers::areaMuenster + : "POINT(7.852980 47.995570)"; + std::string wktString3 = (std::get<4>(GetParam())) + ? SpatialJoinTestHelpers::areaLondonEye + : "POINT(-0.119570 51.503330)"; + std::string wktString4 = (std::get<4>(GetParam())) + ? SpatialJoinTestHelpers::areaStatueOfLiberty + : "POINT(-74.044540 40.689250)"; + std::string wktString5 = (std::get<4>(GetParam())) + ? SpatialJoinTestHelpers::areaEiffelTower + : "POINT(2.294510 48.858250)"; + + Rows unordered_rows{{name1, node1, geometry1, wktString1}, + {name2, node2, geometry2, wktString2}, + {name3, node3, geometry3, wktString3}, + {name4, node4, geometry4, wktString4}, + {name5, node5, geometry5, wktString5}}; + + // Shortcuts + Row TF = unordered_rows.at(0); + Row Mun = unordered_rows.at(1); + Row Eye = unordered_rows.at(2); + Row Lib = unordered_rows.at(3); + Row Eif = unordered_rows.at(4); + + Rows unordered_rows_small{{geometry1, wktString1}, + {geometry2, wktString2}, + {geometry3, wktString3}, + {geometry4, wktString4}, + {geometry5, wktString5}}; + + // Shortcuts + Row sTF = unordered_rows_small.at(0); + Row sMun = unordered_rows_small.at(1); + Row sEye = unordered_rows_small.at(2); + Row sLib = unordered_rows_small.at(3); + Row sEif = unordered_rows_small.at(4); + + // in all calculations below, the factor 1000 is used to convert from km to m + + // distance from the object to itself should be zero + Row expectedDistSelf{"0"}; + + // helper functions + GeoPoint P(double x, double y) { return GeoPoint(y, x); } + + std::string expectedDist(const GeoPoint& p1, const GeoPoint& p2) { + auto p1_ = S2Point{S2LatLng::FromDegrees(p1.getLat(), p1.getLng())}; + auto p2_ = S2Point{S2LatLng::FromDegrees(p2.getLat(), p2.getLng())}; + + return std::to_string(S2Earth::ToKm(S1Angle(p1_, p2_))); } - return result; -}; -Rows unordered_rows{ - {"\"Uni Freiburg TF\"", "", "", - "POINT(7.835050 48.012670)"}, - {"\"Minster Freiburg\"", "", "", - "POINT(7.852980 47.995570)"}, - {"\"London Eye\"", "", "", "POINT(-0.119570 51.503330)"}, - {"\"Statue of liberty\"", "", "", - "POINT(-74.044540 40.689250)"}, - {"\"eiffel tower\"", "", "", - "POINT(2.294510 48.858250)"}, -}; + // Places for testing + GeoPoint PUni = P(7.83505, 48.01267); + GeoPoint PMun = P(7.85298, 47.99557); + GeoPoint PEif = P(2.29451, 48.85825); + GeoPoint PEye = P(-0.11957, 51.50333); + GeoPoint PLib = P(-74.04454, 40.68925); + std::vector testPlaces = std::vector{PUni, PMun, PEif, PEye, PLib}; -// Shortcuts -auto TF = unordered_rows.at(0); -auto Mun = unordered_rows.at(1); -auto Eye = unordered_rows.at(2); -auto Lib = unordered_rows.at(3); -auto Eif = unordered_rows.at(4); + // distance from Uni Freiburg to Freiburger Münster is 2,33 km according to + // google maps + Row expectedDistUniMun{expectedDist(PUni, PMun)}; -Rows unordered_rows_small{{"", "POINT(7.835050 48.012670)"}, - {"", "POINT(7.852980 47.995570)"}, - {"", "POINT(-0.119570 51.503330)"}, - {"", "POINT(-74.044540 40.689250)"}, - {"", "POINT(2.294510 48.858250)"}}; + // distance from Uni Freiburg to Eiffel Tower is 419,32 km according to + // google maps + Row expectedDistUniEif{expectedDist(PUni, PEif)}; -// Shortcuts -auto sTF = unordered_rows_small.at(0); -auto sMun = unordered_rows_small.at(1); -auto sEye = unordered_rows_small.at(2); -auto sLib = unordered_rows_small.at(3); -auto sEif = unordered_rows_small.at(4); + // distance from Minster Freiburg to eiffel tower is 421,09 km according to + // google maps + Row expectedDistMunEif{expectedDist(PMun, PEif)}; -// in all calculations below, the factor 1000 is used to convert from km to m + // distance from london eye to eiffel tower is 340,62 km according to + // google maps + Row expectedDistEyeEif{expectedDist(PEye, PEif)}; -// distance from the object to itself should be zero -Row expectedDistSelf{"0"}; + // distance from Uni Freiburg to London Eye is 690,18 km according to + // google maps + Row expectedDistUniEye{expectedDist(PUni, PEye)}; -// helper functions -auto P = [](double x, double y) { return GeoPoint(y, x); }; + // distance from Minster Freiburg to London Eye is 692,39 km according to + // google maps + Row expectedDistMunEye{expectedDist(PMun, PEye)}; -auto expectedDist = [](const GeoPoint& p1, const GeoPoint& p2) { - auto p1_ = S2Point{S2LatLng::FromDegrees(p1.getLat(), p1.getLng())}; - auto p2_ = S2Point{S2LatLng::FromDegrees(p2.getLat(), p2.getLng())}; + // distance from Uni Freiburg to Statue of Liberty is 6249,55 km according to + // google maps + Row expectedDistUniLib{expectedDist(PUni, PLib)}; - return std::to_string(S2Earth::ToKm(S1Angle(p1_, p2_))); -}; + // distance from Minster Freiburg to Statue of Liberty is 6251,58 km + // according to google maps + Row expectedDistMunLib{expectedDist(PMun, PLib)}; + + // distance from london eye to statue of liberty is 5575,08 km according to + // google maps + Row expectedDistEyeLib{expectedDist(PEye, PLib)}; + + // distance from eiffel tower to Statue of liberty is 5837,42 km according to + // google maps + Row expectedDistEifLib{expectedDist(PEif, PLib)}; + + using ExpectedRowsMaxDist = std::unordered_map; -// Places for testing -auto PUni = P(7.83505, 48.01267); -auto PMun = P(7.85298, 47.99557); -auto PEif = P(2.29451, 48.85825); -auto PEye = P(-0.11957, 51.50333); -auto PLib = P(-74.04454, 40.68925); -auto testPlaces = std::vector{PUni, PMun, PEif, PEye, PLib}; - -// distance from Uni Freiburg to Freiburger Münster is 2,33 km according to -// google maps -Row expectedDistUniMun{expectedDist(PUni, PMun)}; - -// distance from Uni Freiburg to Eiffel Tower is 419,32 km according to -// google maps -Row expectedDistUniEif{expectedDist(PUni, PEif)}; - -// distance from Minster Freiburg to eiffel tower is 421,09 km according to -// google maps -Row expectedDistMunEif{expectedDist(PMun, PEif)}; - -// distance from london eye to eiffel tower is 340,62 km according to -// google maps -Row expectedDistEyeEif{expectedDist(PEye, PEif)}; - -// distance from Uni Freiburg to London Eye is 690,18 km according to -// google maps -Row expectedDistUniEye{expectedDist(PUni, PEye)}; - -// distance from Minster Freiburg to London Eye is 692,39 km according to -// google maps -Row expectedDistMunEye{expectedDist(PMun, PEye)}; - -// distance from Uni Freiburg to Statue of Liberty is 6249,55 km according to -// google maps -Row expectedDistUniLib{expectedDist(PUni, PLib)}; - -// distance from Minster Freiburg to Statue of Liberty is 6251,58 km -// according to google maps -Row expectedDistMunLib{expectedDist(PMun, PLib)}; - -// distance from london eye to statue of liberty is 5575,08 km according to -// google maps -Row expectedDistEyeLib{expectedDist(PEye, PLib)}; - -// distance from eiffel tower to Statue of liberty is 5837,42 km according to -// google maps -Row expectedDistEifLib{expectedDist(PEif, PLib)}; - -using ExpectedRowsMaxDist = std::unordered_map; - -ExpectedRowsMaxDist expectedMaxDistRows = { - {1, - {mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf)}}, - {5000, - {mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf)}}, - {500000, - {mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(TF, Eif, expectedDistUniEif), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Mun, Eif, expectedDistMunEif), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(Eif, TF, expectedDistUniEif), - mergeToRow(Eif, Mun, expectedDistMunEif), - mergeToRow(Eif, Eye, expectedDistEyeEif)}}, - {1000000, - {mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(TF, Eif, expectedDistUniEif), - mergeToRow(TF, Eye, expectedDistUniEye), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Mun, Eif, expectedDistMunEif), - mergeToRow(Mun, Eye, expectedDistMunEye), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Eye, TF, expectedDistUniEye), - mergeToRow(Eye, Mun, expectedDistMunEye), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(Eif, TF, expectedDistUniEif), - mergeToRow(Eif, Mun, expectedDistMunEif), - mergeToRow(Eif, Eye, expectedDistEyeEif)}}, - {10000000, - {mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(TF, Eif, expectedDistUniEif), - mergeToRow(TF, Eye, expectedDistUniEye), - mergeToRow(TF, Lib, expectedDistUniLib), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Mun, Eif, expectedDistMunEif), - mergeToRow(Mun, Eye, expectedDistMunEye), - mergeToRow(Mun, Lib, expectedDistMunLib), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Eye, TF, expectedDistUniEye), - mergeToRow(Eye, Mun, expectedDistMunEye), - mergeToRow(Eye, Lib, expectedDistEyeLib), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Lib, TF, expectedDistUniLib), - mergeToRow(Lib, Mun, expectedDistMunLib), - mergeToRow(Lib, Eye, expectedDistEyeLib), - mergeToRow(Lib, Eif, expectedDistEifLib), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(Eif, TF, expectedDistUniEif), - mergeToRow(Eif, Mun, expectedDistMunEif), - mergeToRow(Eif, Eye, expectedDistEyeEif), - mergeToRow(Eif, Lib, expectedDistEifLib)}}}; - -ExpectedRowsMaxDist expectedMaxDistRowsSmall = { - {1, - { - mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf), - }}, - {5000, - {mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sTF, sMun, expectedDistUniMun), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sTF, expectedDistUniMun), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf)}}, - {500000, - {mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sTF, sMun, expectedDistUniMun), - mergeToRow(sTF, sEif, expectedDistUniEif), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sTF, expectedDistUniMun), - mergeToRow(sMun, sEif, expectedDistMunEif), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sEye, sEif, expectedDistEyeEif), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf), - mergeToRow(sEif, sTF, expectedDistUniEif), - mergeToRow(sEif, sMun, expectedDistMunEif), - mergeToRow(sEif, sEye, expectedDistEyeEif)}}, - {1000000, - {mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sTF, sMun, expectedDistUniMun), - mergeToRow(sTF, sEif, expectedDistUniEif), - mergeToRow(sTF, sEye, expectedDistUniEye), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sTF, expectedDistUniMun), - mergeToRow(sMun, sEif, expectedDistMunEif), - mergeToRow(sMun, sEye, expectedDistMunEye), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sEye, sEif, expectedDistEyeEif), - mergeToRow(sEye, sTF, expectedDistUniEye), - mergeToRow(sEye, sMun, expectedDistMunEye), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf), - mergeToRow(sEif, sTF, expectedDistUniEif), - mergeToRow(sEif, sMun, expectedDistMunEif), - mergeToRow(sEif, sEye, expectedDistEyeEif)}}, - {10000000, - {mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sTF, sMun, expectedDistUniMun), - mergeToRow(sTF, sEif, expectedDistUniEif), - mergeToRow(sTF, sEye, expectedDistUniEye), - mergeToRow(sTF, sLib, expectedDistUniLib), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sTF, expectedDistUniMun), - mergeToRow(sMun, sEif, expectedDistMunEif), - mergeToRow(sMun, sEye, expectedDistMunEye), - mergeToRow(sMun, sLib, expectedDistMunLib), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sEye, sEif, expectedDistEyeEif), - mergeToRow(sEye, sTF, expectedDistUniEye), - mergeToRow(sEye, sMun, expectedDistMunEye), - mergeToRow(sEye, sLib, expectedDistEyeLib), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sLib, sTF, expectedDistUniLib), - mergeToRow(sLib, sMun, expectedDistMunLib), - mergeToRow(sLib, sEye, expectedDistEyeLib), - mergeToRow(sLib, sEif, expectedDistEifLib), - mergeToRow(sEif, sEif, expectedDistSelf), - mergeToRow(sEif, sTF, expectedDistUniEif), - mergeToRow(sEif, sMun, expectedDistMunEif), - mergeToRow(sEif, sEye, expectedDistEyeEif), - mergeToRow(sEif, sLib, expectedDistEifLib)}}}; - -ExpectedRowsMaxDist expectedMaxDistRowsSmallWrongPoint = { - {1, - { - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf), - }}, - {5000, - {mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf)}}, - {500000, - {mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sEif, expectedDistMunEif), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sEye, sEif, expectedDistEyeEif), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf), - mergeToRow(sEif, sMun, expectedDistMunEif), - mergeToRow(sEif, sEye, expectedDistEyeEif)}}, - {1000000, - {mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sEif, expectedDistMunEif), - mergeToRow(sMun, sEye, expectedDistMunEye), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sEye, sEif, expectedDistEyeEif), - mergeToRow(sEye, sMun, expectedDistMunEye), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf), - mergeToRow(sEif, sMun, expectedDistMunEif), - mergeToRow(sEif, sEye, expectedDistEyeEif)}}, - {10000000, - {mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sEif, expectedDistMunEif), - mergeToRow(sMun, sEye, expectedDistMunEye), - mergeToRow(sMun, sLib, expectedDistMunLib), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sEye, sEif, expectedDistEyeEif), - mergeToRow(sEye, sMun, expectedDistMunEye), - mergeToRow(sEye, sLib, expectedDistEyeLib), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sLib, sMun, expectedDistMunLib), - mergeToRow(sLib, sEye, expectedDistEyeLib), - mergeToRow(sLib, sEif, expectedDistEifLib), - mergeToRow(sEif, sEif, expectedDistSelf), - mergeToRow(sEif, sMun, expectedDistMunEif), - mergeToRow(sEif, sEye, expectedDistEyeEif), - mergeToRow(sEif, sLib, expectedDistEifLib)}}}; - -ExpectedRowsMaxDist expectedMaxDistRowsDiff = { - {1, - {mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Eif, sEif, expectedDistSelf)}}, - {5000, - {mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(TF, sMun, expectedDistUniMun), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Mun, sTF, expectedDistUniMun), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Eif, sEif, expectedDistSelf)}}, - {500000, - {mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(TF, sMun, expectedDistUniMun), - mergeToRow(TF, sEif, expectedDistUniEif), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Mun, sTF, expectedDistUniMun), - mergeToRow(Mun, sEif, expectedDistMunEif), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Eye, sEif, expectedDistEyeEif), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Eif, sEif, expectedDistSelf), - mergeToRow(Eif, sTF, expectedDistUniEif), - mergeToRow(Eif, sMun, expectedDistMunEif), - mergeToRow(Eif, sEye, expectedDistEyeEif)}}, - {1000000, - {mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(TF, sMun, expectedDistUniMun), - mergeToRow(TF, sEif, expectedDistUniEif), - mergeToRow(TF, sEye, expectedDistUniEye), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Mun, sTF, expectedDistUniMun), - mergeToRow(Mun, sEif, expectedDistMunEif), - mergeToRow(Mun, sEye, expectedDistMunEye), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Eye, sEif, expectedDistEyeEif), - mergeToRow(Eye, sTF, expectedDistUniEye), - mergeToRow(Eye, sMun, expectedDistMunEye), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Eif, sEif, expectedDistSelf), - mergeToRow(Eif, sTF, expectedDistUniEif), - mergeToRow(Eif, sMun, expectedDistMunEif), - mergeToRow(Eif, sEye, expectedDistEyeEif)}}, - {10000000, - {mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(TF, sMun, expectedDistUniMun), - mergeToRow(TF, sEif, expectedDistUniEif), - mergeToRow(TF, sEye, expectedDistUniEye), - mergeToRow(TF, sLib, expectedDistUniLib), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Mun, sTF, expectedDistUniMun), - mergeToRow(Mun, sEif, expectedDistMunEif), - mergeToRow(Mun, sEye, expectedDistMunEye), - mergeToRow(Mun, sLib, expectedDistMunLib), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Eye, sEif, expectedDistEyeEif), - mergeToRow(Eye, sTF, expectedDistUniEye), - mergeToRow(Eye, sMun, expectedDistMunEye), - mergeToRow(Eye, sLib, expectedDistEyeLib), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Lib, sTF, expectedDistUniLib), - mergeToRow(Lib, sMun, expectedDistMunLib), - mergeToRow(Lib, sEye, expectedDistEyeLib), - mergeToRow(Lib, sEif, expectedDistEifLib), - mergeToRow(Eif, sEif, expectedDistSelf), - mergeToRow(Eif, sTF, expectedDistUniEif), - mergeToRow(Eif, sMun, expectedDistMunEif), - mergeToRow(Eif, sEye, expectedDistEyeEif), - mergeToRow(Eif, sLib, expectedDistEifLib)}}}; - -ExpectedRowsMaxDist expectedMaxDistRowsDiffIDTable = { - {1, {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf)}}, - {5000, - {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), - mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun)}}, - {500000, - {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), - mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun), - mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif)}}, - {1000000, - {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), - mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun), - mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif), - mergeToRow({sTF.at(1)}, sEye, expectedDistUniEye)}}, - {10000000, - {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), - mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun), - mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif), - mergeToRow({sTF.at(1)}, sEye, expectedDistUniEye), - mergeToRow({sTF.at(1)}, sLib, expectedDistUniLib)}}}; - -// The expected values for nearest nneighbors are stored in a nested map. The -// key in the outer map is the maximum number of results and the key in the -// inner map is the maximum distance or std::nullopt. -using ExpectedRowsNearestNeighborsMaxDist = - std::unordered_map, Rows>; -using ExpectedRowsNearestNeighbors = - std::unordered_map; - -ExpectedRowsNearestNeighbors expectedNearestNeighbors = { - {1, - {{std::nullopt, + ExpectedRowsMaxDist expectedMaxDistRows = { + {1, {mergeToRow(TF, TF, expectedDistSelf), mergeToRow(Mun, Mun, expectedDistSelf), mergeToRow(Eye, Eye, expectedDistSelf), mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf)}}}}, - {2, - {{std::nullopt, + mergeToRow(Eif, Eif, expectedDistSelf)}}, + {5000, {mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Lib, Eye, expectedDistEyeLib), - mergeToRow(Eif, Eye, expectedDistEyeEif)}}, - {40, - {mergeToRow(TF, TF, expectedDistSelf), mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Mun, TF, expectedDistUniMun), mergeToRow(Eye, Eye, expectedDistSelf), mergeToRow(Lib, Lib, expectedDistSelf), mergeToRow(Eif, Eif, expectedDistSelf)}}, - {4000, + {500000, {mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(Mun, TF, expectedDistUniMun)}}, - {400000, - {mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(TF, Eif, expectedDistUniEif), mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Mun, Eif, expectedDistMunEif), mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Eye, Eif, expectedDistEyeEif), mergeToRow(Lib, Lib, expectedDistSelf), mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Eif, Eye, expectedDistEyeEif)}}}}, - {3, - {{500000, + mergeToRow(Eif, TF, expectedDistUniEif), + mergeToRow(Eif, Mun, expectedDistMunEif), + mergeToRow(Eif, Eye, expectedDistEyeEif)}}, + {1000000, {mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(TF, Eif, expectedDistUniEif), + mergeToRow(TF, Eye, expectedDistUniEye), mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Mun, Eif, expectedDistMunEif), + mergeToRow(Mun, Eye, expectedDistMunEye), mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Eye, TF, expectedDistUniEye), + mergeToRow(Eye, Mun, expectedDistMunEye), mergeToRow(Lib, Lib, expectedDistSelf), mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(Eif, TF, expectedDistUniEif), + mergeToRow(Eif, Mun, expectedDistMunEif), + mergeToRow(Eif, Eye, expectedDistEyeEif)}}, + {10000000, + {mergeToRow(TF, TF, expectedDistSelf), mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(TF, Eif, expectedDistUniEif), + mergeToRow(TF, Eye, expectedDistUniEye), + mergeToRow(TF, Lib, expectedDistUniLib), + mergeToRow(Mun, Mun, expectedDistSelf), mergeToRow(Mun, TF, expectedDistUniMun), mergeToRow(Mun, Eif, expectedDistMunEif), - mergeToRow(TF, Eif, expectedDistUniEif), + mergeToRow(Mun, Eye, expectedDistMunEye), + mergeToRow(Mun, Lib, expectedDistMunLib), + mergeToRow(Eye, Eye, expectedDistSelf), mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Eye, TF, expectedDistUniEye), + mergeToRow(Eye, Mun, expectedDistMunEye), + mergeToRow(Eye, Lib, expectedDistEyeLib), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Lib, TF, expectedDistUniLib), + mergeToRow(Lib, Mun, expectedDistMunLib), + mergeToRow(Lib, Eye, expectedDistEyeLib), + mergeToRow(Lib, Eif, expectedDistEifLib), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(Eif, TF, expectedDistUniEif), + mergeToRow(Eif, Mun, expectedDistMunEif), mergeToRow(Eif, Eye, expectedDistEyeEif), - mergeToRow(Eif, TF, expectedDistUniEif)}}}}}; + mergeToRow(Eif, Lib, expectedDistEifLib)}}}; + + ExpectedRowsMaxDist expectedMaxDistRowsSmall = { + {1, + { + mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + }}, + {5000, + {mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sTF, sMun, expectedDistUniMun), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sTF, expectedDistUniMun), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf)}}, + {500000, + {mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sTF, sMun, expectedDistUniMun), + mergeToRow(sTF, sEif, expectedDistUniEif), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sTF, expectedDistUniMun), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sTF, expectedDistUniEif), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif)}}, + {1000000, + {mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sTF, sMun, expectedDistUniMun), + mergeToRow(sTF, sEif, expectedDistUniEif), + mergeToRow(sTF, sEye, expectedDistUniEye), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sTF, expectedDistUniMun), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sMun, sEye, expectedDistMunEye), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sEye, sTF, expectedDistUniEye), + mergeToRow(sEye, sMun, expectedDistMunEye), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sTF, expectedDistUniEif), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif)}}, + {10000000, + {mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sTF, sMun, expectedDistUniMun), + mergeToRow(sTF, sEif, expectedDistUniEif), + mergeToRow(sTF, sEye, expectedDistUniEye), + mergeToRow(sTF, sLib, expectedDistUniLib), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sTF, expectedDistUniMun), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sMun, sEye, expectedDistMunEye), + mergeToRow(sMun, sLib, expectedDistMunLib), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sEye, sTF, expectedDistUniEye), + mergeToRow(sEye, sMun, expectedDistMunEye), + mergeToRow(sEye, sLib, expectedDistEyeLib), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sLib, sTF, expectedDistUniLib), + mergeToRow(sLib, sMun, expectedDistMunLib), + mergeToRow(sLib, sEye, expectedDistEyeLib), + mergeToRow(sLib, sEif, expectedDistEifLib), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sTF, expectedDistUniEif), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif), + mergeToRow(sEif, sLib, expectedDistEifLib)}}}; + + ExpectedRowsMaxDist expectedMaxDistRowsSmallWrongPoint = { + {1, + { + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + }}, + {5000, + {mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf)}}, + {500000, + {mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif)}}, + {1000000, + {mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sMun, sEye, expectedDistMunEye), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sEye, sMun, expectedDistMunEye), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif)}}, + {10000000, + {mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sMun, sEye, expectedDistMunEye), + mergeToRow(sMun, sLib, expectedDistMunLib), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sEye, sMun, expectedDistMunEye), + mergeToRow(sEye, sLib, expectedDistEyeLib), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sLib, sMun, expectedDistMunLib), + mergeToRow(sLib, sEye, expectedDistEyeLib), + mergeToRow(sLib, sEif, expectedDistEifLib), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif), + mergeToRow(sEif, sLib, expectedDistEifLib)}}}; + + ExpectedRowsMaxDist expectedMaxDistRowsDiff = { + {1, + {mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Eif, sEif, expectedDistSelf)}}, + {5000, + {mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(TF, sMun, expectedDistUniMun), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Mun, sTF, expectedDistUniMun), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Eif, sEif, expectedDistSelf)}}, + {500000, + {mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(TF, sMun, expectedDistUniMun), + mergeToRow(TF, sEif, expectedDistUniEif), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Mun, sTF, expectedDistUniMun), + mergeToRow(Mun, sEif, expectedDistMunEif), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Eye, sEif, expectedDistEyeEif), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Eif, sEif, expectedDistSelf), + mergeToRow(Eif, sTF, expectedDistUniEif), + mergeToRow(Eif, sMun, expectedDistMunEif), + mergeToRow(Eif, sEye, expectedDistEyeEif)}}, + {1000000, + {mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(TF, sMun, expectedDistUniMun), + mergeToRow(TF, sEif, expectedDistUniEif), + mergeToRow(TF, sEye, expectedDistUniEye), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Mun, sTF, expectedDistUniMun), + mergeToRow(Mun, sEif, expectedDistMunEif), + mergeToRow(Mun, sEye, expectedDistMunEye), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Eye, sEif, expectedDistEyeEif), + mergeToRow(Eye, sTF, expectedDistUniEye), + mergeToRow(Eye, sMun, expectedDistMunEye), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Eif, sEif, expectedDistSelf), + mergeToRow(Eif, sTF, expectedDistUniEif), + mergeToRow(Eif, sMun, expectedDistMunEif), + mergeToRow(Eif, sEye, expectedDistEyeEif)}}, + {10000000, + {mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(TF, sMun, expectedDistUniMun), + mergeToRow(TF, sEif, expectedDistUniEif), + mergeToRow(TF, sEye, expectedDistUniEye), + mergeToRow(TF, sLib, expectedDistUniLib), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Mun, sTF, expectedDistUniMun), + mergeToRow(Mun, sEif, expectedDistMunEif), + mergeToRow(Mun, sEye, expectedDistMunEye), + mergeToRow(Mun, sLib, expectedDistMunLib), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Eye, sEif, expectedDistEyeEif), + mergeToRow(Eye, sTF, expectedDistUniEye), + mergeToRow(Eye, sMun, expectedDistMunEye), + mergeToRow(Eye, sLib, expectedDistEyeLib), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Lib, sTF, expectedDistUniLib), + mergeToRow(Lib, sMun, expectedDistMunLib), + mergeToRow(Lib, sEye, expectedDistEyeLib), + mergeToRow(Lib, sEif, expectedDistEifLib), + mergeToRow(Eif, sEif, expectedDistSelf), + mergeToRow(Eif, sTF, expectedDistUniEif), + mergeToRow(Eif, sMun, expectedDistMunEif), + mergeToRow(Eif, sEye, expectedDistEyeEif), + mergeToRow(Eif, sLib, expectedDistEifLib)}}}; + + ExpectedRowsMaxDist expectedMaxDistRowsDiffIDTable = { + {1, {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf)}}, + {5000, + {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), + mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun)}}, + {500000, + {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), + mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun), + mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif)}}, + {1000000, + {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), + mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun), + mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif), + mergeToRow({sTF.at(1)}, sEye, expectedDistUniEye)}}, + {10000000, + {mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), + mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun), + mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif), + mergeToRow({sTF.at(1)}, sEye, expectedDistUniEye), + mergeToRow({sTF.at(1)}, sLib, expectedDistUniLib)}}}; + + // The expected values for nearest nneighbors are stored in a nested map. The + // key in the outer map is the maximum number of results and the key in the + // inner map is the maximum distance or std::nullopt. + using ExpectedRowsNearestNeighborsMaxDist = + std::unordered_map, Rows>; + using ExpectedRowsNearestNeighbors = + std::unordered_map; + + ExpectedRowsNearestNeighbors expectedNearestNeighbors = { + {1, + {{std::nullopt, + {mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf)}}}}, + {2, + {{std::nullopt, + {mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Lib, Eye, expectedDistEyeLib), + mergeToRow(Eif, Eye, expectedDistEyeEif)}}, + {40, + {mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf)}}, + {4000, + {mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(Mun, TF, expectedDistUniMun)}}, + {400000, + {mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Eif, Eye, expectedDistEyeEif)}}}}, + {3, + {{500000, + {mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Mun, Eif, expectedDistMunEif), + mergeToRow(TF, Eif, expectedDistUniEif), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Eif, Eye, expectedDistEyeEif), + mergeToRow(Eif, TF, expectedDistUniEif)}}}}}; + + // some combinations of the gtest parameters are invalid. Those cases should + // not be tested and are therefore excluded + bool isInvalidAreaTestConfig(std::optional maxDistConfig) { + bool isAreaDataset = std::get<4>(GetParam()); + bool isS2geoAlg = + std::get<0>(GetParam()) == SpatialJoinAlgorithm::S2_GEOMETRY; + return isAreaDataset && (!maxDistConfig.has_value() || + (maxDistConfig.has_value() && isS2geoAlg)); + } +}; // test the compute result method on small examples TEST_P(SpatialJoinParamTest, computeResultSmallDatasetLargeChildren) { @@ -797,6 +849,10 @@ TEST_P(SpatialJoinParamTest, computeResultSmallDatasetLargeChildren) { auto nearestNeighborsTask = getNearestNeighbors(); auto maxDistTask = getMaxDist(); + if (isInvalidAreaTestConfig(maxDistTask)) { + return; + } + if (maxDistTask.has_value()) { buildAndTestSmallTestSetLargeChildren( maxDistTask.value(), addLeftChildFirst, @@ -818,6 +874,9 @@ TEST_P(SpatialJoinParamTest, computeResultSmallDatasetSmallChildren) { bool addLeftChildFirst = std::get<1>(GetParam()); auto maxDistTask = getMaxDist(); + if (isInvalidAreaTestConfig(maxDistTask)) { + return; + } if (maxDistTask.has_value()) { buildAndTestSmallTestSetSmallChildren( maxDistTask.value(), addLeftChildFirst, @@ -837,6 +896,9 @@ TEST_P(SpatialJoinParamTest, computeResultSmallDatasetDifferentSizeChildren) { bool bigChildLeft = std::get<2>(GetParam()); auto maxDistTask = getMaxDist(); + if (isInvalidAreaTestConfig(maxDistTask)) { + return; + } if (maxDistTask.has_value()) { buildAndTestSmallTestSetDiffSizeChildren( maxDistTask.value(), addLeftChildFirst, @@ -850,6 +912,10 @@ TEST_P(SpatialJoinParamTest, maxSizeMaxDistanceTest) { MaxDistanceConfig maxDistConf{maxDist}; bool addLeftChildFirst = std::get<1>(GetParam()); + if (isInvalidAreaTestConfig(maxDistConf)) { + return; + } + // test small children Row columnNames{"?obj1", "?point1", "?obj2", "?point2", "?distOfTheTwoObjectsAddedInternally"}; @@ -885,6 +951,9 @@ TEST_P(SpatialJoinParamTest, diffSizeIdTables) { bool bigChildLeft = std::get<2>(GetParam()); auto maxDistTask = getMaxDist(); + if (isInvalidAreaTestConfig(maxDistTask)) { + return; + } if (maxDistTask.has_value()) { testDiffSizeIdTables( maxDistTask.value(), addLeftChildFirst, @@ -900,7 +969,10 @@ TEST_P(SpatialJoinParamTest, wrongPointInInput) { bool addLeftChildFirst = std::get<1>(GetParam()); auto maxDistTask = getMaxDist(); - if (maxDistTask.has_value()) { + if (isInvalidAreaTestConfig(maxDistTask)) { + return; + } + if (maxDistTask.has_value() and !std::get<4>(GetParam())) { testWrongPointInInput( maxDistTask.value(), addLeftChildFirst, expectedMaxDistRowsSmallWrongPoint[maxDistTask.value().maxDist_], @@ -922,7 +994,8 @@ INSTANTIATE_TEST_SUITE_P( NearestNeighborsConfig{2, 400000}, NearestNeighborsConfig{2, 4000}, NearestNeighborsConfig{2, 40}, - NearestNeighborsConfig{3, 500000}))); + NearestNeighborsConfig{3, 500000}), + ::testing::Bool())); } // end of Namespace computeResultTest @@ -981,8 +1054,7 @@ void testBoundingBox(const size_t& maxDistInMeters, const Point& startPoint) { const std::vector& bbox, SpatialJoinAlgorithms* spatialJoinAlg) { // check if the point is contained in any bounding box - bool within = spatialJoinAlg->OnlyForTestingWrapperContainedInBoundingBoxes( - bbox, point1); + bool within = spatialJoinAlg->isContainedInBoundingBoxes(bbox, point1); if (!within) { GeoPoint geo1{point1.get<1>(), point1.get<0>()}; GeoPoint geo2{startPoint.get<1>(), startPoint.get<0>()}; @@ -991,25 +1063,10 @@ void testBoundingBox(const size_t& maxDistInMeters, const Point& startPoint) { } }; - PreparedSpatialJoinParams params{nullptr, - nullptr, - nullptr, - nullptr, - 0, - 0, - std::vector{}, - 1, - maxDistInMeters, - std::nullopt}; - - std::variant task{ - MaxDistanceConfig{maxDistInMeters}}; - SpatialJoinConfiguration config{task, Variable{"?x"}, Variable{"?y"}}; - - SpatialJoinAlgorithms spatialJoinAlgs{buildTestQEC(), params, config}; - - std::vector bbox = - spatialJoinAlgs.OnlyForTestingWrapperComputeBoundingBox(startPoint); + SpatialJoinAlgorithms spatialJoinAlgs = + getDummySpatialJoinAlgsForWrapperTesting(maxDistInMeters); + + std::vector bbox = spatialJoinAlgs.computeQueryBox(startPoint); // broad grid test for (int lon = -180; lon < 180; lon += 20) { for (int lat = -90; lat < 90; lat += 20) { @@ -1080,33 +1137,8 @@ TEST(SpatialJoin, computeBoundingBox) { } TEST(SpatialJoin, isContainedInBoundingBoxes) { - // build dummy join to access the containedInBoundingBox and - // computeBoundingBox functions - auto qec = buildTestQEC(); - MaxDistanceConfig task{1000}; - std::shared_ptr spatialJoinOperation = - ad_utility::makeExecutionTree( - qec, - SpatialJoinConfiguration{task, Variable{"?point1"}, - Variable{"?point2"}}, - std::nullopt, std::nullopt); - - std::shared_ptr op = spatialJoinOperation->getRootOperation(); - SpatialJoin* spatialJoin = static_cast(op.get()); - - PreparedSpatialJoinParams params{nullptr, - nullptr, - nullptr, - nullptr, - 0, - 0, - std::vector{}, - 1, - spatialJoin->getMaxDist(), - std::nullopt}; - - SpatialJoinAlgorithms spatialJoinAlgs{qec, params, - spatialJoin->onlyForTestingGetConfig()}; + SpatialJoinAlgorithms spatialJoinAlgs = + getDummySpatialJoinAlgsForWrapperTesting(); // note that none of the boxes is overlapping, therefore we can check, that // none of the points which should be contained in one box are contained in @@ -1184,20 +1216,16 @@ TEST(SpatialJoin, isContainedInBoundingBoxes) { // boxes for (size_t i = 0; i < shouldBeContained.size(); i++) { for (size_t k = 0; k < shouldBeContained.at(i).size(); k++) { - ASSERT_TRUE( - spatialJoinAlgs - .OnlyForTestingWrapperContainedInBoundingBoxes( - toTest, shouldBeContained.at(i).at(k))); + ASSERT_TRUE(spatialJoinAlgs.isContainedInBoundingBoxes( + toTest, shouldBeContained.at(i).at(k))); } } // test all points, which shouldn't be contained in the bounding // boxes for (size_t i = 0; i < shouldNotBeContained.size(); i++) { for (size_t k = 0; k < shouldNotBeContained.at(i).size(); k++) { - ASSERT_FALSE( - spatialJoinAlgs - .OnlyForTestingWrapperContainedInBoundingBoxes( - toTest, shouldNotBeContained.at(i).at(k))); + ASSERT_FALSE(spatialJoinAlgs.isContainedInBoundingBoxes( + toTest, shouldNotBeContained.at(i).at(k))); } } } @@ -1208,6 +1236,437 @@ TEST(SpatialJoin, isContainedInBoundingBoxes) { } } +void testBoundingBoxOfAreaOrMidpointOfBox(bool testArea = true) { + auto checkBoundingBox = [](Box box, double minLng, double minLat, + double maxLng, double maxLat) { + ASSERT_DOUBLE_EQ(minLng, box.min_corner().get<0>()); + ASSERT_DOUBLE_EQ(minLat, box.min_corner().get<1>()); + ASSERT_DOUBLE_EQ(maxLng, box.max_corner().get<0>()); + ASSERT_DOUBLE_EQ(maxLat, box.max_corner().get<1>()); + }; + + auto checkMidpoint = [](const Point& point, double lng, double lat) { + ASSERT_DOUBLE_EQ(point.get<0>(), lng); + ASSERT_DOUBLE_EQ(point.get<1>(), lat); + }; + + SpatialJoinAlgorithms sja = getDummySpatialJoinAlgsForWrapperTesting(); + + BoostGeometryNamespace::AnyGeometry geometryA; + std::string wktA = + "POLYGON((9.33 47.41, 9.31 47.45, 9.32 47.48, 9.35 47.42, 9.33 " + "47.41))"; // closed polygon + boost::geometry::read_wkt(wktA, geometryA); + Box a = boost::apply_visitor(BoostGeometryNamespace::BoundingBoxVisitor(), + geometryA); + + BoostGeometryNamespace::AnyGeometry geometryB; + std::string wktB = + "POLYGON((-4.1 10.0, -9.9 10.0, -9.9 -1.0, -4.1 -1.0))"; // not closed + // polygon + boost::geometry::read_wkt(wktB, geometryB); + Box b = boost::apply_visitor(BoostGeometryNamespace::BoundingBoxVisitor(), + geometryB); + + BoostGeometryNamespace::AnyGeometry geometryC; + std::string wktC = + "POLYGON((0.0 0.0, 1.1 0.0, 1.1 1.1, 0.0 1.1, 0.0 0.0))"; // closed + // polygon + boost::geometry::read_wkt(wktC, geometryC); + Box c = boost::apply_visitor(BoostGeometryNamespace::BoundingBoxVisitor(), + geometryC); + + if (testArea) { + checkBoundingBox(a, 9.31, 47.41, 9.35, 47.48); + checkBoundingBox(b, -9.9, -1.0, -4.1, 10.0); + checkBoundingBox(c, 0.0, 0.0, 1.1, 1.1); + } else { + checkMidpoint(sja.calculateMidpointOfBox(a), 9.33, 47.445); + checkMidpoint(sja.calculateMidpointOfBox(b), -7.0, 4.5); + checkMidpoint(sja.calculateMidpointOfBox(c), 0.55, 0.55); + } +} + +TEST(SpatialJoin, BoundingBoxOfArea) { testBoundingBoxOfAreaOrMidpointOfBox(); } + +TEST(SpatialJoin, MidpointOfBoundingBox) { + testBoundingBoxOfAreaOrMidpointOfBox(false); +} + +TEST(SpatialJoin, getMaxDistFromMidpointToAnyPointInsideTheBox) { + SpatialJoinAlgorithms sja = getDummySpatialJoinAlgsForWrapperTesting(); + + // the following polygon is from the eiffel tower + BoostGeometryNamespace::AnyGeometry geometryEiffel; + std::string wktEiffel = + "POLYGON((2.2933119 48.858248,2.2935432 48.8581003,2.2935574 " + "48.8581099,2.2935712 48.8581004,2.2936112 48.8581232,2.2936086 " + "48.8581249,2.293611 48.8581262,2.2936415 48.8581385,2.293672 " + "48.8581477,2.2937035 48.8581504,2.293734 48.858149,2.2937827 " + "48.8581439,2.2938856 48.8581182,2.2939778 48.8580882,2.2940648 " + "48.8580483,2.2941435 48.8579991,2.2941937 48.8579588,2.2942364 " + "48.8579197,2.2942775 48.8578753,2.2943096 48.8578312,2.2943307 " + "48.8577908,2.2943447 48.857745,2.2943478 48.8577118,2.2943394 " + "48.8576885,2.2943306 48.8576773,2.2943205 48.8576677,2.2943158 " + "48.8576707,2.2942802 48.8576465,2.2942977 48.8576355,2.2942817 " + "48.8576248,2.2942926 48.8576181,2.2944653 48.8575069,2.2945144 " + "48.8574753,2.2947414 48.8576291,2.294725 48.8576392,2.2947426 " + "48.857651,2.294706 48.8576751,2.294698 48.8576696,2.2946846 " + "48.8576782,2.2946744 48.8576865,2.2946881 48.8576957,2.2946548 " + "48.857717,2.2946554 48.8577213,2.2946713 48.8577905,2.2946982 " + "48.8578393,2.2947088 48.8578585,2.2947529 48.8579196,2.2948133 " + "48.8579803,2.2948836 48.85803,2.2949462 48.8580637,2.2950051 " + "48.8580923,2.2950719 48.85812,2.2951347 48.8581406,2.2951996 " + "48.8581564,2.2952689 48.8581663,2.295334 48.8581699,2.2953613 " + "48.8581518,2.2953739 48.8581604,2.2953965 48.8581497,2.2954016 " + "48.8581464,2.2953933 48.8581409,2.2954304 48.8581172,2.2954473 " + "48.8581285,2.2954631 48.8581182,2.2956897 48.8582718,2.295653 " + "48.8582954,2.2955837 48.85834,2.2954575 48.8584212,2.2954416 " + "48.858411,2.2954238 48.8584227,2.2953878 48.8583981,2.2953925 " + "48.858395,2.2953701 48.8583857,2.2953419 48.8583779,2.2953057 " + "48.8583737,2.2952111 48.8583776,2.2951081 48.858403,2.2950157 " + "48.8584326,2.2949284 48.8584723,2.2948889 48.8584961,2.2947988 " + "48.8585613,2.2947558 48.8586003,2.2947144 48.8586446,2.294682 " + "48.8586886,2.2946605 48.8587289,2.2946462 48.8587747,2.294644 " + "48.8587962,2.2946462 48.8588051,2.2946486 48.8588068,2.2946938 " + "48.8588377,2.2946607 48.8588587,2.294663 48.8588603,2.294681 " + "48.858849,2.2947169 48.8588737,2.2946988 48.858885,2.2947154 " + "48.8588961,2.2944834 48.8590453,2.2943809 48.8589771,2.2943708 " + "48.8589703,2.2942571 48.8588932,2.2942741 48.8588824,2.2942567 " + "48.8588708,2.2942893 48.8588493,2.294306 48.8588605,2.2943103 " + "48.8588577,2.2942883 48.8588426,2.2943122 48.8588275,2.2943227 " + "48.8588209,2.2943283 48.8588173,2.2943315 48.8588125,2.2943333 " + "48.8588018,2.2943166 48.8587327,2.294301 48.8586978,2.2942783 " + "48.8586648,2.2942406 48.8586191,2.2942064 48.858577,2.2941734 " + "48.8585464,2.2941015 48.8584943,2.2940384 48.8584609,2.2939792 " + "48.8584325,2.293912 48.8584052,2.2938415 48.8583828,2.293784 " + "48.8583695,2.2937145 48.8583599,2.2936514 48.8583593,2.2936122 " + "48.8583846,2.293606 48.8583807,2.2935688 48.8584044,2.2935515 " + "48.8583929,2.293536 48.8584028,2.2933119 48.858248))"; + boost::geometry::read_wkt(wktEiffel, geometryEiffel); + Box boxEiffel = boost::apply_visitor( + BoostGeometryNamespace::BoundingBoxVisitor(), geometryEiffel); + auto midpoint_eiffel = sja.calculateMidpointOfBox(boxEiffel); + + // call the function without the precalculated midpoint, the upper bound max + // distance needs to be bigger than 130 (the tower has a square base of length + // 125m. Therefore from the midpoint to the side of the box and then to the + // top of the box results in 125m/2 + 125m/2 = 125m). As the tower is not that + // near to the equator and the square base has a worst case alignment to the + // longitude and latitude lines (45 degrees tilted), the distance estimate + // gets a little more than 125m (it's upper bound estimate is 219m) + ASSERT_GE(sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxEiffel), 125); + ASSERT_DOUBLE_EQ(sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxEiffel), + sja.getMaxDistFromMidpointToAnyPointInsideTheBox( + boxEiffel, midpoint_eiffel)); + + // the following polygon is from the Minster of Freiburg + BoostGeometryNamespace::AnyGeometry geometryMinster; + std::string wktMinster = + "POLYGON((7.8520522 47.9956071,7.8520528 47.9955872,7.8521103 " + "47.995588,7.8521117 47.9955419,7.852113 47.9954975,7.8520523 " + "47.9954968,7.8520527 47.995477,7.8521152 47.9954775,7.8521154 " + "47.9954688,7.8521299 47.995469,7.8521311 47.9954303,7.8521611 " + "47.9954307,7.8521587 47.9954718,7.8522674 47.9954741,7.8522681 " + "47.9954676,7.8522746 47.9954643,7.8522832 47.9954599,7.8522976 " + "47.99546,7.8523031 47.995455,7.8523048 47.9954217,7.8522781 " + "47.9954213,7.8522786 47.9954058,7.8523123 47.9954065,7.852314 " + "47.9953744,7.8523383 47.9953748,7.8523373 47.9954062,7.8524164 " + "47.995408,7.8524176 47.9953858,7.852441 47.9953865,7.8524398 " + "47.9954085,7.8525077 47.9954101,7.8525088 47.9953886,7.8525316 " + "47.9953892,7.8525305 47.9954106,7.8526031 47.9954123,7.8526042 " + "47.9953915,7.8526276 47.9953922,7.8526265 47.9954128,7.8526944 " + "47.9954144,7.8526954 47.9953943,7.8527183 47.9953949,7.8527173 " + "47.9954149,7.8527892 47.9954165,7.8527903 47.9953974,7.8528131 " + "47.9953979,7.8528122 47.9954171,7.852871 47.9954182,7.8528712 " + "47.995416,7.8528791 47.9954112,7.85289 47.9954113,7.8528971 " + "47.9954158,7.8528974 47.9954052,7.8528925 47.9954052,7.8528928 " + "47.9953971,7.8529015 47.9953972,7.8529024 47.9953702,7.852897 " + "47.9953701,7.8528972 47.9953645,7.8529037 47.9953645,7.8529038 " + "47.9953613,7.8529069 47.9953614,7.8529071 47.9953541,7.8529151 " + "47.9953542,7.8529149 47.9953581,7.8529218 47.9953582,7.8529217 " + "47.9953631,7.8529621 47.9953637,7.8529623 47.9953572,7.8529719 " + "47.9953573,7.8529716 47.9953642,7.8530114 47.9953648,7.8530116 " + "47.9953587,7.8530192 47.9953589,7.853019 47.995365,7.8530635 " + "47.9953657,7.8530637 47.9953607,7.8530716 47.9953608,7.8530715 " + "47.9953657,7.8530758 47.9953657,7.8530757 47.9953688,7.8530817 " + "47.9953689,7.8530815 47.9953742,7.8530747 47.9953741,7.8530737 " + "47.9954052,7.8530794 47.9954053,7.8530792 47.995413,7.8530717 " + "47.9954129,7.8530708 47.9954199,7.8531165 47.9954207,7.8531229 " + "47.9954131,7.8531292 47.9954209,7.8531444 47.9954211,7.8531444 " + "47.9954238,7.8531569 47.995424,7.8531661 47.9954152,7.853171 " + "47.9954201,7.853183 47.9954203,7.8531829 47.9954234,7.8531973 " + "47.9954236,7.8531977 47.9954138,7.8532142 47.9954141,7.8532141 " + "47.9954253,7.8532425 47.9954355,7.8532514 47.9954298,7.8532593 " + "47.9954353,7.8532915 47.9954255,7.8532923 47.9954155,7.8533067 " + "47.995416,7.8533055 47.9954261,7.8533304 47.9954368,7.8533399 " + "47.995431,7.85335 47.9954372,7.8533758 47.9954288,7.853377 " + "47.9954188,7.8533932 47.9954192,7.8533924 47.9954298,7.8534151 " + "47.9954395,7.8534278 47.9954345,7.8534373 47.995441,7.8534664 " + "47.995432,7.8534672 47.9954209,7.8534832 47.9954211,7.8534828 " + "47.9954322,7.8535077 47.9954449,7.8535224 47.9954375,7.8535325 " + "47.995448,7.8535644 47.9954403,7.8535717 47.9954305,7.8535866 " + "47.9954356,7.8535796 47.9954443,7.8536079 47.9954674,7.8536221 " + "47.9954629,7.8536221 47.9954735,7.8536573 47.9954801,7.8536707 " + "47.9954728,7.8536813 47.9954812,7.8536686 47.9954876,7.8536776 " + "47.9955168,7.8536958 47.9955192,7.8536876 47.9955286,7.8537133 " + "47.9955444,7.85373 47.9955428,7.8537318 47.9955528,7.8537154 " + "47.9955545,7.8537069 47.9955819,7.8537168 47.995588,7.8537044 " + "47.9955948,7.8537086 47.9956193,7.8537263 47.9956245,7.8537206 " + "47.9956347,7.8537069 47.9956317,7.8536802 47.9956473,7.8536819 " + "47.9956577,7.8536667 47.9956604,7.8536506 47.9956817,7.8536639 " + "47.9956902,7.8536543 47.9956981,7.8536394 47.9956887,7.8536331 " + "47.9956931,7.853609 47.9956954,7.8536024 47.9957048,7.8535868 " + "47.9957028,7.8535591 47.9957206,7.8535642 47.9957285,7.8535487 " + "47.9957327,7.8535423 47.9957215,7.853508 47.9957131,7.8534942 " + "47.9957215,7.8534818 47.9957186,7.8534587 47.9957284,7.853458 " + "47.9957389,7.8534421 47.9957388,7.8534424 47.9957273,7.853418 " + "47.995714,7.8534099 47.9957194,7.8534021 47.995713,7.8533721 " + "47.9957242,7.8533712 47.9957359,7.8533558 47.9957351,7.8533565 " + "47.9957247,7.8533269 47.9957094,7.8533171 47.9957165,7.8533073 " + "47.9957088,7.8532874 47.9957186,7.8532866 47.9957296,7.8532698 " + "47.9957295,7.8532698 47.9957189,7.8532466 47.9957048,7.8532372 " + "47.9957131,7.8532277 47.995705,7.8532014 47.9957171,7.8532009 " + "47.9957284,7.8531844 47.9957281,7.8531847 47.9957174,7.8531778 " + "47.9957102,7.853163 47.9957245,7.8530549 47.9957225,7.8530552 " + "47.9957161,7.8529541 47.9957138,7.8529535 47.9957236,7.8529578 " + "47.9957237,7.8529577 47.9957269,7.852953 47.9957268,7.8529529 " + "47.9957308,7.8529477 47.9957307,7.8529478 47.9957271,7.8528964 " + "47.9957256,7.8528963 47.9957288,7.8528915 47.9957287,7.8528916 " + "47.9957256,7.8528876 47.9957255,7.8528875 47.9957223,7.8528912 " + "47.9957224,7.8528908 47.9957195,7.8528811 47.9957194,7.8527983 " + "47.9957162,7.8527981 47.9957192,7.8527723 47.9957185,7.8527732 " + "47.9957016,7.852703 47.9957003,7.8527021 47.9957175,7.8526791 " + "47.9957171,7.8526788 47.9957225,7.8526097 47.9957225,7.8526099 " + "47.995718,7.8525863 47.9957183,7.8525874 47.9956981,7.8525155 " + "47.9956967,7.8525144 47.995718,7.8524916 47.9957174,7.8524927 " + "47.9956963,7.8524241 47.995695,7.852423 47.9957153,7.8523996 " + "47.9957148,7.8524007 47.9956946,7.8523226 47.9956931,7.8523217 " + "47.9957212,7.8522948 47.9957208,7.8522957 47.9956927,7.8522663 " + "47.9956923,7.8522667 47.9956784,7.8522926 47.9956787,7.8522937 " + "47.9956433,7.8522882 47.995635,7.8522723 47.9956351,7.8522611 " + "47.9956281,7.8522613 47.9956189,7.8521543 47.9956174,7.852153 " + "47.9956591,7.8521196 47.9956587,7.8521209 47.995617,7.8521109 " + "47.9956168,7.8521111 47.9956079,7.8520522 47.9956071))"; + boost::geometry::read_wkt(wktMinster, geometryMinster); + Box boxMinster = boost::apply_visitor( + BoostGeometryNamespace::BoundingBoxVisitor(), geometryMinster); + auto midpointMinster = sja.calculateMidpointOfBox(boxMinster); + ASSERT_GE(sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxMinster), 80); + ASSERT_DOUBLE_EQ(sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxMinster), + sja.getMaxDistFromMidpointToAnyPointInsideTheBox( + boxMinster, midpointMinster)); + + // the following polygon is from the university building 101 in freiburg + BoostGeometryNamespace::AnyGeometry geometryUni; + std::string wktUni = + "POLYGON((7.8346338 48.0126612,7.8348921 48.0123905,7.8349457 " + "48.0124216,7.8349855 48.0124448,7.8353244 48.0126418,7.8354091 " + "48.0126911,7.8352246 48.0129047,7.8351668 48.0128798,7.8349471 " + "48.0127886,7.8347248 48.0126986,7.8346338 48.0126612))"; + boost::geometry::read_wkt(wktUni, geometryUni); + Box boxUni = boost::apply_visitor( + BoostGeometryNamespace::BoundingBoxVisitor(), geometryUni); + auto midpointUni = sja.calculateMidpointOfBox(boxUni); + ASSERT_GE(sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxUni), 40); + ASSERT_DOUBLE_EQ( + sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxUni), + sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxUni, midpointUni)); + + // the following polygon is from the London Eye + BoostGeometryNamespace::AnyGeometry geometryEye; + std::string wktEye = + "POLYGON((-0.1198608 51.5027451,-0.1197395 51.5027354,-0.1194922 " + "51.5039381,-0.1196135 51.5039478,-0.1198608 51.5027451))"; + boost::geometry::read_wkt(wktEye, geometryEye); + Box boxEye = boost::apply_visitor( + BoostGeometryNamespace::BoundingBoxVisitor(), geometryEye); + auto midpointEye = sja.calculateMidpointOfBox(boxEye); + ASSERT_GE(sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxEye), 70); + ASSERT_DOUBLE_EQ( + sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxEye), + sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxEye, midpointEye)); + + // the following polygon is from the Statue of liberty + BoostGeometryNamespace::AnyGeometry geometryStatue; + std::string wktStatue = + "POLYGON((-74.0451069 40.6893455,-74.045004 40.6892215,-74.0451023 " + "40.6891073,-74.0449107 40.6890721,-74.0449537 40.6889343,-74.0447746 " + "40.6889506,-74.0446495 40.6888049,-74.0445067 40.6889076,-74.0442008 " + "40.6888563,-74.0441463 40.6890663,-74.0441411 40.6890854,-74.0441339 " + "40.6890874,-74.0441198 40.6890912,-74.0439637 40.6891376,-74.0440941 " + "40.6892849,-74.0440057 40.6894071,-74.0441949 40.6894309,-74.0441638 " + "40.6895702,-74.0443261 40.6895495,-74.0443498 40.6895782,-74.0443989 " + "40.6896372,-74.0444277 40.6896741,-74.0445955 40.6895939,-74.0447392 " + "40.6896561,-74.0447498 40.6896615,-74.0447718 40.6895577,-74.0447983 " + "40.6895442,-74.0448287 40.6895279,-74.0449638 40.6895497,-74.0449628 " + "40.6895443,-74.044961 40.6895356,-74.0449576 40.6895192,-74.044935 " + "40.689421,-74.0451069 40.6893455))"; + boost::geometry::read_wkt(wktStatue, geometryStatue); + Box boxStatue = boost::apply_visitor( + BoostGeometryNamespace::BoundingBoxVisitor(), geometryStatue); + auto midpointStatue = sja.calculateMidpointOfBox(boxStatue); + ASSERT_GE(sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxStatue), 100); + ASSERT_DOUBLE_EQ(sja.getMaxDistFromMidpointToAnyPointInsideTheBox(boxStatue), + sja.getMaxDistFromMidpointToAnyPointInsideTheBox( + boxStatue, midpointStatue)); +} + +QueryExecutionContext* getAllGeometriesQEC() { + auto addRow = [](std::string& kg, std::string nr, std::string wktStr) { + kg += absl::StrCat(" \"", wktStr, "\".\n"); + }; + // each object (except for the point) has its leftmost coordinate at an + // integer number and its rightmost coordinate 0.5 units further right. The + // y coordinate will be 0 for those case. All other points are inbetween these + // two points with different y coordinates. The reason for that is, that it + // is very easy to calculate the shortest distance between two geometries. + std::string point = "POINT(1.5 0)"; + std::string linestring = "LINESTRING(2.0 0, 2.2 1, 2.5 0)"; + std::string polygon = "POLYGON((3.0 0, 3.1 1, 3.2 2, 3.5 0))"; + std::string multiPoint = "MULTIPOINT((4.0 0), (4.2 1), (4.5 0))"; + std::string multiLinestring = + "MULTILINESTRING((5.0 0, 5.2 1, 5.5 1), (5.1 3, 5.5 0))"; + std::string multiPolygon = + "MULTIPOLYGON(((6.0 0, 6.1 1, 6.3 5, 6.4 2)), ((6.2 1, 6.3 4, 6.4 3, 6.5 " + "0)))"; + + std::string kg = ""; // tiny test knowledge graph for all geometries + addRow(kg, "1", point); + addRow(kg, "2", linestring); + addRow(kg, "3", polygon); + addRow(kg, "4", multiPoint); + addRow(kg, "5", multiLinestring); + addRow(kg, "6", multiPolygon); + + auto qec = ad_utility::testing::getQec(kg, true, true, false, 16_MB, false, + true, std::nullopt, 10_kB); + return qec; +} + +TEST(SpatialJoin, areaFormat) { + auto qec = getAllGeometriesQEC(); + auto leftChild = + buildIndexScan(qec, {"?geo1", std::string{""}, "?obj1"}); + auto rightChild = + buildIndexScan(qec, {"?geo2", std::string{""}, "?obj2"}); + + std::shared_ptr spatialJoinOperation = + ad_utility::makeExecutionTree( + qec, + SpatialJoinConfiguration{MaxDistanceConfig(100000000), + Variable{"?obj1"}, Variable{"?obj2"}}, + leftChild, rightChild); + + std::shared_ptr op = spatialJoinOperation->getRootOperation(); + SpatialJoin* spatialJoin = static_cast(op.get()); + spatialJoin->selectAlgorithm(SpatialJoinAlgorithm::BOUNDING_BOX); + auto res = spatialJoin->getResult(); + // if all rows can be parsed correctly, the result should be the cross + // product. (Lines which can't be parsed will be ignored (and a warning gets + // printed) and therefore the cross product of all parsed lines would be + // smaller then 36) + ASSERT_EQ(res->idTable().numRows(), 36); +} + +TEST(SpatialJoin, trueAreaDistance) { + auto getDist = [](QueryExecutionContext* qec, std::string nr1, + std::string nr2, bool useMidpointForAreas) { + auto makeIndexScan = [&](std::string nr) { + auto subject = absl::StrCat(""); + auto objStr = absl::StrCat("?obj", nr); + TripleComponent object{Variable{objStr}}; + return ad_utility::makeExecutionTree( + qec, Permutation::Enum::PSO, + SparqlTriple{TripleComponent::Iri::fromIriref(subject), "", + object}); + }; + auto scan1 = makeIndexScan(nr1); + auto scan2 = makeIndexScan(nr2); + auto var1 = absl::StrCat("?obj", nr1); + auto var2 = absl::StrCat("?obj", nr2); + + std::shared_ptr spatialJoinOperation = + ad_utility::makeExecutionTree( + qec, + SpatialJoinConfiguration{MaxDistanceConfig(100000000), + Variable{var1}, Variable{var2}}, + scan1, scan2); + + std::shared_ptr op = spatialJoinOperation->getRootOperation(); + SpatialJoin* spatialJoin = static_cast(op.get()); + spatialJoin->selectAlgorithm(SpatialJoinAlgorithm::BOUNDING_BOX); + PreparedSpatialJoinParams params = + spatialJoin->onlyForTestingGetPrepareJoin(); + SpatialJoinAlgorithms algorithms{ + qec, params, spatialJoin->onlyForTestingGetConfig(), std::nullopt}; + algorithms.setUseMidpointForAreas_(useMidpointForAreas); + auto entryLeft = algorithms.onlyForTestingGetRtreeEntry( + params.idTableLeft_, 0, params.leftJoinCol_); + auto entryRight = algorithms.onlyForTestingGetRtreeEntry( + params.idTableRight_, 0, params.rightJoinCol_); + auto distID = algorithms.computeDist(entryLeft.value(), entryRight.value()); + return distID.getDouble(); + }; + auto qec = buildMixedAreaPointQEC(true); + + // the following tests all calculate the distance from germany to each point. + // When the areas get approximated by their midpoint, the distance should + // always be larger or at least equally large compared to areas not being + // approximated by their midpoint. + ASSERT_TRUE(getDist(qec, "Area6", "1", true) >= + getDist(qec, "Area6", "1", false)); + ASSERT_TRUE(getDist(qec, "Area6", "Area2", true) >= + getDist(qec, "Area6", "Area2", false)); + ASSERT_TRUE(getDist(qec, "Area6", "3", true) >= + getDist(qec, "Area6", "3", false)); + ASSERT_TRUE(getDist(qec, "Area6", "Area4", true) >= + getDist(qec, "Area6", "Area4", false)); + ASSERT_TRUE(getDist(qec, "Area6", "5", true) >= + getDist(qec, "Area6", "5", false)); + ASSERT_TRUE(getDist(qec, "Area6", "Area6", true) >= + getDist(qec, "Area6", "Area6", false)); +} + +TEST(SpatialJoin, mixedDataSet) { + auto testDist = [](QueryExecutionContext* qec, size_t maxDist, + size_t nrResultRows) { + auto leftChild = + buildIndexScan(qec, {"?obj1", std::string{""}, "?geo1"}); + auto rightChild = + buildIndexScan(qec, {"?obj2", std::string{""}, "?geo2"}); + + std::shared_ptr spatialJoinOperation = + ad_utility::makeExecutionTree( + qec, + SpatialJoinConfiguration{MaxDistanceConfig(maxDist), + Variable{"?geo1"}, Variable{"?geo2"}}, + leftChild, rightChild); + + std::shared_ptr op = spatialJoinOperation->getRootOperation(); + SpatialJoin* spatialJoin = static_cast(op.get()); + spatialJoin->selectAlgorithm(SpatialJoinAlgorithm::BOUNDING_BOX); + PreparedSpatialJoinParams params = + spatialJoin->onlyForTestingGetPrepareJoin(); + SpatialJoinAlgorithms algorithms{ + qec, params, spatialJoin->onlyForTestingGetConfig(), std::nullopt}; + algorithms.setUseMidpointForAreas_(false); + auto res = algorithms.BoundingBoxAlgorithm(); + // that the id table contains all the necessary other columns and gets + // constructed correctly has already been extensively tested elsewhere. + // Here we only test, that the distance between GeoPoints and areas gets + // computed correctly. For this purpose it is sufficient to check the number + // of rows in the result table + ASSERT_EQ(res.idTable().numRows(), nrResultRows); + }; + auto qec = buildMixedAreaPointQEC(); + testDist(qec, 1, 5); + testDist(qec, 5000, 7); + testDist(qec, 500000, 13); + testDist(qec, 1000000, 17); + testDist(qec, 10000000, 25); +} + } // namespace boundingBox } // namespace diff --git a/test/engine/SpatialJoinTest.cpp b/test/engine/SpatialJoinTest.cpp index 766801ee3b..9e3a4b74da 100644 --- a/test/engine/SpatialJoinTest.cpp +++ b/test/engine/SpatialJoinTest.cpp @@ -607,7 +607,7 @@ INSTANTIATE_TEST_SUITE_P(SpatialJoin, SpatialJoinKnownEmptyTest, namespace resultSortedOn { TEST(SpatialJoin, resultSortedOn) { - std::string kg = createSmallDatasetWithPoints(); + std::string kg = createSmallDataset(); ad_utility::MemorySize blocksizePermutations = 16_MB; auto qec = getQec(kg, true, true, false, blocksizePermutations, false); @@ -727,7 +727,7 @@ class SpatialJoinMultiplicityAndSizeEstimateTest }; const double doubleBound = 0.00001; - std::string kg = createSmallDatasetWithPoints(); + std::string kg = createSmallDataset(); // add multiplicities to test knowledge graph kg += " \"testing multiplicity\" ."; @@ -859,7 +859,7 @@ class SpatialJoinMultiplicityAndSizeEstimateTest // ================================ here the children are only index // scans, as they are perfectly predictable in relation to size and // multiplicity estimates - std::string kg = createSmallDatasetWithPoints(); + std::string kg = createSmallDataset(); // add multiplicities to test knowledge graph kg += " \"POINT(7.12345 48.12345)\"."; diff --git a/test/engine/SpatialJoinTestHelpers.h b/test/engine/SpatialJoinTestHelpers.h index 15274fe5be..b993bcca3c 100644 --- a/test/engine/SpatialJoinTestHelpers.h +++ b/test/engine/SpatialJoinTestHelpers.h @@ -18,6 +18,196 @@ auto makePointLiteral = [](std::string_view c1, std::string_view c2) { return absl::StrCat(" \"POINT(", c1, " ", c2, ")\"^^<", GEO_WKT_LITERAL, ">"); }; +auto makeAreaLiteral = [](std::string_view coordinateList) { + return absl::StrCat("\"POLYGON((", coordinateList, "))\"^^<", GEO_WKT_LITERAL, + ">"); +}; + +const std::string pointUniFreiburg = makePointLiteral("7.83505", "48.01267"); +const std::string pointMinster = makePointLiteral("7.85298", "47.99557"); +const std::string pointLondonEye = makePointLiteral("-0.11957", "51.50333"); +const std::string pointStatueOfLiberty = + makePointLiteral("-74.04454", "40.68925"); +const std::string pointEiffelTower = makePointLiteral("2.29451", "48.85825"); + +const std::string areaUniFreiburg = makeAreaLiteral( + "7.8346338 48.0126612,7.8348921 48.0123905,7.8349457 " + "48.0124216,7.8349855 48.0124448,7.8353244 48.0126418,7.8354091 " + "48.0126911,7.8352246 48.0129047,7.8351668 48.0128798,7.8349471 " + "48.0127886,7.8347248 48.0126986,7.8346338 48.0126612"); + +const std::string areaMuenster = makeAreaLiteral( + "7.8520522 47.9956071,7.8520528 47.9955872,7.8521103 " + "47.995588,7.8521117 47.9955419,7.852113 47.9954975,7.8520523 " + "47.9954968,7.8520527 47.995477,7.8521152 47.9954775,7.8521154 " + "47.9954688,7.8521299 47.995469,7.8521311 47.9954303,7.8521611 " + "47.9954307,7.8521587 47.9954718,7.8522674 47.9954741,7.8522681 " + "47.9954676,7.8522746 47.9954643,7.8522832 47.9954599,7.8522976 " + "47.99546,7.8523031 47.995455,7.8523048 47.9954217,7.8522781 " + "47.9954213,7.8522786 47.9954058,7.8523123 47.9954065,7.852314 " + "47.9953744,7.8523383 47.9953748,7.8523373 47.9954062,7.8524164 " + "47.995408,7.8524176 47.9953858,7.852441 47.9953865,7.8524398 " + "47.9954085,7.8525077 47.9954101,7.8525088 47.9953886,7.8525316 " + "47.9953892,7.8525305 47.9954106,7.8526031 47.9954123,7.8526042 " + "47.9953915,7.8526276 47.9953922,7.8526265 47.9954128,7.8526944 " + "47.9954144,7.8526954 47.9953943,7.8527183 47.9953949,7.8527173 " + "47.9954149,7.8527892 47.9954165,7.8527903 47.9953974,7.8528131 " + "47.9953979,7.8528122 47.9954171,7.852871 47.9954182,7.8528712 " + "47.995416,7.8528791 47.9954112,7.85289 47.9954113,7.8528971 " + "47.9954158,7.8528974 47.9954052,7.8528925 47.9954052,7.8528928 " + "47.9953971,7.8529015 47.9953972,7.8529024 47.9953702,7.852897 " + "47.9953701,7.8528972 47.9953645,7.8529037 47.9953645,7.8529038 " + "47.9953613,7.8529069 47.9953614,7.8529071 47.9953541,7.8529151 " + "47.9953542,7.8529149 47.9953581,7.8529218 47.9953582,7.8529217 " + "47.9953631,7.8529621 47.9953637,7.8529623 47.9953572,7.8529719 " + "47.9953573,7.8529716 47.9953642,7.8530114 47.9953648,7.8530116 " + "47.9953587,7.8530192 47.9953589,7.853019 47.995365,7.8530635 " + "47.9953657,7.8530637 47.9953607,7.8530716 47.9953608,7.8530715 " + "47.9953657,7.8530758 47.9953657,7.8530757 47.9953688,7.8530817 " + "47.9953689,7.8530815 47.9953742,7.8530747 47.9953741,7.8530737 " + "47.9954052,7.8530794 47.9954053,7.8530792 47.995413,7.8530717 " + "47.9954129,7.8530708 47.9954199,7.8531165 47.9954207,7.8531229 " + "47.9954131,7.8531292 47.9954209,7.8531444 47.9954211,7.8531444 " + "47.9954238,7.8531569 47.995424,7.8531661 47.9954152,7.853171 " + "47.9954201,7.853183 47.9954203,7.8531829 47.9954234,7.8531973 " + "47.9954236,7.8531977 47.9954138,7.8532142 47.9954141,7.8532141 " + "47.9954253,7.8532425 47.9954355,7.8532514 47.9954298,7.8532593 " + "47.9954353,7.8532915 47.9954255,7.8532923 47.9954155,7.8533067 " + "47.995416,7.8533055 47.9954261,7.8533304 47.9954368,7.8533399 " + "47.995431,7.85335 47.9954372,7.8533758 47.9954288,7.853377 " + "47.9954188,7.8533932 47.9954192,7.8533924 47.9954298,7.8534151 " + "47.9954395,7.8534278 47.9954345,7.8534373 47.995441,7.8534664 " + "47.995432,7.8534672 47.9954209,7.8534832 47.9954211,7.8534828 " + "47.9954322,7.8535077 47.9954449,7.8535224 47.9954375,7.8535325 " + "47.995448,7.8535644 47.9954403,7.8535717 47.9954305,7.8535866 " + "47.9954356,7.8535796 47.9954443,7.8536079 47.9954674,7.8536221 " + "47.9954629,7.8536221 47.9954735,7.8536573 47.9954801,7.8536707 " + "47.9954728,7.8536813 47.9954812,7.8536686 47.9954876,7.8536776 " + "47.9955168,7.8536958 47.9955192,7.8536876 47.9955286,7.8537133 " + "47.9955444,7.85373 47.9955428,7.8537318 47.9955528,7.8537154 " + "47.9955545,7.8537069 47.9955819,7.8537168 47.995588,7.8537044 " + "47.9955948,7.8537086 47.9956193,7.8537263 47.9956245,7.8537206 " + "47.9956347,7.8537069 47.9956317,7.8536802 47.9956473,7.8536819 " + "47.9956577,7.8536667 47.9956604,7.8536506 47.9956817,7.8536639 " + "47.9956902,7.8536543 47.9956981,7.8536394 47.9956887,7.8536331 " + "47.9956931,7.853609 47.9956954,7.8536024 47.9957048,7.8535868 " + "47.9957028,7.8535591 47.9957206,7.8535642 47.9957285,7.8535487 " + "47.9957327,7.8535423 47.9957215,7.853508 47.9957131,7.8534942 " + "47.9957215,7.8534818 47.9957186,7.8534587 47.9957284,7.853458 " + "47.9957389,7.8534421 47.9957388,7.8534424 47.9957273,7.853418 " + "47.995714,7.8534099 47.9957194,7.8534021 47.995713,7.8533721 " + "47.9957242,7.8533712 47.9957359,7.8533558 47.9957351,7.8533565 " + "47.9957247,7.8533269 47.9957094,7.8533171 47.9957165,7.8533073 " + "47.9957088,7.8532874 47.9957186,7.8532866 47.9957296,7.8532698 " + "47.9957295,7.8532698 47.9957189,7.8532466 47.9957048,7.8532372 " + "47.9957131,7.8532277 47.995705,7.8532014 47.9957171,7.8532009 " + "47.9957284,7.8531844 47.9957281,7.8531847 47.9957174,7.8531778 " + "47.9957102,7.853163 47.9957245,7.8530549 47.9957225,7.8530552 " + "47.9957161,7.8529541 47.9957138,7.8529535 47.9957236,7.8529578 " + "47.9957237,7.8529577 47.9957269,7.852953 47.9957268,7.8529529 " + "47.9957308,7.8529477 47.9957307,7.8529478 47.9957271,7.8528964 " + "47.9957256,7.8528963 47.9957288,7.8528915 47.9957287,7.8528916 " + "47.9957256,7.8528876 47.9957255,7.8528875 47.9957223,7.8528912 " + "47.9957224,7.8528908 47.9957195,7.8528811 47.9957194,7.8527983 " + "47.9957162,7.8527981 47.9957192,7.8527723 47.9957185,7.8527732 " + "47.9957016,7.852703 47.9957003,7.8527021 47.9957175,7.8526791 " + "47.9957171,7.8526788 47.9957225,7.8526097 47.9957225,7.8526099 " + "47.995718,7.8525863 47.9957183,7.8525874 47.9956981,7.8525155 " + "47.9956967,7.8525144 47.995718,7.8524916 47.9957174,7.8524927 " + "47.9956963,7.8524241 47.995695,7.852423 47.9957153,7.8523996 " + "47.9957148,7.8524007 47.9956946,7.8523226 47.9956931,7.8523217 " + "47.9957212,7.8522948 47.9957208,7.8522957 47.9956927,7.8522663 " + "47.9956923,7.8522667 47.9956784,7.8522926 47.9956787,7.8522937 " + "47.9956433,7.8522882 47.995635,7.8522723 47.9956351,7.8522611 " + "47.9956281,7.8522613 47.9956189,7.8521543 47.9956174,7.852153 " + "47.9956591,7.8521196 47.9956587,7.8521209 47.995617,7.8521109 " + "47.9956168,7.8521111 47.9956079,7.8520522 47.9956071"); + +const std::string areaLondonEye = makeAreaLiteral( + "-0.1198608 51.5027451,-0.1197395 51.5027354,-0.1194922 " + "51.5039381,-0.1196135 51.5039478,-0.1198608 51.5027451"); + +const std::string areaStatueOfLiberty = makeAreaLiteral( + "-74.0451069 40.6893455,-74.045004 40.6892215,-74.0451023 " + "40.6891073,-74.0449107 40.6890721,-74.0449537 " + "40.6889343,-74.0447746 40.6889506,-74.0446495 " + "40.6888049,-74.0445067 40.6889076,-74.0442008 " + "40.6888563,-74.0441463 40.6890663,-74.0441411 " + "40.6890854,-74.0441339 40.6890874,-74.0441198 " + "40.6890912,-74.0439637 40.6891376,-74.0440941 " + "40.6892849,-74.0440057 40.6894071,-74.0441949 " + "40.6894309,-74.0441638 40.6895702,-74.0443261 " + "40.6895495,-74.0443498 40.6895782,-74.0443989 " + "40.6896372,-74.0444277 40.6896741,-74.0445955 " + "40.6895939,-74.0447392 40.6896561,-74.0447498 " + "40.6896615,-74.0447718 40.6895577,-74.0447983 " + "40.6895442,-74.0448287 40.6895279,-74.0449638 " + "40.6895497,-74.0449628 40.6895443,-74.044961 40.6895356,-74.0449576 " + "40.6895192,-74.044935 40.689421,-74.0451069 40.6893455"); + +const std::string areaEiffelTower = makeAreaLiteral( + "2.2933119 48.858248,2.2935432 48.8581003,2.2935574 " + "48.8581099,2.2935712 48.8581004,2.2936112 48.8581232,2.2936086 " + "48.8581249,2.293611 48.8581262,2.2936415 48.8581385,2.293672 " + "48.8581477,2.2937035 48.8581504,2.293734 48.858149,2.2937827 " + "48.8581439,2.2938856 48.8581182,2.2939778 48.8580882,2.2940648 " + "48.8580483,2.2941435 48.8579991,2.2941937 48.8579588,2.2942364 " + "48.8579197,2.2942775 48.8578753,2.2943096 48.8578312,2.2943307 " + "48.8577908,2.2943447 48.857745,2.2943478 48.8577118,2.2943394 " + "48.8576885,2.2943306 48.8576773,2.2943205 48.8576677,2.2943158 " + "48.8576707,2.2942802 48.8576465,2.2942977 48.8576355,2.2942817 " + "48.8576248,2.2942926 48.8576181,2.2944653 48.8575069,2.2945144 " + "48.8574753,2.2947414 48.8576291,2.294725 48.8576392,2.2947426 " + "48.857651,2.294706 48.8576751,2.294698 48.8576696,2.2946846 " + "48.8576782,2.2946744 48.8576865,2.2946881 48.8576957,2.2946548 " + "48.857717,2.2946554 48.8577213,2.2946713 48.8577905,2.2946982 " + "48.8578393,2.2947088 48.8578585,2.2947529 48.8579196,2.2948133 " + "48.8579803,2.2948836 48.85803,2.2949462 48.8580637,2.2950051 " + "48.8580923,2.2950719 48.85812,2.2951347 48.8581406,2.2951996 " + "48.8581564,2.2952689 48.8581663,2.295334 48.8581699,2.2953613 " + "48.8581518,2.2953739 48.8581604,2.2953965 48.8581497,2.2954016 " + "48.8581464,2.2953933 48.8581409,2.2954304 48.8581172,2.2954473 " + "48.8581285,2.2954631 48.8581182,2.2956897 48.8582718,2.295653 " + "48.8582954,2.2955837 48.85834,2.2954575 48.8584212,2.2954416 " + "48.858411,2.2954238 48.8584227,2.2953878 48.8583981,2.2953925 " + "48.858395,2.2953701 48.8583857,2.2953419 48.8583779,2.2953057 " + "48.8583737,2.2952111 48.8583776,2.2951081 48.858403,2.2950157 " + "48.8584326,2.2949284 48.8584723,2.2948889 48.8584961,2.2947988 " + "48.8585613,2.2947558 48.8586003,2.2947144 48.8586446,2.294682 " + "48.8586886,2.2946605 48.8587289,2.2946462 48.8587747,2.294644 " + "48.8587962,2.2946462 48.8588051,2.2946486 48.8588068,2.2946938 " + "48.8588377,2.2946607 48.8588587,2.294663 48.8588603,2.294681 " + "48.858849,2.2947169 48.8588737,2.2946988 48.858885,2.2947154 " + "48.8588961,2.2944834 48.8590453,2.2943809 48.8589771,2.2943708 " + "48.8589703,2.2942571 48.8588932,2.2942741 48.8588824,2.2942567 " + "48.8588708,2.2942893 48.8588493,2.294306 48.8588605,2.2943103 " + "48.8588577,2.2942883 48.8588426,2.2943122 48.8588275,2.2943227 " + "48.8588209,2.2943283 48.8588173,2.2943315 48.8588125,2.2943333 " + "48.8588018,2.2943166 48.8587327,2.294301 48.8586978,2.2942783 " + "48.8586648,2.2942406 48.8586191,2.2942064 48.858577,2.2941734 " + "48.8585464,2.2941015 48.8584943,2.2940384 48.8584609,2.2939792 " + "48.8584325,2.293912 48.8584052,2.2938415 48.8583828,2.293784 " + "48.8583695,2.2937145 48.8583599,2.2936514 48.8583593,2.2936122 " + "48.8583846,2.293606 48.8583807,2.2935688 48.8584044,2.2935515 " + "48.8583929,2.293536 48.8584028,2.2933119 48.858248"); + +// compared to the other areas, this one is not real, because it would be way +// too large. Here the borders of germany get approximated by just a few points +// to not make this file too crowded. As this geometry is only needed because +// the distance from the midpoint to the borders can't be ignored, it's not +// necessary to insert the complete geometry +const std::string approximatedAreaGermany = makeAreaLiteral( + "7.20369317867016 53.62121249029073, " + "9.335040870259194 54.77156944262062, 13.97127141588071 53.7058383745324, " + "14.77327338230339 51.01654754091759, 11.916828022441791 " + "50.36932046223437, " + "13.674640551587391 48.68663848319227, 12.773761630400273 " + "47.74969625921073, " + "7.720050609106677 47.64617710434852, 8.313312337693318 " + "48.997548751390326, " + "6.50056816701192 49.535220384133375, 6.0391423781112 51.804566644690524, " + "7.20369317867016 53.62121249029073"); + // helper function to create a vector of strings from a result table inline std::vector printTable(const QueryExecutionContext* qec, const Result* table) { @@ -77,37 +267,90 @@ inline std::vector createRowVectorFromColumnVector( return result; } -// create a small test dataset, which focuses on points as geometry objects. -// Note, that some of these objects have a polygon representation, but for -// testing purposes, they get represented as a point here. I took those +inline void addPoint(std::string& kg, std::string number, std::string name, + std::string point) { + kg += absl::StrCat(" ", name, " . . ", point, " ."); +} + +inline void addArea(std::string& kg, std::string number, std::string name, + std::string area) { + kg += absl::StrCat(" ", name, " . \n", + " .\n", " ", + area, " .\n"); +} + +// create a small test dataset, which focuses on points or polygons as geometry +// objects. Note, that some of these objects have a polygon representation, but +// when choosing points, they get represented a single point. I took those // points, such that it is obvious, which pair of objects should be included, // when the maximum distance is x meters. Please note, that these datapoints -// are not copied from a real input file. Copying the query will therefore -// likely not result in the same results as here (also the names, coordinates, -// etc. might be different in the real datasets) -inline std::string createSmallDatasetWithPoints() { - auto addPoint = [](std::string& kg, std::string number, std::string name, - std::string point) { - kg += absl::StrCat(" ", name, " . . ", point, " ."); - }; - std::string kg2; - auto p = makePointLiteral; - addPoint(kg2, "1", "\"Uni Freiburg TF\"", p("7.83505", "48.01267")); - addPoint(kg2, "2", "\"Minster Freiburg\"", p("7.85298", "47.99557")); - addPoint(kg2, "3", "\"London Eye\"", p("-0.11957", "51.50333")); - addPoint(kg2, "4", "\"Statue of liberty\"", p("-74.04454", "40.68925")); - addPoint(kg2, "5", "\"eiffel tower\"", p("2.29451", "48.85825")); - - return kg2; +// are only partially copied from a real input file. Copying the query will +// therefore likely not result in the same results as here (the names, +// coordinates, etc. might be different in the real datasets). If usePolygons is +// set to false, all objects are represented by a point, otherwise all objects +// are represented by their area. +inline std::string createSmallDataset(bool usePolygons = false) { + std::string kg; + if (usePolygons) { + addArea(kg, "1", "\"Uni Freiburg TF Area\"", areaUniFreiburg); + addArea(kg, "2", "\"Minster Freiburg Area\"", areaMuenster); + addArea(kg, "3", "\"London Eye Area\"", areaLondonEye); + addArea(kg, "4", "\"Statue of liberty Area\"", areaStatueOfLiberty); + addArea(kg, "5", "\"eiffel tower Area\"", areaEiffelTower); + } else { + addPoint(kg, "1", "\"Uni Freiburg TF\"", pointUniFreiburg); + addPoint(kg, "2", "\"Minster Freiburg\"", pointMinster); + addPoint(kg, "3", "\"London Eye\"", pointLondonEye); + addPoint(kg, "4", "\"Statue of liberty\"", pointStatueOfLiberty); + addPoint(kg, "5", "\"eiffel tower\"", pointEiffelTower); + } + return kg; +} + +inline std::string createMixedDataset() { + std::string kg; + addArea(kg, "1", "\"Uni Freiburg TF Area\"", areaUniFreiburg); + addPoint(kg, "2", "\"Minster Freiburg\"", pointMinster); + addArea(kg, "3", "\"London Eye Area\"", areaLondonEye); + addPoint(kg, "4", "\"Statue of liberty\"", pointStatueOfLiberty); + addArea(kg, "5", "\"eiffel tower Area\"", areaEiffelTower); + return kg; +} + +// a mixed dataset, which contains points and areas. One of them is the geometry +// of germeny, where the distance from the midpoint to the borders can not be +// ignored or approximated as zero +inline std::string createTrueDistanceDataset() { + std::string kg; + addPoint(kg, "1", "\"Uni Freiburg TF\"", pointUniFreiburg); + addArea(kg, "2", "\"Minster Freiburg Area\"", areaMuenster); + addPoint(kg, "3", "\"London Eye\"", pointLondonEye); + addArea(kg, "4", "\"Statue of liberty Area\"", areaStatueOfLiberty); + addPoint(kg, "5", "\"eiffel tower\"", pointEiffelTower); + addArea(kg, "6", "\"Germany\"", approximatedAreaGermany); + return kg; } -inline QueryExecutionContext* buildTestQEC() { - std::string kg = createSmallDatasetWithPoints(); +inline QueryExecutionContext* buildTestQEC(bool useAreas = false) { + std::string kg = createSmallDataset(useAreas); ad_utility::MemorySize blocksizePermutations = 16_MB; - auto qec = ad_utility::testing::getQec(kg, true, true, false, - blocksizePermutations, false); + auto qec = + ad_utility::testing::getQec(kg, true, true, false, blocksizePermutations, + false, true, std::nullopt, 10_kB); + return qec; +} + +inline QueryExecutionContext* buildMixedAreaPointQEC( + bool useTrueDistanceDataset = false) { + std::string kg = useTrueDistanceDataset ? createTrueDistanceDataset() + : createMixedDataset(); + ad_utility::MemorySize blocksizePermutations = 16_MB; + auto qec = + ad_utility::testing::getQec(kg, true, true, false, blocksizePermutations, + false, true, std::nullopt, 10_kB); return qec; } @@ -151,4 +394,41 @@ inline std::shared_ptr buildSmallChild( return buildJoin(qec, scan1, scan2, joinVariable); } +// this function creates a minimum viable SpatialJoinAlgorithms class, which +// gets used in testing to access the wrapper methods. Note that not all +// functions of this class work properly, as many necessary parameters are +// defaulted as nullpointer or std::nullopt. The maxDist is necessary, because +// one of the wrapper classes needs a proper maxDistance, otherwise the wrapper +// can't be used to test the function +inline SpatialJoinAlgorithms getDummySpatialJoinAlgsForWrapperTesting( + size_t maxDist = 1000, + std::optional qec = std::nullopt) { + if (!qec) { + qec = buildTestQEC(); + } + MaxDistanceConfig task{maxDist}; + std::shared_ptr spatialJoinOperation = + ad_utility::makeExecutionTree( + qec.value(), + SpatialJoinConfiguration{task, Variable{"?point1"}, + Variable{"?point2"}}, + std::nullopt, std::nullopt); + + std::shared_ptr op = spatialJoinOperation->getRootOperation(); + SpatialJoin* spatialJoin = static_cast(op.get()); + + PreparedSpatialJoinParams params{nullptr, + nullptr, + nullptr, + nullptr, + 0, + 0, + std::vector{}, + 1, + spatialJoin->getMaxDist(), + std::nullopt}; + + return {qec.value(), params, spatialJoin->onlyForTestingGetConfig()}; +} + } // namespace SpatialJoinTestHelpers diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 8e1a693209..ab5453043f 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -13,7 +13,7 @@ namespace ad_utility::testing { // ______________________________________________________________ -Index makeIndexWithTestSettings() { +Index makeIndexWithTestSettings(ad_utility::MemorySize parserBufferSize) { Index index{ad_utility::makeUnlimitedAllocator()}; index.setNumTriplesPerBatch(2); EXTERNAL_ID_TABLE_SORTER_IGNORE_MEMORY_LIMIT_FOR_TESTING = true; @@ -23,7 +23,10 @@ Index makeIndexWithTestSettings() { BATCH_SIZE_VOCABULARY_MERGE = 2; DEFAULT_PROGRESS_BAR_BATCH_SIZE = 2; index.memoryLimitIndexBuilding() = 50_MB; - index.parserBufferSize() = 1_kB; + index.parserBufferSize() = + parserBufferSize; // Note that the default value remains unchanged, but + // some tests (i.e. polygon testing in Spatial Joins) + // require a larger buffer size return index; } @@ -142,7 +145,8 @@ Index makeTestIndex(const std::string& indexBasename, ad_utility::MemorySize blocksizePermutations, bool createTextIndex, bool addWordsFromLiterals, std::optional> - contentsOfWordsFileAndDocsFile) { + contentsOfWordsFileAndDocsFile, + ad_utility::MemorySize parserBufferSize) { // Ignore the (irrelevant) log output of the index building and loading during // these tests. static std::ostringstream ignoreLogStream; @@ -173,7 +177,7 @@ Index makeTestIndex(const std::string& indexBasename, settingsFile << settingsJson.dump(); } { - Index index = makeIndexWithTestSettings(); + Index index = makeIndexWithTestSettings(parserBufferSize); // This is enough for 2 triples per block. This is deliberately chosen as a // small value, s.t. the tiny knowledge graphs from unit tests also contain // multiple blocks. Should this value or the semantics of it (how many @@ -247,7 +251,8 @@ QueryExecutionContext* getQec(std::optional turtleInput, ad_utility::MemorySize blocksizePermutations, bool createTextIndex, bool addWordsFromLiterals, std::optional> - contentsOfWordsFileAndDocsFile) { + contentsOfWordsFileAndDocsFile, + ad_utility::MemorySize parserBufferSize) { // Similar to `absl::Cleanup`. Calls the `callback_` in the destructor, but // the callback is stored as a `std::function`, which allows to store // different types of callbacks in the same wrapper type. @@ -294,21 +299,22 @@ QueryExecutionContext* getQec(std::optional turtleInput, std::string testIndexBasename = "_staticGlobalTestIndex" + std::to_string(contextMap.size()); contextMap.emplace( - key, Context{TypeErasedCleanup{[testIndexBasename]() { - for (const std::string& indexFilename : - getAllIndexFilenames(testIndexBasename)) { - // Don't log when a file can't be deleted, - // because the logging might already be - // destroyed. - ad_utility::deleteFile(indexFilename, false); - } - }}, - std::make_unique(makeTestIndex( - testIndexBasename, turtleInput, loadAllPermutations, - usePatterns, usePrefixCompression, - blocksizePermutations, createTextIndex, - addWordsFromLiterals, contentsOfWordsFileAndDocsFile)), - std::make_unique()}); + key, + Context{TypeErasedCleanup{[testIndexBasename]() { + for (const std::string& indexFilename : + getAllIndexFilenames(testIndexBasename)) { + // Don't log when a file can't be deleted, + // because the logging might already be + // destroyed. + ad_utility::deleteFile(indexFilename, false); + } + }}, + std::make_unique(makeTestIndex( + testIndexBasename, turtleInput, loadAllPermutations, + usePatterns, usePrefixCompression, blocksizePermutations, + createTextIndex, addWordsFromLiterals, + contentsOfWordsFileAndDocsFile, parserBufferSize)), + std::make_unique()}); } auto* qec = contextMap.at(key).qec_.get(); qec->getIndex().getImpl().setGlobalIndexAndComparatorOnlyForTesting(); diff --git a/test/util/IndexTestHelpers.h b/test/util/IndexTestHelpers.h index cbbd5ea486..b43d8008e3 100644 --- a/test/util/IndexTestHelpers.h +++ b/test/util/IndexTestHelpers.h @@ -21,8 +21,10 @@ namespace ad_utility::testing { // Create an empty `Index` object that has certain default settings overwritten // such that very small indices, as they are typically used for unit tests, -// can be built without a lot of time and memory overhead. -Index makeIndexWithTestSettings(); +// can be built without a lot of time and memory overhead. Using the parameter +// parserBufferSize the buffer size can be increased, when needed for larger +// tests (like polygon testing in Spatial Joins). +Index makeIndexWithTestSettings(ad_utility::MemorySize parserBufferSize = 1_kB); // Get names of all index files for a given basename. Needed for cleaning up // after tests using a test index. @@ -38,7 +40,9 @@ std::vector getAllIndexFilenames(const std::string& indexBasename); // following properties: Its vocabulary contains the literals `"alpha", // "älpha", "A", "Beta"`. These vocabulary entries are expected by the tests // for the subclasses of `SparqlExpression`. -// The concrete triple contents are currently used in `GroupByTest.cpp`. +// The concrete triple contents are currently used in `GroupByTest.cpp`. Using +// the parameter parserBufferSize the buffer size can be increased, when needed +// for larger tests (like polygon testing in Spatial Joins). Index makeTestIndex(const std::string& indexBasename, std::optional turtleInput = std::nullopt, bool loadAllPermutations = true, bool usePatterns = true, @@ -47,12 +51,15 @@ Index makeTestIndex(const std::string& indexBasename, bool createTextIndex = false, bool addWordsFromLiterals = true, std::optional> - contentsOfWordsFileAndDocsfile = std::nullopt); + contentsOfWordsFileAndDocsfile = std::nullopt, + ad_utility::MemorySize parserBufferSize = 1_kB); // Return a static `QueryExecutionContext` that refers to an index that was // build using `makeTestIndex` (see above). The index (most notably its // vocabulary) is the only part of the `QueryExecutionContext` that is actually -// relevant for these tests, so the other members are defaulted. +// relevant for these tests, so the other members are defaulted. Using +// the parameter parserBufferSize the buffer size can be increased, when needed +// for larger tests (like polygon testing in Spatial Joins). QueryExecutionContext* getQec( std::optional turtleInput = std::nullopt, bool loadAllPermutations = true, bool usePatterns = true, @@ -60,7 +67,8 @@ QueryExecutionContext* getQec( ad_utility::MemorySize blocksizePermutations = 16_B, bool createTextIndex = false, bool addWordsFromLiterals = true, std::optional> - contentsOfWordsFileAndDocsfile = std::nullopt); + contentsOfWordsFileAndDocsfile = std::nullopt, + ad_utility::MemorySize parserBufferSize = 1_kB); // Return a lambda that takes a string and converts it into an ID by looking // it up in the vocabulary of `index`. An `AD_CONTRACT_CHECK` will fail if the