diff --git a/src/binder/visitor/property_collector.cpp b/src/binder/visitor/property_collector.cpp index 3a03283a8dd..d4712062a4b 100644 --- a/src/binder/visitor/property_collector.cpp +++ b/src/binder/visitor/property_collector.cpp @@ -50,13 +50,6 @@ void PropertyCollector::visitQueryPartSkipNodeRel(const NormalizedQueryPart& que void PropertyCollector::visitMatch(const BoundReadingClause& readingClause) { auto& matchClause = readingClause.constCast(); - for (auto& rel : matchClause.getQueryGraphCollection()->getQueryRels()) { - if (rel->isEmpty() || rel->getRelType() != QueryRelType::NON_RECURSIVE) { - // If a query rel is empty then it does not have an internal id property. - continue; - } - properties.insert(rel->getInternalIDProperty()); - } if (matchClause.hasPredicate()) { collectProperties(matchClause.getPredicate()); } @@ -89,21 +82,28 @@ void PropertyCollector::visitSet(const BoundUpdatingClause& updatingClause) { } collectProperties(info.columnData); } + for (const auto& info : boundSetClause.getRelInfos()) { + auto& rel = info.pattern->constCast(); + KU_ASSERT(!rel.isEmpty() && rel.getRelType() == QueryRelType::NON_RECURSIVE); + properties.insert(rel.getInternalIDProperty()); + } } void PropertyCollector::visitDelete(const BoundUpdatingClause& updatingClause) { auto& boundDeleteClause = updatingClause.constCast(); // Read primary key if we are deleting nodes; - for (auto& info : boundDeleteClause.getNodeInfos()) { + for (const auto& info : boundDeleteClause.getNodeInfos()) { auto& node = info.pattern->constCast(); - for (auto entry : node.getEntries()) { + for (const auto entry : node.getEntries()) { properties.insert(node.getPrimaryKey(entry->getTableID())); } } // Read rel internal id if we are deleting relationships. - for (auto& info : boundDeleteClause.getRelInfos()) { + for (const auto& info : boundDeleteClause.getRelInfos()) { auto& rel = info.pattern->constCast(); - properties.insert(rel.getInternalIDProperty()); + if (!rel.isEmpty() && rel.getRelType() == QueryRelType::NON_RECURSIVE) { + properties.insert(rel.getInternalIDProperty()); + } } } diff --git a/src/expression_evaluator/pattern_evaluator.cpp b/src/expression_evaluator/pattern_evaluator.cpp index 5025a2762e2..768e291c639 100644 --- a/src/expression_evaluator/pattern_evaluator.cpp +++ b/src/expression_evaluator/pattern_evaluator.cpp @@ -45,6 +45,7 @@ void PatternExpressionEvaluator::initFurther(const ResultSet&) { StructPackFunctions::compileFunc(nullptr, parameters, resultVector); const auto& dataType = expression->getDataType(); auto fieldIdx = StructType::getFieldIdx(dataType.copy(), InternalKeyword::ID); + KU_ASSERT(fieldIdx != INVALID_STRUCT_FIELD_IDX); idVector = StructVector::getFieldVector(resultVector.get(), fieldIdx).get(); } diff --git a/src/graph/on_disk_graph.cpp b/src/graph/on_disk_graph.cpp index 60f9794a85b..3270c9a2cfe 100644 --- a/src/graph/on_disk_graph.cpp +++ b/src/graph/on_disk_graph.cpp @@ -226,21 +226,20 @@ std::vector OnDiskGraph::scanBwdRandom(nodeID_t, GraphScanState&) { return result; } -void OnDiskGraph::scan(nodeID_t nodeID, RelTable* relTable, OnDiskGraphScanStates& scanState, - RelTableScanState& relTableScanState, std::vector& nbrNodeIDs) { - scanState.srcNodeIDVector->setValue(0, nodeID); - relTableScanState.resetState(); - relTable->initializeScanState(context->getTx(), relTableScanState); - while (relTableScanState.source != TableScanSource::NONE && - relTable->scan(context->getTx(), relTableScanState)) { - if (relTableScanState.IDVector->state->getSelVector().getSelSize() > 0) { - for (auto i = 0u; i < relTableScanState.IDVector->state->getSelVector().getSelSize(); - ++i) { - auto nbrID = relTableScanState.IDVector->getValue(i); - nbrNodeIDs.push_back(nbrID); - } - } - } +void OnDiskGraph::scan(nodeID_t, RelTable*, OnDiskGraphScanStates&, RelTableScanState&, + std::vector&) { + // scanState.srcNodeIDVector->setValue(0, nodeID); + // relTableScanState.resetState(); + // relTable->initializeScanState(context->getTx(), relTableScanState); + // while (relTableScanState.source != TableScanSource::NONE && + // relTable->scan(context->getTx(), relTableScanState)) { + // if (relTableScanState.outState->getSelVector().getSelSize() > 0) { + // for (auto i = 0u; i < relTableScanState.outState->getSelVector().getSelSize(); ++i) { + // auto nbrID = relTableScanState.IDVector->getValue(i); + // nbrNodeIDs.push_back(nbrID); + // } + // } + // } } } // namespace graph diff --git a/src/include/processor/operator/filtering_operator.h b/src/include/processor/operator/filtering_operator.h index a1be2ca8793..6fbff2db56c 100644 --- a/src/include/processor/operator/filtering_operator.h +++ b/src/include/processor/operator/filtering_operator.h @@ -18,7 +18,7 @@ class SelVectorOverWriter { virtual ~SelVectorOverWriter() = default; protected: - void restoreSelVector(common::DataChunkState& dataChunkState); + void restoreSelVector(common::DataChunkState& dataChunkState) const; void saveSelVector(common::DataChunkState& dataChunkState); diff --git a/src/include/processor/operator/flatten.h b/src/include/processor/operator/flatten.h index 04a18ddd3fb..76b6808a081 100644 --- a/src/include/processor/operator/flatten.h +++ b/src/include/processor/operator/flatten.h @@ -11,14 +11,14 @@ struct FlattenLocalState { uint64_t sizeToFlatten = 0; }; -class Flatten : public PhysicalOperator, SelVectorOverWriter { +class Flatten final : public PhysicalOperator, SelVectorOverWriter { static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::FLATTEN; public: Flatten(data_chunk_pos_t dataChunkToFlattenPos, std::unique_ptr child, uint32_t id, std::unique_ptr printInfo) : PhysicalOperator{type_, std::move(child), id, std::move(printInfo)}, - dataChunkToFlattenPos{dataChunkToFlattenPos} {} + dataChunkToFlattenPos{dataChunkToFlattenPos}, dataChunkState{nullptr} {} void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; diff --git a/src/include/processor/operator/persistent/delete_executor.h b/src/include/processor/operator/persistent/delete_executor.h index 5f6c44ed76a..ead0c4574df 100644 --- a/src/include/processor/operator/persistent/delete_executor.h +++ b/src/include/processor/operator/persistent/delete_executor.h @@ -136,6 +136,10 @@ struct RelDeleteInfo { common::ValueVector* dstNodeIDVector = nullptr; common::ValueVector* relIDVector = nullptr; + RelDeleteInfo() + : srcNodeIDPos{INVALID_DATA_CHUNK_POS, INVALID_VALUE_VECTOR_POS}, + dstNodeIDPos{INVALID_DATA_CHUNK_POS, INVALID_VALUE_VECTOR_POS}, + relIDPos{INVALID_DATA_CHUNK_POS, INVALID_VALUE_VECTOR_POS} {} RelDeleteInfo(DataPos srcNodeIDPos, DataPos dstNodeIDPos, DataPos relIDPos) : srcNodeIDPos{srcNodeIDPos}, dstNodeIDPos{dstNodeIDPos}, relIDPos{relIDPos} {} EXPLICIT_COPY_DEFAULT_MOVE(RelDeleteInfo); @@ -154,7 +158,7 @@ class RelDeleteExecutor { RelDeleteExecutor(const RelDeleteExecutor& other) : info{other.info.copy()} {} virtual ~RelDeleteExecutor() = default; - void init(ResultSet* resultSet, ExecutionContext* context); + virtual void init(ResultSet* resultSet, ExecutionContext* context); virtual void delete_(ExecutionContext* context) = 0; @@ -166,9 +170,11 @@ class RelDeleteExecutor { class EmptyRelDeleteExecutor final : public RelDeleteExecutor { public: - explicit EmptyRelDeleteExecutor(RelDeleteInfo info) : RelDeleteExecutor{std::move(info)} {} + explicit EmptyRelDeleteExecutor() : RelDeleteExecutor{RelDeleteInfo{}} {} EmptyRelDeleteExecutor(const EmptyRelDeleteExecutor& other) : RelDeleteExecutor{other} {} + void init(ResultSet*, ExecutionContext*) override {} + void delete_(ExecutionContext*) override {} std::unique_ptr copy() const override { diff --git a/src/include/processor/operator/scan/offset_scan_node_table.h b/src/include/processor/operator/scan/offset_scan_node_table.h index 627e5804243..da9aaeedce4 100644 --- a/src/include/processor/operator/scan/offset_scan_node_table.h +++ b/src/include/processor/operator/scan/offset_scan_node_table.h @@ -7,7 +7,7 @@ namespace processor { // OffsetScanNodeTable is only used as the source operator for RecursiveJoin and thus cannot be // executed in parallel. Therefore, it does not have a shared state. -class OffsetScanNodeTable : public ScanTable { +class OffsetScanNodeTable final : public ScanTable { static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::OFFSET_SCAN_NODE_TABLE; public: diff --git a/src/include/processor/operator/scan/scan_multi_rel_tables.h b/src/include/processor/operator/scan/scan_multi_rel_tables.h index 0b53701aa8b..e317b75af59 100644 --- a/src/include/processor/operator/scan/scan_multi_rel_tables.h +++ b/src/include/processor/operator/scan/scan_multi_rel_tables.h @@ -48,7 +48,7 @@ class RelTableCollectionScanner { uint32_t nextTableIdx = 0; }; -class ScanMultiRelTable : public ScanTable { +class ScanMultiRelTable final : public ScanTable { static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::SCAN_REL_TABLE; public: @@ -57,13 +57,14 @@ class ScanMultiRelTable : public ScanTable { std::unique_ptr child, uint32_t id, std::unique_ptr printInfo) : ScanTable{type_, std::move(info), std::move(child), id, std::move(printInfo)}, - directionInfo{std::move(directionInfo)}, scanners{std::move(scanners)} {} + directionInfo{std::move(directionInfo)}, boundNodeIDVector{nullptr}, + scanners{std::move(scanners)}, currentScanner{nullptr} {} - void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) final; + void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; - bool getNextTuplesInternal(ExecutionContext* context) final; + bool getNextTuplesInternal(ExecutionContext* context) override; - std::unique_ptr clone() final; + std::unique_ptr clone() override; private: void resetState(); @@ -71,9 +72,9 @@ class ScanMultiRelTable : public ScanTable { private: DirectionInfo directionInfo; - common::ValueVector* boundNodeIDVector = nullptr; + common::ValueVector* boundNodeIDVector; common::table_id_map_t scanners; - RelTableCollectionScanner* currentScanner = nullptr; + RelTableCollectionScanner* currentScanner; }; } // namespace processor diff --git a/src/include/storage/store/node_table.h b/src/include/storage/store/node_table.h index 75e604917dc..341201b09c2 100644 --- a/src/include/storage/store/node_table.h +++ b/src/include/storage/store/node_table.h @@ -28,15 +28,10 @@ struct NodeTableScanState final : TableScanState { // Scan state for un-committed data. // Ideally we shouldn't need columns to scan un-checkpointed but committed data. explicit NodeTableScanState(std::vector columnIDs) - : TableScanState{std::move(columnIDs), {}} { - nodeGroupScanState = std::make_unique(this->columnIDs.size()); - } - + : NodeTableScanState{std::move(columnIDs), {}} {} NodeTableScanState(std::vector columnIDs, std::vector columns) - : TableScanState{std::move(columnIDs), std::move(columns), - std::vector{}} { - nodeGroupScanState = std::make_unique(this->columnIDs.size()); - } + : NodeTableScanState{std::move(columnIDs), std::move(columns), + std::vector{}} {} NodeTableScanState(std::vector columnIDs, std::vector columns, std::vector columnPredicateSets) : TableScanState{std::move(columnIDs), std::move(columns), std::move(columnPredicateSets)} { diff --git a/src/include/storage/store/rel_table.h b/src/include/storage/store/rel_table.h index ba87af2424d..bc75944897b 100644 --- a/src/include/storage/store/rel_table.h +++ b/src/include/storage/store/rel_table.h @@ -29,18 +29,12 @@ struct RelTableScanState : TableScanState { const std::vector& columnIDs) : RelTableScanState(memoryManager, columnIDs, {}, nullptr, nullptr, common::RelDataDirection::FWD /* This is a dummy direction */, - std::vector{}) { - nodeGroupScanState = - std::make_unique(memoryManager, this->columnIDs.size()); - } + std::vector{}) {} RelTableScanState(MemoryManager& memoryManager, const std::vector& columnIDs, const std::vector& columns, Column* csrOffsetCol, Column* csrLengthCol, common::RelDataDirection direction) : RelTableScanState(memoryManager, columnIDs, columns, csrOffsetCol, csrLengthCol, - direction, std::vector{}) { - nodeGroupScanState = - std::make_unique(memoryManager, this->columnIDs.size()); - } + direction, std::vector{}) {} RelTableScanState(MemoryManager& memoryManager, const std::vector& columnIDs, const std::vector& columns, Column* csrOffsetCol, Column* csrLengthCol, common::RelDataDirection direction, @@ -76,7 +70,6 @@ struct LocalRelTableScanState final : RelTableScanState { LocalRelTableScanState(MemoryManager& memoryManager, const RelTableScanState& state, const std::vector& columnIDs, LocalRelTable* localRelTable) : RelTableScanState{memoryManager, columnIDs}, localRelTable{localRelTable} { - IDVector = state.IDVector; direction = state.direction; boundNodeIDVector = state.boundNodeIDVector; outputVectors = state.outputVectors; diff --git a/src/include/storage/store/table.h b/src/include/storage/store/table.h index 016b95a1c66..27d18e86e08 100644 --- a/src/include/storage/store/table.h +++ b/src/include/storage/store/table.h @@ -19,8 +19,9 @@ enum class TableScanSource : uint8_t { COMMITTED = 0, UNCOMMITTED = 1, NONE = 3 struct TableScanState { std::unique_ptr rowIdxVector; // Node/Rel ID vector. We assume all output vectors are within the same DataChunk as this one. - common::ValueVector* IDVector; + common::ValueVector* nodeIDVector; std::vector outputVectors; + common::DataChunkState* outState; std::vector columnIDs; common::NodeSemiMask* semiMask; @@ -36,21 +37,17 @@ struct TableScanState { common::ZoneMapCheckResult zoneMapResult = common::ZoneMapCheckResult::ALWAYS_SCAN; explicit TableScanState(std::vector columnIDs) - : IDVector(nullptr), columnIDs{std::move(columnIDs)}, semiMask{nullptr} { - rowIdxVector = std::make_unique(common::LogicalType::INT64()); - } + : TableScanState{std::move(columnIDs), {}, {}} {} + TableScanState(std::vector columnIDs, std::vector columns) + : TableScanState{std::move(columnIDs), std::move(columns), {}} {} TableScanState(std::vector columnIDs, std::vector columns, std::vector columnPredicateSets) - : IDVector(nullptr), columnIDs{std::move(columnIDs)}, semiMask{nullptr}, - columns{std::move(columns)}, columnPredicateSets{std::move(columnPredicateSets)} { - rowIdxVector = std::make_unique(common::LogicalType::INT64()); - } - explicit TableScanState(std::vector columnIDs, - std::vector columns) - : IDVector(nullptr), columnIDs{std::move(columnIDs)}, semiMask{nullptr}, - columns{std::move(columns)} { + : nodeIDVector{nullptr}, outState{nullptr}, columnIDs{std::move(columnIDs)}, + semiMask{nullptr}, columns{std::move(columns)}, + columnPredicateSets{std::move(columnPredicateSets)} { rowIdxVector = std::make_unique(common::LogicalType::INT64()); } + virtual ~TableScanState() = default; DELETE_COPY_DEFAULT_MOVE(TableScanState); diff --git a/src/main/storage_driver.cpp b/src/main/storage_driver.cpp index 40f808d2c23..cfd8ddf2d2c 100644 --- a/src/main/storage_driver.cpp +++ b/src/main/storage_driver.cpp @@ -144,7 +144,7 @@ void StorageDriver::scanColumn(storage::Table* table, column_id_t columnID, offs idVector->state = vectorState; columnVector->state = vectorState; scanState->rowIdxVector->state = vectorState; - scanState->IDVector = idVector.get(); + scanState->nodeIDVector = idVector.get(); scanState->outputVectors.push_back(columnVector.get()); // Scan // TODO: validate not more than 1 level nested diff --git a/src/optimizer/projection_push_down_optimizer.cpp b/src/optimizer/projection_push_down_optimizer.cpp index 3d3ad02c2bd..eab8dfff943 100644 --- a/src/optimizer/projection_push_down_optimizer.cpp +++ b/src/optimizer/projection_push_down_optimizer.cpp @@ -185,7 +185,10 @@ void ProjectionPushDownOptimizer::visitDelete(LogicalOperator* op) { auto& rel = info.pattern->constCast(); collectExpressionsInUse(rel.getSrcNode()->getInternalID()); collectExpressionsInUse(rel.getDstNode()->getInternalID()); - collectExpressionsInUse(rel.getInternalIDProperty()); + KU_ASSERT(rel.getRelType() == QueryRelType::NON_RECURSIVE); + if (!rel.isEmpty()) { + collectExpressionsInUse(rel.getInternalIDProperty()); + } } } break; default: diff --git a/src/planner/operator/persistent/logical_delete.cpp b/src/planner/operator/persistent/logical_delete.cpp index eeb7af729ca..3f2cc3738e8 100644 --- a/src/planner/operator/persistent/logical_delete.cpp +++ b/src/planner/operator/persistent/logical_delete.cpp @@ -21,7 +21,7 @@ std::string LogicalDelete::getExpressionsForPrinting() const { f_group_pos_set LogicalDelete::getGroupsPosToFlatten() const { KU_ASSERT(!infos.empty()); - auto childSchema = children[0]->getSchema(); + const auto childSchema = children[0]->getSchema(); f_group_pos_set dependentGroupPos; switch (infos[0].tableType) { case TableType::NODE: { diff --git a/src/planner/plan/append_extend.cpp b/src/planner/plan/append_extend.cpp index 74f217e23a2..2c6384e9397 100644 --- a/src/planner/plan/append_extend.cpp +++ b/src/planner/plan/append_extend.cpp @@ -6,7 +6,6 @@ #include "catalog/catalog.h" #include "catalog/catalog_entry/rel_table_catalog_entry.h" #include "common/enums/join_type.h" -#include "common/exception/runtime.h" #include "planner/join_order/cost_model.h" #include "planner/operator/extend/logical_extend.h" #include "planner/operator/extend/logical_recursive_extend.h" @@ -78,27 +77,10 @@ static std::shared_ptr getIRIProperty(const expression_vector& prope return nullptr; } -static void validatePropertiesContainRelID(const RelExpression& rel, - const expression_vector& properties) { - if (rel.isEmpty()) { - return; - } - for (auto& property : properties) { - if (*property == *rel.getInternalIDProperty()) { - return; - } - } - // LCOV_EXCL_START - throw RuntimeException(stringFormat( - "Internal ID of relationship {} is not scanned. This should not happen.", rel.toString())); - // LCOV_EXCL_STOP -} - void Planner::appendNonRecursiveExtend(const std::shared_ptr& boundNode, const std::shared_ptr& nbrNode, const std::shared_ptr& rel, ExtendDirection direction, bool extendFromSource, const expression_vector& properties, LogicalPlan& plan) { - validatePropertiesContainRelID(*rel, properties); // Filter bound node label if we know some incoming nodes won't have any outgoing rel. This // cannot be done at binding time because the pruning is affected by extend direction. auto boundNodeTableIDSet = getBoundNodeTableIDSet(*rel, direction); diff --git a/src/processor/map/map_delete.cpp b/src/processor/map/map_delete.cpp index 04400b5685a..b1bd9fbe612 100644 --- a/src/processor/map/map_delete.cpp +++ b/src/processor/map/map_delete.cpp @@ -90,15 +90,15 @@ std::unique_ptr PlanMapper::mapDeleteNode(LogicalOperator* log std::unique_ptr PlanMapper::getRelDeleteExecutor( const BoundDeleteInfo& boundInfo, const Schema& schema) const { - auto storageManager = clientContext->getStorageManager(); auto& rel = boundInfo.pattern->constCast(); + if (rel.isEmpty()) { + return std::make_unique(); + } + auto relIDPos = getDataPos(*rel.getInternalIDProperty(), schema); auto srcNodeIDPos = getDataPos(*rel.getSrcNode()->getInternalID(), schema); auto dstNodeIDPos = getDataPos(*rel.getDstNode()->getInternalID(), schema); - auto relIDPos = getDataPos(*rel.getInternalIDProperty(), schema); auto info = RelDeleteInfo(srcNodeIDPos, dstNodeIDPos, relIDPos); - if (rel.isEmpty()) { - return std::make_unique(std::move(info)); - } + auto storageManager = clientContext->getStorageManager(); if (rel.isMultiLabeled()) { common::table_id_map_t tableIDToTableMap; for (auto entry : rel.getEntries()) { diff --git a/src/processor/map/map_extend.cpp b/src/processor/map/map_extend.cpp index 03ac90d3afd..c91b9987b49 100644 --- a/src/processor/map/map_extend.cpp +++ b/src/processor/map/map_extend.cpp @@ -91,13 +91,12 @@ std::unique_ptr PlanMapper::mapExtend(LogicalOperator* logical auto prevOperator = mapOperator(logicalOperator->getChild(0).get()); auto inNodeIDPos = getDataPos(*boundNode->getInternalID(), *inFSchema); auto outNodeIDPos = getDataPos(*nbrNode->getInternalID(), *outFSchema); - auto relIDPos = getDataPos(*rel->getInternalIDProperty(), *outFSchema); std::vector outVectorsPos; outVectorsPos.push_back(outNodeIDPos); for (auto& expression : extend->getProperties()) { outVectorsPos.push_back(getDataPos(*expression, *outFSchema)); } - auto scanInfo = ScanTableInfo(relIDPos, outVectorsPos); + auto scanInfo = ScanTableInfo(inNodeIDPos, outVectorsPos); std::vector tableNames; auto storageManager = clientContext->getStorageManager(); for (auto entry : rel->getEntries()) { diff --git a/src/processor/operator/filtering_operator.cpp b/src/processor/operator/filtering_operator.cpp index 4d6b642575f..55875c5b38b 100644 --- a/src/processor/operator/filtering_operator.cpp +++ b/src/processor/operator/filtering_operator.cpp @@ -9,7 +9,7 @@ using namespace kuzu::common; namespace kuzu { namespace processor { -void SelVectorOverWriter::restoreSelVector(DataChunkState& dataChunkState) { +void SelVectorOverWriter::restoreSelVector(DataChunkState& dataChunkState) const { if (prevSelVector != nullptr) { dataChunkState.setSelVector(prevSelVector); } diff --git a/src/processor/operator/scan/primary_key_scan_node_table.cpp b/src/processor/operator/scan/primary_key_scan_node_table.cpp index 3189e473f08..5491b51c46b 100644 --- a/src/processor/operator/scan/primary_key_scan_node_table.cpp +++ b/src/processor/operator/scan/primary_key_scan_node_table.cpp @@ -68,7 +68,7 @@ bool PrimaryKeyScanNodeTable::getNextTuplesInternal(ExecutionContext* context) { return false; } auto nodeID = nodeID_t{nodeOffset, nodeInfo.table->getTableID()}; - nodeInfo.localScanState->IDVector->setValue(pos, nodeID); + nodeInfo.localScanState->nodeIDVector->setValue(pos, nodeID); if (nodeOffset >= StorageConstants::MAX_NUM_ROWS_IN_TABLE) { nodeInfo.localScanState->source = TableScanSource::UNCOMMITTED; nodeInfo.localScanState->nodeGroupIdx = diff --git a/src/processor/operator/scan/scan_multi_rel_tables.cpp b/src/processor/operator/scan/scan_multi_rel_tables.cpp index b7dfc9a4517..377581e4ac1 100644 --- a/src/processor/operator/scan/scan_multi_rel_tables.cpp +++ b/src/processor/operator/scan/scan_multi_rel_tables.cpp @@ -87,7 +87,7 @@ bool ScanMultiRelTable::getNextTuplesInternal(ExecutionContext* context) { resetState(); return false; } - const auto currentIdx = IDVector->state->getSelVector()[0]; + const auto currentIdx = boundNodeIDVector->state->getSelVector()[0]; if (boundNodeIDVector->isNull(currentIdx)) { outState->getSelVectorUnsafe().setSelSize(0); continue; diff --git a/src/processor/operator/scan/scan_node_table.cpp b/src/processor/operator/scan/scan_node_table.cpp index 76a68bfaed9..a267fd8d3c5 100644 --- a/src/processor/operator/scan/scan_node_table.cpp +++ b/src/processor/operator/scan/scan_node_table.cpp @@ -107,7 +107,7 @@ bool ScanNodeTable::getNextTuplesInternal(ExecutionContext* context) { if (!skipScan) { while (scanState.source != TableScanSource::NONE && info.table->scan(transaction, scanState)) { - if (scanState.IDVector->state->getSelVector().getSelSize() > 0) { + if (scanState.outState->getSelVector().getSelSize() > 0) { return true; } } diff --git a/src/processor/operator/scan/scan_rel_table.cpp b/src/processor/operator/scan/scan_rel_table.cpp index d23b2ee3ffd..7d7a9a655d4 100644 --- a/src/processor/operator/scan/scan_rel_table.cpp +++ b/src/processor/operator/scan/scan_rel_table.cpp @@ -94,7 +94,7 @@ bool ScanRelTable::getNextTuplesInternal(ExecutionContext* context) { if (!skipScan) { while (scanState.source != TableScanSource::NONE && relInfo.table->scan(transaction, scanState)) { - if (scanState.IDVector->state->getSelVector().getSelSize() > 0) { + if (scanState.outState->getSelVector().getSelSize() > 0) { return true; } } diff --git a/src/processor/operator/scan/scan_table.cpp b/src/processor/operator/scan/scan_table.cpp index d1f9c3bf45d..ebfa1e6c254 100644 --- a/src/processor/operator/scan/scan_table.cpp +++ b/src/processor/operator/scan/scan_table.cpp @@ -13,8 +13,11 @@ void ScanTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext*) } void ScanTable::initVectors(storage::TableScanState& state, const ResultSet& resultSet) const { - state.IDVector = resultSet.getValueVector(info.IDPos).get(); - state.rowIdxVector->state = IDVector->state; + state.nodeIDVector = resultSet.getValueVector(info.IDPos).get(); + state.rowIdxVector->state = info.outVectorsPos.empty() ? + state.nodeIDVector->state : + resultSet.getValueVector(info.outVectorsPos[0])->state; + state.outState = outState; for (auto& pos : info.outVectorsPos) { state.outputVectors.push_back(resultSet.getValueVector(pos).get()); } diff --git a/src/storage/local_storage/local_rel_table.cpp b/src/storage/local_storage/local_rel_table.cpp index 8b7bf4836ff..722abb150f6 100644 --- a/src/storage/local_storage/local_rel_table.cpp +++ b/src/storage/local_storage/local_rel_table.cpp @@ -218,7 +218,7 @@ row_idx_t LocalRelTable::findMatchingRow(MemoryManager& memoryManager, offset_t std::vector columnIDs; columnIDs.push_back(LOCAL_REL_ID_COLUMN_ID); const auto scanState = std::make_unique(memoryManager, columnIDs); - scanState->IDVector = scanChunk.getValueVector(0).get(); + scanState->outState = scanChunk.state.get(); scanState->rowIdxVector->state = scanChunk.state; scanState->outputVectors.push_back(scanChunk.getValueVector(0).get()); scanChunk.state->getSelVectorUnsafe().setSelSize(intersectRows.size()); diff --git a/src/storage/store/chunked_node_group.cpp b/src/storage/store/chunked_node_group.cpp index 7611daedd4b..463c69579f2 100644 --- a/src/storage/store/chunked_node_group.cpp +++ b/src/storage/store/chunked_node_group.cpp @@ -204,7 +204,7 @@ void ChunkedNodeGroup::scan(const Transaction* transaction, const TableScanState rowIdxInGroup, numRowsToScan); hasValuesToScan = selVector->getSelSize() > 0; } - auto& anchorSelVector = scanState.IDVector->state->getSelVectorUnsafe(); + auto& anchorSelVector = scanState.outState->getSelVectorUnsafe(); if (selVector && selVector->getSelSize() != numRowsToScan) { std::memcpy(anchorSelVector.getMultableBuffer().data(), selVector->getMultableBuffer().data(), selVector->getSelSize() * sizeof(sel_t)); diff --git a/src/storage/store/csr_node_group.cpp b/src/storage/store/csr_node_group.cpp index 3bbf5b0e554..e64354cd30e 100644 --- a/src/storage/store/csr_node_group.cpp +++ b/src/storage/store/csr_node_group.cpp @@ -97,13 +97,13 @@ NodeGroupScanResult CSRNodeGroup::scan(Transaction* transaction, TableScanState& scanCommittedInMemSequential(transaction, relScanState, nodeGroupScanState) : scanCommittedInMemRandom(transaction, relScanState, nodeGroupScanState); if (result == NODE_GROUP_SCAN_EMMPTY_RESULT) { - relScanState.IDVector->state->getSelVectorUnsafe().setSelSize(0); + relScanState.outState->getSelVectorUnsafe().setSelSize(0); return NODE_GROUP_SCAN_EMMPTY_RESULT; } return result; } case CSRNodeGroupScanSource::NONE: { - relScanState.IDVector->state->getSelVectorUnsafe().setSelSize(0); + relScanState.outState->getSelVectorUnsafe().setSelSize(0); return NODE_GROUP_SCAN_EMMPTY_RESULT; } default: { @@ -179,7 +179,7 @@ NodeGroupScanResult CSRNodeGroup::scanCommittedInMemRandom(Transaction* transact nextRow++; } nodeGroupScanState.nextRowToScan += numRows; - tableState.IDVector->state->getSelVectorUnsafe().setSelSize(numSelected); + tableState.outState->getSelVectorUnsafe().setSelSize(numSelected); return NodeGroupScanResult{0, numRows}; } @@ -653,6 +653,7 @@ void CSRNodeGroup::checkpointInMemOnly(const UniqLock& lock, NodeGroupCheckpoint auto numRowsToAppend = 0u; for (auto i = 0u; i < maxNumRowsToAppend; i++) { const auto row = rows[numRowsTryAppended + i]; + // TODO(Guodong): Should skip deleted rows here. if (row == INVALID_ROW_IDX) { continue; } @@ -661,7 +662,7 @@ void CSRNodeGroup::checkpointInMemOnly(const UniqLock& lock, NodeGroupCheckpoint } scanChunkState->getSelVectorUnsafe().setSelSize(numRowsToAppend); if (numRowsToAppend > 0) { - NodeGroup::lookup(lock, &DUMMY_CHECKPOINT_TRANSACTION, *scanState); + lookup(lock, &DUMMY_CHECKPOINT_TRANSACTION, *scanState); for (auto idx = 0u; idx < numColumnsToCheckpoint; idx++) { dataChunksToFlush[idx]->getData().append(scanChunk.valueVectors[idx].get(), scanChunkState->getSelVector()); @@ -712,6 +713,7 @@ void CSRNodeGroup::initInMemScanChunkAndScanState(const CSRNodeGroupCheckpointSt dataChunk.insert(i, valueVector); } scanState.rowIdxVector->setState(dataChunk.state); + scanState.outState = dataChunk.state.get(); scanState.nodeGroupScanState = std::make_unique(csrState.columnIDs.size()); for (auto i = 0u; i < csrState.columnIDs.size(); i++) { scanState.outputVectors.push_back(dataChunk.valueVectors[i].get()); diff --git a/src/storage/store/node_group.cpp b/src/storage/store/node_group.cpp index e5f8bba5c86..7edbece3c5e 100644 --- a/src/storage/store/node_group.cpp +++ b/src/storage/store/node_group.cpp @@ -147,7 +147,7 @@ NodeGroupScanResult NodeGroup::scan(Transaction* transaction, TableScanState& st const auto startNodeOffset = nodeGroupScanState.nextRowToScan + StorageUtils::getStartOffsetOfNodeGroup(state.nodeGroupIdx); if (!state.semiMask->isMasked(startNodeOffset, startNodeOffset + numRowsToScan - 1)) { - state.IDVector->state->getSelVectorUnsafe().setSelSize(0); + state.outState->getSelVectorUnsafe().setSelSize(0); nodeGroupScanState.nextRowToScan += numRowsToScan; return NodeGroupScanResult{nodeGroupScanState.nextRowToScan, 0}; } diff --git a/src/storage/store/node_table.cpp b/src/storage/store/node_table.cpp index 91ea516bfdd..8b1dd8fe501 100644 --- a/src/storage/store/node_table.cpp +++ b/src/storage/store/node_table.cpp @@ -92,7 +92,6 @@ bool NodeTable::scanInternal(Transaction* transaction, TableScanState& scanState KU_ASSERT(scanState.source != TableScanSource::NONE && scanState.columns.size() == scanState.outputVectors.size()); for (const auto& outputVector : scanState.outputVectors) { - KU_ASSERT(outputVector->state == scanState.IDVector->state); KU_UNUSED(outputVector); } const auto scanResult = scanState.nodeGroup->scan(transaction, scanState); @@ -104,19 +103,19 @@ bool NodeTable::scanInternal(Transaction* transaction, TableScanState& scanState nodeGroupStartOffset += StorageConstants::MAX_NUM_ROWS_IN_TABLE; } for (auto i = 0u; i < scanResult.numRows; i++) { - scanState.IDVector->setValue(i, + scanState.nodeIDVector->setValue(i, nodeID_t{nodeGroupStartOffset + scanResult.startRow + i, tableID}); } return true; } bool NodeTable::lookup(Transaction* transaction, const TableScanState& scanState) const { - KU_ASSERT(scanState.IDVector->state->getSelVector().getSelSize() == 1); - const auto nodeIDPos = scanState.IDVector->state->getSelVector()[0]; - if (scanState.IDVector->isNull(nodeIDPos)) { + KU_ASSERT(scanState.nodeIDVector->state->getSelVector().getSelSize() == 1); + const auto nodeIDPos = scanState.nodeIDVector->state->getSelVector()[0]; + if (scanState.nodeIDVector->isNull(nodeIDPos)) { return false; } - const auto nodeOffset = scanState.IDVector->readNodeOffset(nodeIDPos); + const auto nodeOffset = scanState.nodeIDVector->readNodeOffset(nodeIDPos); offset_t rowIdxInGroup = INVALID_OFFSET; if (nodeOffset >= StorageConstants::MAX_NUM_ROWS_IN_TABLE) { rowIdxInGroup = nodeOffset - StorageConstants::MAX_NUM_ROWS_IN_TABLE - @@ -318,10 +317,10 @@ void NodeTable::commit(Transaction* transaction, LocalTable* localTable) { nodeIDVector.setState(dataChunk->state); const auto numNodeGroupsToScan = localNodeTable.getNumNodeGroups(); const auto scanState = std::make_unique(columnIDs); - scanState->IDVector = &nodeIDVector; for (auto& vector : dataChunk->valueVectors) { scanState->outputVectors.push_back(vector.get()); } + scanState->outState = dataChunk->state.get(); scanState->source = TableScanSource::UNCOMMITTED; node_group_idx_t nodeGroupToScan = 0u; while (nodeGroupToScan < numNodeGroupsToScan) { @@ -336,9 +335,9 @@ void NodeTable::commit(Transaction* transaction, LocalTable* localTable) { break; } for (auto i = 0u; i < scanResult.numRows; i++) { - scanState->IDVector->setValue(i, nodeID_t{startNodeOffset + i, tableID}); + nodeIDVector.setValue(i, nodeID_t{startNodeOffset + i, tableID}); } - insertPK(transaction, *scanState->IDVector, *scanState->outputVectors[0]); + insertPK(transaction, nodeIDVector, *scanState->outputVectors[0]); startNodeOffset += scanResult.numRows; } nodeGroupToScan++; diff --git a/src/storage/store/rel_table.cpp b/src/storage/store/rel_table.cpp index ceef3a009cb..58eb4b3fb7b 100644 --- a/src/storage/store/rel_table.cpp +++ b/src/storage/store/rel_table.cpp @@ -90,7 +90,7 @@ void RelTable::initializeLocalRelScanState(RelTableScanState& relScanState) { bool RelTable::scanInternal(Transaction* transaction, TableScanState& scanState) { auto& relScanState = scanState.cast(); - relScanState.IDVector->state->getSelVectorUnsafe().setToUnfiltered(); + relScanState.outState->getSelVectorUnsafe().setToUnfiltered(); switch (relScanState.source) { case TableScanSource::COMMITTED: { const auto scanResult = relScanState.nodeGroup->scan(transaction, scanState); @@ -214,15 +214,15 @@ void RelTable::detachDelete(Transaction* transaction, RelDataDirection direction relReadState->boundNodeIDVector = &deleteState->srcNodeIDVector; relReadState->outputVectors = std::vector{&deleteState->dstNodeIDVector, &deleteState->relIDVector}; - relReadState->IDVector = relReadState->outputVectors[1]; - relReadState->rowIdxVector->state = relReadState->IDVector->state; + relReadState->rowIdxVector->state = relReadState->outputVectors[1]->state; + relReadState->outState = relReadState->rowIdxVector->state.get(); if (const auto localRelTable = transaction->getLocalStorage()->getLocalTable(tableID, LocalStorage::NotExistAction::RETURN_NULL)) { auto localTableColumnIDs = LocalRelTable::rewriteLocalColumnIDs(direction, relReadState->columnIDs); relReadState->localTableScanState = std::make_unique(memoryManager, *relReadState, localTableColumnIDs, localRelTable->ptrCast()); - relReadState->localTableScanState->rowIdxVector->state = relReadState->IDVector->state; + relReadState->localTableScanState->rowIdxVector->state = relReadState->rowIdxVector->state; } initializeScanState(transaction, *relReadState); detachDeleteForCSRRels(transaction, tableData, reverseTableData, relReadState.get(), diff --git a/src/storage/store/rel_table_data.cpp b/src/storage/store/rel_table_data.cpp index b20bb7fb8a5..ac5b359a370 100644 --- a/src/storage/store/rel_table_data.cpp +++ b/src/storage/store/rel_table_data.cpp @@ -127,22 +127,23 @@ std::pair RelTableData::findMatchingRow(Trans csrHeaderColumns.offset.get(), csrHeaderColumns.length.get(), direction); scanState->boundNodeIDVector = &boundNodeIDVector; scanState->outputVectors.push_back(scanChunk.getValueVector(0).get()); - scanState->IDVector = scanState->outputVectors[0]; - scanState->rowIdxVector->state = scanState->IDVector->state; + const auto scannedIDVector = scanState->outputVectors[0]; + scanState->outState = scannedIDVector->state.get(); + scanState->rowIdxVector->state = scanState->outputVectors[0]->state; scanState->source = TableScanSource::COMMITTED; scanState->boundNodeOffset = boundNodeOffset; scanState->nodeGroup = getNodeGroup(nodeGroupIdx); scanState->nodeGroup->initializeScanState(transaction, *scanState); row_idx_t matchingRowIdx = INVALID_ROW_IDX; - CSRNodeGroupScanSource source = CSRNodeGroupScanSource::NONE; + auto source = CSRNodeGroupScanSource::NONE; while (true) { const auto scanResult = scanState->nodeGroup->scan(transaction, *scanState); if (scanResult == NODE_GROUP_SCAN_EMMPTY_RESULT) { break; } - for (auto i = 0u; i < scanState->IDVector->state->getSelVector().getSelSize(); i++) { - const auto pos = scanState->IDVector->state->getSelVector()[i]; - if (scanState->IDVector->getValue(pos).offset == relOffset) { + for (auto i = 0u; i < scanState->outState->getSelVector().getSelSize(); i++) { + const auto pos = scanState->outState->getSelVector()[i]; + if (scannedIDVector->getValue(pos).offset == relOffset) { const auto rowIdxPos = scanState->rowIdxVector->state->getSelVector()[i]; matchingRowIdx = scanState->rowIdxVector->getValue(rowIdxPos); source = scanState->nodeGroupScanState->cast().source; @@ -174,7 +175,7 @@ void RelTableData::checkIfNodeHasRels(Transaction* transaction, csrHeaderColumns.offset.get(), csrHeaderColumns.length.get(), direction); scanState->boundNodeIDVector = srcNodeIDVector; scanState->outputVectors.push_back(scanChunk.getValueVector(0).get()); - scanState->IDVector = scanState->outputVectors[0]; + scanState->outState = scanState->outputVectors[0]->state.get(); scanState->source = TableScanSource::COMMITTED; scanState->boundNodeOffset = nodeOffset; scanState->nodeGroup = getNodeGroup(nodeGroupIdx); @@ -184,7 +185,7 @@ void RelTableData::checkIfNodeHasRels(Transaction* transaction, if (scanResult == NODE_GROUP_SCAN_EMMPTY_RESULT) { break; } - if (scanState->outputVectors[0]->state->getSelVector().getSelSize() > 0) { + if (scanState->outState->getSelVector().getSelSize() > 0) { throw RuntimeException(ExceptionMessage::violateDeleteNodeWithConnectedEdgesConstraint( tableName, std::to_string(nodeOffset), RelDataDirectionUtils::relDirectionToString(direction)));