From ad50d366555963f17ece587eb2da618a34e4c017 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 7 Jan 2025 09:00:13 -0500 Subject: [PATCH 1/9] add support for writing metadata to mysql --- components/core/src/clp_s/CMakeLists.txt | 1 + .../clp_s/metadata_uploader/CMakeLists.txt | 101 ++++++++++++++ .../CommandLineArguments.cpp | 123 ++++++++++++++++++ .../CommandLineArguments.hpp | 50 +++++++ .../MySQLTableMetadataDB.cpp | 98 ++++++++++++++ .../MySQLTableMetadataDB.hpp | 72 ++++++++++ .../TableMetadataManager.cpp | 101 ++++++++++++++ .../TableMetadataManager.hpp | 56 ++++++++ .../metadata_uploader/metadata_uploader.cpp | 43 ++++++ 9 files changed, 645 insertions(+) create mode 100644 components/core/src/clp_s/metadata_uploader/CMakeLists.txt create mode 100644 components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp create mode 100644 components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp create mode 100644 components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp create mode 100644 components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp create mode 100644 components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp create mode 100644 components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp create mode 100644 components/core/src/clp_s/metadata_uploader/metadata_uploader.cpp diff --git a/components/core/src/clp_s/CMakeLists.txt b/components/core/src/clp_s/CMakeLists.txt index 997a03488..2482e8ff8 100644 --- a/components/core/src/clp_s/CMakeLists.txt +++ b/components/core/src/clp_s/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(search/kql) +add_subdirectory(metadata_uploader) set( CLP_SOURCES diff --git a/components/core/src/clp_s/metadata_uploader/CMakeLists.txt b/components/core/src/clp_s/metadata_uploader/CMakeLists.txt new file mode 100644 index 000000000..7b44adf85 --- /dev/null +++ b/components/core/src/clp_s/metadata_uploader/CMakeLists.txt @@ -0,0 +1,101 @@ +set( + METADATA_UPLOADER_SOURCES + ../../clp/aws/AwsAuthenticationSigner.cpp + ../../clp/aws/AwsAuthenticationSigner.hpp + ../../clp/BoundedReader.cpp + ../../clp/BoundedReader.hpp + ../../clp/CurlDownloadHandler.cpp + ../../clp/CurlDownloadHandler.hpp + ../../clp/CurlEasyHandle.hpp + ../../clp/CurlGlobalInstance.cpp + ../../clp/CurlGlobalInstance.hpp + ../../clp/CurlOperationFailed.hpp + ../../clp/CurlStringList.hpp + ../../clp/database_utils.cpp + ../../clp/database_utils.hpp + ../../clp/FileReader.cpp + ../../clp/FileReader.hpp + ../../clp/GlobalMetadataDBConfig.cpp + ../../clp/GlobalMetadataDBConfig.hpp + ../../clp/hash_utils.cpp + ../../clp/hash_utils.hpp + ../../clp/MySQLDB.cpp + ../../clp/MySQLDB.hpp + ../../clp/MySQLParamBindings.cpp + ../../clp/MySQLParamBindings.hpp + ../../clp/MySQLPreparedStatement.cpp + ../../clp/MySQLPreparedStatement.hpp + ../../clp/NetworkReader.cpp + ../../clp/NetworkReader.hpp + ../../clp/ReaderInterface.cpp + ../../clp/ReaderInterface.hpp + ../../clp/Thread.cpp + ../../clp/Thread.hpp + ../ArchiveReader.cpp + ../ArchiveReader.hpp + ../ArchiveReaderAdaptor.cpp + ../ArchiveReaderAdaptor.hpp + ../ColumnReader.cpp + ../ColumnReader.hpp + ../DictionaryReader.hpp + ../DictionaryEntry.cpp + ../DictionaryEntry.hpp + ../FileReader.cpp + ../FileReader.hpp + ../FileWriter.cpp + ../FileWriter.hpp + ../InputConfig.cpp + ../InputConfig.hpp + ../PackedStreamReader.cpp + ../PackedStreamReader.hpp + ../ReaderUtils.cpp + ../ReaderUtils.hpp + ../SchemaReader.cpp + ../SchemaReader.hpp + ../SchemaTree.cpp + ../SchemaTree.hpp + ../TimestampDictionaryReader.cpp + ../TimestampDictionaryReader.hpp + ../TimestampEntry.cpp + ../TimestampEntry.hpp + ../TimestampPattern.cpp + ../TimestampPattern.hpp + ../Utils.cpp + ../Utils.hpp + ../VariableDecoder.cpp + ../VariableDecoder.hpp + ../ZstdCompressor.cpp + ../ZstdCompressor.hpp + ../ZstdDecompressor.cpp + ../ZstdDecompressor.hpp + CommandLineArguments.cpp + CommandLineArguments.hpp + metadata_uploader.cpp + MySQLTableMetadataDB.cpp + MySQLTableMetadataDB.hpp + TableMetadataManager.cpp + TableMetadataManager.hpp +) + +add_executable(metadata-uploader ${METADATA_UPLOADER_SOURCES}) +target_compile_features(metadata-uploader PRIVATE cxx_std_20) +target_include_directories(metadata-uploader PRIVATE "${PROJECT_SOURCE_DIR}/submodules") +target_link_libraries(metadata-uploader + PRIVATE + absl::flat_hash_map + Boost::iostreams Boost::program_options Boost::url + ${CURL_LIBRARIES} + clp::string_utils + MariaDBClient::MariaDBClient + OpenSSL::Crypto + simdjson + spdlog::spdlog + yaml-cpp::yaml-cpp + ZStd::ZStd +) +# Put the built executable at the root of the build directory +set_target_properties( + metadata-uploader + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}" +) diff --git a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp b/components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp new file mode 100644 index 000000000..b8a187093 --- /dev/null +++ b/components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp @@ -0,0 +1,123 @@ +#include "CommandLineArguments.hpp" + +#include + +#include +#include + + +namespace po = boost::program_options; + +namespace clp_s::metadata_uploader { +CommandLineArguments::ParsingResult +CommandLineArguments::parse_arguments(int argc, char const** argv) { + // Print out basic usage if user doesn't specify any options + if (1 == argc) { + print_basic_usage(); + return ParsingResult::Failure; + } + + // Define general options + po::options_description general_options("General Options"); + general_options.add_options()("help,h", "Print help"); + + // Define output options + po::options_description output_options("Output Options"); + std::string metadata_db_config_file_path; + // clang-format off + output_options.add_options()( + "db-config-file", + po::value(&metadata_db_config_file_path)->value_name("FILE") + ->default_value(metadata_db_config_file_path), + "Table metadata DB YAML config" + ); + // clang-format on + + // Define visible options + po::options_description visible_options; + visible_options.add(general_options); + visible_options.add(output_options); + + // Define hidden positional options (not shown in Boost's program options help message) + po::options_description positional_options; + // clang-format off + positional_options.add_options() + ("archive-dir", po::value(&m_archive_dir)) + ("archive-id", po::value(&m_archive_id)); + // clang-format on + po::positional_options_description positional_options_description; + positional_options_description.add("archive-dir", 1); + positional_options_description.add("archive-id", 1); + + // Aggregate all options + po::options_description all_options; + all_options.add(general_options); + all_options.add(output_options); + all_options.add(positional_options); + + // Parse options + try { + // Parse options specified on the command line + po::parsed_options parsed = po::command_line_parser(argc, argv) + .options(all_options) + .positional(positional_options_description) + .run(); + po::variables_map parsed_command_line_options; + store(parsed, parsed_command_line_options); + + notify(parsed_command_line_options); + + // Handle --help + if (parsed_command_line_options.count("help")) { + if (argc > 2) { + SPDLOG_WARN("Ignoring all options besides --help."); + } + + print_basic_usage(); + + std::cerr << visible_options << std::endl; + return ParsingResult::InfoCommand; + } + + // Validate required parameters + if (m_archive_dir.empty()) { + throw std::invalid_argument("ARCHIVE_DIR not specified or empty."); + } + if (m_archive_id.empty()) { + throw std::invalid_argument("ARCHIVE_ID not specified or empty."); + } + if (false == metadata_db_config_file_path.empty()) { + clp::GlobalMetadataDBConfig metadata_db_config; + try { + metadata_db_config.parse_config_file(metadata_db_config_file_path); + } catch (std::exception& e) { + SPDLOG_ERROR("Failed to validate metadata database config - {}.", e.what()); + return ParsingResult::Failure; + } + + if (clp::GlobalMetadataDBConfig::MetadataDBType::MySQL + != metadata_db_config.get_metadata_db_type()) + { + SPDLOG_ERROR( + "Invalid metadata database type for {}; only supported type is MySQL.", + m_program_name + ); + return ParsingResult::Failure; + } + + m_metadata_db_config = std::move(metadata_db_config); + } + } catch (std::exception& e) { + SPDLOG_ERROR("{}", e.what()); + print_basic_usage(); + return ParsingResult::Failure; + } + + return ParsingResult::Success; +} + +void CommandLineArguments::print_basic_usage() const { + std::cerr << "Usage: " << get_program_name() << " [OPTIONS] ARCHIVE_DIR ARCHIVE_ID" + << std::endl; +} +} // namespace clp_s::metadata_uploader diff --git a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp b/components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp new file mode 100644 index 000000000..3d7c219be --- /dev/null +++ b/components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp @@ -0,0 +1,50 @@ +#ifndef CLP_S_METADATA_UPLOADER_COMMANDLINEARGUMENTS_HPP +#define CLP_S_METADATA_UPLOADER_COMMANDLINEARGUMENTS_HPP + +#include + +#include "../../clp/GlobalMetadataDBConfig.hpp" + +namespace clp_s::metadata_uploader { +/** + * Class to parse command line arguments + */ +class CommandLineArguments { +public: + // Types + enum class ParsingResult { + Success = 0, + InfoCommand, + Failure + }; + + // Constructors + explicit CommandLineArguments(std::string const& program_name) : m_program_name(program_name) {} + + // Methods + ParsingResult parse_arguments(int argc, char const* argv[]); + + std::string const& get_program_name() const { return m_program_name; } + + std::string const& get_archive_dir() const { return m_archive_dir; } + + std::string const& get_archive_id() const { return m_archive_id; } + + std::optional const& get_db_config() const { + return m_metadata_db_config; + } + +private: + // Methods + void print_basic_usage() const; + + // Variables + std::string m_program_name; + std::string m_archive_dir; + std::string m_archive_id; + + std::optional m_metadata_db_config; +}; +} // namespace clp_s::metadata_uploader + +#endif // CLP_S_METADATA_UPLOADER_COMMANDLINEARGUMENTS_HPP diff --git a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp new file mode 100644 index 000000000..fad877e76 --- /dev/null +++ b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp @@ -0,0 +1,98 @@ +#include "MySQLTableMetadataDB.hpp" + +#include +#include + +#include "../../clp/database_utils.hpp" +#include "../../clp/type_utils.hpp" + +enum class TableMetadataFieldIndexes : uint16_t { + Name = 0, + Type, + Length, +}; + +namespace clp_s::metadata_uploader { +void MySQLTableMetadataDB::open() { + if (m_is_open) { + throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__); + } + + m_db.open(m_host, m_port, m_username, m_password, m_database_name); + m_is_open = true; +} + +void MySQLTableMetadataDB::init(std::string const& table_name) { + if (false == m_is_open) { + throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__); + } + + m_db.execute_query(fmt::format( + "CREATE TABLE IF NOT EXISTS {}{} (" + "name VARCHAR(512) NOT NULL, " + "type BIGINT NOT NULL," + "PRIMARY KEY (name, type)" + ")", + m_table_prefix, + table_name + )); + + m_insert_field_statement.reset(); + + std::vector table_metadata_field_names( + clp::enum_to_underlying_type(TableMetadataFieldIndexes::Length) + ); + table_metadata_field_names[clp::enum_to_underlying_type(TableMetadataFieldIndexes::Name)] + = "name"; + table_metadata_field_names[clp::enum_to_underlying_type(TableMetadataFieldIndexes::Type)] + = "type"; + fmt::memory_buffer statement_buffer; + auto statement_buffer_ix = std::back_inserter(statement_buffer); + + fmt::format_to( + statement_buffer_ix, + "INSERT IGNORE INTO {}{} ({}) VALUES ({})", + m_table_prefix, + table_name, + clp::get_field_names_sql(table_metadata_field_names), + clp::get_placeholders_sql(table_metadata_field_names.size()) + ); + SPDLOG_DEBUG("{:.{}}", statement_buffer.data(), statement_buffer.size()); + m_insert_field_statement = std::make_unique( + m_db.prepare_statement(statement_buffer.data(), statement_buffer.size()) + ); + + m_is_init = true; +} + +void MySQLTableMetadataDB::add_field(std::string const& field_name, + NodeType field_type) { + if (false == m_is_init) { + throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__); + } + + auto& statement_bindings = m_insert_field_statement->get_statement_bindings(); + statement_bindings.bind_varchar( + clp::enum_to_underlying_type(TableMetadataFieldIndexes::Name), + field_name.c_str(), + field_name.length() + ); + + uint64_t field_type_value = static_cast(field_type); + statement_bindings.bind_uint64( + clp::enum_to_underlying_type(TableMetadataFieldIndexes::Type), + field_type_value + ); + + if (false == m_insert_field_statement->execute()) { + throw OperationFailed(ErrorCodeFailure, __FILENAME__, __LINE__); + } +} + +void MySQLTableMetadataDB::close() { + m_insert_field_statement.reset(); + m_db.close(); + m_is_open = false; + m_is_init = false; +} +} // namespace clp_s::metadata_uploader diff --git a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp new file mode 100644 index 000000000..b1a7b0a66 --- /dev/null +++ b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp @@ -0,0 +1,72 @@ +#ifndef CLP_S_METADATA_UPLOADER_MYSQLTABLEMETADATADB_HPP +#define CLP_S_METADATA_UPLOADER_MYSQLTABLEMETADATADB_HPP + +#include "../../clp/MySQLDB.hpp" +#include "../../clp/MySQLPreparedStatement.hpp" +#include "../SchemaTree.hpp" +#include "../TraceableException.hpp" + +using clp::MySQLDB; +using clp::MySQLPreparedStatement; + +namespace clp_s::metadata_uploader { +/** + * Class representing a MySQL table metadata database + */ +class MySQLTableMetadataDB { +public: + // Types + class OperationFailed : public TraceableException { + public: + // Constructors + OperationFailed(ErrorCode error_code, char const* const filename, int line_number) + : TraceableException(error_code, filename, line_number) {} + + // Methods + char const* what() const noexcept override { + return "MySQLTableMetadataDB operation failed"; + } + }; + + // Constructors + MySQLTableMetadataDB( + std::string const& host, + int port, + std::string const& username, + std::string const& password, + std::string const& database_name, + std::string const& table_prefix + ) + : m_is_open(false), + m_is_init(false), + m_host(host), + m_port(port), + m_username(username), + m_password(password), + m_database_name(database_name), + m_table_prefix(table_prefix) {} + + // Methods + void open(); + void init(std::string const& table_name); + void close(); + void add_field(std::string const& field_name, NodeType field_type); + +private: + // Variables + bool m_is_open; + bool m_is_init; + std::string m_host; + int m_port; + std::string m_username; + std::string m_password; + std::string m_database_name; + std::string m_table_prefix; + + MySQLDB m_db; + + std::unique_ptr m_insert_field_statement; +}; +} // namespace clp_s::metadata_uploader + +#endif // CLP_S_METADATA_UPLOADER_MYSQLTABLEMETADATADB_HPP diff --git a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp new file mode 100644 index 000000000..4edae99a3 --- /dev/null +++ b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp @@ -0,0 +1,101 @@ +#include "TableMetadataManager.hpp" + +#include + +namespace clp_s::metadata_uploader { +TableMetadataManager::TableMetadataManager( + std::optional const& db_config +) { + if (db_config.has_value()) { + m_table_metadata_db = std::make_unique( + db_config->get_metadata_db_host(), + db_config->get_metadata_db_port(), + db_config->get_metadata_db_username(), + db_config->get_metadata_db_password(), + db_config->get_metadata_db_name(), + db_config->get_metadata_table_prefix() + ); + m_table_metadata_db->open(); + m_output_type = OutputType::Database; + } else { + throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__); + } +} + +TableMetadataManager::~TableMetadataManager() { + if (m_output_type == OutputType::Database) { + m_table_metadata_db->close(); + } +} + +void TableMetadataManager::update_metadata( + std::string const& archive_dir, + std::string const& archive_id +) { + m_table_metadata_db->init(archive_dir); + + auto archive_path = std::filesystem::path(archive_dir) / archive_id; + std::error_code ec; + if (false == std::filesystem::exists(archive_path, ec) || ec) { + throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__); + } + + ArchiveReader archive_reader; + archive_reader.open( + clp_s::Path{.source = clp_s::InputSource::Filesystem, .path = archive_path.string()}, + NetworkAuthOption{} + ); + + auto schema_tree = archive_reader.get_schema_tree(); + auto field_pairs = traverse_schema_tree(schema_tree); + if (OutputType::Database == m_output_type) { + for (auto& [name, type] : field_pairs) { + m_table_metadata_db->add_field(name, type); + } + } +} + +std::vector> TableMetadataManager::traverse_schema_tree( + std::shared_ptr const& schema_tree +) { + std::vector> fields; + if (nullptr == schema_tree) { + return fields; + } + + std::string path_buffer; + // Stack of pairs of node_id and path_length + std::stack> s; + for (auto &node : schema_tree->get_nodes()) { + if (node.get_parent_id() == -1 && clp_s::NodeType::Metadata != node.get_type()) { + s.push({node.get_id(), 0}); + break; + } + } + + while (!s.empty()) { + auto [node_id, path_length] = s.top(); + s.pop(); + + auto& node = schema_tree->get_node(node_id); + auto& children_ids = node.get_children_ids(); + auto node_type = node.get_type(); + path_buffer.resize(path_length); + if (false == path_buffer.empty()) { + path_buffer += "."; + } + path_buffer += node.get_key_name(); + if (children_ids.empty() && clp_s::NodeType::Object != node_type + && clp_s::NodeType::Unknown != node_type) + { + fields.push_back({path_buffer, node_type}); + } + + for (auto child_id : children_ids) { + s.push({child_id, path_buffer.size()}); + } + } + + return fields; +} +} // namespace clp_s::metadata_uploader diff --git a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp new file mode 100644 index 000000000..eedb80035 --- /dev/null +++ b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp @@ -0,0 +1,56 @@ +#ifndef CLP_S_METADATA_UPLOADER_TABLEMETADATAMANAGER_HPP +#define CLP_S_METADATA_UPLOADER_TABLEMETADATAMANAGER_HPP + +#include "../../clp/GlobalMetadataDBConfig.hpp" +#include "../ArchiveReader.hpp" +#include "MySQLTableMetadataDB.hpp" + +namespace clp_s::metadata_uploader { +/** + * Class to manage the metadata for a table + */ +class TableMetadataManager { +public: + // Types + class OperationFailed : public TraceableException { + public: + // Constructors + OperationFailed(ErrorCode error_code, char const* const filename, int line_number) + : TraceableException(error_code, filename, line_number) {} + }; + + enum class OutputType: uint8_t { + Database + }; + + // Constructors + TableMetadataManager( + std::optional const& db_config = std::nullopt + ); + + // Destructor + ~TableMetadataManager(); + + // Methods + /** + * Updates the metadata for a given archive + * @param archive_dir used as the table name + * @param archive_id + */ + void update_metadata(std::string const& archive_dir, std::string const& archive_id); + +private: + /** + * Traverses the schema tree and returns a list of path names and their types + * @param schema_tree + * @return a list of path names and their types + */ + std::vector> traverse_schema_tree( + std::shared_ptr const& schema_tree + ); + + std::shared_ptr m_table_metadata_db; + OutputType m_output_type; +}; +} // namespace clp_s::metadata_uploader +#endif // CLP_S_METADATA_UPLOADER_TABLEMETADATAMANAGER_HPP diff --git a/components/core/src/clp_s/metadata_uploader/metadata_uploader.cpp b/components/core/src/clp_s/metadata_uploader/metadata_uploader.cpp new file mode 100644 index 000000000..bfd330172 --- /dev/null +++ b/components/core/src/clp_s/metadata_uploader/metadata_uploader.cpp @@ -0,0 +1,43 @@ +#include + +#include +#include + +#include "../FileReader.hpp" +#include "../ReaderUtils.hpp" +#include "../ZstdDecompressor.hpp" +#include "CommandLineArguments.hpp" +#include "MySQLTableMetadataDB.hpp" +#include "TableMetadataManager.hpp" + +using clp_s::metadata_uploader::CommandLineArguments; + +int main(int argc, char const* argv[]) { + try { + auto stderr_logger = spdlog::stderr_logger_st("stderr"); + spdlog::set_default_logger(stderr_logger); + spdlog::set_pattern("%Y-%m-%dT%H:%M:%S.%e%z [%l] %v"); + } catch (std::exception& e) { + return 1; + } + + CommandLineArguments command_line_arguments("metadata-uploader"); + auto parsing_result = command_line_arguments.parse_arguments(argc, argv); + switch (parsing_result) { + case CommandLineArguments::ParsingResult::Failure: + return 1; + case CommandLineArguments::ParsingResult::InfoCommand: + return 0; + case CommandLineArguments::ParsingResult::Success: + // Continue processing + break; + } + + clp_s::metadata_uploader::TableMetadataManager table_metadata_manager( + command_line_arguments.get_db_config() + ); + table_metadata_manager.update_metadata( + command_line_arguments.get_archive_dir(), + command_line_arguments.get_archive_id() + ); +} From 4997e0dbe78437d91afe155e588e3a355492ea0b Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 17 Jan 2025 09:18:31 -0500 Subject: [PATCH 2/9] fix lint errors --- .../core/src/clp_s/metadata_uploader/CommandLineArguments.cpp | 1 - .../core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp | 3 +-- .../core/src/clp_s/metadata_uploader/TableMetadataManager.cpp | 2 +- .../core/src/clp_s/metadata_uploader/TableMetadataManager.hpp | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp b/components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp index b8a187093..f882b0b5b 100644 --- a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp +++ b/components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp @@ -5,7 +5,6 @@ #include #include - namespace po = boost::program_options; namespace clp_s::metadata_uploader { diff --git a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp index fad877e76..3a1528470 100644 --- a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp +++ b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp @@ -65,8 +65,7 @@ void MySQLTableMetadataDB::init(std::string const& table_name) { m_is_init = true; } -void MySQLTableMetadataDB::add_field(std::string const& field_name, - NodeType field_type) { +void MySQLTableMetadataDB::add_field(std::string const& field_name, NodeType field_type) { if (false == m_is_init) { throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__); } diff --git a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp index 4edae99a3..351e877dd 100644 --- a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp +++ b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp @@ -66,7 +66,7 @@ std::vector> TableMetadataManager::trave std::string path_buffer; // Stack of pairs of node_id and path_length std::stack> s; - for (auto &node : schema_tree->get_nodes()) { + for (auto& node : schema_tree->get_nodes()) { if (node.get_parent_id() == -1 && clp_s::NodeType::Metadata != node.get_type()) { s.push({node.get_id(), 0}); break; diff --git a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp index eedb80035..0a35d22d0 100644 --- a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp +++ b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp @@ -19,7 +19,7 @@ class TableMetadataManager { : TraceableException(error_code, filename, line_number) {} }; - enum class OutputType: uint8_t { + enum class OutputType : uint8_t { Database }; From 4b59affb54441deae52ed43c24b1be57b75e69c6 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 20 Jan 2025 09:16:32 -0500 Subject: [PATCH 3/9] address comments --- .../CommandLineArguments.hpp | 1 + .../MySQLTableMetadataDB.hpp | 26 ++++++++++++++++--- .../TableMetadataManager.cpp | 2 +- .../TableMetadataManager.hpp | 8 ++++-- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp b/components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp index 3d7c219be..df59f8f91 100644 --- a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp +++ b/components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp @@ -2,6 +2,7 @@ #define CLP_S_METADATA_UPLOADER_COMMANDLINEARGUMENTS_HPP #include +#include #include "../../clp/GlobalMetadataDBConfig.hpp" diff --git a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp index b1a7b0a66..d5cbb84c8 100644 --- a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp +++ b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp @@ -47,17 +47,35 @@ class MySQLTableMetadataDB { m_table_prefix(table_prefix) {} // Methods + /** + * Opens the database connection + */ void open(); + + /** + * Creates the table if it does not exist and prepares the insert statement + * @param table_name + */ void init(std::string const& table_name); + + /** + * Closes the database connection + */ void close(); + + /** + * Adds a field to the table + * @param field_name + * @param field_type + */ void add_field(std::string const& field_name, NodeType field_type); private: // Variables - bool m_is_open; - bool m_is_init; - std::string m_host; - int m_port; + bool m_is_open{}; + bool m_is_init{}; + std::string m_host{}; + int m_port{}; std::string m_username; std::string m_password; std::string m_database_name; diff --git a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp index 351e877dd..009ca4d93 100644 --- a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp +++ b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp @@ -67,7 +67,7 @@ std::vector> TableMetadataManager::trave // Stack of pairs of node_id and path_length std::stack> s; for (auto& node : schema_tree->get_nodes()) { - if (node.get_parent_id() == -1 && clp_s::NodeType::Metadata != node.get_type()) { + if (constants::cRootNodeId == node.get_parent_id() && clp_s::NodeType::Metadata != node.get_type()) { s.push({node.get_id(), 0}); break; } diff --git a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp index 0a35d22d0..e1b558e2a 100644 --- a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp +++ b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp @@ -7,7 +7,11 @@ namespace clp_s::metadata_uploader { /** - * Class to manage the metadata for a table + * Class used to updates field names (e.g., JSON full paths) and data types for a specified archive + * directory. It currently stores the results in a database. An archive directory consists of + * multiple archives on the same topic, which can be queried using SQL. The directory name serves as + * the table name, and its metadata (field names and data types) is used by the SQL engine to define + * the table schema. */ class TableMetadataManager { public: @@ -50,7 +54,7 @@ class TableMetadataManager { ); std::shared_ptr m_table_metadata_db; - OutputType m_output_type; + OutputType m_output_type{OutputType::Database}; }; } // namespace clp_s::metadata_uploader #endif // CLP_S_METADATA_UPLOADER_TABLEMETADATAMANAGER_HPP From b36bf031bef1ade192acb12fd12d67283bfb4ec9 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 20 Jan 2025 09:26:02 -0500 Subject: [PATCH 4/9] minor fix --- .../core/src/clp_s/metadata_uploader/CMakeLists.txt | 1 + .../clp_s/metadata_uploader/MySQLTableMetadataDB.hpp | 12 +++++++----- .../clp_s/metadata_uploader/TableMetadataManager.cpp | 6 +++++- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/components/core/src/clp_s/metadata_uploader/CMakeLists.txt b/components/core/src/clp_s/metadata_uploader/CMakeLists.txt index 7b44adf85..635571dd2 100644 --- a/components/core/src/clp_s/metadata_uploader/CMakeLists.txt +++ b/components/core/src/clp_s/metadata_uploader/CMakeLists.txt @@ -31,6 +31,7 @@ set( ../../clp/ReaderInterface.hpp ../../clp/Thread.cpp ../../clp/Thread.hpp + ../archive_constants.hpp ../ArchiveReader.cpp ../ArchiveReader.hpp ../ArchiveReaderAdaptor.cpp diff --git a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp index d5cbb84c8..f22e586e0 100644 --- a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp +++ b/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp @@ -15,6 +15,8 @@ namespace clp_s::metadata_uploader { */ class MySQLTableMetadataDB { public: + static constexpr char cTableMetadataPrefix[] = "table_metadata_"; + // Types class OperationFailed : public TraceableException { public: @@ -44,7 +46,7 @@ class MySQLTableMetadataDB { m_username(username), m_password(password), m_database_name(database_name), - m_table_prefix(table_prefix) {} + m_table_prefix(table_prefix + cTableMetadataPrefix) {} // Methods /** @@ -64,10 +66,10 @@ class MySQLTableMetadataDB { void close(); /** - * Adds a field to the table - * @param field_name - * @param field_type - */ + * Adds a field to the table + * @param field_name + * @param field_type + */ void add_field(std::string const& field_name, NodeType field_type); private: diff --git a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp index 009ca4d93..1ac92befc 100644 --- a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp +++ b/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp @@ -2,6 +2,8 @@ #include +#include "../archive_constants.hpp" + namespace clp_s::metadata_uploader { TableMetadataManager::TableMetadataManager( std::optional const& db_config @@ -67,7 +69,9 @@ std::vector> TableMetadataManager::trave // Stack of pairs of node_id and path_length std::stack> s; for (auto& node : schema_tree->get_nodes()) { - if (constants::cRootNodeId == node.get_parent_id() && clp_s::NodeType::Metadata != node.get_type()) { + if (constants::cRootNodeId == node.get_parent_id() + && clp_s::NodeType::Metadata != node.get_type()) + { s.push({node.get_id(), 0}); break; } From 0baf07e3109ac852d503e71798fc0f14eaa8ab6f Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 21 Jan 2025 09:20:00 -0500 Subject: [PATCH 5/9] rename the program and class names --- components/core/src/clp_s/CMakeLists.txt | 2 +- .../CMakeLists.txt | 22 ++++++------- .../CommandLineArguments.cpp | 4 +-- .../CommandLineArguments.hpp | 10 +++--- .../IndexManager.cpp} | 17 +++++----- .../IndexManager.hpp} | 31 ++++++++++--------- .../MySQLIndexStorage.cpp} | 14 ++++----- .../MySQLIndexStorage.hpp} | 28 ++++++++--------- .../indexer.cpp} | 25 ++++++++------- 9 files changed, 79 insertions(+), 74 deletions(-) rename components/core/src/clp_s/{metadata_uploader => indexer}/CMakeLists.txt (85%) rename components/core/src/clp_s/{metadata_uploader => indexer}/CommandLineArguments.cpp (98%) rename components/core/src/clp_s/{metadata_uploader => indexer}/CommandLineArguments.hpp (81%) rename components/core/src/clp_s/{metadata_uploader/TableMetadataManager.cpp => indexer/IndexManager.cpp} (87%) rename components/core/src/clp_s/{metadata_uploader/TableMetadataManager.hpp => indexer/IndexManager.hpp} (66%) rename components/core/src/clp_s/{metadata_uploader/MySQLTableMetadataDB.cpp => indexer/MySQLIndexStorage.cpp} (88%) rename components/core/src/clp_s/{metadata_uploader/MySQLTableMetadataDB.hpp => indexer/MySQLIndexStorage.hpp} (72%) rename components/core/src/clp_s/{metadata_uploader/metadata_uploader.cpp => indexer/indexer.cpp} (65%) diff --git a/components/core/src/clp_s/CMakeLists.txt b/components/core/src/clp_s/CMakeLists.txt index 2482e8ff8..32a925a70 100644 --- a/components/core/src/clp_s/CMakeLists.txt +++ b/components/core/src/clp_s/CMakeLists.txt @@ -1,5 +1,5 @@ add_subdirectory(search/kql) -add_subdirectory(metadata_uploader) +add_subdirectory(indexer) set( CLP_SOURCES diff --git a/components/core/src/clp_s/metadata_uploader/CMakeLists.txt b/components/core/src/clp_s/indexer/CMakeLists.txt similarity index 85% rename from components/core/src/clp_s/metadata_uploader/CMakeLists.txt rename to components/core/src/clp_s/indexer/CMakeLists.txt index 635571dd2..8c2738d99 100644 --- a/components/core/src/clp_s/metadata_uploader/CMakeLists.txt +++ b/components/core/src/clp_s/indexer/CMakeLists.txt @@ -1,5 +1,5 @@ set( - METADATA_UPLOADER_SOURCES + INDEXER_SOURCES ../../clp/aws/AwsAuthenticationSigner.cpp ../../clp/aws/AwsAuthenticationSigner.hpp ../../clp/BoundedReader.cpp @@ -71,17 +71,17 @@ set( ../ZstdDecompressor.hpp CommandLineArguments.cpp CommandLineArguments.hpp - metadata_uploader.cpp - MySQLTableMetadataDB.cpp - MySQLTableMetadataDB.hpp - TableMetadataManager.cpp - TableMetadataManager.hpp + indexer.cpp + IndexManager.cpp + IndexManager.hpp + MySQLIndexStorage.cpp + MySQLIndexStorage.hpp ) -add_executable(metadata-uploader ${METADATA_UPLOADER_SOURCES}) -target_compile_features(metadata-uploader PRIVATE cxx_std_20) -target_include_directories(metadata-uploader PRIVATE "${PROJECT_SOURCE_DIR}/submodules") -target_link_libraries(metadata-uploader +add_executable(indexer ${INDEXER_SOURCES}) +target_compile_features(indexer PRIVATE cxx_std_20) +target_include_directories(indexer PRIVATE "${PROJECT_SOURCE_DIR}/submodules") +target_link_libraries(indexer PRIVATE absl::flat_hash_map Boost::iostreams Boost::program_options Boost::url @@ -96,7 +96,7 @@ target_link_libraries(metadata-uploader ) # Put the built executable at the root of the build directory set_target_properties( - metadata-uploader + indexer PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}" ) diff --git a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp b/components/core/src/clp_s/indexer/CommandLineArguments.cpp similarity index 98% rename from components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp rename to components/core/src/clp_s/indexer/CommandLineArguments.cpp index f882b0b5b..5f3a0a75a 100644 --- a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.cpp +++ b/components/core/src/clp_s/indexer/CommandLineArguments.cpp @@ -7,7 +7,7 @@ namespace po = boost::program_options; -namespace clp_s::metadata_uploader { +namespace clp_s::indexer { CommandLineArguments::ParsingResult CommandLineArguments::parse_arguments(int argc, char const** argv) { // Print out basic usage if user doesn't specify any options @@ -119,4 +119,4 @@ void CommandLineArguments::print_basic_usage() const { std::cerr << "Usage: " << get_program_name() << " [OPTIONS] ARCHIVE_DIR ARCHIVE_ID" << std::endl; } -} // namespace clp_s::metadata_uploader +} // namespace clp_s::indexer diff --git a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp b/components/core/src/clp_s/indexer/CommandLineArguments.hpp similarity index 81% rename from components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp rename to components/core/src/clp_s/indexer/CommandLineArguments.hpp index df59f8f91..1640eb12d 100644 --- a/components/core/src/clp_s/metadata_uploader/CommandLineArguments.hpp +++ b/components/core/src/clp_s/indexer/CommandLineArguments.hpp @@ -1,12 +1,12 @@ -#ifndef CLP_S_METADATA_UPLOADER_COMMANDLINEARGUMENTS_HPP -#define CLP_S_METADATA_UPLOADER_COMMANDLINEARGUMENTS_HPP +#ifndef CLP_S_INDEXER_COMMANDLINEARGUMENTS_HPP +#define CLP_S_INDEXER_COMMANDLINEARGUMENTS_HPP #include #include #include "../../clp/GlobalMetadataDBConfig.hpp" -namespace clp_s::metadata_uploader { +namespace clp_s::indexer { /** * Class to parse command line arguments */ @@ -46,6 +46,6 @@ class CommandLineArguments { std::optional m_metadata_db_config; }; -} // namespace clp_s::metadata_uploader +} // namespace clp_s::indexer -#endif // CLP_S_METADATA_UPLOADER_COMMANDLINEARGUMENTS_HPP +#endif // CLP_S_INDEXER_COMMANDLINEARGUMENTS_HPP diff --git a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp b/components/core/src/clp_s/indexer/IndexManager.cpp similarity index 87% rename from components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp rename to components/core/src/clp_s/indexer/IndexManager.cpp index 1ac92befc..2b5805878 100644 --- a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.cpp +++ b/components/core/src/clp_s/indexer/IndexManager.cpp @@ -1,15 +1,16 @@ -#include "TableMetadataManager.hpp" +#include "IndexManager.hpp" #include +#include #include "../archive_constants.hpp" -namespace clp_s::metadata_uploader { -TableMetadataManager::TableMetadataManager( +namespace clp_s::indexer { +IndexManager::IndexManager( std::optional const& db_config ) { if (db_config.has_value()) { - m_table_metadata_db = std::make_unique( + m_table_metadata_db = std::make_unique( db_config->get_metadata_db_host(), db_config->get_metadata_db_port(), db_config->get_metadata_db_username(), @@ -24,13 +25,13 @@ TableMetadataManager::TableMetadataManager( } } -TableMetadataManager::~TableMetadataManager() { +IndexManager::~IndexManager() { if (m_output_type == OutputType::Database) { m_table_metadata_db->close(); } } -void TableMetadataManager::update_metadata( +void IndexManager::update_metadata( std::string const& archive_dir, std::string const& archive_id ) { @@ -57,7 +58,7 @@ void TableMetadataManager::update_metadata( } } -std::vector> TableMetadataManager::traverse_schema_tree( +std::vector> IndexManager::traverse_schema_tree( std::shared_ptr const& schema_tree ) { std::vector> fields; @@ -102,4 +103,4 @@ std::vector> TableMetadataManager::trave return fields; } -} // namespace clp_s::metadata_uploader +} // namespace clp_s::indexer diff --git a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp b/components/core/src/clp_s/indexer/IndexManager.hpp similarity index 66% rename from components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp rename to components/core/src/clp_s/indexer/IndexManager.hpp index e1b558e2a..fdc79c361 100644 --- a/components/core/src/clp_s/metadata_uploader/TableMetadataManager.hpp +++ b/components/core/src/clp_s/indexer/IndexManager.hpp @@ -1,19 +1,19 @@ -#ifndef CLP_S_METADATA_UPLOADER_TABLEMETADATAMANAGER_HPP -#define CLP_S_METADATA_UPLOADER_TABLEMETADATAMANAGER_HPP +#ifndef CLP_S_INDEXER_TABLEMETADATAMANAGER_HPP +#define CLP_S_INDEXER_TABLEMETADATAMANAGER_HPP #include "../../clp/GlobalMetadataDBConfig.hpp" #include "../ArchiveReader.hpp" -#include "MySQLTableMetadataDB.hpp" +#include "MySQLIndexStorage.hpp" -namespace clp_s::metadata_uploader { +namespace clp_s::indexer { /** * Class used to updates field names (e.g., JSON full paths) and data types for a specified archive * directory. It currently stores the results in a database. An archive directory consists of * multiple archives on the same topic, which can be queried using SQL. The directory name serves as - * the table name, and its metadata (field names and data types) is used by the SQL engine to define - * the table schema. + * the table name, and its schema (field names and data types) is used by the SQL engine to resolve + * column metadata. */ -class TableMetadataManager { +class IndexManager { public: // Types class OperationFailed : public TraceableException { @@ -21,6 +21,11 @@ class TableMetadataManager { // Constructors OperationFailed(ErrorCode error_code, char const* const filename, int line_number) : TraceableException(error_code, filename, line_number) {} + + // Methods + [[nodiscard]] char const* what() const noexcept override { + return "IndexManager operation failed"; + } }; enum class OutputType : uint8_t { @@ -28,12 +33,10 @@ class TableMetadataManager { }; // Constructors - TableMetadataManager( - std::optional const& db_config = std::nullopt - ); + IndexManager(std::optional const& db_config = std::nullopt); // Destructor - ~TableMetadataManager(); + ~IndexManager(); // Methods /** @@ -53,8 +56,8 @@ class TableMetadataManager { std::shared_ptr const& schema_tree ); - std::shared_ptr m_table_metadata_db; + std::shared_ptr m_table_metadata_db; OutputType m_output_type{OutputType::Database}; }; -} // namespace clp_s::metadata_uploader -#endif // CLP_S_METADATA_UPLOADER_TABLEMETADATAMANAGER_HPP +} // namespace clp_s::indexer +#endif // CLP_S_INDEXER_TABLEMETADATAMANAGER_HPP diff --git a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp b/components/core/src/clp_s/indexer/MySQLIndexStorage.cpp similarity index 88% rename from components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp rename to components/core/src/clp_s/indexer/MySQLIndexStorage.cpp index 3a1528470..6839424d2 100644 --- a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.cpp +++ b/components/core/src/clp_s/indexer/MySQLIndexStorage.cpp @@ -1,4 +1,4 @@ -#include "MySQLTableMetadataDB.hpp" +#include "MySQLIndexStorage.hpp" #include #include @@ -12,8 +12,8 @@ enum class TableMetadataFieldIndexes : uint16_t { Length, }; -namespace clp_s::metadata_uploader { -void MySQLTableMetadataDB::open() { +namespace clp_s::indexer { +void MySQLIndexStorage::open() { if (m_is_open) { throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__); } @@ -22,7 +22,7 @@ void MySQLTableMetadataDB::open() { m_is_open = true; } -void MySQLTableMetadataDB::init(std::string const& table_name) { +void MySQLIndexStorage::init(std::string const& table_name) { if (false == m_is_open) { throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__); } @@ -65,7 +65,7 @@ void MySQLTableMetadataDB::init(std::string const& table_name) { m_is_init = true; } -void MySQLTableMetadataDB::add_field(std::string const& field_name, NodeType field_type) { +void MySQLIndexStorage::add_field(std::string const& field_name, NodeType field_type) { if (false == m_is_init) { throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__); } @@ -88,10 +88,10 @@ void MySQLTableMetadataDB::add_field(std::string const& field_name, NodeType fie } } -void MySQLTableMetadataDB::close() { +void MySQLIndexStorage::close() { m_insert_field_statement.reset(); m_db.close(); m_is_open = false; m_is_init = false; } -} // namespace clp_s::metadata_uploader +} // namespace clp_s::indexer diff --git a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp b/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp similarity index 72% rename from components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp rename to components/core/src/clp_s/indexer/MySQLIndexStorage.hpp index f22e586e0..5ab915c69 100644 --- a/components/core/src/clp_s/metadata_uploader/MySQLTableMetadataDB.hpp +++ b/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp @@ -1,5 +1,5 @@ -#ifndef CLP_S_METADATA_UPLOADER_MYSQLTABLEMETADATADB_HPP -#define CLP_S_METADATA_UPLOADER_MYSQLTABLEMETADATADB_HPP +#ifndef CLP_S_INDEXER_MYSQLTABLEMETADATADB_HPP +#define CLP_S_INDEXER_MYSQLTABLEMETADATADB_HPP #include "../../clp/MySQLDB.hpp" #include "../../clp/MySQLPreparedStatement.hpp" @@ -9,13 +9,13 @@ using clp::MySQLDB; using clp::MySQLPreparedStatement; -namespace clp_s::metadata_uploader { +namespace clp_s::indexer { /** - * Class representing a MySQL table metadata database + * Class representing a MySQL storage for column metadata (column names and types) */ -class MySQLTableMetadataDB { +class MySQLIndexStorage { public: - static constexpr char cTableMetadataPrefix[] = "table_metadata_"; + static constexpr char cColumnMetadataPrefix[] = "column_metadata_"; // Types class OperationFailed : public TraceableException { @@ -25,13 +25,13 @@ class MySQLTableMetadataDB { : TraceableException(error_code, filename, line_number) {} // Methods - char const* what() const noexcept override { - return "MySQLTableMetadataDB operation failed"; + [[nodiscard]] char const* what() const noexcept override { + return "MySQLIndexStorage operation failed"; } }; // Constructors - MySQLTableMetadataDB( + MySQLIndexStorage( std::string const& host, int port, std::string const& username, @@ -46,7 +46,7 @@ class MySQLTableMetadataDB { m_username(username), m_password(password), m_database_name(database_name), - m_table_prefix(table_prefix + cTableMetadataPrefix) {} + m_table_prefix(table_prefix + cColumnMetadataPrefix) {} // Methods /** @@ -66,7 +66,7 @@ class MySQLTableMetadataDB { void close(); /** - * Adds a field to the table + * Adds a field (column) to the table * @param field_name * @param field_type */ @@ -76,7 +76,7 @@ class MySQLTableMetadataDB { // Variables bool m_is_open{}; bool m_is_init{}; - std::string m_host{}; + std::string m_host; int m_port{}; std::string m_username; std::string m_password; @@ -87,6 +87,6 @@ class MySQLTableMetadataDB { std::unique_ptr m_insert_field_statement; }; -} // namespace clp_s::metadata_uploader +} // namespace clp_s::indexer -#endif // CLP_S_METADATA_UPLOADER_MYSQLTABLEMETADATADB_HPP +#endif // CLP_S_INDEXER_MYSQLTABLEMETADATADB_HPP diff --git a/components/core/src/clp_s/metadata_uploader/metadata_uploader.cpp b/components/core/src/clp_s/indexer/indexer.cpp similarity index 65% rename from components/core/src/clp_s/metadata_uploader/metadata_uploader.cpp rename to components/core/src/clp_s/indexer/indexer.cpp index bfd330172..6b992dc25 100644 --- a/components/core/src/clp_s/metadata_uploader/metadata_uploader.cpp +++ b/components/core/src/clp_s/indexer/indexer.cpp @@ -1,16 +1,14 @@ +#include #include #include #include -#include "../FileReader.hpp" #include "../ReaderUtils.hpp" -#include "../ZstdDecompressor.hpp" #include "CommandLineArguments.hpp" -#include "MySQLTableMetadataDB.hpp" -#include "TableMetadataManager.hpp" +#include "IndexManager.hpp" -using clp_s::metadata_uploader::CommandLineArguments; +using clp_s::indexer::CommandLineArguments; int main(int argc, char const* argv[]) { try { @@ -33,11 +31,14 @@ int main(int argc, char const* argv[]) { break; } - clp_s::metadata_uploader::TableMetadataManager table_metadata_manager( - command_line_arguments.get_db_config() - ); - table_metadata_manager.update_metadata( - command_line_arguments.get_archive_dir(), - command_line_arguments.get_archive_id() - ); + try { + clp_s::indexer::IndexManager index_manager(command_line_arguments.get_db_config()); + index_manager.update_metadata( + command_line_arguments.get_archive_dir(), + command_line_arguments.get_archive_id() + ); + } catch (std::exception& e) { + SPDLOG_ERROR("Failed to update metadata: {}", e.what()); + return 1; + } } From 0180f454dd9f2718b7dfb357e1343429f90dd9fe Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 21 Jan 2025 09:44:59 -0500 Subject: [PATCH 6/9] fix lint errors --- .../core/src/clp_s/indexer/CommandLineArguments.hpp | 2 +- components/core/src/clp_s/indexer/IndexManager.cpp | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/components/core/src/clp_s/indexer/CommandLineArguments.hpp b/components/core/src/clp_s/indexer/CommandLineArguments.hpp index 1640eb12d..a6e0c756e 100644 --- a/components/core/src/clp_s/indexer/CommandLineArguments.hpp +++ b/components/core/src/clp_s/indexer/CommandLineArguments.hpp @@ -1,8 +1,8 @@ #ifndef CLP_S_INDEXER_COMMANDLINEARGUMENTS_HPP #define CLP_S_INDEXER_COMMANDLINEARGUMENTS_HPP -#include #include +#include #include "../../clp/GlobalMetadataDBConfig.hpp" diff --git a/components/core/src/clp_s/indexer/IndexManager.cpp b/components/core/src/clp_s/indexer/IndexManager.cpp index 2b5805878..b103c8160 100644 --- a/components/core/src/clp_s/indexer/IndexManager.cpp +++ b/components/core/src/clp_s/indexer/IndexManager.cpp @@ -6,9 +6,7 @@ #include "../archive_constants.hpp" namespace clp_s::indexer { -IndexManager::IndexManager( - std::optional const& db_config -) { +IndexManager::IndexManager(std::optional const& db_config) { if (db_config.has_value()) { m_table_metadata_db = std::make_unique( db_config->get_metadata_db_host(), @@ -31,10 +29,7 @@ IndexManager::~IndexManager() { } } -void IndexManager::update_metadata( - std::string const& archive_dir, - std::string const& archive_id -) { +void IndexManager::update_metadata(std::string const& archive_dir, std::string const& archive_id) { m_table_metadata_db->init(archive_dir); auto archive_path = std::filesystem::path(archive_dir) / archive_id; From 51c82ae02fb66efebbf32fe2cb844036acf6ab70 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 21 Jan 2025 13:33:57 -0500 Subject: [PATCH 7/9] add escape function --- .../core/src/clp_s/indexer/IndexManager.cpp | 53 +++++++++++++++++-- .../core/src/clp_s/indexer/IndexManager.hpp | 13 +++-- .../src/clp_s/indexer/MySQLIndexStorage.hpp | 6 +-- components/core/src/clp_s/indexer/indexer.cpp | 1 + 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/components/core/src/clp_s/indexer/IndexManager.cpp b/components/core/src/clp_s/indexer/IndexManager.cpp index b103c8160..9c12347d6 100644 --- a/components/core/src/clp_s/indexer/IndexManager.cpp +++ b/components/core/src/clp_s/indexer/IndexManager.cpp @@ -53,6 +53,53 @@ void IndexManager::update_metadata(std::string const& archive_dir, std::string c } } +std::string IndexManager::escape_key_name(std::string_view const key_name) { + std::string escaped_key_name; + escaped_key_name.reserve(key_name.size()); + for (auto c : key_name) { + switch (c) { + case '\"': + escaped_key_name += "\\\""; + break; + case '\\': + escaped_key_name += "\\\\"; + break; + case '\n': + escaped_key_name += "\\n"; + break; + case '\t': + escaped_key_name += "\\t"; + break; + case '\r': + escaped_key_name += "\\r"; + break; + case '\b': + escaped_key_name += "\\b"; + break; + case '\f': + escaped_key_name += "\\f"; + break; + case '.': + escaped_key_name += "\\."; + break; + default: + if (std::isprint(c)) { + escaped_key_name += c; + } else { + char buffer[7]; + std::snprintf( + buffer, + sizeof(buffer), + "\\u00%02x", + static_cast(c) + ); + escaped_key_name += buffer; + } + } + } + return escaped_key_name; +} + std::vector> IndexManager::traverse_schema_tree( std::shared_ptr const& schema_tree ) { @@ -73,18 +120,18 @@ std::vector> IndexManager::traverse_sche } } - while (!s.empty()) { + while (false == s.empty()) { auto [node_id, path_length] = s.top(); s.pop(); - auto& node = schema_tree->get_node(node_id); + auto const& node = schema_tree->get_node(node_id); auto& children_ids = node.get_children_ids(); auto node_type = node.get_type(); path_buffer.resize(path_length); if (false == path_buffer.empty()) { path_buffer += "."; } - path_buffer += node.get_key_name(); + path_buffer += escape_key_name(node.get_key_name()); if (children_ids.empty() && clp_s::NodeType::Object != node_type && clp_s::NodeType::Unknown != node_type) { diff --git a/components/core/src/clp_s/indexer/IndexManager.hpp b/components/core/src/clp_s/indexer/IndexManager.hpp index fdc79c361..bc8319893 100644 --- a/components/core/src/clp_s/indexer/IndexManager.hpp +++ b/components/core/src/clp_s/indexer/IndexManager.hpp @@ -1,5 +1,5 @@ -#ifndef CLP_S_INDEXER_TABLEMETADATAMANAGER_HPP -#define CLP_S_INDEXER_TABLEMETADATAMANAGER_HPP +#ifndef CLP_S_INDEXER_INDEXMANAGER_HPP +#define CLP_S_INDEXER_INDEXMANAGER_HPP #include "../../clp/GlobalMetadataDBConfig.hpp" #include "../ArchiveReader.hpp" @@ -47,6 +47,13 @@ class IndexManager { void update_metadata(std::string const& archive_dir, std::string const& archive_id); private: + /** + * Escapes a key name + * @param key_name + * @return the escaped key name + */ + static std::string escape_key_name(std::string_view const key_name); + /** * Traverses the schema tree and returns a list of path names and their types * @param schema_tree @@ -60,4 +67,4 @@ class IndexManager { OutputType m_output_type{OutputType::Database}; }; } // namespace clp_s::indexer -#endif // CLP_S_INDEXER_TABLEMETADATAMANAGER_HPP +#endif // CLP_S_INDEXER_INDEXMANAGER_HPP diff --git a/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp b/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp index 5ab915c69..81e8721d5 100644 --- a/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp +++ b/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp @@ -1,5 +1,5 @@ -#ifndef CLP_S_INDEXER_MYSQLTABLEMETADATADB_HPP -#define CLP_S_INDEXER_MYSQLTABLEMETADATADB_HPP +#ifndef CLP_S_INDEXER_MYSQLINDEXSTORAGE_HPP +#define CLP_S_INDEXER_MYSQLINDEXSTORAGE_HPP #include "../../clp/MySQLDB.hpp" #include "../../clp/MySQLPreparedStatement.hpp" @@ -89,4 +89,4 @@ class MySQLIndexStorage { }; } // namespace clp_s::indexer -#endif // CLP_S_INDEXER_MYSQLTABLEMETADATADB_HPP +#endif // CLP_S_INDEXER_MYSQLINDEXSTORAGE_HPP diff --git a/components/core/src/clp_s/indexer/indexer.cpp b/components/core/src/clp_s/indexer/indexer.cpp index 6b992dc25..8f860ef47 100644 --- a/components/core/src/clp_s/indexer/indexer.cpp +++ b/components/core/src/clp_s/indexer/indexer.cpp @@ -41,4 +41,5 @@ int main(int argc, char const* argv[]) { SPDLOG_ERROR("Failed to update metadata: {}", e.what()); return 1; } + return 0; } From 5ae0d51bcdb52b9f26ac30943bcbe12eaef33936 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 21 Jan 2025 13:39:43 -0500 Subject: [PATCH 8/9] use emplace instead --- components/core/src/clp_s/indexer/IndexManager.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/components/core/src/clp_s/indexer/IndexManager.cpp b/components/core/src/clp_s/indexer/IndexManager.cpp index 9c12347d6..bc3e11fd5 100644 --- a/components/core/src/clp_s/indexer/IndexManager.cpp +++ b/components/core/src/clp_s/indexer/IndexManager.cpp @@ -111,11 +111,11 @@ std::vector> IndexManager::traverse_sche std::string path_buffer; // Stack of pairs of node_id and path_length std::stack> s; - for (auto& node : schema_tree->get_nodes()) { + for (const auto& node : schema_tree->get_nodes()) { if (constants::cRootNodeId == node.get_parent_id() && clp_s::NodeType::Metadata != node.get_type()) { - s.push({node.get_id(), 0}); + s.emplace(node.get_id(), 0); break; } } @@ -125,7 +125,7 @@ std::vector> IndexManager::traverse_sche s.pop(); auto const& node = schema_tree->get_node(node_id); - auto& children_ids = node.get_children_ids(); + const auto& children_ids = node.get_children_ids(); auto node_type = node.get_type(); path_buffer.resize(path_length); if (false == path_buffer.empty()) { @@ -135,11 +135,11 @@ std::vector> IndexManager::traverse_sche if (children_ids.empty() && clp_s::NodeType::Object != node_type && clp_s::NodeType::Unknown != node_type) { - fields.push_back({path_buffer, node_type}); + fields.emplace_back(path_buffer, node_type); } for (auto child_id : children_ids) { - s.push({child_id, path_buffer.size()}); + s.emplace(child_id, path_buffer.size()); } } From 81142043201075046ca612a7bddc1d0087cf27b6 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 21 Jan 2025 14:10:49 -0500 Subject: [PATCH 9/9] remove what() impl --- components/core/src/clp_s/indexer/IndexManager.cpp | 4 ++-- components/core/src/clp_s/indexer/IndexManager.hpp | 5 ----- components/core/src/clp_s/indexer/MySQLIndexStorage.hpp | 5 ----- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/components/core/src/clp_s/indexer/IndexManager.cpp b/components/core/src/clp_s/indexer/IndexManager.cpp index bc3e11fd5..dcb2318d0 100644 --- a/components/core/src/clp_s/indexer/IndexManager.cpp +++ b/components/core/src/clp_s/indexer/IndexManager.cpp @@ -111,7 +111,7 @@ std::vector> IndexManager::traverse_sche std::string path_buffer; // Stack of pairs of node_id and path_length std::stack> s; - for (const auto& node : schema_tree->get_nodes()) { + for (auto const& node : schema_tree->get_nodes()) { if (constants::cRootNodeId == node.get_parent_id() && clp_s::NodeType::Metadata != node.get_type()) { @@ -125,7 +125,7 @@ std::vector> IndexManager::traverse_sche s.pop(); auto const& node = schema_tree->get_node(node_id); - const auto& children_ids = node.get_children_ids(); + auto const& children_ids = node.get_children_ids(); auto node_type = node.get_type(); path_buffer.resize(path_length); if (false == path_buffer.empty()) { diff --git a/components/core/src/clp_s/indexer/IndexManager.hpp b/components/core/src/clp_s/indexer/IndexManager.hpp index bc8319893..2c447729f 100644 --- a/components/core/src/clp_s/indexer/IndexManager.hpp +++ b/components/core/src/clp_s/indexer/IndexManager.hpp @@ -21,11 +21,6 @@ class IndexManager { // Constructors OperationFailed(ErrorCode error_code, char const* const filename, int line_number) : TraceableException(error_code, filename, line_number) {} - - // Methods - [[nodiscard]] char const* what() const noexcept override { - return "IndexManager operation failed"; - } }; enum class OutputType : uint8_t { diff --git a/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp b/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp index 81e8721d5..4c4bc0dd9 100644 --- a/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp +++ b/components/core/src/clp_s/indexer/MySQLIndexStorage.hpp @@ -23,11 +23,6 @@ class MySQLIndexStorage { // Constructors OperationFailed(ErrorCode error_code, char const* const filename, int line_number) : TraceableException(error_code, filename, line_number) {} - - // Methods - [[nodiscard]] char const* what() const noexcept override { - return "MySQLIndexStorage operation failed"; - } }; // Constructors