diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index ce74f04cc..ae9ed4620 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -505,6 +505,9 @@ set(SOURCE_FILES_unitTest src/clp/streaming_archive/reader/Segment.hpp src/clp/streaming_archive/reader/SegmentManager.cpp src/clp/streaming_archive/reader/SegmentManager.hpp + src/clp/streaming_archive/single_file_archive/Defs.hpp + src/clp/streaming_archive/single_file_archive/writer.cpp + src/clp/streaming_archive/single_file_archive/writer.hpp src/clp/streaming_archive/writer/Archive.cpp src/clp/streaming_archive/writer/Archive.hpp src/clp/streaming_archive/writer/File.cpp diff --git a/components/core/src/clp/clp/CMakeLists.txt b/components/core/src/clp/clp/CMakeLists.txt index eff32ce46..c241e3bed 100644 --- a/components/core/src/clp/clp/CMakeLists.txt +++ b/components/core/src/clp/clp/CMakeLists.txt @@ -108,6 +108,9 @@ set( ../streaming_archive/reader/Segment.hpp ../streaming_archive/reader/SegmentManager.cpp ../streaming_archive/reader/SegmentManager.hpp + ../streaming_archive/single_file_archive/Defs.hpp + ../streaming_archive/single_file_archive/writer.cpp + ../streaming_archive/single_file_archive/writer.hpp ../streaming_archive/writer/Archive.cpp ../streaming_archive/writer/Archive.hpp ../streaming_archive/writer/File.cpp diff --git a/components/core/src/clp/clp/CommandLineArguments.cpp b/components/core/src/clp/clp/CommandLineArguments.cpp index cb44d96d8..266d1be62 100644 --- a/components/core/src/clp/clp/CommandLineArguments.cpp +++ b/components/core/src/clp/clp/CommandLineArguments.cpp @@ -373,6 +373,10 @@ CommandLineArguments::parse_arguments(int argc, char const* argv[]) { ->default_value(m_schema_file_path), "Path to a schema file. If not specified, heuristics are used to determine " "dictionary variables. See README-Schema.md for details." + )( + "single-file-archive", + po::bool_switch(&m_single_file_archive), + "Output archive as a single-file archive" ); po::options_description all_compression_options; diff --git a/components/core/src/clp/clp/CommandLineArguments.hpp b/components/core/src/clp/clp/CommandLineArguments.hpp index 6e14a4b3b..307427210 100644 --- a/components/core/src/clp/clp/CommandLineArguments.hpp +++ b/components/core/src/clp/clp/CommandLineArguments.hpp @@ -23,6 +23,7 @@ class CommandLineArguments : public CommandLineArgumentsBase { explicit CommandLineArguments(std::string const& program_name) : CommandLineArgumentsBase(program_name), m_show_progress(false), + m_single_file_archive(false), m_sort_input_files(true), m_print_archive_stats_progress(false), m_target_segment_uncompressed_size(1L * 1024 * 1024 * 1024), @@ -45,6 +46,8 @@ class CommandLineArguments : public CommandLineArgumentsBase { bool show_progress() const { return m_show_progress; } + [[nodiscard]] auto single_file_archive() const -> bool { return m_single_file_archive; } + bool sort_input_files() const { return m_sort_input_files; } bool print_archive_stats_progress() const { return m_print_archive_stats_progress; } @@ -92,6 +95,7 @@ class CommandLineArguments : public CommandLineArgumentsBase { std::string m_output_dir; std::string m_schema_file_path; bool m_show_progress; + bool m_single_file_archive; bool m_print_archive_stats_progress; size_t m_target_encoded_file_size; size_t m_target_segment_uncompressed_size; diff --git a/components/core/src/clp/clp/FileCompressor.cpp b/components/core/src/clp/clp/FileCompressor.cpp index 9898602cc..d71f8ac6d 100644 --- a/components/core/src/clp/clp/FileCompressor.cpp +++ b/components/core/src/clp/clp/FileCompressor.cpp @@ -243,7 +243,9 @@ void FileCompressor::parse_and_encode_with_heuristic( // Parse content from file while (m_message_parser.parse_next_message(true, reader, m_parsed_message)) { - if (archive_writer.get_data_size_of_dictionaries() >= target_data_size_of_dicts) { + if (archive_writer.get_data_size_of_dictionaries() >= target_data_size_of_dicts + && false == archive_writer.get_use_single_file_archive()) + { split_file_and_archive( archive_user_config, path_for_compression, @@ -337,7 +339,9 @@ bool FileCompressor::try_compressing_as_archive( parent_directories.emplace(file_parent_path); } - if (archive_writer.get_data_size_of_dictionaries() >= target_data_size_of_dicts) { + if (archive_writer.get_data_size_of_dictionaries() >= target_data_size_of_dicts + && false == archive_writer.get_use_single_file_archive()) + { split_archive(archive_user_config, archive_writer); } @@ -537,7 +541,9 @@ std::error_code FileCompressor::compress_ir_stream_by_encoding( } // Split archive/encoded file if necessary before writing the new event - if (archive.get_data_size_of_dictionaries() >= target_data_size_of_dicts) { + if (archive.get_data_size_of_dictionaries() >= target_data_size_of_dicts + && false == archive.get_use_single_file_archive()) + { split_file_and_archive( archive_user_config, path, diff --git a/components/core/src/clp/clp/compression.cpp b/components/core/src/clp/clp/compression.cpp index a0d5bf276..05a001851 100644 --- a/components/core/src/clp/clp/compression.cpp +++ b/components/core/src/clp/clp/compression.cpp @@ -107,6 +107,7 @@ bool compress( archive_user_config.global_metadata_db = global_metadata_db.get(); archive_user_config.print_archive_stats_progress = command_line_args.print_archive_stats_progress(); + archive_user_config.use_single_file_archive = command_line_args.single_file_archive(); // Open Archive streaming_archive::writer::Archive archive_writer; @@ -135,7 +136,9 @@ bool compress( ); } for (auto it = files_to_compress.cbegin(); it != files_to_compress.cend(); ++it) { - if (archive_writer.get_data_size_of_dictionaries() >= target_data_size_of_dictionaries) { + if (archive_writer.get_data_size_of_dictionaries() >= target_data_size_of_dictionaries + && false == archive_writer.get_use_single_file_archive()) + { split_archive(archive_user_config, archive_writer); } if (false @@ -163,7 +166,9 @@ bool compress( file_group_id_comparator); // Compress grouped files for (auto const& file_to_compress : grouped_files_to_compress) { - if (archive_writer.get_data_size_of_dictionaries() >= target_data_size_of_dictionaries) { + if (archive_writer.get_data_size_of_dictionaries() >= target_data_size_of_dictionaries + && false == archive_writer.get_use_single_file_archive()) + { split_archive(archive_user_config, archive_writer); } if (false diff --git a/components/core/src/clp/streaming_archive/ArchiveMetadata.hpp b/components/core/src/clp/streaming_archive/ArchiveMetadata.hpp index 1a1edc894..747b59e6a 100644 --- a/components/core/src/clp/streaming_archive/ArchiveMetadata.hpp +++ b/components/core/src/clp/streaming_archive/ArchiveMetadata.hpp @@ -4,11 +4,16 @@ #include #include "../Defs.h" +#include "../ffi/encoding_methods.hpp" #include "../FileReader.hpp" #include "../FileWriter.hpp" #include "Constants.hpp" +#include "msgpack.hpp" namespace clp::streaming_archive { + +static constexpr std::string_view cCompressionTypeZstd = "ZSTD"; + /** * A class to encapsulate metadata directly relating to an archive. */ @@ -79,6 +84,18 @@ class ArchiveMetadata { [[nodiscard]] auto get_end_timestamp() const { return m_end_timestamp; } + [[nodiscard]] auto get_variable_encoding_methods_version() const -> std::string const& { + return m_variable_encoding_methods_version; + } + + [[nodiscard]] auto get_variables_schema_version() const -> std::string const& { + return m_variables_schema_version; + } + + [[nodiscard]] auto get_compression_type() const -> std::string const& { + return m_compression_type; + } + /** * Expands the archive's time range based to encompass the given time range * @param begin_timestamp @@ -88,6 +105,20 @@ class ArchiveMetadata { void write_to_file(FileWriter& file_writer) const; + // MsgPack serialization used for single-file archive format. Variables are renamed when + // serialized to match single-file archive specification. + MSGPACK_DEFINE_MAP( + MSGPACK_NVP("archive_format_version", m_archive_format_version), + MSGPACK_NVP("variable_encoding_methods_version", m_variable_encoding_methods_version), + MSGPACK_NVP("variables_schema_version", m_variables_schema_version), + MSGPACK_NVP("compression_type", m_compression_type), + MSGPACK_NVP("creator_id", m_creator_id), + MSGPACK_NVP("begin_timestamp", m_begin_timestamp), + MSGPACK_NVP("end_timestamp", m_end_timestamp), + MSGPACK_NVP("uncompressed_size", m_uncompressed_size), + MSGPACK_NVP("compressed_size", m_compressed_size) + ); + private: // Variables archive_format_version_t m_archive_format_version{cArchiveFormatVersion}; @@ -102,6 +133,12 @@ class ArchiveMetadata { // The size of the archive uint64_t m_compressed_size{0}; uint64_t m_dynamic_compressed_size{0}; + // TODO: The following fields are used in single-file archive; however, they are not + // currently part of multi-file archive metadata. Modifying multi-file archive metadata + // disk format is potentially a breaking change and not currently required. + std::string m_variable_encoding_methods_version{ffi::cVariableEncodingMethodsVersion}; + std::string m_variables_schema_version{ffi::cVariablesSchemaVersion}; + std::string m_compression_type{cCompressionTypeZstd}; }; } // namespace clp::streaming_archive diff --git a/components/core/src/clp/streaming_archive/single_file_archive/Defs.hpp b/components/core/src/clp/streaming_archive/single_file_archive/Defs.hpp new file mode 100644 index 000000000..9d44e64f7 --- /dev/null +++ b/components/core/src/clp/streaming_archive/single_file_archive/Defs.hpp @@ -0,0 +1,63 @@ +#ifndef CLP_STREAMING_ARCHIVE_SINGLE_FILE_ARCHIVE_DEFS_HPP +#define CLP_STREAMING_ARCHIVE_SINGLE_FILE_ARCHIVE_DEFS_HPP + +#include +#include + +#include "../../Defs.h" +#include "../ArchiveMetadata.hpp" +#include "../Constants.hpp" +#include "msgpack.hpp" + +namespace clp::streaming_archive::single_file_archive { + +using single_file_archive_format_version_t = uint32_t; + +// Single file archive version. +constexpr uint8_t cArchiveMajorVersion{0}; +constexpr uint8_t cArchiveMinorVersion{1}; +constexpr uint16_t cArchivePatchVersion{1}; +constexpr single_file_archive_format_version_t cArchiveVersion{ + cArchiveMajorVersion << 24 | cArchiveMinorVersion << 16 | cArchivePatchVersion +}; + +static constexpr size_t cNumMagicNumberChars{4}; +static constexpr std::array + cUnstructuredSfaMagicNumber{'Y', 'C', 'L', 'P'}; +static constexpr std::string_view cUnstructuredSfaExtension{".clp"}; +static constexpr size_t cFileSizeWarningThreshold{100L * 1024 * 1024}; + +static constexpr size_t cNumStaticFiles{5}; +constexpr std::array cStaticArchiveFileNames{ + cMetadataDBFileName, + cLogTypeDictFilename, + cLogTypeSegmentIndexFilename, + cVarDictFilename, + cVarSegmentIndexFilename +}; + +static constexpr size_t cNumUnused{6}; + +struct __attribute__((packed)) SingleFileArchiveHeader { + std::array magic; + single_file_archive_format_version_t version; + uint64_t metadata_size; + std::array unused; +}; + +struct FileInfo { + std::string name; + uint64_t offset; + // Variables are renamed when serialized to match single-file archive specification. + MSGPACK_DEFINE_MAP(MSGPACK_NVP("n", name), MSGPACK_NVP("o", offset)); +}; + +struct SingleFileArchiveMetadata { + std::vector archive_files; + ArchiveMetadata archive_metadata; + uint64_t num_segments; + MSGPACK_DEFINE_MAP(archive_files, archive_metadata, num_segments); +}; +} // namespace clp::streaming_archive::single_file_archive + +#endif // CLP_STREAMING_ARCHIVE_SINGLE_FILE_ARCHIVE_DEFS_HPP diff --git a/components/core/src/clp/streaming_archive/single_file_archive/writer.cpp b/components/core/src/clp/streaming_archive/single_file_archive/writer.cpp new file mode 100644 index 000000000..561cd8aa5 --- /dev/null +++ b/components/core/src/clp/streaming_archive/single_file_archive/writer.cpp @@ -0,0 +1,268 @@ +#include "writer.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../../Defs.h" +#include "../../ErrorCode.hpp" +#include "../../FileReader.hpp" +#include "../../FileWriter.hpp" +#include "../../TraceableException.hpp" +#include "../ArchiveMetadata.hpp" +#include "../Constants.hpp" +#include "Defs.hpp" + +namespace clp::streaming_archive::single_file_archive { + +namespace { +constexpr size_t cReadBlockSize = 4096; + +/** + * Gets the size of a file specified by `file_path` and adds it to file section `offset`. + * @param file_path + * @param[out] offset File section offset for the single-file archive. The returned offset + * represents the starting position of the next file in single-file archive. + * @throws OperationFailed if error getting file size. + */ +auto get_file_size_and_update_offset(std::filesystem::path const& file_path, uint64_t& offset) + -> void; + +/** + * Generates metadata for the file section of a single-file archive. The metadata consists + * of a list of file names and their corresponding starting offsets. + * + * @param multi_file_archive_path + * @param next_segment_id + * @return Vector containing a `FileInfo` struct for every file in the multi-file archive. + * @throws Propagates `update_offset`'s exceptions. + */ +[[nodiscard]] auto +get_file_infos(std::filesystem::path const& multi_file_archive_path, segment_id_t next_segment_id) + -> std::vector; + +/** + * Combines file section metadata, multi-file archive metadata, and the number of segments into + * single-file archive metadata. Once combined, serializes the metadata into MsgPack format. + * + * @param multi_file_archive_metadata + * @param multi_file_archive_path + * @param next_segment_id + * @return Packed metadata. + */ +[[nodiscard]] auto pack_single_file_archive_metadata( + ArchiveMetadata const& multi_file_archive_metadata, + std::filesystem::path const& multi_file_archive_path, + segment_id_t next_segment_id +) -> std::stringstream; + +/** + * Writes single-file archive header. + * + * @param archive_writer + * @param packed_metadata_size + */ +auto write_archive_header(FileWriter& archive_writer, size_t packed_metadata_size) -> void; + +/** + * Writes single-file archive metadata. + * + * @param archive_writer + * @param packed_metadata Packed metadata. + */ +auto write_archive_metadata(FileWriter& archive_writer, std::stringstream const& packed_metadata) + -> void; + +/** + * Reads the content of a file and writes it to the single-file archive. + * @param file_path + * @param archive_writer + * @throws OperationFailed if reading the file fails. + */ +auto write_archive_file(std::string const& file_path, FileWriter& archive_writer) -> void; + +/** + * Iterates over files in the multi-file archive copying their contents to the single-file archive. + * Skips metadata file since already written in `write_archive_metadata`. + * + * @param archive_writer + * @param multi_file_archive_path + * @param next_segment_id + * @throws Propagates `update_offset`'s exceptions. + */ +auto write_archive_files( + FileWriter& archive_writer, + std::filesystem::path const& multi_file_archive_path, + segment_id_t next_segment_id +) -> void; + +auto get_file_size_and_update_offset(std::filesystem::path const& file_path, uint64_t& offset) + -> void { + try { + auto size = std::filesystem::file_size(file_path); + offset += size; + } catch (std::filesystem::filesystem_error const& e) { + throw OperationFailed( + ErrorCode_Failure, + __FILENAME__, + __LINE__, + fmt::format("Failed to get file size: {}", e.what()) + ); + } +} + +auto +get_file_infos(std::filesystem::path const& multi_file_archive_path, segment_id_t next_segment_id) + -> std::vector { + std::vector files; + uint64_t offset = 0; + + for (auto const& static_archive_file_name : cStaticArchiveFileNames) { + files.emplace_back(FileInfo{std::string(static_archive_file_name), offset}); + get_file_size_and_update_offset(multi_file_archive_path / static_archive_file_name, offset); + } + + std::filesystem::path segment_dir_path = multi_file_archive_path / cSegmentsDirname; + + for (size_t i = 0; i < next_segment_id; ++i) { + auto const segment_id = std::to_string(i); + files.emplace_back(FileInfo{segment_id, offset}); + get_file_size_and_update_offset(segment_dir_path / segment_id, offset); + } + + // Add sentinel indicating total size of all files. + files.emplace_back(FileInfo{"", offset}); + + // Decompression of large single-file archives will consume excessive memory since + // single-file archives are not split. + if (offset > cFileSizeWarningThreshold) { + SPDLOG_WARN( + "Single file archive size exceeded {}. " + "The single-file archive format is not intended for large archives, " + " consider using multi-file archive format instead.", + cFileSizeWarningThreshold + ); + } + + return files; +} + +auto pack_single_file_archive_metadata( + ArchiveMetadata const& multi_file_archive_metadata, + std::filesystem::path const& multi_file_archive_path, + segment_id_t next_segment_id +) -> std::stringstream { + SingleFileArchiveMetadata single_file_archive{ + .archive_files = get_file_infos(multi_file_archive_path, next_segment_id), + .archive_metadata = multi_file_archive_metadata, + .num_segments = next_segment_id, + }; + + std::stringstream buf; + msgpack::pack(buf, single_file_archive); + + return buf; +} + +auto write_archive_header(FileWriter& archive_writer, size_t packed_metadata_size) -> void { + SingleFileArchiveHeader header{ + .magic{}, + .version = cArchiveVersion, + .metadata_size = packed_metadata_size, + .unused{} + }; + + static_assert(cUnstructuredSfaMagicNumber.size() == header.magic.size()); + std::memcpy( + &header.magic, + cUnstructuredSfaMagicNumber.data(), + cUnstructuredSfaMagicNumber.size() + ); + archive_writer.write(reinterpret_cast(&header), sizeof(header)); +} + +auto write_archive_metadata(FileWriter& archive_writer, std::stringstream const& packed_metadata) + -> void { + archive_writer.write(packed_metadata.str().data(), packed_metadata.str().size()); +} + +auto write_archive_file(std::filesystem::path const& file_path, FileWriter& archive_writer) + -> void { + FileReader reader(file_path.string()); + std::array read_buffer{}; + while (true) { + size_t num_bytes_read{}; + ErrorCode const error_code + = reader.try_read(read_buffer.data(), cReadBlockSize, num_bytes_read); + if (ErrorCode_EndOfFile == error_code) { + break; + } + if (ErrorCode_Success != error_code) { + throw OperationFailed(error_code, __FILENAME__, __LINE__); + } + archive_writer.write(read_buffer.data(), num_bytes_read); + } +} + +auto write_archive_files( + FileWriter& archive_writer, + std::filesystem::path const& multi_file_archive_path, + segment_id_t next_segment_id +) -> void { + for (auto const& static_archive_file_name : cStaticArchiveFileNames) { + std::filesystem::path static_archive_file_path + = multi_file_archive_path / static_archive_file_name; + write_archive_file(static_archive_file_path, archive_writer); + } + + std::filesystem::path segment_dir_path = multi_file_archive_path / cSegmentsDirname; + for (size_t i = 0; i < next_segment_id; ++i) { + std::filesystem::path segment_path = segment_dir_path / std::to_string(i); + write_archive_file(segment_path, archive_writer); + } +} +} // namespace + +auto write_single_file_archive( + ArchiveMetadata const& multi_file_archive_metadata, + std::filesystem::path const& multi_file_archive_path, + segment_id_t next_segment_id +) -> void { + FileWriter archive_writer; + std::filesystem::path single_file_archive_path + = multi_file_archive_path.string() + + std::string(single_file_archive::cUnstructuredSfaExtension); + + if (std::filesystem::exists(single_file_archive_path)) { + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + + archive_writer.open( + single_file_archive_path.string(), + FileWriter::OpenMode::CREATE_FOR_WRITING + ); + + auto const packed_metadata = pack_single_file_archive_metadata( + multi_file_archive_metadata, + multi_file_archive_path, + next_segment_id + ); + + write_archive_header(archive_writer, packed_metadata.str().size()); + write_archive_metadata(archive_writer, packed_metadata); + write_archive_files(archive_writer, multi_file_archive_path, next_segment_id); + + archive_writer.close(); + try { + std::filesystem::remove_all(multi_file_archive_path); + } catch (std::filesystem::filesystem_error& e) { + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } +} +} // namespace clp::streaming_archive::single_file_archive diff --git a/components/core/src/clp/streaming_archive/single_file_archive/writer.hpp b/components/core/src/clp/streaming_archive/single_file_archive/writer.hpp new file mode 100644 index 000000000..25ccb0175 --- /dev/null +++ b/components/core/src/clp/streaming_archive/single_file_archive/writer.hpp @@ -0,0 +1,52 @@ +#ifndef CLP_STREAMING_ARCHIVE_SINGLE_FILE_ARCHIVE_WRITER_HPP +#define CLP_STREAMING_ARCHIVE_SINGLE_FILE_ARCHIVE_WRITER_HPP + +#include +#include + +#include + +#include "../../Defs.h" +#include "../../ErrorCode.hpp" +#include "../../TraceableException.hpp" +#include "../ArchiveMetadata.hpp" + +namespace clp::streaming_archive::single_file_archive { + +class OperationFailed : public TraceableException { +public: + // Constructors + OperationFailed( + ErrorCode error_code, + char const* const filename, + int line_number, + std::string message = "streaming_archive::single_file_archive operation failed" + ) + : TraceableException{error_code, filename, line_number}, + m_message{std::move(message)} {} + + // Methods + [[nodiscard]] auto what() const noexcept -> char const* override { return m_message.c_str(); } + +private: + std::string m_message; +}; + +/** + * Writes header, metadata and archive files in single-file format then + * removes existing multi-file archive. + * + * @param multi_file_archive_metadata + * @param multi_file_archive_path + * @param next_segment_id ID of the next segment to be created in the archive. + * @throws OperationFailed if single-file archive path already exists. + */ +auto write_single_file_archive( + ArchiveMetadata const& multi_file_archive_metadata, + std::filesystem::path const& multi_file_archive_path, + segment_id_t next_segment_id +) -> void; + +} // namespace clp::streaming_archive::single_file_archive + +#endif // CLP_STREAMING_ARCHIVE_SINGLE_FILE_ARCHIVE_WRITER_HPP diff --git a/components/core/src/clp/streaming_archive/writer/Archive.cpp b/components/core/src/clp/streaming_archive/writer/Archive.cpp index 6804fac7a..0b2ea0072 100644 --- a/components/core/src/clp/streaming_archive/writer/Archive.cpp +++ b/components/core/src/clp/streaming_archive/writer/Archive.cpp @@ -13,12 +13,15 @@ #include #include #include +#include +#include "../../Defs.h" #include "../../EncodedVariableInterpreter.hpp" #include "../../ir/types.hpp" #include "../../spdlog_with_specializations.hpp" #include "../../Utils.hpp" #include "../Constants.hpp" +#include "../single_file_archive/writer.hpp" #include "utils.hpp" using clp::ir::eight_byte_encoded_variable_t; @@ -56,6 +59,7 @@ void Archive::open(UserConfig const& user_config) { m_creator_id_as_string = boost::uuids::to_string(m_creator_id); m_creation_num = user_config.creation_num; m_print_archive_stats_progress = user_config.print_archive_stats_progress; + m_use_single_file_archive = user_config.use_single_file_archive; std::error_code std_error_code; @@ -242,6 +246,10 @@ void Archive::close() { m_metadata_db.close(); + if (m_use_single_file_archive) { + create_single_file_archive(); + } + m_creator_id_as_string.clear(); m_id_as_string.clear(); m_path.clear(); @@ -330,7 +338,9 @@ void Archive::write_msg_using_schema(LogEventView const& log_view) { m_old_ts_pattern = timestamp_pattern; } } - if (get_data_size_of_dictionaries() >= m_target_data_size_of_dicts) { + if (get_data_size_of_dictionaries() >= m_target_data_size_of_dicts + && false == m_use_single_file_archive) + { split_file_and_archive( m_archive_user_config, m_path_for_compression, @@ -650,6 +660,20 @@ void Archive::update_metadata() { } } +void Archive::create_single_file_archive() { + if (false == m_local_metadata.has_value()) { + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + + auto const& multi_file_archive_metadata = m_local_metadata.value(); + + clp::streaming_archive::single_file_archive::write_single_file_archive( + multi_file_archive_metadata, + m_path, + m_next_segment_id + ); +} + // Explicitly declare template specializations so that we can define the template methods in this // file template void Archive::write_log_event_ir( diff --git a/components/core/src/clp/streaming_archive/writer/Archive.hpp b/components/core/src/clp/streaming_archive/writer/Archive.hpp index cd5c5d99f..a0be67ce0 100644 --- a/components/core/src/clp/streaming_archive/writer/Archive.hpp +++ b/components/core/src/clp/streaming_archive/writer/Archive.hpp @@ -48,6 +48,7 @@ class Archive { std::string output_dir; GlobalMetadataDB* global_metadata_db; bool print_archive_stats_progress; + bool use_single_file_archive; }; class OperationFailed : public TraceableException { @@ -193,6 +194,10 @@ class Archive { return m_logtype_dict.get_data_size() + m_var_dict.get_data_size(); } + [[nodiscard]] auto get_use_single_file_archive() const -> bool { + return m_use_single_file_archive; + } + private: // Types /** @@ -279,6 +284,11 @@ class Archive { */ void update_metadata(); + /** + * Writes archive to disk in single-file format then removes existing multi-file archive. + */ + auto create_single_file_archive() -> void; + // Variables boost::uuids::uuid m_id; std::string m_id_as_string; @@ -341,6 +351,7 @@ class Archive { GlobalMetadataDB* m_global_metadata_db; bool m_print_archive_stats_progress; + bool m_use_single_file_archive{false}; }; } // namespace clp::streaming_archive::writer