From 32af4bfbd81e2e7e7788a967fe8e717870d1895a Mon Sep 17 00:00:00 2001 From: ArrayRecord Team Date: Tue, 14 Jun 2022 11:37:12 +0000 Subject: [PATCH] Internal change PiperOrigin-RevId: 454828490 --- .bazelrc | 8 + WORKSPACE | 136 ++++++++++++++ cpp/BUILD | 220 ++++++++++------------- cpp/array_record_fuzzer.cc | 107 ----------- cpp/array_record_reader.cc | 50 +++--- cpp/array_record_reader.h | 30 ++-- cpp/array_record_reader_test.cc | 91 +++++----- cpp/array_record_writer.cc | 62 +++---- cpp/array_record_writer.h | 29 ++- cpp/array_record_writer_test.cc | 32 ++-- cpp/common.h | 6 +- cpp/layout.proto | 2 - cpp/masked_reader.cc | 16 +- cpp/masked_reader.h | 6 +- cpp/masked_reader_test.cc | 8 +- cpp/parallel_for.h | 12 +- cpp/parallel_for_test.cc | 12 +- cpp/sequenced_chunk_writer.cc | 22 +-- cpp/sequenced_chunk_writer.h | 20 +-- cpp/sequenced_chunk_writer_test.cc | 48 ++--- cpp/test_utils.cc | 4 +- cpp/test_utils_test.cc | 10 +- cpp/thread_compatible_shared_ptr.h | 2 +- cpp/thread_compatible_shared_ptr_test.cc | 14 +- cpp/thread_pool.cc | 4 +- python/BUILD | 18 +- python/array_record_module.cc | 17 +- python/array_record_module_test.py | 4 +- 28 files changed, 501 insertions(+), 489 deletions(-) create mode 100644 .bazelrc create mode 100644 WORKSPACE delete mode 100644 cpp/array_record_fuzzer.cc diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 0000000..3a7994e --- /dev/null +++ b/.bazelrc @@ -0,0 +1,8 @@ +build -c opt +build --cxxopt=-std=c++17 +build --host_cxxopt=-std=c++17 + +# TODO(fchern): Use non-hardcode path. +build --action_env=PYTHON_BIN_PATH="/usr/bin/python3" +build --repo_env=PYTHON_BIN_PATH="/usr/bin/python3" +build --python_path="/usr/bin/python3" diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 0000000..1983e74 --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,136 @@ +workspace(name = "com_google_array_record") + +# Might be better than http_archive +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +# Abseil LTS 20211102 +http_archive( + name = "com_google_absl", + sha256 = "237e2e6aec7571ae90d961d02de19f56861a7417acbbc15713b8926e39d461ed", # SHARED_ABSL_SHA + strip_prefix = "abseil-cpp-215105818dfde3174fe799600bb0f3cae233d0bf", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/215105818dfde3174fe799600bb0f3cae233d0bf.tar.gz", + "https://github.com/abseil/abseil-cpp/archive/215105818dfde3174fe799600bb0f3cae233d0bf.tar.gz", + ], +) +http_archive( + name = "com_google_googletest", + strip_prefix = "googletest-eb9225ce361affe561592e0912320b9db84985d0", + url = "https://github.com/google/googletest/archive/eb9225ce361affe561592e0912320b9db84985d0.zip", + sha256 = "a7db7d1295ce46b93f3d1a90dbbc55a48409c00d19684fcd87823037add88118", +) + +# V3.4.0, 20210818 +http_archive( + name = "eigen3", + sha256 = "b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626", + strip_prefix = "eigen-3.4.0", + urls = [ + "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.bz2", + ], + build_file_content = +""" +cc_library( + name = 'eigen3', + srcs = [], + includes = ['.'], + hdrs = glob(['Eigen/**', 'unsupported/Eigen/**']), + visibility = ['//visibility:public'], +) +""" +) + +## `pybind11_bazel` +# See https://github.com/pybind/pybind11_bazel +http_archive( + name = "pybind11_bazel", + strip_prefix = "pybind11_bazel-72cbbf1fbc830e487e3012862b7b720001b70672", + sha256 = "516c1b3a10d87740d2b7de6f121f8e19dde2c372ecbfe59aef44cd1872c10395", + urls = ["https://github.com/pybind/pybind11_bazel/archive/72cbbf1fbc830e487e3012862b7b720001b70672.tar.gz"], +) +# V2.9.2, 20220330 +http_archive( + name = "pybind11", + build_file = "@pybind11_bazel//:pybind11.BUILD", + strip_prefix = "pybind11-2.9.2", + urls = ["https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip"], +) +load("@pybind11_bazel//:python_configure.bzl", "python_configure") +python_configure(name = "local_config_python") + +# V3.20.1, 20220421 +# proto_library, cc_proto_library, and java_proto_library rules implicitly +# depend on @com_google_protobuf for protoc and proto runtimes. +# This statement defines the @com_google_protobuf repo. +http_archive( + name = "com_google_protobuf", + sha256 = "8b28fdd45bab62d15db232ec404248901842e5340299a57765e48abe8a80d930", + strip_prefix = "protobuf-3.20.1", + urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.20.1.tar.gz"], +) + +load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") +protobuf_deps() + +# Riegeli does not cut releases, so we reference the head +http_archive( + name = "com_google_riegeli", + strip_prefix = "riegeli-master", + urls = [ + "https://github.com/google/riegeli/archive/master.zip", + ], +) +# Dependencies required by riegeli +http_archive( + name = "org_brotli", + patch_args = ["-p1"], + patches = ["@com_google_riegeli//third_party:brotli.patch"], + sha256 = "fec5a1d26f3dd102c542548aaa704f655fecec3622a24ec6e97768dcb3c235ff", + strip_prefix = "brotli-68f1b90ad0d204907beb58304d0bd06391001a4d", + urls = ["https://github.com/google/brotli/archive/68f1b90ad0d204907beb58304d0bd06391001a4d.zip"], # 2021-08-18 +) +http_archive( + name = "net_zstd", + build_file = "@com_google_riegeli//third_party:net_zstd.BUILD", + sha256 = "b6c537b53356a3af3ca3e621457751fa9a6ba96daf3aebb3526ae0f610863532", + strip_prefix = "zstd-1.4.5/lib", + urls = ["https://github.com/facebook/zstd/archive/v1.4.5.zip"], # 2020-05-22 +) +http_archive( + name = "lz4", + build_file = "@com_google_riegeli//third_party:lz4.BUILD", + sha256 = "4ec935d99aa4950eadfefbd49c9fad863185ac24c32001162c44a683ef61b580", + strip_prefix = "lz4-1.9.3/lib", + urls = ["https://github.com/lz4/lz4/archive/refs/tags/v1.9.3.zip"], # 2020-11-16 +) +http_archive( + name = "snappy", + build_file = "@com_google_riegeli//third_party:snappy.BUILD", + sha256 = "38b4aabf88eb480131ed45bfb89c19ca3e2a62daeb081bdf001cfb17ec4cd303", + strip_prefix = "snappy-1.1.8", + urls = ["https://github.com/google/snappy/archive/1.1.8.zip"], # 2020-01-14 +) + +http_archive( + name = "crc32c", + build_file = "@com_google_riegeli//third_party:crc32.BUILD", + sha256 = "338f1d9d95753dc3cdd882dfb6e176bbb4b18353c29c411ebcb7b890f361722e", + strip_prefix = "crc32c-1.1.0", + urls = ["https://github.com/google/crc32c/archive/1.1.0.zip"], # 2019-05-24 +) + +http_archive( + name = "zlib", + build_file = "@com_google_riegeli//third_party:zlib.BUILD", + sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", + strip_prefix = "zlib-1.2.11", + urls = ["http://zlib.net/fossils/zlib-1.2.11.tar.gz"], # 2017-01-15 +) + +http_archive( + name = "highwayhash", + build_file = "@com_google_riegeli//third_party:highwayhash.BUILD", + sha256 = "cf891e024699c82aabce528a024adbe16e529f2b4e57f954455e0bf53efae585", + strip_prefix = "highwayhash-276dd7b4b6d330e4734b756e97ccfb1b69cc2e12", + urls = ["https://github.com/google/highwayhash/archive/276dd7b4b6d330e4734b756e97ccfb1b69cc2e12.zip"], # 2019-02-22 +) diff --git a/cpp/BUILD b/cpp/BUILD index a428cbf..5ebb84d 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -1,8 +1,6 @@ # ArrayRecord is a new file format for IO intensive applications. # It supports efficient random access and various compression algorithms. -load("//security/fuzzing/blaze:cc_fuzz_target.bzl", "cc_fuzz_target") - package(default_visibility = ["//visibility:public"]) licenses(["notice"]) @@ -21,9 +19,9 @@ cc_library( name = "common", hdrs = ["common.h"], deps = [ - "//third_party/absl/base:core_headers", - "//third_party/absl/status", - "//third_party/absl/strings:str_format", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", ], ) @@ -33,18 +31,18 @@ cc_library( hdrs = ["sequenced_chunk_writer.h"], deps = [ ":common", - "//third_party/absl/base:core_headers", - "//third_party/absl/status", - "//third_party/absl/status:statusor", - "//third_party/absl/strings", - "//third_party/absl/strings:str_format", - "//third_party/absl/synchronization", - "//third_party/riegeli/base", - "//third_party/riegeli/base:status", - "//third_party/riegeli/bytes:writer", - "//third_party/riegeli/chunk_encoding:chunk", - "//third_party/riegeli/chunk_encoding:constants", - "//third_party/riegeli/records:chunk_writer", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/synchronization", + "@com_google_riegeli//riegeli/base", + "@com_google_riegeli//riegeli/base:status", + "@com_google_riegeli//riegeli/bytes:writer", + "@com_google_riegeli//riegeli/chunk_encoding:chunk", + "@com_google_riegeli//riegeli/chunk_encoding:constants", + "@com_google_riegeli//riegeli/records:chunk_writer", ], ) @@ -53,7 +51,7 @@ cc_library( srcs = ["thread_pool.cc"], hdrs = ["thread_pool.h"], deps = [ - "//third_party/absl/flags:flag", + "@com_google_absl//absl/flags:flag", "@eigen3//:eigen3", ], ) @@ -64,17 +62,17 @@ cc_library( deps = [ ":common", ":thread_pool", - "//third_party/absl/base:core_headers", - "//third_party/absl/functional:function_ref", - "//third_party/absl/status", - "//third_party/absl/synchronization", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/status", + "@com_google_absl//absl/synchronization", ], ) cc_library( name = "thread_compatible_shared_ptr", hdrs = ["thread_compatible_shared_ptr.h"], - deps = ["//third_party/absl/synchronization"], + deps = ["@com_google_absl//absl/synchronization"], ) cc_library( @@ -91,8 +89,8 @@ cc_test( deps = [ ":common", ":test_utils", - "//testing/base/public:gunit_main", - "//third_party/absl/strings", + "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/strings", ], ) @@ -105,25 +103,24 @@ cc_library( ":layout_cc_proto", ":sequenced_chunk_writer", ":thread_pool", - "//third_party/absl/base:core_headers", - "//third_party/absl/status", - "//third_party/absl/status:statusor", - "//third_party/absl/strings", - "//third_party/absl/synchronization", - "//third_party/absl/types:span", - "//third_party/riegeli/base", - "//third_party/riegeli/base:options_parser", - "//third_party/riegeli/bytes:chain_writer", - "//third_party/riegeli/bytes:file_writer", - "//third_party/riegeli/chunk_encoding:chunk", - "//third_party/riegeli/chunk_encoding:chunk_encoder", - "//third_party/riegeli/chunk_encoding:compressor_options", - "//third_party/riegeli/chunk_encoding:constants", - "//third_party/riegeli/chunk_encoding:deferred_encoder", - "//third_party/riegeli/chunk_encoding:simple_encoder", - "//third_party/riegeli/chunk_encoding:transpose_encoder", - "//third_party/riegeli/records:records_metadata_cc_proto", "@com_google_protobuf//:protobuf_lite", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:span", + "@com_google_riegeli//riegeli/base", + "@com_google_riegeli//riegeli/base:options_parser", + "@com_google_riegeli//riegeli/bytes:chain_writer", + "@com_google_riegeli//riegeli/chunk_encoding:chunk", + "@com_google_riegeli//riegeli/chunk_encoding:chunk_encoder", + "@com_google_riegeli//riegeli/chunk_encoding:compressor_options", + "@com_google_riegeli//riegeli/chunk_encoding:constants", + "@com_google_riegeli//riegeli/chunk_encoding:deferred_encoder", + "@com_google_riegeli//riegeli/chunk_encoding:simple_encoder", + "@com_google_riegeli//riegeli/chunk_encoding:transpose_encoder", + "@com_google_riegeli//riegeli/records:records_metadata_cc_proto", ], ) @@ -133,13 +130,13 @@ cc_library( hdrs = ["masked_reader.h"], deps = [ ":common", - "//third_party/absl/memory", - "//third_party/absl/status", - "//third_party/absl/time", - "//third_party/absl/types:optional", - "//third_party/riegeli/base", - "//third_party/riegeli/base:status", - "//third_party/riegeli/bytes:reader", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", + "@com_google_riegeli//riegeli/base", + "@com_google_riegeli//riegeli/base:status", + "@com_google_riegeli//riegeli/bytes:reader", ], ) @@ -154,23 +151,23 @@ cc_library( ":parallel_for", ":thread_compatible_shared_ptr", ":thread_pool", - "//third_party/absl/base:core_headers", - "//third_party/absl/functional:bind_front", - "//third_party/absl/functional:function_ref", - "//third_party/absl/status", - "//third_party/absl/status:statusor", - "//third_party/absl/strings", - "//third_party/absl/synchronization", - "//third_party/absl/types:span", - "//third_party/riegeli/base", - "//third_party/riegeli/base:options_parser", - "//third_party/riegeli/base:status", - "//third_party/riegeli/bytes:reader", - "//third_party/riegeli/chunk_encoding:chunk_decoder", - "//third_party/riegeli/records:chunk_reader", - "//third_party/riegeli/records:record_position", - "//third_party/riegeli/records:record_reader", "@com_google_protobuf//:protobuf_lite", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/functional:bind_front", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:span", + "@com_google_riegeli//riegeli/base", + "@com_google_riegeli//riegeli/base:options_parser", + "@com_google_riegeli//riegeli/base:status", + "@com_google_riegeli//riegeli/bytes:reader", + "@com_google_riegeli//riegeli/chunk_encoding:chunk_decoder", + "@com_google_riegeli//riegeli/records:chunk_reader", + "@com_google_riegeli//riegeli/records:record_position", + "@com_google_riegeli//riegeli/records:record_reader", ], ) @@ -181,18 +178,18 @@ cc_test( ":common", ":sequenced_chunk_writer", ":thread_pool", - "//testing/base/public:gunit_main", - "//third_party/absl/flags:flag", - "//third_party/absl/status", - "//third_party/absl/strings:cord", - "//third_party/riegeli/bytes:chain_writer", - "//third_party/riegeli/bytes:cord_writer", - "//third_party/riegeli/bytes:string_reader", - "//third_party/riegeli/bytes:string_writer", - "//third_party/riegeli/chunk_encoding:chunk", - "//third_party/riegeli/chunk_encoding:compressor_options", - "//third_party/riegeli/chunk_encoding:simple_encoder", - "//third_party/riegeli/records:record_reader", + "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:cord", + "@com_google_riegeli//riegeli/bytes:chain_writer", + "@com_google_riegeli//riegeli/bytes:cord_writer", + "@com_google_riegeli//riegeli/bytes:string_reader", + "@com_google_riegeli//riegeli/bytes:string_writer", + "@com_google_riegeli//riegeli/chunk_encoding:chunk", + "@com_google_riegeli//riegeli/chunk_encoding:compressor_options", + "@com_google_riegeli//riegeli/chunk_encoding:simple_encoder", + "@com_google_riegeli//riegeli/records:record_reader", ], ) @@ -203,8 +200,8 @@ cc_test( ":common", ":thread_compatible_shared_ptr", ":thread_pool", - "//testing/base/public:gunit_main", - "//third_party/absl/time", + "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/time", ], ) @@ -219,14 +216,14 @@ cc_test( ":layout_cc_proto", ":test_utils", ":thread_pool", - "//testing/base/public:gunit_main", - "//third_party/absl/strings", - "//third_party/riegeli/bytes:string_reader", - "//third_party/riegeli/bytes:string_writer", - "//third_party/riegeli/chunk_encoding:constants", - "//third_party/riegeli/records:record_reader", - "//third_party/riegeli/records:record_writer", - "//third_party/riegeli/records:records_metadata_cc_proto", + "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/strings", + "@com_google_riegeli//riegeli/bytes:string_reader", + "@com_google_riegeli//riegeli/bytes:string_writer", + "@com_google_riegeli//riegeli/chunk_encoding:constants", + "@com_google_riegeli//riegeli/records:record_reader", + "@com_google_riegeli//riegeli/records:record_writer", + "@com_google_riegeli//riegeli/records:records_metadata_cc_proto", ], ) @@ -235,8 +232,8 @@ cc_test( srcs = ["masked_reader_test.cc"], deps = [ ":masked_reader", - "//testing/base/public:gunit_main", - "//third_party/riegeli/bytes:string_reader", + "@com_google_googletest//:gtest_main", + "@com_google_riegeli//riegeli/bytes:string_reader", ], ) @@ -247,9 +244,9 @@ cc_test( deps = [ ":parallel_for", ":thread_pool", - "//testing/base/public:gunit_main", - "//third_party/absl/functional:function_ref", - "//third_party/absl/status", + "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/status", ], ) @@ -264,34 +261,13 @@ cc_test( ":layout_cc_proto", ":test_utils", ":thread_pool", - "//testing/base/public:gunit_main", - "//third_party/absl/functional:function_ref", - "//third_party/absl/status", - "//third_party/absl/strings", - "//third_party/riegeli/bytes:string_reader", - "//third_party/riegeli/bytes:string_writer", - "//third_party/riegeli/chunk_encoding:chunk_decoder", - "//third_party/riegeli/records:chunk_reader", - ], -) - -cc_fuzz_target( - name = "array_record_fuzzer", - srcs = ["array_record_fuzzer.cc"], - componentid = 85655, # Buganizer ID: Research > ScaM - deps = [ - ":array_record_reader", - ":array_record_writer", - ":layout_cc_proto", - ":thread_pool", - "//base:logging", - "//base:sysinfo", - "//research/scam/utils:threads", - "//third_party/absl/status", - "//third_party/absl/types:span", - "//third_party/riegeli/bytes:string_reader", - "//third_party/riegeli/bytes:string_writer", - "//third_party/riegeli/records:record_reader", - "//third_party/riegeli/records:records_metadata_cc_proto", + "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@com_google_riegeli//riegeli/bytes:string_reader", + "@com_google_riegeli//riegeli/bytes:string_writer", + "@com_google_riegeli//riegeli/chunk_encoding:chunk_decoder", + "@com_google_riegeli//riegeli/records:chunk_reader", ], ) diff --git a/cpp/array_record_fuzzer.cc b/cpp/array_record_fuzzer.cc deleted file mode 100644 index d12f1c6..0000000 --- a/cpp/array_record_fuzzer.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright 2022 Google LLC. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include -#include -#include -#include - -#include "base/logging.h" -#include "base/sysinfo.h" -#include "fuzzer/FuzzedDataProvider.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/types/span.h" -#include "third_party/array_record/cpp/array_record_reader.h" -#include "third_party/array_record/cpp/array_record_writer.h" -#include "third_party/array_record/cpp/layout.proto.h" -#include "third_party/array_record/cpp/thread_pool.h" -#include "third_party/riegeli/bytes/string_reader.h" -#include "third_party/riegeli/bytes/string_writer.h" -#include "third_party/riegeli/records/record_reader.h" -#include "third_party/riegeli/records/records_metadata.proto.h" - -namespace array_record { - -void TestArrayRecord(absl::string_view options_text, - absl::Span ground_truth_records, - ARThreadPool* pool) { - LOG(INFO) << "Testing with options: " << options_text - << " num_records: " << ground_truth_records.size(); - auto options = - ArrayRecordWriterBase::Options::FromString(options_text).ValueOrDie(); - std::string encoded; - auto writer = ArrayRecordWriter>( - std::forward_as_tuple(&encoded), options, pool); - - for (const auto& record : ground_truth_records) { - CHECK(writer.WriteRecord(record)) << writer.status(); - } - CHECK(writer.Close()) << writer.status(); - - auto reader = ArrayRecordReader>( - std::forward_as_tuple(encoded), - ArrayRecordReaderBase::Options().set_readahead_buffer_size(2048), pool); - - // Test sequential read - for (auto i : Seq(reader.NumRecords())) { - absl::string_view record; - CHECK(reader.ReadRecord(&record)); - CHECK_EQ(record, ground_truth_records[i]); - } - - // Test parallel read - CHECK_OK(reader.ParallelReadRecords( - [&](uint64_t record_idx, absl::string_view record) -> absl::Status { - CHECK_EQ(record, ground_truth_records[record_idx]); - return absl::OkStatus(); - })); - - CHECK(reader.Close()) << reader.status(); - LOG(INFO) << "TEST PASSED"; -} - -} // namespace array_record - -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - FuzzedDataProvider fuzzed_data_provider(data, size); - std::vector ground_truth_records; - - while (fuzzed_data_provider.remaining_bytes()) { - ground_truth_records.push_back( - fuzzed_data_provider.ConsumeRandomLengthString()); - } - - auto* pool = array_record::ArrayRecordGlobalPool(); - - array_record::TestArrayRecord("uncompressed", ground_truth_records, pool); - - array_record::TestArrayRecord("brotli", ground_truth_records, pool); - - array_record::TestArrayRecord("zstd", ground_truth_records, pool); - - array_record::TestArrayRecord("snappy", ground_truth_records, pool); - - array_record::TestArrayRecord("uncompressed,transpose", ground_truth_records, - pool); - - array_record::TestArrayRecord("brotli,transpose", ground_truth_records, pool); - - array_record::TestArrayRecord("zstd,transpose", ground_truth_records, pool); - - array_record::TestArrayRecord("snappy,transpose", ground_truth_records, pool); - return 0; -} diff --git a/cpp/array_record_reader.cc b/cpp/array_record_reader.cc index c7948bb..a442afd 100644 --- a/cpp/array_record_reader.cc +++ b/cpp/array_record_reader.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/array_record_reader.h" +#include "cpp/array_record_reader.h" #include #include @@ -29,26 +29,26 @@ limitations under the License. #include #include -#include "third_party/absl/base/optimization.h" -#include "third_party/absl/base/thread_annotations.h" -#include "third_party/absl/functional/bind_front.h" -#include "third_party/absl/functional/function_ref.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/absl/synchronization/mutex.h" -#include "third_party/absl/types/span.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/array_record/cpp/layout.proto.h" -#include "third_party/array_record/cpp/masked_reader.h" -#include "third_party/array_record/cpp/parallel_for.h" -#include "third_party/array_record/cpp/thread_compatible_shared_ptr.h" -#include "third_party/riegeli/base/object.h" -#include "third_party/riegeli/base/options_parser.h" -#include "third_party/riegeli/base/status.h" -#include "third_party/riegeli/bytes/reader.h" -#include "third_party/riegeli/chunk_encoding/chunk_decoder.h" -#include "third_party/riegeli/records/chunk_reader.h" -#include "third_party/riegeli/records/record_position.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "absl/functional/bind_front.h" +#include "absl/functional/function_ref.h" +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" +#include "cpp/common.h" +#include "cpp/layout.pb.h" +#include "cpp/masked_reader.h" +#include "cpp/parallel_for.h" +#include "cpp/thread_compatible_shared_ptr.h" +#include "riegeli/base/object.h" +#include "riegeli/base/options_parser.h" +#include "riegeli/base/status.h" +#include "riegeli/bytes/reader.h" +#include "riegeli/chunk_encoding/chunk_decoder.h" +#include "riegeli/records/chunk_reader.h" +#include "riegeli/records/record_position.h" namespace array_record { @@ -58,7 +58,7 @@ constexpr size_t kRiegeliBlockSize = (1 << 16); // This number should rarely change unless there's a new great layout design // that wasn't backward compatible and justifies its performance and reliability // worth us to implement. -constexpr uint32 kArrayRecordV1 = 1; +constexpr uint32_t kArrayRecordV1 = 1; // Magic number for ArrayRecord constexpr uint64_t kMagic = 0x71930e704fdae05eULL; @@ -185,7 +185,7 @@ void ArrayRecordReaderBase::Initialize() { } uint32_t max_parallelism = 1; if (state_->pool) { - max_parallelism = state_->pool->num_threads(); + max_parallelism = state_->pool->NumThreads(); if (state_->options.max_parallelism().has_value()) { max_parallelism = std::min(max_parallelism, state_->options.max_parallelism().value()); @@ -491,12 +491,12 @@ bool ArrayRecordReaderBase::SeekRecord(uint64_t record_index) { return true; } -bool ArrayRecordReaderBase::ReadRecord(proto2::MessageLite* record) { +bool ArrayRecordReaderBase::ReadRecord(google::protobuf::MessageLite* record) { absl::string_view result_view; if (!ReadRecord(&result_view)) { return false; } - return record->ParsePartialFromString(result_view); + return record->ParsePartialFromString(result_view.data()); } bool ArrayRecordReaderBase::ReadRecord(absl::string_view* record) { diff --git a/cpp/array_record_reader.h b/cpp/array_record_reader.h index 9c32e55..67cacb1 100644 --- a/cpp/array_record_reader.h +++ b/cpp/array_record_reader.h @@ -42,18 +42,18 @@ limitations under the License. #include #include "google/protobuf/message_lite.h" -#include "third_party/absl/functional/function_ref.h" -#include "third_party/absl/status/statusor.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/array_record/cpp/layout.proto.h" -#include "third_party/array_record/cpp/thread_compatible_shared_ptr.h" -#include "third_party/array_record/cpp/thread_pool.h" -#include "third_party/riegeli/base/object.h" -#include "third_party/riegeli/bytes/reader.h" -#include "third_party/riegeli/chunk_encoding/chunk_decoder.h" -#include "third_party/riegeli/records/chunk_reader.h" -#include "third_party/riegeli/records/record_reader.h" +#include "absl/functional/function_ref.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "cpp/common.h" +#include "cpp/layout.pb.h" +#include "cpp/thread_compatible_shared_ptr.h" +#include "cpp/thread_pool.h" +#include "riegeli/base/object.h" +#include "riegeli/bytes/reader.h" +#include "riegeli/chunk_encoding/chunk_decoder.h" +#include "riegeli/records/chunk_reader.h" +#include "riegeli/records/record_reader.h" namespace array_record { @@ -138,7 +138,7 @@ class ArrayRecordReaderBase : public riegeli::Object { // query `status()` for diagnosing. template >> + std::is_base_of_v>> absl::Status ParallelReadRecords(FunctionT callback) const { return ParallelReadRecords( [&](uint64_t record_idx, absl::string_view record) -> absl::Status { @@ -195,7 +195,7 @@ class ArrayRecordReaderBase : public riegeli::Object { // query `status()` for diagnosing. template >> + std::is_base_of_v>> absl::Status ParallelReadRecordsWithIndices( absl::Span indices, FunctionT callback) const { return ParallelReadRecordsWithIndices( @@ -234,7 +234,7 @@ class ArrayRecordReaderBase : public riegeli::Object { // `true` (when `ok()`, `record` is set) - success // `false` (when `ok()`) - data ends // `false` (when `!ok()`) - failure - bool ReadRecord(proto2::MessageLite* record); + bool ReadRecord(google::protobuf::MessageLite* record); // Reads thenext record `RecordIndex()` pointed to. // diff --git a/cpp/array_record_reader_test.cc b/cpp/array_record_reader_test.cc index 051ac46..26c133b 100644 --- a/cpp/array_record_reader_test.cc +++ b/cpp/array_record_reader_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/array_record_reader.h" +#include "cpp/array_record_reader.h" #include #include @@ -22,20 +22,20 @@ limitations under the License. #include #include -#include "testing/base/public/gmock.h" -#include "testing/base/public/gunit.h" -#include "third_party/absl/functional/function_ref.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/array_record/cpp/array_record_writer.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/array_record/cpp/layout.proto.h" -#include "third_party/array_record/cpp/test_utils.h" -#include "third_party/array_record/cpp/thread_pool.h" -#include "third_party/riegeli/bytes/string_reader.h" -#include "third_party/riegeli/bytes/string_writer.h" -#include "third_party/riegeli/chunk_encoding/chunk_decoder.h" -#include "third_party/riegeli/records/chunk_reader.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/functional/function_ref.h" +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "cpp/array_record_writer.h" +#include "cpp/common.h" +#include "cpp/layout.pb.h" +#include "cpp/test_utils.h" +#include "cpp/thread_pool.h" +#include "riegeli/bytes/string_reader.h" +#include "riegeli/bytes/string_writer.h" +#include "riegeli/chunk_encoding/chunk_decoder.h" +#include "riegeli/records/chunk_reader.h" constexpr uint32_t kDatasetSize = 10000; @@ -92,13 +92,16 @@ TEST_P(ArrayRecordReaderTest, MoveTest) { auto reader_before_move = ArrayRecordReader>( std::forward_as_tuple(encoded), ArrayRecordReaderBase::Options(), use_thread_pool() ? get_pool() : nullptr); - ASSERT_OK(reader_before_move.status()); + ASSERT_TRUE(reader_before_move.status().ok()); - ASSERT_OK(reader_before_move.ParallelReadRecords( - [&](uint64_t record_index, absl::string_view record) -> absl::Status { - EXPECT_EQ(record, test_str[record_index]); - return absl::OkStatus(); - })); + ASSERT_TRUE( + reader_before_move + .ParallelReadRecords([&](uint64_t record_index, + absl::string_view record) -> absl::Status { + EXPECT_EQ(record, test_str[record_index]); + return absl::OkStatus(); + }) + .ok()); ArrayRecordReader> reader = std::move(reader_before_move); @@ -106,12 +109,15 @@ TEST_P(ArrayRecordReaderTest, MoveTest) { ASSERT_FALSE(reader_before_move.is_open()); // NOLINT std::vector indices = {1, 2, 4}; - ASSERT_OK(reader.ParallelReadRecordsWithIndices( - indices, - [&](uint64_t indices_idx, absl::string_view record) -> absl::Status { - EXPECT_EQ(record, test_str[indices[indices_idx]]); - return absl::OkStatus(); - })); + ASSERT_TRUE(reader + .ParallelReadRecordsWithIndices( + indices, + [&](uint64_t indices_idx, + absl::string_view record) -> absl::Status { + EXPECT_EQ(record, test_str[indices[indices_idx]]); + return absl::OkStatus(); + }) + .ok()); absl::string_view record_view; for (auto i : IndicesOf(test_str)) { @@ -157,23 +163,28 @@ TEST_P(ArrayRecordReaderTest, RandomDatasetTest) { std::forward_as_tuple(encoded), ArrayRecordReaderBase::Options().set_readahead_buffer_size(2048), use_thread_pool() ? get_pool() : nullptr); - ASSERT_OK(reader.status()); + ASSERT_TRUE(reader.status().ok()); EXPECT_EQ(reader.NumRecords(), kDatasetSize); - ASSERT_OK(reader.ParallelReadRecords( - [&](uint64_t record_index, - absl::string_view result_view) -> absl::Status { - EXPECT_EQ(result_view, records[record_index]); - return absl::OkStatus(); - })); + ASSERT_TRUE(reader + .ParallelReadRecords( + [&](uint64_t record_index, + absl::string_view result_view) -> absl::Status { + EXPECT_EQ(result_view, records[record_index]); + return absl::OkStatus(); + }) + .ok()); std::vector indices = {0, 3, 5, 7, 101, 2000}; - ASSERT_OK(reader.ParallelReadRecordsWithIndices( - indices, - [&](uint64_t indices_idx, absl::string_view result_view) -> absl::Status { - EXPECT_EQ(result_view, records[indices[indices_idx]]); - return absl::OkStatus(); - })); + ASSERT_TRUE(reader + .ParallelReadRecordsWithIndices( + indices, + [&](uint64_t indices_idx, + absl::string_view result_view) -> absl::Status { + EXPECT_EQ(result_view, records[indices[indices_idx]]); + return absl::OkStatus(); + }) + .ok()); // Test sequential read absl::string_view result_view; diff --git a/cpp/array_record_writer.cc b/cpp/array_record_writer.cc index 5cf1ea5..6b77edc 100644 --- a/cpp/array_record_writer.cc +++ b/cpp/array_record_writer.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/array_record_writer.h" +#include "cpp/array_record_writer.h" #include #include @@ -28,26 +28,26 @@ limitations under the License. #include #include "google/protobuf/message_lite.h" -#include "third_party/absl/base/thread_annotations.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/status/statusor.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/absl/synchronization/mutex.h" -#include "third_party/absl/types/span.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/array_record/cpp/layout.proto.h" -#include "third_party/array_record/cpp/sequenced_chunk_writer.h" -#include "third_party/array_record/cpp/thread_pool.h" -#include "third_party/riegeli/base/object.h" -#include "third_party/riegeli/base/options_parser.h" -#include "third_party/riegeli/bytes/chain_writer.h" -#include "third_party/riegeli/chunk_encoding/chunk.h" -#include "third_party/riegeli/chunk_encoding/chunk_encoder.h" -#include "third_party/riegeli/chunk_encoding/compressor_options.h" -#include "third_party/riegeli/chunk_encoding/constants.h" -#include "third_party/riegeli/chunk_encoding/deferred_encoder.h" -#include "third_party/riegeli/chunk_encoding/simple_encoder.h" -#include "third_party/riegeli/chunk_encoding/transpose_encoder.h" +#include "absl/base/thread_annotations.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" +#include "cpp/common.h" +#include "cpp/layout.pb.h" +#include "cpp/sequenced_chunk_writer.h" +#include "cpp/thread_pool.h" +#include "riegeli/base/object.h" +#include "riegeli/base/options_parser.h" +#include "riegeli/bytes/chain_writer.h" +#include "riegeli/chunk_encoding/chunk.h" +#include "riegeli/chunk_encoding/chunk_encoder.h" +#include "riegeli/chunk_encoding/compressor_options.h" +#include "riegeli/chunk_encoding/constants.h" +#include "riegeli/chunk_encoding/deferred_encoder.h" +#include "riegeli/chunk_encoding/simple_encoder.h" +#include "riegeli/chunk_encoding/transpose_encoder.h" namespace array_record { @@ -203,7 +203,7 @@ class ArrayRecordWriterBase::SubmitChunkCallback const CompressorOptions compression_options_; absl::Mutex mu_; - const uint32_t max_parallelism_; + const int32_t max_parallelism_; int32_t num_concurrent_chunk_writers_ ABSL_GUARDED_BY(mu_) = 0; friend class absl::Condition; @@ -261,7 +261,7 @@ ArrayRecordWriterBase& ArrayRecordWriterBase::operator=( void ArrayRecordWriterBase::Initialize() { uint32_t max_parallelism = 1; if (pool_) { - max_parallelism = pool_->num_threads(); + max_parallelism = pool_->NumThreads(); if (options_.max_parallelism().has_value()) { max_parallelism = std::min(max_parallelism, options_.max_parallelism().value()); @@ -333,7 +333,7 @@ std::unique_ptr ArrayRecordWriterBase::CreateEncoder() { return encoder; } -bool ArrayRecordWriterBase::WriteRecord(const proto2::MessageLite& record) { +bool ArrayRecordWriterBase::WriteRecord(const google::protobuf::MessageLite& record) { return WriteRecordImpl(record); } @@ -358,23 +358,25 @@ bool ArrayRecordWriterBase::WriteRecordImpl(Record&& record) { if (chunk_encoder_->num_records() >= options_.group_size()) { auto writer = get_writer(); auto encoder = std::move(chunk_encoder_); - std::promise> chunk_promise; - if (!writer->CommitFutureChunk(chunk_promise.get_future())) { + auto chunk_promise = + std::make_shared>>(); + if (!writer->CommitFutureChunk(chunk_promise->get_future())) { Fail(writer->status()); return false; } chunk_encoder_ = CreateEncoder(); if (pool_) { + std::shared_ptr shared_encoder = + std::move(encoder); submit_chunk_callback_->TrackConcurrentChunkWriters(); - pool_->Schedule([writer, encoder = std::move(encoder), - chunk_promise = std::move(chunk_promise)]() mutable { + pool_->Schedule([writer, shared_encoder, chunk_promise]() mutable { AR_ENDO_TASK("Encode riegeli chunk"); - chunk_promise.set_value(EncodeChunk(encoder.get())); + chunk_promise->set_value(EncodeChunk(shared_encoder.get())); writer->SubmitFutureChunks(false); }); return true; } - chunk_promise.set_value(EncodeChunk(encoder.get())); + chunk_promise->set_value(EncodeChunk(encoder.get())); if (!writer->SubmitFutureChunks(true)) { Fail(writer->status()); return false; diff --git a/cpp/array_record_writer.h b/cpp/array_record_writer.h index ff08c7a..d40bca5 100644 --- a/cpp/array_record_writer.h +++ b/cpp/array_record_writer.h @@ -65,17 +65,16 @@ limitations under the License. #include #include "google/protobuf/message_lite.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/array_record/cpp/sequenced_chunk_writer.h" -#include "third_party/array_record/cpp/thread_pool.h" -#include "third_party/riegeli/base/base.h" -#include "third_party/riegeli/base/object.h" -#include "third_party/riegeli/bytes/file_writer.h" -#include "third_party/riegeli/chunk_encoding/chunk.h" -#include "third_party/riegeli/chunk_encoding/chunk_encoder.h" -#include "third_party/riegeli/chunk_encoding/compressor_options.h" -#include "third_party/riegeli/records/records_metadata.proto.h" +#include "absl/strings/string_view.h" +#include "cpp/common.h" +#include "cpp/sequenced_chunk_writer.h" +#include "cpp/thread_pool.h" +#include "riegeli/base/base.h" +#include "riegeli/base/object.h" +#include "riegeli/chunk_encoding/chunk.h" +#include "riegeli/chunk_encoding/chunk_encoder.h" +#include "riegeli/chunk_encoding/compressor_options.h" +#include "riegeli/records/records_metadata.pb.h" namespace array_record { @@ -280,7 +279,7 @@ class ArrayRecordWriterBase : public riegeli::Object { }; // Write records of various types. - bool WriteRecord(const proto2::MessageLite& record); + bool WriteRecord(const google::protobuf::MessageLite& record); bool WriteRecord(absl::string_view record); bool WriteRecord(const void* data, size_t num_bytes); template @@ -315,7 +314,7 @@ class ArrayRecordWriterBase : public riegeli::Object { class SubmitChunkCallback; Options options_; - ThreadPool* pool_; + ARThreadPool* pool_; std::unique_ptr chunk_encoder_; std::unique_ptr submit_chunk_callback_; }; @@ -361,7 +360,7 @@ class ArrayRecordWriter : public ArrayRecordWriterBase { // Ctor by taking the ownership of the other riegeli writer. explicit ArrayRecordWriter(Dest&& dest, Options options = Options(), - ThreadPool* pool = nullptr) + ARThreadPool* pool = nullptr) : ArrayRecordWriterBase(std::move(options), pool), dest_(std::make_shared>(std::move(dest))) { Initialize(); @@ -371,7 +370,7 @@ class ArrayRecordWriter : public ArrayRecordWriterBase { template explicit ArrayRecordWriter(std::tuple dest_args, Options options = Options(), - ThreadPool* pool = nullptr) + ARThreadPool* pool = nullptr) : ArrayRecordWriterBase(std::move(options), pool), dest_(std::make_shared>( std::move(dest_args))) { diff --git a/cpp/array_record_writer_test.cc b/cpp/array_record_writer_test.cc index 507f822..8582873 100644 --- a/cpp/array_record_writer_test.cc +++ b/cpp/array_record_writer_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/array_record_writer.h" +#include "cpp/array_record_writer.h" #include #include @@ -22,19 +22,19 @@ limitations under the License. #include #include -#include "testing/base/public/gmock.h" -#include "testing/base/public/gunit.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/array_record/cpp/layout.proto.h" -#include "third_party/array_record/cpp/test_utils.h" -#include "third_party/array_record/cpp/thread_pool.h" -#include "third_party/riegeli/bytes/string_reader.h" -#include "third_party/riegeli/bytes/string_writer.h" -#include "third_party/riegeli/chunk_encoding/constants.h" -#include "third_party/riegeli/records/record_reader.h" -#include "third_party/riegeli/records/record_writer.h" -#include "third_party/riegeli/records/records_metadata.proto.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "cpp/common.h" +#include "cpp/layout.pb.h" +#include "cpp/test_utils.h" +#include "cpp/thread_pool.h" +#include "riegeli/bytes/string_reader.h" +#include "riegeli/bytes/string_writer.h" +#include "riegeli/chunk_encoding/constants.h" +#include "riegeli/records/record_reader.h" +#include "riegeli/records/record_writer.h" +#include "riegeli/records/records_metadata.pb.h" namespace array_record { @@ -101,7 +101,7 @@ TEST_P(ArrayRecordWriterTest, MoveTest) { // Once moved we can no longer write records. EXPECT_FALSE(writer.WriteRecord(test_str[3])); - ASSERT_OK(moved_writer.status()); + ASSERT_TRUE(moved_writer.status().ok()); EXPECT_TRUE(moved_writer.WriteRecord(test_str[3])); EXPECT_TRUE(moved_writer.WriteRecord(test_str[4])); ASSERT_TRUE(moved_writer.Close()); @@ -175,7 +175,7 @@ TEST_P(ArrayRecordWriterTest, RandomDatasetTest) { // Verify we can access the file randomly by chunk_offset recorded in the // footer - for (int i = 0; i < num_chunks; ++i) { + for (auto i = 0UL; i < num_chunks; ++i) { ASSERT_TRUE(reader.Seek(footers[i].chunk_offset())); absl::string_view result_view; ASSERT_TRUE(reader.ReadRecord(result_view)) << reader.status(); diff --git a/cpp/common.h b/cpp/common.h index 9fb74f4..0bffea9 100644 --- a/cpp/common.h +++ b/cpp/common.h @@ -16,9 +16,9 @@ limitations under the License. #ifndef ARRAY_RECORD_CPP_COMMON_H_ #define ARRAY_RECORD_CPP_COMMON_H_ -#include "third_party/absl/base/attributes.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/strings/str_format.h" +#include "absl/base/attributes.h" +#include "absl/status/status.h" +#include "absl/strings/str_format.h" namespace array_record { diff --git a/cpp/layout.proto b/cpp/layout.proto index 0641045..c088e98 100644 --- a/cpp/layout.proto +++ b/cpp/layout.proto @@ -20,8 +20,6 @@ syntax = "proto2"; package array_record; -option cc_api_version = 2; - // Riegeli files are composed in data chunks. Each data chunk contains multiple // records, and a record can be a serialized proto (with a size limit of 2GB) or // arbitrary bytes without size limits. diff --git a/cpp/masked_reader.cc b/cpp/masked_reader.cc index 5b2b44c..cac73ec 100644 --- a/cpp/masked_reader.cc +++ b/cpp/masked_reader.cc @@ -13,19 +13,19 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/masked_reader.h" +#include "cpp/masked_reader.h" #include #include #include -#include "third_party/absl/memory/memory.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/time/clock.h" -#include "third_party/absl/time/time.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/riegeli/base/base.h" -#include "third_party/riegeli/base/status.h" +#include "absl/memory/memory.h" +#include "absl/status/status.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "cpp/common.h" +#include "riegeli/base/base.h" +#include "riegeli/base/status.h" namespace array_record { diff --git a/cpp/masked_reader.h b/cpp/masked_reader.h index 19ad94e..e60661f 100644 --- a/cpp/masked_reader.h +++ b/cpp/masked_reader.h @@ -19,9 +19,9 @@ limitations under the License. #include #include -#include "third_party/absl/types/optional.h" -#include "third_party/riegeli/base/base.h" -#include "third_party/riegeli/bytes/reader.h" +#include "absl/types/optional.h" +#include "riegeli/base/base.h" +#include "riegeli/bytes/reader.h" namespace array_record { diff --git a/cpp/masked_reader_test.cc b/cpp/masked_reader_test.cc index f9366f0..298ff89 100644 --- a/cpp/masked_reader_test.cc +++ b/cpp/masked_reader_test.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/masked_reader.h" +#include "cpp/masked_reader.h" #include #include -#include "testing/base/public/gmock.h" -#include "testing/base/public/gunit.h" -#include "third_party/riegeli/bytes/string_reader.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "riegeli/bytes/string_reader.h" namespace array_record { namespace { diff --git a/cpp/parallel_for.h b/cpp/parallel_for.h index 6a545c1..1c9390f 100644 --- a/cpp/parallel_for.h +++ b/cpp/parallel_for.h @@ -25,12 +25,12 @@ limitations under the License. #include #include -#include "third_party/absl/base/thread_annotations.h" -#include "third_party/absl/functional/function_ref.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/synchronization/mutex.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/array_record/cpp/thread_pool.h" +#include "absl/base/thread_annotations.h" +#include "absl/functional/function_ref.h" +#include "absl/status/status.h" +#include "absl/synchronization/mutex.h" +#include "cpp/common.h" +#include "cpp/thread_pool.h" namespace array_record { diff --git a/cpp/parallel_for_test.cc b/cpp/parallel_for_test.cc index 68a0458..a878828 100644 --- a/cpp/parallel_for_test.cc +++ b/cpp/parallel_for_test.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ // Tests for parallel_for.h. -#include "third_party/array_record/cpp/parallel_for.h" +#include "cpp/parallel_for.h" #include #include @@ -23,11 +23,11 @@ limitations under the License. #include #include -#include "testing/base/public/gmock.h" -#include "testing/base/public/gunit.h" -#include "third_party/absl/functional/function_ref.h" -#include "third_party/absl/status/status.h" -#include "third_party/array_record/cpp/thread_pool.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/functional/function_ref.h" +#include "absl/status/status.h" +#include "cpp/thread_pool.h" namespace array_record { diff --git a/cpp/sequenced_chunk_writer.cc b/cpp/sequenced_chunk_writer.cc index 8754868..6095ad7 100644 --- a/cpp/sequenced_chunk_writer.cc +++ b/cpp/sequenced_chunk_writer.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/sequenced_chunk_writer.h" +#include "cpp/sequenced_chunk_writer.h" #include #include // NOLINT(build/c++11) @@ -21,16 +21,16 @@ limitations under the License. #include #include -#include "third_party/absl/base/thread_annotations.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/status/statusor.h" -#include "third_party/absl/strings/str_format.h" -#include "third_party/absl/strings/str_join.h" -#include "third_party/absl/synchronization/mutex.h" -#include "third_party/riegeli/base/base.h" -#include "third_party/riegeli/base/status.h" -#include "third_party/riegeli/chunk_encoding/chunk.h" -#include "third_party/riegeli/chunk_encoding/constants.h" +#include "absl/base/thread_annotations.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/synchronization/mutex.h" +#include "riegeli/base/base.h" +#include "riegeli/base/status.h" +#include "riegeli/chunk_encoding/chunk.h" +#include "riegeli/chunk_encoding/constants.h" namespace array_record { diff --git a/cpp/sequenced_chunk_writer.h b/cpp/sequenced_chunk_writer.h index 5190cfa..38f96ba 100644 --- a/cpp/sequenced_chunk_writer.h +++ b/cpp/sequenced_chunk_writer.h @@ -31,16 +31,16 @@ limitations under the License. #include #include -#include "third_party/absl/base/thread_annotations.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/status/statusor.h" -#include "third_party/absl/synchronization/mutex.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/riegeli/base/base.h" -#include "third_party/riegeli/base/object.h" -#include "third_party/riegeli/bytes/writer.h" -#include "third_party/riegeli/chunk_encoding/chunk.h" -#include "third_party/riegeli/records/chunk_writer.h" +#include "absl/base/thread_annotations.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/synchronization/mutex.h" +#include "cpp/common.h" +#include "riegeli/base/base.h" +#include "riegeli/base/object.h" +#include "riegeli/bytes/writer.h" +#include "riegeli/chunk_encoding/chunk.h" +#include "riegeli/records/chunk_writer.h" namespace array_record { diff --git a/cpp/sequenced_chunk_writer_test.cc b/cpp/sequenced_chunk_writer_test.cc index 9d275e7..8f8ace3 100644 --- a/cpp/sequenced_chunk_writer_test.cc +++ b/cpp/sequenced_chunk_writer_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/sequenced_chunk_writer.h" +#include "cpp/sequenced_chunk_writer.h" #include // NOLINT(build/c++11) #include @@ -21,22 +21,21 @@ limitations under the License. #include #include -#include "testing/base/public/gmock.h" -#include "testing/base/public/gunit.h" -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/status/status.h" -#include "third_party/absl/strings/cord.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/array_record/cpp/thread_pool.h" -#include "third_party/riegeli/bytes/chain_writer.h" -#include "third_party/riegeli/bytes/cord_writer.h" -#include "third_party/riegeli/bytes/file_writer.h" -#include "third_party/riegeli/bytes/string_reader.h" -#include "third_party/riegeli/bytes/string_writer.h" -#include "third_party/riegeli/chunk_encoding/chunk.h" -#include "third_party/riegeli/chunk_encoding/compressor_options.h" -#include "third_party/riegeli/chunk_encoding/simple_encoder.h" -#include "third_party/riegeli/records/record_reader.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/status/status.h" +#include "absl/strings/cord.h" +#include "cpp/common.h" +#include "cpp/thread_pool.h" +#include "riegeli/bytes/chain_writer.h" +#include "riegeli/bytes/cord_writer.h" +#include "riegeli/bytes/string_reader.h" +#include "riegeli/bytes/string_writer.h" +#include "riegeli/chunk_encoding/chunk.h" +#include "riegeli/chunk_encoding/compressor_options.h" +#include "riegeli/chunk_encoding/simple_encoder.h" +#include "riegeli/records/record_reader.h" namespace array_record { namespace { @@ -70,14 +69,6 @@ TEST(SequencedChunkWriterTest, RvalCtorTest) { std::make_unique>>( std::move(cord_writer)); } - { - File* tmp_file = - file::CreateTempFile(absl::GetFlag(FLAGS_test_tmpdir)).ValueOrDie(); - auto file_writer = riegeli::FileWriter(tmp_file); - auto to_file = - std::make_unique>>( - std::move(file_writer)); - } } TEST(SequencedChunkWriterTest, DestArgsCtorTest) { @@ -124,7 +115,6 @@ class TestCommitChunkCallback }; TEST(SequencedChunkWriterTest, SanityTestCodeSnippet) { - auto pool = ArrayRecordGlobalPool(); std::string encoded; auto callback = TestCommitChunkCallback(); @@ -152,10 +142,8 @@ TEST(SequencedChunkWriterTest, SanityTestCodeSnippet) { return chunk; }); ASSERT_TRUE(writer->CommitFutureChunk(encoding_task.get_future())); - pool->Schedule([=, encoding_task = std::move(encoding_task)]() mutable { - encoding_task(); - writer->SubmitFutureChunks(false); - }); + encoding_task(); + writer->SubmitFutureChunks(false); } // Calling SubmitFutureChunks(true) blocks the current thread until all // encoding tasks complete. diff --git a/cpp/test_utils.cc b/cpp/test_utils.cc index ffb0f5d..ec76e3f 100644 --- a/cpp/test_utils.cc +++ b/cpp/test_utils.cc @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/test_utils.h" +#include "cpp/test_utils.h" #include #include -#include "third_party/array_record/cpp/common.h" +#include "cpp/common.h" namespace array_record { diff --git a/cpp/test_utils_test.cc b/cpp/test_utils_test.cc index a24e528..47cfab6 100644 --- a/cpp/test_utils_test.cc +++ b/cpp/test_utils_test.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/test_utils.h" +#include "cpp/test_utils.h" #include -#include "testing/base/public/gmock.h" -#include "testing/base/public/gunit.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/array_record/cpp/common.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "cpp/common.h" namespace array_record { namespace { diff --git a/cpp/thread_compatible_shared_ptr.h b/cpp/thread_compatible_shared_ptr.h index 38123ad..1886024 100644 --- a/cpp/thread_compatible_shared_ptr.h +++ b/cpp/thread_compatible_shared_ptr.h @@ -47,7 +47,7 @@ limitations under the License. #include #include -#include "third_party/absl/synchronization/mutex.h" +#include "absl/synchronization/mutex.h" namespace array_record { diff --git a/cpp/thread_compatible_shared_ptr_test.cc b/cpp/thread_compatible_shared_ptr_test.cc index ab09505..b492431 100644 --- a/cpp/thread_compatible_shared_ptr_test.cc +++ b/cpp/thread_compatible_shared_ptr_test.cc @@ -13,18 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/thread_compatible_shared_ptr.h" +#include "cpp/thread_compatible_shared_ptr.h" #include #include #include -#include "testing/base/public/gmock.h" -#include "testing/base/public/gunit.h" -#include "third_party/absl/time/clock.h" -#include "third_party/absl/time/time.h" -#include "third_party/array_record/cpp/common.h" -#include "third_party/array_record/cpp/thread_pool.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "cpp/common.h" +#include "cpp/thread_pool.h" namespace array_record { namespace { diff --git a/cpp/thread_pool.cc b/cpp/thread_pool.cc index 33b6ec5..7bc66f0 100644 --- a/cpp/thread_pool.cc +++ b/cpp/thread_pool.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "third_party/array_record/cpp/thread_pool.h" +#include "cpp/thread_pool.h" -#include "third_party/absl/flags/flag.h" +#include "absl/flags/flag.h" ABSL_FLAG(uint32_t, array_record_global_pool_size, 64, "Number of threads for ArrayRecordGlobalPool"); diff --git a/python/BUILD b/python/BUILD index bbb2583..0b54355 100644 --- a/python/BUILD +++ b/python/BUILD @@ -10,19 +10,21 @@ pybind_extension( name = "array_record_module", srcs = ["array_record_module.cc"], deps = [ - "//third_party/absl/status", - "//third_party/absl/strings", - "//third_party/array_record/cpp:array_record_reader", - "//third_party/array_record/cpp:array_record_writer", - "//third_party/array_record/cpp:thread_pool", - "//third_party/riegeli/bytes:fd_reader", - "//third_party/riegeli/bytes:fd_writer", + "//cpp:array_record_reader", + "//cpp:array_record_writer", + "//cpp:thread_pool", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@com_google_riegeli//riegeli/bytes:fd_reader", + "@com_google_riegeli//riegeli/bytes:fd_writer", ], ) -py3_test( +py_test( name = "array_record_module_test", srcs = ["array_record_module_test.py"], + python_version = "PY3", + srcs_version = "PY3", deps = [ ":array_record_module", "//third_party/py/absl/testing:absltest", diff --git a/python/array_record_module.cc b/python/array_record_module.cc index 923c303..430cecf 100644 --- a/python/array_record_module.cc +++ b/python/array_record_module.cc @@ -17,18 +17,17 @@ limitations under the License. #include #include -#include "third_party/absl/status/status.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/array_record/cpp/array_record_reader.h" -#include "third_party/array_record/cpp/array_record_writer.h" -#include "third_party/array_record/cpp/thread_pool.h" -#include "third_party/pybind11/include/pybind11/pytypes.h" -#include "third_party/riegeli/bytes/fd_reader.h" -#include "third_party/riegeli/bytes/fd_writer.h" - +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "cpp/array_record_reader.h" +#include "cpp/array_record_writer.h" +#include "cpp/thread_pool.h" #include "pybind11/gil.h" #include "pybind11/pybind11.h" +#include "pybind11/pytypes.h" #include "pybind11/stl.h" +#include "riegeli/bytes/fd_reader.h" +#include "riegeli/bytes/fd_writer.h" namespace py = pybind11; diff --git a/python/array_record_module_test.py b/python/array_record_module_test.py index 2383ac3..c242aa2 100644 --- a/python/array_record_module_test.py +++ b/python/array_record_module_test.py @@ -18,8 +18,8 @@ from absl.testing import absltest -from array_record.python.array_record_module import ArrayRecordReader -from array_record.python.array_record_module import ArrayRecordWriter +from python.array_record_module import ArrayRecordReader +from python.array_record_module import ArrayRecordWriter class ArrayRecordModuleTest(absltest.TestCase):