From 5d2e93e4709ac84787150ae7a86e5cb513fb8f90 Mon Sep 17 00:00:00 2001 From: Vineeth Yeevani Date: Sun, 29 Sep 2024 14:00:40 -0700 Subject: [PATCH] - support for macos - patch protobuf to prevent racy initialization in python - removing eigen thread pool dependency to eliminate faults with copying on macos. seemingly related to libc++ vs libstdc++ - adding some vscode debugging - added multi-platform support - adding support for building the wheel directly from bazel to avoid more scripts - adding git workflow for macos and linux --- .bazelrc | 15 +- .github/workflows/build.yml | 74 ++++++ .gitignore | 8 + .vscode/launch.json | 22 ++ __init__.py => .vscode/settings.json | 0 BUILD | 28 ++- MODULE.bazel | 37 +++ MODULE.bazel.lock | 229 ++++++++++++++++++ WORKSPACE | 152 ------------ array_record/BUILD | 6 + {python => array_record}/__init__.py | 0 array_record/beam/BUILD | 16 ++ {beam => array_record/beam}/README.md | 0 {beam => array_record/beam}/__init__.py | 0 {beam => array_record/beam}/arrayrecordio.py | 0 {beam => array_record/beam}/demo.py | 0 {beam => array_record/beam}/dofns.py | 0 {beam => array_record/beam}/example.py | 0 .../beam}/examples/example_full_demo_cli.sh | 0 .../beam}/examples/example_gcs_conversion.py | 0 .../beam}/examples/example_sink_conversion.py | 0 .../beam}/examples/requirements.txt | 0 {beam => array_record/beam}/options.py | 0 {beam => array_record/beam}/pipelines.py | 0 {beam => array_record/beam}/testdata.py | 0 {python => array_record/python}/BUILD | 26 +- array_record/python/__init__.py | 0 .../python}/array_record_data_source.py | 2 +- .../python}/array_record_data_source_test.py | 4 +- .../python}/array_record_module.cc | 0 .../python}/array_record_module_test.py | 1 - .../python}/testdata/BUILD | 0 .../digits.array_record-00000-of-00002 | Bin .../digits.array_record-00001-of-00002 | Bin setup.py => array_record/setup.py | 1 - cpp/BUILD | 10 +- cpp/array_record_reader.cc | 34 +-- cpp/array_record_reader.h | 2 +- cpp/array_record_writer.cc | 2 +- cpp/shareable_dependency.h | 10 +- cpp/shareable_dependency_test.cc | 14 +- cpp/thread_pool.cc | 55 ++++- cpp/thread_pool.h | 41 +++- oss/README.md | 15 -- oss/build.Dockerfile | 33 --- oss/build_whl.sh | 89 ------- oss/runner_common.sh | 44 ---- protobuf.diff | 17 ++ requirements.in | 1 + requirements_lock.txt | 26 ++ 50 files changed, 616 insertions(+), 398 deletions(-) create mode 100644 .github/workflows/build.yml create mode 100644 .gitignore create mode 100644 .vscode/launch.json rename __init__.py => .vscode/settings.json (100%) create mode 100644 MODULE.bazel create mode 100644 MODULE.bazel.lock delete mode 100644 WORKSPACE create mode 100644 array_record/BUILD rename {python => array_record}/__init__.py (100%) create mode 100644 array_record/beam/BUILD rename {beam => array_record/beam}/README.md (100%) rename {beam => array_record/beam}/__init__.py (100%) rename {beam => array_record/beam}/arrayrecordio.py (100%) rename {beam => array_record/beam}/demo.py (100%) rename {beam => array_record/beam}/dofns.py (100%) rename {beam => array_record/beam}/example.py (100%) rename {beam => array_record/beam}/examples/example_full_demo_cli.sh (100%) rename {beam => array_record/beam}/examples/example_gcs_conversion.py (100%) rename {beam => array_record/beam}/examples/example_sink_conversion.py (100%) rename {beam => array_record/beam}/examples/requirements.txt (100%) rename {beam => array_record/beam}/options.py (100%) rename {beam => array_record/beam}/pipelines.py (100%) rename {beam => array_record/beam}/testdata.py (100%) rename {python => array_record/python}/BUILD (64%) create mode 100644 array_record/python/__init__.py rename {python => array_record/python}/array_record_data_source.py (99%) rename {python => array_record/python}/array_record_data_source_test.py (99%) rename {python => array_record/python}/array_record_module.cc (100%) rename {python => array_record/python}/array_record_module_test.py (99%) rename {python => array_record/python}/testdata/BUILD (100%) rename {python => array_record/python}/testdata/digits.array_record-00000-of-00002 (100%) rename {python => array_record/python}/testdata/digits.array_record-00001-of-00002 (100%) rename setup.py => array_record/setup.py (95%) delete mode 100644 oss/README.md delete mode 100644 oss/build.Dockerfile delete mode 100755 oss/build_whl.sh delete mode 100644 oss/runner_common.sh create mode 100644 protobuf.diff create mode 100644 requirements.in create mode 100644 requirements_lock.txt diff --git a/.bazelrc b/.bazelrc index d60866a..b8e0f8c 100644 --- a/.bazelrc +++ b/.bazelrc @@ -1,10 +1,11 @@ -build -c opt +common --enable_bzlmod +build -c dbg build --cxxopt=-std=c++17 build --host_cxxopt=-std=c++17 build --experimental_repo_remote_exec - -# TODO(fchern): Use non-hardcode path. -build --action_env=PYTHON_BIN_PATH="/usr/bin/python3" -build --action_env=PYTHON_LIB_PATH="/usr/lib/python3" -build --repo_env=PYTHON_BIN_PATH="/usr/bin/python3" -build --python_path="/usr/bin/python3" +build --copt=-pthread +build --features=debug_prefix_map_pwd_is_dot +build --apple_generate_dsym +build --strip=never +build --features=oso_prefix_is_pwd +build --spawn_strategy=local \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..6e98f52 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,74 @@ +name: Build and Save Python Wheel + +on: + push: + branches: + - '**' # Build on all branches + tags: + - 'v*' # Trigger the release workflow only when a tag is pushed (e.g., v1.0.0) + +jobs: + build: + strategy: + matrix: + os: [macos-latest, ubuntu-latest] # Specify both macOS and Linux environments + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 # Ensures that the full history is cloned to handle any branch-specific build needs + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install setuptools wheel + + # Run the build script and capture the output directory + - name: Run build script + id: build + run: | + bazel build ... + + - name: Run Bazel Tests + run: | + bazel test ... + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + name: built-wheels-${{ matrix.os }} + path: bazel-bin/*.whl + + release: + runs-on: ubuntu-latest + needs: build + if: startsWith(github.ref, 'refs/tags/v') # Only run this job if a tag starting with "v" is pushed + steps: + - name: Download Artifacts from macOS + uses: actions/download-artifact@v3 + with: + name: built-wheels-macos-latest + path: ./artifacts/macos + + - name: Download Artifacts from Linux + uses: actions/download-artifact@v3 + with: + name: built-wheels-ubuntu-latest + path: ./artifacts/linux + + - name: Create GitHub Release + uses: softprops/action-gh-release@v1 + with: + tag_name: ${{ github.ref_name }} # Use the current tag name as the release version + files: | + artifacts/macos/*.whl + artifacts/linux/*.whl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3b20fcd --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +# Bazel related files +bazel-* +*.bazel +bazel-bin/ +bazel-out/ +bazel-testlogs/ +bazel-genfiles/ +# End Generation Here diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..43f44bf --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,22 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Debug Sharedable dependency test", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/bazel-bin/cpp/shareable_dependency_test", + "args": [], + "stopAtEntry": false, + "cwd": "${fileDirname}", + "environment": [], + "externalConsole": false, + "MIMode": "lldb", + "targetArchitecture": "arm64" + } + + ] + } \ No newline at end of file diff --git a/__init__.py b/.vscode/settings.json similarity index 100% rename from __init__.py rename to .vscode/settings.json diff --git a/BUILD b/BUILD index f20b80d..c43020b 100644 --- a/BUILD +++ b/BUILD @@ -1,6 +1,28 @@ +load("@rules_python//python:defs.bzl", "py_binary", "py_test") +load("@rules_python//python:pip.bzl", "compile_pip_requirements") +load("@rules_python//python:packaging.bzl", "py_wheel", 'py_package') +package(default_visibility = ["//visibility:public"]) -py_library( - name = "setup", - srcs = ["setup.py"], +compile_pip_requirements( + name = "requirements", + src = "requirements.in", + requirements_txt = "requirements_lock.txt", +) + +py_wheel( + name = "array_record_wheel", + distribution = "array_record", + version = "0.6.0", + platform = select({ + "@platforms//os:macos": "macosx_14_0_arm64", + "@platforms//os:linux": "manylinux2014_x86_64", + }), + deps = [ + "//array_record/python:array_record_data_source", + "//array_record/python:array_record_module", + "//array_record/python:init", + "//array_record/beam:beam", + "//array_record:package_info", + ], ) diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000..7ff89bb --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,37 @@ +module( + name = "array_record", + version = "0.1.0", +) + +bazel_dep(name = "abseil-cpp", version = "20240116.2", repo_name = "com_google_absl") +bazel_dep(name = "abseil-py", version = "2.1.0", repo_name = "com_google_absl_py") +bazel_dep(name = "brotli", version = "1.1.0") +bazel_dep(name = "googletest", version = "1.15.2", repo_name = "com_google_googletest") +bazel_dep(name = "protobuf", version = "24.4", repo_name = "com_google_protobuf") +single_version_override(module_name="protobuf", patches=["//:protobuf.diff"], patch_strip=1) +bazel_dep(name = "riegeli", version = "0.0.0-20240606-973b6f0", repo_name = "com_google_riegeli") +bazel_dep(name = "highwayhash", version = "0.0.0-20240305-5ad3bf8") +bazel_dep(name = "rules_python", version = "0.36.0") +bazel_dep(name = "pybind11_bazel", version = "2.12.0") +bazel_dep(name = "platforms", version = "0.0.10") + +SUPPORTED_PYTHON_VERSIONS = [ + "3.10", +] +DEFAULT_PYTHON_VERSION = SUPPORTED_PYTHON_VERSIONS[-1] +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +[ + python.toolchain( + python_version = version, + is_default = version == DEFAULT_PYTHON_VERSION, + ) + for version in SUPPORTED_PYTHON_VERSIONS +] + +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") +pip.parse( + hub_name = "pypi_array_record", + python_version = "3.10", + requirements_lock = "//:requirements_lock.txt", +) +use_repo(pip, "pypi_array_record") \ No newline at end of file diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock new file mode 100644 index 0000000..aa7a48a --- /dev/null +++ b/MODULE.bazel.lock @@ -0,0 +1,229 @@ +{ + "lockFileVersion": 11, + "registryFileHashes": { + "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497", + "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2", + "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589", + "https://bcr.bazel.build/modules/abseil-cpp/20230125.1/MODULE.bazel": "89047429cb0207707b2dface14ba7f8df85273d484c2572755be4bab7ce9c3a0", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/MODULE.bazel": "1c8cec495288dccd14fdae6e3f95f772c1c91857047a098fad772034264cc8cb", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/MODULE.bazel": "d253ae36a8bd9ee3c5955384096ccb6baf16a1b1e93e858370da0a3b94f77c16", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.2/MODULE.bazel": "73939767a4686cd9a520d16af5ab440071ed75cec1a876bf2fcfaf1f71987a16", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.2/source.json": "750d5e29326fb59cbe61116a7b803c8a1d0a7090a9c8ed89888d188e3c473fc7", + "https://bcr.bazel.build/modules/abseil-py/2.1.0/MODULE.bazel": "5ebe5bf853769c65707e5c28f216798f7a4b1042015e6a36e6d03094d94bec8a", + "https://bcr.bazel.build/modules/abseil-py/2.1.0/source.json": "0e8fc4f088ce07099c1cd6594c20c7ddbb48b4b3c0849b7d94ba94be88ff042b", + "https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85", + "https://bcr.bazel.build/modules/apple_support/1.15.1/source.json": "517f2b77430084c541bc9be2db63fdcbb7102938c5f64c17ee60ffda2e5cf07b", + "https://bcr.bazel.build/modules/apple_support/1.5.0/MODULE.bazel": "50341a62efbc483e8a2a6aec30994a58749bd7b885e18dd96aa8c33031e558ef", + "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd", + "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8", + "https://bcr.bazel.build/modules/bazel_features/1.11.0/source.json": "c9320aa53cd1c441d24bd6b716da087ad7e4ff0d9742a9884587596edfe53015", + "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a", + "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8", + "https://bcr.bazel.build/modules/bazel_skylib/1.1.1/MODULE.bazel": "1add3e7d93ff2e6998f9e118022c84d163917d912f5afafb3058e3d2f1545b5e", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.0/MODULE.bazel": "44fe84260e454ed94ad326352a698422dbe372b21a1ac9f3eab76eb531223686", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a", + "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5", + "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d", + "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138", + "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917", + "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/source.json": "082ed5f9837901fada8c68c2f3ddc958bb22b6d654f71dd73f3df30d45d4b749", + "https://bcr.bazel.build/modules/boringssl/0.0.0-20240530-2db0eb3/MODULE.bazel": "d0405b762c5e87cd445b7015f2b8da5400ef9a8dbca0bfefa6c1cea79d528a97", + "https://bcr.bazel.build/modules/boringssl/0.0.0-20240530-2db0eb3/source.json": "0d413869349e82e5d679802abe9ce23e0326bbf56daa97ae9e7dbdcec72982fc", + "https://bcr.bazel.build/modules/brotli/1.1.0/MODULE.bazel": "3b5b90488995183419c4b5c9b063a164f6c0bc4d0d6b40550a612a5e860cc0fe", + "https://bcr.bazel.build/modules/brotli/1.1.0/source.json": "098a4fd315527166e8dfe1fd1537c96a737a83764be38fc43f4da231d600f3d0", + "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", + "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8", + "https://bcr.bazel.build/modules/bzip2/1.0.8/MODULE.bazel": "83ee443b286b0b91566e5ee77e74ba6445895f3135467893871560f9e4ebc159", + "https://bcr.bazel.build/modules/bzip2/1.0.8/source.json": "b64f3a2f973749cf5f6ee32b3d804af56a35a746228a7845ed5daa31c8cc8af1", + "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb", + "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4", + "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6", + "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f", + "https://bcr.bazel.build/modules/googletest/1.15.2/MODULE.bazel": "6de1edc1d26cafb0ea1a6ab3f4d4192d91a312fd2d360b63adaa213cd00b2108", + "https://bcr.bazel.build/modules/googletest/1.15.2/source.json": "dbdda654dcb3a0d7a8bc5d0ac5fc7e150b58c2a986025ae5bc634bb2cb61f470", + "https://bcr.bazel.build/modules/highwayhash/0.0.0-20240305-5ad3bf8/MODULE.bazel": "5c7f29d5bd70feff14b0f65b39584957e18e4a8d555e5a29a4c36019afbb44b9", + "https://bcr.bazel.build/modules/highwayhash/0.0.0-20240305-5ad3bf8/source.json": "211c0937ef5f537da6c3c135d12e60927c71b380642e207e4a02b86d29c55e85", + "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902", + "https://bcr.bazel.build/modules/lz4/1.9.4/MODULE.bazel": "e3d307b1d354d70f6c809167eafecf5d622c3f27e3971ab7273410f429c7f83a", + "https://bcr.bazel.build/modules/lz4/1.9.4/source.json": "233f0bdfc21f254e3dda14683ddc487ca68c6a3a83b7d5db904c503f85bd089b", + "https://bcr.bazel.build/modules/platforms/0.0.10/MODULE.bazel": "8cb8efaf200bdeb2150d93e162c40f388529a25852b332cec879373771e48ed5", + "https://bcr.bazel.build/modules/platforms/0.0.10/source.json": "f22828ff4cf021a6b577f1bf6341cb9dcd7965092a439f64fc1bb3b7a5ae4bd5", + "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee", + "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37", + "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615", + "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814", + "https://bcr.bazel.build/modules/platforms/0.0.8/MODULE.bazel": "9f142c03e348f6d263719f5074b21ef3adf0b139ee4c5133e2aa35664da9eb2d", + "https://bcr.bazel.build/modules/platforms/0.0.9/MODULE.bazel": "4a87a60c927b56ddd67db50c89acaa62f4ce2a1d2149ccb63ffd871d5ce29ebc", + "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7", + "https://bcr.bazel.build/modules/protobuf/23.1/MODULE.bazel": "88b393b3eb4101d18129e5db51847cd40a5517a53e81216144a8c32dfeeca52a", + "https://bcr.bazel.build/modules/protobuf/24.4/MODULE.bazel": "7bc7ce5f2abf36b3b7b7c8218d3acdebb9426aeb35c2257c96445756f970eb12", + "https://bcr.bazel.build/modules/protobuf/24.4/source.json": "ace4b8c65d4cfe64efe544f09fc5e5df77faf3a67fbb29c5341e0d755d9b15d6", + "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0", + "https://bcr.bazel.build/modules/protobuf/3.19.6/MODULE.bazel": "9233edc5e1f2ee276a60de3eaa47ac4132302ef9643238f23128fea53ea12858", + "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e", + "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/MODULE.bazel": "e6f4c20442eaa7c90d7190d8dc539d0ab422f95c65a57cc59562170c58ae3d34", + "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/source.json": "6900fdc8a9e95866b8c0d4ad4aba4d4236317b5c1cd04c502df3f0d33afed680", + "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206", + "https://bcr.bazel.build/modules/re2/2024-07-02/MODULE.bazel": "0eadc4395959969297cbcf31a249ff457f2f1d456228c67719480205aa306daa", + "https://bcr.bazel.build/modules/re2/2024-07-02/source.json": "547d0111a9d4f362db32196fef805abbf3676e8d6afbe44d395d87816c1130ca", + "https://bcr.bazel.build/modules/riegeli/0.0.0-20240606-973b6f0/MODULE.bazel": "3e8067b12d3a3bb4bc297b29c66a778af0c1da0cddbfde37d18c077ffc365602", + "https://bcr.bazel.build/modules/riegeli/0.0.0-20240606-973b6f0/source.json": "7383c71350a45496e37b4e55974fb5c9e90ec7397070a4d6fc7c9a00117f95cf", + "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647", + "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c", + "https://bcr.bazel.build/modules/rules_cc/0.0.6/MODULE.bazel": "abf360251023dfe3efcef65ab9d56beefa8394d4176dd29529750e1c57eaa33f", + "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e", + "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5", + "https://bcr.bazel.build/modules/rules_cc/0.0.9/source.json": "1f1ba6fea244b616de4a554a0f4983c91a9301640c8fe0dd1d410254115c8430", + "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6", + "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74", + "https://bcr.bazel.build/modules/rules_java/7.1.0/MODULE.bazel": "30d9135a2b6561c761bd67bd4990da591e6bdc128790ce3e7afd6a3558b2fb64", + "https://bcr.bazel.build/modules/rules_java/7.6.5/MODULE.bazel": "481164be5e02e4cab6e77a36927683263be56b7e36fef918b458d7a8a1ebadb1", + "https://bcr.bazel.build/modules/rules_java/7.6.5/source.json": "a805b889531d1690e3c72a7a7e47a870d00323186a9904b36af83aa3d053ee8d", + "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7", + "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909", + "https://bcr.bazel.build/modules/rules_jvm_external/5.1/source.json": "5abb45cc9beb27b77aec6a65a11855ef2b55d95dfdc358e9f312b78ae0ba32d5", + "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0", + "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d", + "https://bcr.bazel.build/modules/rules_license/0.0.7/source.json": "355cc5737a0f294e560d52b1b7a6492d4fff2caf0bef1a315df5a298fca2d34a", + "https://bcr.bazel.build/modules/rules_pkg/0.7.0/MODULE.bazel": "df99f03fc7934a4737122518bb87e667e62d780b610910f0447665a7e2be62dc", + "https://bcr.bazel.build/modules/rules_pkg/0.7.0/source.json": "c2557066e0c0342223ba592510ad3d812d4963b9024831f7f66fd0584dd8c66c", + "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06", + "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7", + "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/MODULE.bazel": "1e5b502e2e1a9e825eef74476a5a1ee524a92297085015a052510b09a1a09483", + "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/source.json": "8d8448e71706df7450ced227ca6b3812407ff5e2ccad74a43a9fbe79c84e34e0", + "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f", + "https://bcr.bazel.build/modules/rules_python/0.22.1/MODULE.bazel": "26114f0c0b5e93018c0c066d6673f1a2c3737c7e90af95eff30cfee38d0bbac7", + "https://bcr.bazel.build/modules/rules_python/0.25.0/MODULE.bazel": "72f1506841c920a1afec76975b35312410eea3aa7b63267436bfb1dd91d2d382", + "https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed", + "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58", + "https://bcr.bazel.build/modules/rules_python/0.33.2/MODULE.bazel": "3e036c4ad8d804a4dad897d333d8dce200d943df4827cb849840055be8d2e937", + "https://bcr.bazel.build/modules/rules_python/0.36.0/MODULE.bazel": "a4ce1ccea92b9106c7d16ab9ee51c6183107e78ba4a37aa65055227b80cd480c", + "https://bcr.bazel.build/modules/rules_python/0.36.0/source.json": "b79cbb7b2ae1751949e2f6ee6692822e4ffd13ca1e959ce99abec4ac7666162a", + "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c", + "https://bcr.bazel.build/modules/snappy/1.2.0/MODULE.bazel": "cc7a727b46089c7fdae0ede21b1fd65bdb14d01823da118ef5c48044f40b6b27", + "https://bcr.bazel.build/modules/snappy/1.2.0/source.json": "17f5527e15d30a9d9eebf79ed73b280b56cac44f8c8fea696666d99943f84c33", + "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", + "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", + "https://bcr.bazel.build/modules/stardoc/0.5.3/source.json": "cd53fe968dc8cd98197c052db3db6d82562960c87b61e7a90ee96f8e4e0dda97", + "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43", + "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/MODULE.bazel": "c0df5e35ad55e264160417fd0875932ee3c9dda63d9fccace35ac62f45e1b6f9", + "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/source.json": "b2150404947339e8b947c6b16baa39fa75657f4ddec5e37272c7b11c7ab533bc", + "https://bcr.bazel.build/modules/xz/5.4.5.bcr.1/MODULE.bazel": "c037f75fa1b7e1ff15fbd15d807a8ce545e9b02f02df0a9777aa9aa7d8b268bb", + "https://bcr.bazel.build/modules/xz/5.4.5.bcr.1/source.json": "766f28499a16fa9ed8dc94382d50e80ceda0d0ab80b79b7b104a67074ab10e1f", + "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0", + "https://bcr.bazel.build/modules/zlib/1.2.12/MODULE.bazel": "3b1a8834ada2a883674be8cbd36ede1b6ec481477ada359cd2d3ddc562340b27", + "https://bcr.bazel.build/modules/zlib/1.2.13/MODULE.bazel": "aa6deb1b83c18ffecd940c4119aff9567cd0a671d7bba756741cb2ef043a29d5", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.3/MODULE.bazel": "af322bc08976524477c79d1e45e241b6efbeb918c497e8840b8ab116802dda79", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.3/source.json": "2be409ac3c7601245958cd4fcdff4288be79ed23bd690b4b951f500d54ee6e7d", + "https://bcr.bazel.build/modules/zstd/1.5.6/MODULE.bazel": "471ebe7d3cdd8c6469390fcf623eb4779ff55fbee0a87f1dc57a1def468b96d4", + "https://bcr.bazel.build/modules/zstd/1.5.6/source.json": "02010c3333fc89b44fe861db049968decb6e688411f7f9d4f6791d74f9adfb51" + }, + "selectedYankedVersions": {}, + "moduleExtensions": { + "@@apple_support~//crosstool:setup.bzl%apple_cc_configure_extension": { + "general": { + "bzlTransitiveDigest": "ltCGFbl/LQQZXn/LEMXfKX7pGwyqNiOCHcmiQW0tmjM=", + "usagesDigest": "RkqDb8JtSSm4rLheCLMw/Dx3QQE7dZbl4taOVEYaQZg=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "local_config_apple_cc": { + "bzlFile": "@@apple_support~//crosstool:setup.bzl", + "ruleClassName": "_apple_cc_autoconf", + "attributes": {} + }, + "local_config_apple_cc_toolchains": { + "bzlFile": "@@apple_support~//crosstool:setup.bzl", + "ruleClassName": "_apple_cc_autoconf_toolchains", + "attributes": {} + } + }, + "recordedRepoMappingEntries": [ + [ + "apple_support~", + "bazel_tools", + "bazel_tools" + ] + ] + } + }, + "@@platforms//host:extension.bzl%host_platform": { + "general": { + "bzlTransitiveDigest": "xelQcPZH8+tmuOHVjL9vDxMnnQNMlwj0SlvgoqBkm4U=", + "usagesDigest": "V1R2Y2oMxKNfx2WCWpSCaUV1WefW1o8HZGm3v1vHgY4=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "host_platform": { + "bzlFile": "@@platforms//host:extension.bzl", + "ruleClassName": "host_platform_repo", + "attributes": {} + } + }, + "recordedRepoMappingEntries": [] + } + }, + "@@protobuf~//:non_module_deps.bzl%non_module_deps": { + "general": { + "bzlTransitiveDigest": "jsbfONl9OksDWiAs7KDFK5chH/tYI3DngdM30NKdk5Y=", + "usagesDigest": "eVrT3hFCIZNRuTKpfWDzSIwTi2p6U6PWbt+tNWl/Tqk=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "utf8_range": { + "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", + "ruleClassName": "http_archive", + "attributes": { + "urls": [ + "https://github.com/protocolbuffers/utf8_range/archive/de0b4a8ff9b5d4c98108bdfe723291a33c52c54f.zip" + ], + "strip_prefix": "utf8_range-de0b4a8ff9b5d4c98108bdfe723291a33c52c54f", + "sha256": "5da960e5e5d92394c809629a03af3c7709d2d3d0ca731dacb3a9fb4bf28f7702" + } + } + }, + "recordedRepoMappingEntries": [ + [ + "protobuf~", + "bazel_tools", + "bazel_tools" + ] + ] + } + }, + "@@pybind11_bazel~//:internal_configure.bzl%internal_configure_extension": { + "general": { + "bzlTransitiveDigest": "+F47SE20NlARCHVGbd4r7kkjg4OA0eCJcOd5fqKq4fQ=", + "usagesDigest": "iH2lKTfsNEpn2MqtGpBNwJrxbb2C7DiYmh/XuKgDtr8=", + "recordedFileInputs": { + "@@pybind11_bazel~//MODULE.bazel": "e6f4c20442eaa7c90d7190d8dc539d0ab422f95c65a57cc59562170c58ae3d34" + }, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "pybind11": { + "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", + "ruleClassName": "http_archive", + "attributes": { + "build_file": "@@pybind11_bazel~//:pybind11-BUILD.bazel", + "strip_prefix": "pybind11-2.12.0", + "urls": [ + "https://github.com/pybind/pybind11/archive/v2.12.0.zip" + ] + } + } + }, + "recordedRepoMappingEntries": [ + [ + "pybind11_bazel~", + "bazel_tools", + "bazel_tools" + ] + ] + } + } + } +} diff --git a/WORKSPACE b/WORKSPACE deleted file mode 100644 index 7cfe32b..0000000 --- a/WORKSPACE +++ /dev/null @@ -1,152 +0,0 @@ -workspace(name = "array_record") - -load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - -# Abseil LTS 20230125.0 -http_archive( - name = "com_google_absl", - sha256 = "3ea49a7d97421b88a8c48a0de16c16048e17725c7ec0f1d3ea2683a2a75adc21", # SHARED_ABSL_SHA - strip_prefix = "abseil-cpp-20230125.0", - urls = [ - "https://github.com/abseil/abseil-cpp/archive/refs/tags/20230125.0.tar.gz", - ], -) -# Version: pypi-v0.11.0, 2020/10/27 -git_repository( - name = "com_google_absl_py", - remote = "https://github.com/abseil/abseil-py", - commit = "127c98870edf5f03395ce9cf886266fa5f24455e", -) -# Needed by com_google_riegeli -http_archive( - name = "org_brotli", - sha256 = "84a9a68ada813a59db94d83ea10c54155f1d34399baf377842ff3ab9b3b3256e", - strip_prefix = "brotli-3914999fcc1fda92e750ef9190aa6db9bf7bdb07", - urls = ["https://github.com/google/brotli/archive/3914999fcc1fda92e750ef9190aa6db9bf7bdb07.zip"], # 2022-11-17 -) -# GoogleTest/GoogleMock framework. Used by most unit-tests. -http_archive( - name = "com_google_googletest", - urls = ["https://github.com/google/googletest/archive/main.zip"], - strip_prefix = "googletest-main", -) - -# V3.4.0, 20210818 -http_archive( - name = "eigen3", - sha256 = "b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626", - strip_prefix = "eigen-3.4.0", - urls = [ - "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.bz2", - ], - build_file_content = -""" -cc_library( - name = 'eigen3', - srcs = [], - includes = ['.'], - hdrs = glob(['Eigen/**', 'unsupported/Eigen/**']), - visibility = ['//visibility:public'], -) -""" -) - -# `pybind11_bazel` (https://github.com/pybind/pybind11_bazel): 20230130 -http_archive( - name = "pybind11_bazel", - strip_prefix = "pybind11_bazel-5f458fa53870223a0de7eeb60480dd278b442698", - sha256 = "b35f3abc3d52ee5c753fdeeb2b5129b99e796558754ca5d245e28e51c1072a21", - urls = ["https://github.com/pybind/pybind11_bazel/archive/5f458fa53870223a0de7eeb60480dd278b442698.tar.gz"], -) -# V2.10.3, 20230130 -http_archive( - name = "pybind11", - build_file = "@pybind11_bazel//:pybind11.BUILD", - strip_prefix = "pybind11-2.10.3", - sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb", - urls = ["https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip"], -) -load("@pybind11_bazel//:python_configure.bzl", "python_configure") -python_configure(name = "local_config_python") - -# V21.12, 20230130 -# proto_library, cc_proto_library, and java_proto_library rules implicitly -# depend on @com_google_protobuf for protoc and proto runtimes. -# This statement defines the @com_google_protobuf repo. -http_archive( - name = "com_google_protobuf", - sha256 = "22fdaf641b31655d4b2297f9981fa5203b2866f8332d3c6333f6b0107bb320de", - strip_prefix = "protobuf-21.12", - urls = ["https://github.com/protocolbuffers/protobuf/archive/v21.12.tar.gz"], -) - -load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") -protobuf_deps() - -# Riegeli does not cut releases, so we reference the head -http_archive( - name = "com_google_riegeli", - strip_prefix = "riegeli-master", - urls = [ - "https://github.com/google/riegeli/archive/master.zip", - ], -) -# Riegeli's dependencies -http_archive( - name = "net_zstd", - build_file = "@com_google_riegeli//third_party:net_zstd.BUILD", - sha256 = "b6c537b53356a3af3ca3e621457751fa9a6ba96daf3aebb3526ae0f610863532", - strip_prefix = "zstd-1.4.5/lib", - urls = ["https://github.com/facebook/zstd/archive/v1.4.5.zip"], # 2020-05-22 -) -http_archive( - name = "lz4", - build_file = "@com_google_riegeli//third_party:lz4.BUILD", - sha256 = "4ec935d99aa4950eadfefbd49c9fad863185ac24c32001162c44a683ef61b580", - strip_prefix = "lz4-1.9.3/lib", - urls = ["https://github.com/lz4/lz4/archive/refs/tags/v1.9.3.zip"], # 2020-11-16 -) -http_archive( - name = "snappy", - build_file = "@com_google_riegeli//third_party:snappy.BUILD", - sha256 = "7ee7540b23ae04df961af24309a55484e7016106e979f83323536a1322cedf1b", - strip_prefix = "snappy-1.2.0", - urls = ["https://github.com/google/snappy/archive/1.2.0.zip"], # 2024-04-04 -) -http_archive( - name = "crc32c", - build_file = "@com_google_riegeli//third_party:crc32.BUILD", - sha256 = "338f1d9d95753dc3cdd882dfb6e176bbb4b18353c29c411ebcb7b890f361722e", - strip_prefix = "crc32c-1.1.0", - urls = ["https://github.com/google/crc32c/archive/1.1.0.zip"], # 2019-05-24 -) -http_archive( - name = "zlib", - build_file = "@com_google_riegeli//third_party:zlib.BUILD", - sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", - strip_prefix = "zlib-1.2.11", - urls = ["http://zlib.net/fossils/zlib-1.2.11.tar.gz"], # 2017-01-15 -) -http_archive( - name = "highwayhash", - build_file = "@com_google_riegeli//third_party:highwayhash.BUILD", - sha256 = "5380cb7cf19e7c9591f31792b7794d48084f6a3ab7c03d637cd6a32cf2ee8686", - strip_prefix = "highwayhash-a7f68e2f95fac08b24327d74747521cf634d5aff", - urls = ["https://github.com/google/highwayhash/archive/a7f68e2f95fac08b24327d74747521cf634d5aff.zip"], # 2023-08-09 -) - -# Tensorflow, 20230705 -http_archive( - name = "org_tensorflow", - strip_prefix = "tensorflow-2.12.1", - sha256 = "63025cb60d00d9aa7a88807651305a38abb9bb144464e2419c03f13a089d19a6", - urls = ["https://github.com/tensorflow/tensorflow/archive/v2.12.1.zip"], -) - -# This import (along with the org_tensorflow archive) is necessary to provide the devtoolset-9 toolchain -load("@org_tensorflow//tensorflow/tools/toolchains/remote_config:configs.bzl", "initialize_rbe_configs") # buildifier: disable=load-on-top -load("@org_tensorflow//tensorflow/tools/toolchains:cpus/aarch64/aarch64_compiler_configure.bzl", "aarch64_compiler_configure") # buildifier: disable=load-on-top - -initialize_rbe_configs() -aarch64_compiler_configure() diff --git a/array_record/BUILD b/array_record/BUILD new file mode 100644 index 0000000..6977c27 --- /dev/null +++ b/array_record/BUILD @@ -0,0 +1,6 @@ +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "package_info", + srcs = ["setup.py", "__init__.py"], +) \ No newline at end of file diff --git a/python/__init__.py b/array_record/__init__.py similarity index 100% rename from python/__init__.py rename to array_record/__init__.py diff --git a/array_record/beam/BUILD b/array_record/beam/BUILD new file mode 100644 index 0000000..115a5bd --- /dev/null +++ b/array_record/beam/BUILD @@ -0,0 +1,16 @@ +load("@rules_python//python:defs.bzl", "py_library") +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "beam", + srcs = [ + "__init__.py", + "arrayrecordio.py", + "demo.py", + "dofns.py", + "example.py", + "options.py", + "pipelines.py", + "testdata.py", + ] +) \ No newline at end of file diff --git a/beam/README.md b/array_record/beam/README.md similarity index 100% rename from beam/README.md rename to array_record/beam/README.md diff --git a/beam/__init__.py b/array_record/beam/__init__.py similarity index 100% rename from beam/__init__.py rename to array_record/beam/__init__.py diff --git a/beam/arrayrecordio.py b/array_record/beam/arrayrecordio.py similarity index 100% rename from beam/arrayrecordio.py rename to array_record/beam/arrayrecordio.py diff --git a/beam/demo.py b/array_record/beam/demo.py similarity index 100% rename from beam/demo.py rename to array_record/beam/demo.py diff --git a/beam/dofns.py b/array_record/beam/dofns.py similarity index 100% rename from beam/dofns.py rename to array_record/beam/dofns.py diff --git a/beam/example.py b/array_record/beam/example.py similarity index 100% rename from beam/example.py rename to array_record/beam/example.py diff --git a/beam/examples/example_full_demo_cli.sh b/array_record/beam/examples/example_full_demo_cli.sh similarity index 100% rename from beam/examples/example_full_demo_cli.sh rename to array_record/beam/examples/example_full_demo_cli.sh diff --git a/beam/examples/example_gcs_conversion.py b/array_record/beam/examples/example_gcs_conversion.py similarity index 100% rename from beam/examples/example_gcs_conversion.py rename to array_record/beam/examples/example_gcs_conversion.py diff --git a/beam/examples/example_sink_conversion.py b/array_record/beam/examples/example_sink_conversion.py similarity index 100% rename from beam/examples/example_sink_conversion.py rename to array_record/beam/examples/example_sink_conversion.py diff --git a/beam/examples/requirements.txt b/array_record/beam/examples/requirements.txt similarity index 100% rename from beam/examples/requirements.txt rename to array_record/beam/examples/requirements.txt diff --git a/beam/options.py b/array_record/beam/options.py similarity index 100% rename from beam/options.py rename to array_record/beam/options.py diff --git a/beam/pipelines.py b/array_record/beam/pipelines.py similarity index 100% rename from beam/pipelines.py rename to array_record/beam/pipelines.py diff --git a/beam/testdata.py b/array_record/beam/testdata.py similarity index 100% rename from beam/testdata.py rename to array_record/beam/testdata.py diff --git a/python/BUILD b/array_record/python/BUILD similarity index 64% rename from python/BUILD rename to array_record/python/BUILD index c207fa1..c00227e 100644 --- a/python/BUILD +++ b/array_record/python/BUILD @@ -1,6 +1,7 @@ # Python binding for ArrayRecord - load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") +load("@pypi_array_record//:requirements.bzl", "requirement") +load("@rules_python//python:packaging.bzl", "py_wheel") package(default_visibility = ["//visibility:public"]) @@ -18,15 +19,17 @@ pybind_extension( "@com_google_riegeli//riegeli/bytes:fd_reader", "@com_google_riegeli//riegeli/bytes:fd_writer", ], + visibility = ["//visibility:public"], ) py_test( name = "array_record_module_test", srcs = ["array_record_module_test.py"], - data = [":array_record_module.so"], + data = [":array_record_module"], deps = [ "@com_google_absl_py//absl/testing:absltest", ], + imports = ["../.."] ) py_library( @@ -34,23 +37,32 @@ py_library( srcs = ["array_record_data_source.py"], data = [":array_record_module.so"], deps = [ - # Implicit etils (/epath) dependency. + requirement("etils") ], + visibility = ["//visibility:public"], +) + +py_library( + name = "init", + srcs = ["__init__.py"], + visibility = ["//visibility:public"], ) py_test( name = "array_record_data_source_test", srcs = ["array_record_data_source_test.py"], - args = ["--test_srcdir=python/testdata"], + args = ["--test_srcdir=array_record/python/testdata"], data = [ - ":array_record_module.so", - "//python/testdata:digits.array_record-00000-of-00002", - "//python/testdata:digits.array_record-00001-of-00002", + ":array_record_module", + "//array_record/python/testdata:digits.array_record-00000-of-00002", + "//array_record/python/testdata:digits.array_record-00001-of-00002", ], deps = [ + ":init", ":array_record_data_source", "@com_google_absl_py//absl/testing:absltest", "@com_google_absl_py//absl/testing:flagsaver", "@com_google_absl_py//absl/testing:parameterized", ], + imports = ["../.."] ) diff --git a/array_record/python/__init__.py b/array_record/python/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/array_record_data_source.py b/array_record/python/array_record_data_source.py similarity index 99% rename from python/array_record_data_source.py rename to array_record/python/array_record_data_source.py index 9ffe588..c21af5f 100644 --- a/python/array_record_data_source.py +++ b/array_record/python/array_record_data_source.py @@ -46,7 +46,7 @@ def __getitem__(self, record_keys: Sequence[int]) -> Sequence[T]: from absl import logging from etils import epath -from . import array_record_module +from array_record.python import array_record_module # TODO(jolesiak): Decide what to do with these flags, e.g., remove them (could # be appropriate if we decide to use asyncio) or move them somewhere else and diff --git a/python/array_record_data_source_test.py b/array_record/python/array_record_data_source_test.py similarity index 99% rename from python/array_record_data_source_test.py rename to array_record/python/array_record_data_source_test.py index 5196d99..98f422c 100644 --- a/python/array_record_data_source_test.py +++ b/array_record/python/array_record_data_source_test.py @@ -24,9 +24,7 @@ from absl.testing import flagsaver from absl.testing import parameterized -from array_record.python import array_record_data_source -from array_record.python import array_record_module - +from array_record.python import array_record_module, array_record_data_source FLAGS = flags.FLAGS diff --git a/python/array_record_module.cc b/array_record/python/array_record_module.cc similarity index 100% rename from python/array_record_module.cc rename to array_record/python/array_record_module.cc diff --git a/python/array_record_module_test.py b/array_record/python/array_record_module_test.py similarity index 99% rename from python/array_record_module_test.py rename to array_record/python/array_record_module_test.py index 4df8ff2..84969a9 100644 --- a/python/array_record_module_test.py +++ b/array_record/python/array_record_module_test.py @@ -21,7 +21,6 @@ from array_record.python.array_record_module import ArrayRecordReader from array_record.python.array_record_module import ArrayRecordWriter - class ArrayRecordModuleTest(absltest.TestCase): def setUp(self): diff --git a/python/testdata/BUILD b/array_record/python/testdata/BUILD similarity index 100% rename from python/testdata/BUILD rename to array_record/python/testdata/BUILD diff --git a/python/testdata/digits.array_record-00000-of-00002 b/array_record/python/testdata/digits.array_record-00000-of-00002 similarity index 100% rename from python/testdata/digits.array_record-00000-of-00002 rename to array_record/python/testdata/digits.array_record-00000-of-00002 diff --git a/python/testdata/digits.array_record-00001-of-00002 b/array_record/python/testdata/digits.array_record-00001-of-00002 similarity index 100% rename from python/testdata/digits.array_record-00001-of-00002 rename to array_record/python/testdata/digits.array_record-00001-of-00002 diff --git a/setup.py b/array_record/setup.py similarity index 95% rename from setup.py rename to array_record/setup.py index cfb0bac..c182899 100644 --- a/setup.py +++ b/array_record/setup.py @@ -42,7 +42,6 @@ def has_ext_modules(self): classifiers=[ 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', ], zip_safe=False, distclass=BinaryDistribution, diff --git a/cpp/BUILD b/cpp/BUILD index adc266f..ff5c2a6 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -15,11 +15,6 @@ cc_proto_library( deps = [":layout_proto"], ) -go_proto_library( - name = "layout_go_proto", - deps = [":layout_proto"], -) - cc_library( name = "common", hdrs = ["common.h"], @@ -58,7 +53,6 @@ cc_library( hdrs = ["thread_pool.h"], deps = [ "@com_google_absl//absl/flags:flag", - "@eigen3//:eigen3", ], ) @@ -119,7 +113,7 @@ cc_library( ":sequenced_chunk_writer", ":shareable_dependency", ":thread_pool", - "//third_party/protobuf:protobuf_lite", + "@com_google_protobuf//:protobuf_lite", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", @@ -171,7 +165,7 @@ cc_library( ":parallel_for", ":shareable_dependency", ":thread_pool", - "//third_party/protobuf:protobuf_lite", + "@com_google_protobuf//:protobuf_lite", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/functional:function_ref", diff --git a/cpp/array_record_reader.cc b/cpp/array_record_reader.cc index b5731a0..37a8f28 100644 --- a/cpp/array_record_reader.cc +++ b/cpp/array_record_reader.cc @@ -40,7 +40,7 @@ limitations under the License. #include "cpp/masked_reader.h" #include "cpp/parallel_for.h" #include "cpp/thread_pool.h" -#include "third_party/protobuf/message_lite.h" +#include "google/protobuf/message_lite.h" #include "riegeli/base/object.h" #include "riegeli/base/options_parser.h" #include "riegeli/base/status.h" @@ -325,15 +325,15 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecords( return absl::OkStatus(); } uint64_t num_chunk_groups = - CeilOfRatio(state_->chunk_offsets.size(), state_->chunk_group_size); + CeilOfRatio(static_cast(state_->chunk_offsets.size()), static_cast(state_->chunk_group_size)); const auto reader = get_backing_reader(); auto status = ParallelForWithStatus<1>( - Seq(num_chunk_groups), state_->pool, [&](size_t buf_idx) -> absl::Status { + Seq(num_chunk_groups), state_->pool, absl::AnyInvocable([&](size_t buf_idx) -> absl::Status { uint64_t chunk_idx_start = buf_idx * state_->chunk_group_size; // inclusive index, not the conventional exclusive index. uint64_t last_chunk_idx = - std::min((buf_idx + 1) * state_->chunk_group_size - 1, - state_->chunk_offsets.size() - 1); + std::min(static_cast((buf_idx + 1) * state_->chunk_group_size - 1), + static_cast(state_->chunk_offsets.size() - 1)); uint64_t buf_len = state_->ChunkEndOffset(last_chunk_idx) - state_->chunk_offsets[chunk_idx_start]; AR_ENDO_JOB( @@ -379,7 +379,7 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecords( } } return absl::OkStatus(); - }); + })); return status; } @@ -404,13 +404,13 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecordsInRange( const auto reader = get_backing_reader(); auto status = ParallelForWithStatus<1>( - Seq(num_chunk_groups), state_->pool, [&](size_t buf_idx) -> absl::Status { + Seq(num_chunk_groups), state_->pool, absl::AnyInvocable([&](size_t buf_idx) -> absl::Status { uint64_t chunk_idx_start = chunk_idx_begin + buf_idx * state_->chunk_group_size; // inclusive index, not the conventional exclusive index. uint64_t last_chunk_idx = std::min( - chunk_idx_begin + (buf_idx + 1) * state_->chunk_group_size - 1, - chunk_idx_end - 1); + static_cast(chunk_idx_begin + (buf_idx + 1) * state_->chunk_group_size - 1), + static_cast(chunk_idx_end - 1)); uint64_t buf_len = state_->ChunkEndOffset(last_chunk_idx) - state_->chunk_offsets[chunk_idx_start]; AR_ENDO_JOB( @@ -466,7 +466,7 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecordsInRange( } } return absl::OkStatus(); - }); + })); return status; } @@ -528,7 +528,7 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecordsWithIndices( const auto reader = get_backing_reader(); auto status = ParallelForWithStatus<1>( IndicesOf(chunk_indices_per_buffer), state_->pool, - [&](size_t buf_idx) -> absl::Status { + absl::AnyInvocable([&](size_t buf_idx) -> absl::Status { auto buffer_chunks = absl::MakeConstSpan(chunk_indices_per_buffer[buf_idx]); uint64_t buf_len = state_->ChunkEndOffset(buffer_chunks.back()) - @@ -575,7 +575,7 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecordsWithIndices( } } return absl::OkStatus(); - }); + })); return status; } @@ -654,8 +654,8 @@ bool ArrayRecordReaderBase::ReadAheadFromBuffer(uint64_t buffer_idx) { std::vector decoders; decoders.reserve(state_->chunk_group_size); uint64_t chunk_start = buffer_idx * state_->chunk_group_size; - uint64_t chunk_end = std::min(state_->chunk_offsets.size(), - (buffer_idx + 1) * state_->chunk_group_size); + uint64_t chunk_end = std::min(static_cast(state_->chunk_offsets.size()), + static_cast((buffer_idx + 1) * state_->chunk_group_size)); const auto reader = get_backing_reader(); for (uint64_t chunk_idx = chunk_start; chunk_idx < chunk_end; ++chunk_idx) { uint64_t chunk_offset = state_->chunk_offsets[chunk_idx]; @@ -674,7 +674,7 @@ bool ArrayRecordReaderBase::ReadAheadFromBuffer(uint64_t buffer_idx) { } // Used for running one extra task in this thread. - std::function current_task = []{}; + absl::AnyInvocable current_task = []{}; while (state_->future_decoders.size() < max_parallelism) { uint64_t buffer_to_add = buffer_idx + state_->future_decoders.size(); @@ -695,8 +695,8 @@ bool ArrayRecordReaderBase::ReadAheadFromBuffer(uint64_t buffer_idx) { chunk_offsets.reserve(state_->chunk_group_size); uint64_t chunk_start = buffer_to_add * state_->chunk_group_size; uint64_t chunk_end = - std::min(state_->chunk_offsets.size(), - (buffer_to_add + 1) * state_->chunk_group_size); + std::min(static_cast(state_->chunk_offsets.size()), + static_cast((buffer_to_add + 1) * state_->chunk_group_size)); for (uint64_t chunk_idx = chunk_start; chunk_idx < chunk_end; ++chunk_idx) { chunk_offsets.push_back(state_->chunk_offsets[chunk_idx]); } diff --git a/cpp/array_record_reader.h b/cpp/array_record_reader.h index 62d8116..d754759 100644 --- a/cpp/array_record_reader.h +++ b/cpp/array_record_reader.h @@ -48,7 +48,7 @@ limitations under the License. #include "cpp/common.h" #include "cpp/shareable_dependency.h" #include "cpp/thread_pool.h" -#include "third_party/protobuf/message_lite.h" +#include "google/protobuf/message_lite.h" #include "riegeli/base/initializer.h" #include "riegeli/base/object.h" #include "riegeli/bytes/reader.h" diff --git a/cpp/array_record_writer.cc b/cpp/array_record_writer.cc index e61a5e6..4e28c4c 100644 --- a/cpp/array_record_writer.cc +++ b/cpp/array_record_writer.cc @@ -39,7 +39,7 @@ limitations under the License. #include "cpp/layout.pb.h" #include "cpp/sequenced_chunk_writer.h" #include "cpp/thread_pool.h" -#include "third_party/protobuf/message_lite.h" +#include "google/protobuf/message_lite.h" #include "riegeli/base/object.h" #include "riegeli/base/options_parser.h" #include "riegeli/base/status.h" diff --git a/cpp/shareable_dependency.h b/cpp/shareable_dependency.h index 691f866..06615a6 100644 --- a/cpp/shareable_dependency.h +++ b/cpp/shareable_dependency.h @@ -169,7 +169,9 @@ template struct DependencyShare::Sharing { explicit Sharing(Handle handle) : handle(std::move(handle)) {} - void Ref() const { ref_count.Ref(); } + void Ref() const { + ref_count.Ref(); + } void Unref() const { // Notify the `ShareableDependency` if there are no more shares. absl::MutexLock l(&mu); @@ -178,8 +180,10 @@ struct DependencyShare::Sharing { << "The last DependencyShare outlived the ShareableDependency"; } } - bool HasUniqueOwner() const { return ref_count.HasUniqueOwner(); } - void WaitUntilUnique() const { + bool HasUniqueOwner() const { + return ref_count.HasUniqueOwner(); + } + void WaitUntilUnique() const { absl::MutexLock l(&mu, absl::Condition(this, &Sharing::HasUniqueOwner)); } diff --git a/cpp/shareable_dependency_test.cc b/cpp/shareable_dependency_test.cc index 2083dd7..19db07b 100644 --- a/cpp/shareable_dependency_test.cc +++ b/cpp/shareable_dependency_test.cc @@ -73,12 +73,12 @@ TEST_F(ShareableDependencyTest, SanityTest) { EXPECT_FALSE(new_main.IsUnique()); // NOLINT(bugprone-use-after-move) absl::Notification notification; - pool_->Schedule([refobj = main.Share(), ¬ification] { + pool_->Schedule([refobj = std::make_shared>(main.Share()), ¬ification] { notification.WaitForNotification(); absl::SleepFor(absl::Milliseconds(10)); - EXPECT_EQ(refobj->value(), 1); - const auto second_ref = refobj; - refobj->add_value(1); + EXPECT_EQ(refobj.get()->get()->value(), 1); + const auto second_ref = refobj.get(); + refobj.get()->get()->add_value(1); }); EXPECT_FALSE(main.IsUnique()); notification.Notify(); @@ -97,12 +97,12 @@ TEST_F(ShareableDependencyTest, SanityTestWithReset) { EXPECT_TRUE(main.IsUnique()); absl::Notification notification; - pool_->Schedule([refobj = main.Share(), ¬ification] { + pool_->Schedule([refobj = std::make_shared>(main.Share()), ¬ification] { notification.WaitForNotification(); absl::SleepFor(absl::Milliseconds(10)); - EXPECT_EQ(refobj->value(), 1); + EXPECT_EQ(refobj.get()->get()->value(), 1); const auto second_ref = refobj; - refobj->add_value(1); + refobj.get()->get()->add_value(1); }); EXPECT_FALSE(main.IsUnique()); notification.Notify(); diff --git a/cpp/thread_pool.cc b/cpp/thread_pool.cc index 7bc66f0..5be3662 100644 --- a/cpp/thread_pool.cc +++ b/cpp/thread_pool.cc @@ -1,3 +1,55 @@ +#include "thread_pool.h" + +ThreadPool::ThreadPool(int num_threads) : num_threads_(num_threads) { + threads_.reserve(num_threads); + for (int i = 0; i < num_threads; ++i) { + threads_.emplace_back(&ThreadPool::WorkLoop, this); + } +} + +ThreadPool::~ThreadPool() { + { + absl::MutexLock l(&mu_); + for (size_t i = 0; i < threads_.size(); ++i) { + queue_.push(nullptr); // Shutdown signal. + } + } + for (auto &t : threads_) { + t.join(); + } +} + +void ThreadPool::Schedule(absl::AnyInvocable func) { + assert(func != nullptr); + absl::MutexLock l(&mu_); + queue_.push(std::move(func)); +} + +bool ThreadPool::WorkAvailable() const { + return !queue_.empty(); +} + +void ThreadPool::WorkLoop() { + while (true) { + absl::AnyInvocable func; + { + absl::MutexLock l(&mu_); + mu_.Await(absl::Condition(this, &ThreadPool::WorkAvailable)); + func = std::move(queue_.front()); + queue_.pop(); + } + if (func == nullptr) { // Shutdown signal. + break; + } + func(); + } +} + +uint64_t ThreadPool::NumThreads() { + return this->num_threads_; +} + + /* Copyright 2022 Google LLC. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +66,6 @@ limitations under the License. ==============================================================================*/ #include "cpp/thread_pool.h" - #include "absl/flags/flag.h" ABSL_FLAG(uint32_t, array_record_global_pool_size, 64, @@ -25,7 +76,7 @@ namespace array_record { ARThreadPool* ArrayRecordGlobalPool() { static ARThreadPool* pool_ = []() -> ARThreadPool* { ARThreadPool* pool = new - Eigen::ThreadPool(absl::GetFlag(FLAGS_array_record_global_pool_size)); + ThreadPool(absl::GetFlag(FLAGS_array_record_global_pool_size)); return pool; }(); return pool_; diff --git a/cpp/thread_pool.h b/cpp/thread_pool.h index decb28d..447070b 100644 --- a/cpp/thread_pool.h +++ b/cpp/thread_pool.h @@ -1,3 +1,40 @@ +#ifndef THREAD_POOL_H_ +#define THREAD_POOL_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/thread_annotations.h" +#include "absl/functional/any_invocable.h" +#include "absl/synchronization/mutex.h" + +// A simple ThreadPool implementation for tests. +class ThreadPool { + public: + explicit ThreadPool(int num_threads); + ThreadPool(const ThreadPool &) = delete; + ThreadPool &operator=(const ThreadPool &) = delete; + ~ThreadPool(); + uint64_t NumThreads(); + + // Schedule a function to be run on a ThreadPool thread immediately. + void Schedule(absl::AnyInvocable func); + + private: + bool WorkAvailable() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + void WorkLoop(); + uint64_t num_threads_; + absl::Mutex mu_; + std::queue> queue_ ABSL_GUARDED_BY(mu_); + std::vector threads_; +}; + +#endif // THREAD_POOL_H_ /* Copyright 2022 Google LLC. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,12 +53,10 @@ limitations under the License. #ifndef ARRAY_RECORD_CPP_THREAD_POOL_H_ #define ARRAY_RECORD_CPP_THREAD_POOL_H_ -#define EIGEN_USE_CUSTOM_THREAD_POOL -#include "unsupported/Eigen/CXX11/ThreadPool" namespace array_record { -using ARThreadPool = Eigen::ThreadPoolInterface; +using ARThreadPool = ThreadPool; ARThreadPool* ArrayRecordGlobalPool(); diff --git a/oss/README.md b/oss/README.md deleted file mode 100644 index e227f33..0000000 --- a/oss/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Steps to build a new array_record pip package - -1. Update the version number in setup.py - -2. In the root folder, run - - ``` - ./oss/build_whl.sh - ``` - to use the current `python3` version. Otherwise, optionally set - ``` - PYTHON_VERSION=3.9 ./oss/build_whl.sh - ``` - -3. Wheels are in `all_dist/`. diff --git a/oss/build.Dockerfile b/oss/build.Dockerfile deleted file mode 100644 index 5fefa86..0000000 --- a/oss/build.Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -# Constructs the environment within which we will build the pip wheels. - - -ARG AUDITWHEEL_PLATFORM - -FROM quay.io/pypa/${AUDITWHEEL_PLATFORM} - -ARG PYTHON_VERSION -ARG PYTHON_BIN -ARG BAZEL_VERSION - -ENV DEBIAN_FRONTEND=noninteractive - -RUN yum install -y rsync -ENV PATH="${PYTHON_BIN}:${PATH}" - -# Download the correct bazel version and make sure it's on path. -RUN BAZEL_ARCH_SUFFIX="$(uname -m | sed s/aarch64/arm64/)" \ - && curl -sSL --fail -o /usr/local/bin/bazel "https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-linux-$BAZEL_ARCH_SUFFIX" \ - && chmod a+x /usr/local/bin/bazel - -# Install dependencies needed for array_record. -RUN --mount=type=cache,target=/root/.cache \ - ${PYTHON_BIN}/python -m pip install -U \ - absl-py \ - auditwheel \ - etils[epath] \ - patchelf \ - setuptools \ - twine \ - wheel; - -WORKDIR "/tmp/array_record" \ No newline at end of file diff --git a/oss/build_whl.sh b/oss/build_whl.sh deleted file mode 100755 index 275c868..0000000 --- a/oss/build_whl.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash -# Build wheel for the python version specified by $PYTHON_VERSION. -# Optionally, can set the environment variable $PYTHON_BIN to refer to a -# specific python interpreter. - -set -e -x - -if [ -z ${PYTHON_BIN} ]; then - if [ -z ${PYTHON_VERSION} ]; then - PYTHON_BIN=$(which python3) - else - PYTHON_BIN=$(which python${PYTHON_VERSION}) - fi -fi - -PYTHON_MAJOR_VERSION=$(${PYTHON_BIN} -c 'import sys; print(sys.version_info.major)') -PYTHON_MINOR_VERSION=$(${PYTHON_BIN} -c 'import sys; print(sys.version_info.minor)') -PYTHON_VERSION="${PYTHON_MAJOR_VERSION}.${PYTHON_MINOR_VERSION}" -export PYTHON_VERSION="${PYTHON_VERSION}" - -function write_to_bazelrc() { - echo "$1" >> .bazelrc -} - -function main() { - # Remove .bazelrc if it already exists - [ -e .bazelrc ] && rm .bazelrc - - write_to_bazelrc "build -c opt" - write_to_bazelrc "build --cxxopt=-std=c++17" - write_to_bazelrc "build --host_cxxopt=-std=c++17" - write_to_bazelrc "build --linkopt=\"-lrt -lm\"" - write_to_bazelrc "build --experimental_repo_remote_exec" - write_to_bazelrc "build --python_path=\"${PYTHON_BIN}\"" - - if [ -n "${CROSSTOOL_TOP}" ]; then - write_to_bazelrc "build --crosstool_top=${CROSSTOOL_TOP}" - write_to_bazelrc "test --crosstool_top=${CROSSTOOL_TOP}" - fi - - # Using a previous version of Blaze to avoid: - # https://github.com/bazelbuild/bazel/issues/8622 - export USE_BAZEL_VERSION=5.4.0 - bazel clean - bazel build ... - bazel test --verbose_failures --test_output=errors ... - - DEST="/tmp/array_record/all_dist" - # Create the directory, then do dirname on a non-existent file inside it to - # give us an absolute paths with tilde characters resolved to the destination - # directory. - mkdir -p "${DEST}" - echo "=== destination directory: ${DEST}" - - TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXX) - - echo $(date) : "=== Using tmpdir: ${TMPDIR}" - mkdir "${TMPDIR}/array_record" - - echo $(date) : "=== Copy array_record files" - - cp setup.py "${TMPDIR}" - cp LICENSE "${TMPDIR}" - rsync -avm -L --exclude="bazel-*/" . "${TMPDIR}/array_record" - rsync -avm -L --include="*.so" --include="*_pb2.py" \ - --exclude="*.runfiles" --exclude="*_obj" --include="*/" --exclude="*" \ - bazel-bin/cpp "${TMPDIR}/array_record" - rsync -avm -L --include="*.so" --include="*_pb2.py" \ - --exclude="*.runfiles" --exclude="*_obj" --include="*/" --exclude="*" \ - bazel-bin/python "${TMPDIR}/array_record" - - pushd ${TMPDIR} - echo $(date) : "=== Building wheel" - ${PYTHON_BIN} setup.py bdist_wheel --python-tag py3${PYTHON_MINOR_VERSION} - - if [ -n "${AUDITWHEEL_PLATFORM}" ]; then - echo $(date) : "=== Auditing wheel" - auditwheel repair --plat ${AUDITWHEEL_PLATFORM} -w dist dist/*.whl - fi - - echo $(date) : "=== Listing wheel" - ls -lrt dist/*.whl - cp dist/*.whl "${DEST}" - popd - - echo $(date) : "=== Output wheel file is in: ${DEST}" -} - -main diff --git a/oss/runner_common.sh b/oss/runner_common.sh deleted file mode 100644 index 2ee2c8c..0000000 --- a/oss/runner_common.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# Builds ArrayRecord from source code located in SOURCE_DIR producing wheels -# under $SOURCE_DIR/all_dist. -function build_and_test_array_record() { - SOURCE_DIR=$1 - - # Automatically decide which platform to build for by checking on which - # platform this runs. - AUDITWHEEL_PLATFORM="manylinux2014_$(uname -m)" - - # Using a previous version of Blaze to avoid: - # https://github.com/bazelbuild/bazel/issues/8622 - export BAZEL_VERSION="5.4.0" - - # Build wheels for multiple Python minor versions. - PYTHON_MAJOR_VERSION=3 - for PYTHON_MINOR_VERSION in 9 10 11 12 - do - PYTHON_VERSION=${PYTHON_MAJOR_VERSION}.${PYTHON_MINOR_VERSION} - PYTHON_BIN=/opt/python/cp${PYTHON_MAJOR_VERSION}${PYTHON_MINOR_VERSION}-cp${PYTHON_MAJOR_VERSION}${PYTHON_MINOR_VERSION}/bin - - # Cleanup older images. - docker rmi -f array_record:${PYTHON_VERSION} - docker rm -f array_record - - DOCKER_BUILDKIT=1 docker build --progress=plain --no-cache \ - --build-arg AUDITWHEEL_PLATFORM=${AUDITWHEEL_PLATFORM} \ - --build-arg PYTHON_VERSION=${PYTHON_VERSION} \ - --build-arg PYTHON_BIN=${PYTHON_BIN} \ - --build-arg BAZEL_VERSION=${BAZEL_VERSION} \ - -t array_record:${PYTHON_VERSION} - < ${SOURCE_DIR}/oss/build.Dockerfile - - docker run --rm -a stdin -a stdout -a stderr \ - --env PYTHON_BIN="${PYTHON_BIN}/python" \ - --env BAZEL_VERSION=${BAZEL_VERSION} \ - --env AUDITWHEEL_PLATFORM=${AUDITWHEEL_PLATFORM} \ - -v $SOURCE_DIR:/tmp/array_record \ - --name array_record array_record:${PYTHON_VERSION} \ - bash oss/build_whl.sh - done - - ls ${SOURCE_DIR}/all_dist/*.whl -} \ No newline at end of file diff --git a/protobuf.diff b/protobuf.diff new file mode 100644 index 0000000..070f22b --- /dev/null +++ b/protobuf.diff @@ -0,0 +1,17 @@ +diff --git a/src/google/protobuf/message_lite.cc b/src/google/protobuf/message_lite.cc +index c0456c3f2..5006d2901 100644 +--- a/src/google/protobuf/message_lite.cc ++++ b/src/google/protobuf/message_lite.cc +@@ -696,9 +696,9 @@ void OnShutdown(void (*func)()) { + } + + void OnShutdownRun(void (*f)(const void*), const void* arg) { +- auto shutdown_data = ShutdownData::get(); +- absl::MutexLock lock(&shutdown_data->mutex); +- shutdown_data->functions.push_back(std::make_pair(f, arg)); ++ // auto shutdown_data = ShutdownData::get(); ++ // absl::MutexLock lock(&shutdown_data->mutex); ++ // shutdown_data->functions.push_back(std::make_pair(f, arg)); + } + + } // namespace internal \ No newline at end of file diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..5595518 --- /dev/null +++ b/requirements.in @@ -0,0 +1 @@ +etils[epath] \ No newline at end of file diff --git a/requirements_lock.txt b/requirements_lock.txt new file mode 100644 index 0000000..aa1da93 --- /dev/null +++ b/requirements_lock.txt @@ -0,0 +1,26 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# bazel run //:requirements.update +# +etils[epath,epy]==1.9.4 \ + --hash=sha256:4387e7a4911a3b5cc4b92b99a9211386d176b43bae1dac8e2fe345fc2cb95e4b \ + --hash=sha256:fad950414f0a1ca58c70c70915b0014f9953dd9bcf8aa951a0f75ff9becbeb24 + # via -r requirements.in +fsspec==2024.9.0 \ + --hash=sha256:4b0afb90c2f21832df142f292649035d80b421f60a9e1c027802e5a0da2b04e8 \ + --hash=sha256:a0947d552d8a6efa72cc2c730b12c41d043509156966cca4fb157b0f2a0c574b + # via etils +importlib-resources==6.4.5 \ + --hash=sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065 \ + --hash=sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717 + # via etils +typing-extensions==4.12.2 \ + --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ + --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 + # via etils +zipp==3.20.2 \ + --hash=sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350 \ + --hash=sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29 + # via etils