Merge branch 'dev' into eschnett/julia-bindings

* dev: Fix CMake: HDF5 Libs are PUBLIC (openPMD#1520) Fix `chmod` in `download_samples.sh` (openPMD#1518) CI: Old CTest (openPMD#1519) Python: Fix ODR Violation (openPMD#1521) replace extent in weighting and displacement (openPMD#1510) CMake: Warn and Continue on Empty HDF5_VERSION (openPMD#1512) Replace openPMD_Datatypes global with function (openPMD#1509) Streaming examples: Set WAN as default transport (openPMD#1511) TOML Backend (openPMD#1436) make it possible to manually set chunks when loading dask arrays (openPMD#1477) [pre-commit.ci] pre-commit autoupdate (openPMD#1504) Optional debugging output for AbstractIOHandlerImpl::flush() (openPMD#1495) Python: 3.8+ (openPMD#1502) # Conflicts: # .github/workflows/linux.yml # src/binding/python/Series.cpp
eschnett · Sep 5, 2023 · 724806e · 724806e
2 parents d59773a + e01f721
commit 724806e
Show file tree

Hide file tree

Showing 61 changed files with 1,000 additions and 617 deletions.
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -37,7 +37,8 @@ jobs:
           -DopenPMD_USE_INVASIVE_TESTS=ON \
           -DCMAKE_VERBOSE_MAKEFILE=ON
         cmake --build build --parallel 2
-        ctest --test-dir build --output-on-failure
+        cd build
+        ctest --output-on-failure
 
   clang7_nopy_ompi_h5_ad2_libcpp:
     runs-on: ubuntu-20.04
@@ -74,11 +75,13 @@ jobs:
           -DopenPMD_USE_INVASIVE_TESTS=ON \
           -DCMAKE_VERBOSE_MAKEFILE=ON
         cmake --build build --parallel 2
-        ctest --test-dir build --output-on-failure
+
+        cd build
+        ctest --output-on-failure
 
         find . -name *.bp     | xargs -n1 -P1 -I {} rm -rf {}
         find . -name *.bp.dir | xargs -n1 -P1 -I {} rm -rf {}
-        ctest --test-dir build --output-on-failure
+        ctest --output-on-failure
 
   clang7_nopy_ompi_h5_ad2:
     runs-on: ubuntu-20.04
@@ -108,7 +111,8 @@ jobs:
           -DopenPMD_USE_INVASIVE_TESTS=ON \
           -DCMAKE_VERBOSE_MAKEFILE=ON
         cmake --build build --parallel 2
-        ctest --test-dir build --output-on-failure
+        cd build
+        ctest --output-on-failure
 
 # TODO
 #  clang7_py36_nompi_h5_ad2_libstdc++
@@ -179,7 +183,8 @@ jobs:
           -DopenPMD_USE_ADIOS2=ON  \
           -DopenPMD_USE_INVASIVE_TESTS=ON
         cmake --build build --parallel 2
-        ctest --test-dir build --output-on-failure
+        cd build
+        ctest --output-on-failure
 
 # TODO: (old Travis-CI coverage)
 #  clang10_py38_ompi_h5_1-10-6_ad2_release
@@ -229,7 +234,8 @@ jobs:
           -DopenPMD_USE_ADIOS2=ON \
           -DopenPMD_USE_INVASIVE_TESTS=ON
         cmake --build build --parallel 2
-        ctest --test-dir build --output-on-failure
+        cd build
+        ctest --output-on-failure
 
   gcc9_py38_pd_nompi_h5_ad2_libcpp_julia:
     runs-on: ubuntu-20.04
@@ -265,7 +271,8 @@ jobs:
         cmake --build build --parallel 2
         # Install the Julia side of CxxWrap
         julia --eval 'using Pkg; Pkg.add("CxxWrap")'
-        ctest --test-dir build --output-on-failure
+        cd build
+        ctest --output-on-failure
 
   musllinux_py10:
     runs-on: ubuntu-20.04
@@ -290,7 +297,8 @@ jobs:
           -DopenPMD_USE_INVASIVE_TESTS=ON \
           -DPython_EXECUTABLE=$(which python3.10)
         cmake --build build --parallel 2
-        ctest --test-dir build --output-on-failure
+        cd build
+        ctest --output-on-failure
 
   conda_ompi_all:
     runs-on: ubuntu-20.04
@@ -324,4 +332,5 @@ jobs:
           -DopenPMD_USE_ADIOS2=ON \
           -DopenPMD_USE_INVASIVE_TESTS=ON
         cmake --build build --parallel 2
-        ctest --test-dir build --output-on-failure
+        cd build
+        ctest --output-on-failure
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -49,7 +49,7 @@ repos:
 
 # Changes tabs to spaces
 - repo: https://github.com/Lucas-C/pre-commit-hooks
-  rev: v1.5.3
+  rev: v1.5.4
   hooks:
   - id: remove-tabs
 
@@ -80,7 +80,7 @@ repos:
 
 # Autoremoves unused Python imports
 - repo: https://github.com/hadialqattan/pycln
-  rev: v2.2.1
+  rev: v2.2.2
   hooks:
   - id: pycln
     name: pycln (python)

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -336,10 +336,16 @@ endif()
 # HDF5 checks
 string(CONCAT openPMD_HDF5_STATUS "")
 # version: lower limit
-if(openPMD_HAVE_HDF5 AND HDF5_VERSION VERSION_LESS 1.8.13)
-    string(CONCAT openPMD_HDF5_STATUS
-        "Found HDF5 version ${HDF5_VERSION} is too old. At least "
-        "version 1.8.13 is required.\n")
+if(openPMD_HAVE_HDF5)
+    if(HDF5_VERSION STREQUAL "")
+        message(WARNING "HDF5_VERSION is empty. Now assuming it is 1.8.13 or newer.")
+    else()
+        if(HDF5_VERSION VERSION_LESS 1.8.13)
+            string(CONCAT openPMD_HDF5_STATUS
+                "Found HDF5 version ${HDF5_VERSION} is too old. At least "
+                "version 1.8.13 is required.\n")
+        endif()
+    endif()
 endif()
 # we imply support for parallel I/O if MPI variant is ON
 if(openPMD_HAVE_MPI AND openPMD_HAVE_HDF5
@@ -415,7 +421,7 @@ if(CMAKE_VERSION VERSION_LESS 3.18.0)
     set(_PY_DEV_MODULE Development)
 endif()
 if(openPMD_USE_PYTHON STREQUAL AUTO)
-    find_package(Python 3.7.0 COMPONENTS Interpreter ${_PY_DEV_MODULE})
+    find_package(Python 3.8.0 COMPONENTS Interpreter ${_PY_DEV_MODULE})
     if(Python_FOUND)
         if(openPMD_USE_INTERNAL_PYBIND11)
             add_subdirectory("${openPMD_SOURCE_DIR}/share/openPMD/thirdParty/pybind11")
@@ -588,8 +594,9 @@ target_include_directories(openPMD SYSTEM PRIVATE
     $<TARGET_PROPERTY:openPMD::thirdparty::toml11,INTERFACE_INCLUDE_DIRECTORIES>)
 
 # HDF5 Backend
+#   TODO: Once we require CMake 3.20+, simply link hdf5::hdf5 C lib target
 if(openPMD_HAVE_HDF5)
-    target_link_libraries(openPMD PRIVATE ${HDF5_LIBRARIES})
+    target_link_libraries(openPMD PUBLIC ${HDF5_LIBRARIES})
     target_include_directories(openPMD SYSTEM PRIVATE ${HDF5_INCLUDE_DIRS})
     target_compile_definitions(openPMD PRIVATE ${HDF5_DEFINITIONS})
 endif()

diff --git a/Dockerfile b/Dockerfile
@@ -5,8 +5,8 @@ FROM       quay.io/pypa/manylinux2010_x86_64 as build-env
 # FROM       quay.io/pypa/manylinux1_x86_64 as build-env
 ENV        DEBIAN_FRONTEND noninteractive
 
-# Python 3.7-3.11 via "37m 38 39 311"
-ARG        PY_VERSIONS="37m 38 39 310 311"
+# Python 3.8-3.11 via "38 39 311"
+ARG        PY_VERSIONS="38 39 310 311"
 
 # static libs need relocatable symbols for linking to shared python lib
 ENV        CFLAGS="-fPIC ${CFLAGS}"
@@ -112,30 +112,6 @@ RUN         for whl in /opt/src/dist/*.whl; do \
             && du -hs /opt/src/dist/* \
             && du -hs /wheelhouse/*
 
-# test in fresh env: Debian:Buster + Python 3.7
-FROM       debian:buster
-ENV        DEBIAN_FRONTEND noninteractive
-COPY --from=build-env /wheelhouse/openPMD_api-*-cp37-cp37m-manylinux2010_x86_64.whl .
-RUN        apt-get update \
-           && apt-get install -y --no-install-recommends python3 python3-pip \
-           && rm -rf /var/lib/apt/lists/*
-           # binutils
-RUN        python3 --version \
-           && python3 -m pip install -U pip \
-           && python3 -m pip install openPMD_api-*-cp37-cp37m-manylinux2010_x86_64.whl
-RUN        find / -name "openpmd*"
-RUN        ls -hal /usr/local/lib/python3.7/dist-packages/
-RUN        ls -hal /usr/local/lib/python3.7/dist-packages/openpmd_api/
-# RUN        ls -hal /usr/local/lib/python3.7/dist-packages/.libsopenpmd_api
-# RUN        objdump -x /usr/local/lib/python3.7/dist-packages/openpmd_api.cpython-37m-x86_64-linux-gnu.so | grep RPATH
-RUN        ldd /usr/local/lib/python3.7/dist-packages/openpmd_api/openpmd_api_cxx.cpython-37m-x86_64-linux-gnu.so
-RUN        python3 -c "import openpmd_api as io; print(io.__version__); print(io.variants)"
-RUN        python3 -m openpmd_api.ls --help
-RUN        openpmd-ls --help
-#RUN        echo "* soft core 100000" >> /etc/security/limits.conf && \
-#           python3 -c "import openpmd_api as io"; \
-#           gdb -ex bt -c core
-
 # test in fresh env: Debian:Sid + Python 3.8
 FROM       debian:sid
 ENV        DEBIAN_FRONTEND noninteractive

diff --git a/README.md b/README.md
@@ -121,7 +121,7 @@ Optional language bindings:
   * Julia 1.7 - 1.10
   * [libcxxwrap_julia](https://github.com/JuliaInterop/libcxxwrap-julia) 0.8.3 - 0.9.7
 * Python:
-  * Python 3.7 - 3.11
+  * Python 3.8 - 3.11
   * pybind11 2.11.1+
   * numpy 1.15+
   * mpi4py 2.1+ (optional, for MPI)

diff --git a/conda.yml b/conda.yml
@@ -36,7 +36,7 @@ dependencies:
   - pre-commit
   - pyarrow  # for dask
 #  - pybind11  # shipped internally
-  - python>=3.7
+  - python>=3.8
 
 # just a note for later hackery, we could install pip packages inside the env, too:
 #  - pip:

diff --git a/docs/source/analysis/dask.rst b/docs/source/analysis/dask.rst
@@ -41,6 +41,9 @@ The central Python API calls to convert to DASK datatypes are the ``ParticleSpec
 
    # note: no series.flush() needed
 
+The ``to_dask_array`` method will automatically set Dask array chunking based on the available chunks in the read data set.
+The default behavior can be overridden by passing an additional keyword argument ``chunks``, see the `dask.array.from_array documentation <https://docs.dask.org/en/stable/generated/dask.array.from_array.html>`__ for more details.
+For example, to chunk only along the outermost axis in a 3D dataset using the default Dask array chunk size, call ``to_dask_array(chunks={0: 'auto', 1: -1,  2:  -1})``.
 
 Example
 -------

diff --git a/docs/source/backends/json.rst b/docs/source/backends/json.rst
@@ -1,10 +1,19 @@
 .. _backends-json:
 
-JSON
-====
+JSON/TOML
+=========
 
-openPMD supports writing to and reading from JSON files.
-The JSON backend is always available.
+openPMD supports writing to and reading from JSON and TOML files.
+The JSON and TOML backends are always available.
+
+.. note::
+
+   Both the JSON and the TOML backends are not intended for large-scale data I/O.
+
+   The JSON backend is mainly intended for prototyping and learning, or similar workflows where setting up a large IO backend such as HDF5 or ADIOS2 is perceived as obstructive. It can also be used for small datasets that need to be stored in text format rather than binary.
+
+   The TOML backend is intended for exchanging the *structure* of a data series without its "heavy" data fields.
+   For instance, one can easily create and exchange human-readable, machine-actionable data configurations for experiments and simulations.
 
 
 JSON File Format
@@ -43,9 +52,17 @@ Every such attribute is itself a JSON object with two keys:
  * ``datatype``: A string describing the type of the value.
  * ``value``: The actual value of type ``datatype``.
 
+TOML File Format
+----------------
+
+A TOML file uses the file ending ``.toml``. The TOML backend is chosen by creating a ``Series`` object with a filename that has this file ending.
+
+The TOML backend internally works with JSON datasets and converts to/from TOML during I/O.
+As a result, data layout and usage are equivalent to the JSON backend.
+
 
-Restrictions
-------------
+JSON Restrictions
+-----------------
 
 For creation of JSON serializations (i.e. writing), the restrictions of the JSON backend are
 equivalent to those of the `JSON library by Niels Lohmann <https://github.com/nlohmann/json>`_
@@ -77,6 +94,20 @@ The (keys) names ``"attributes"``, ``"data"`` and ``"datatype"`` are reserved an
 
 A parallel (i.e. MPI) implementation is *not* available.
 
+TOML Restrictions
+-----------------
+
+Note that the JSON datatype-specific restrictions do not automatically hold for TOML, as those affect only the representation on disk, not the internal representation.
+
+TOML supports most numeric types, with the support for long double and long integer types being platform-defined.
+Special floating point values such as NaN are also support.
+
+TOML does not support null values.
+
+The (keys) names ``"attributes"``, ``"data"`` and ``"datatype"`` are reserved and must not be used for base/mesh/particles path, records and their components.
+
+A parallel (i.e. MPI) implementation is *not* available.
+
 
 Example
 -------

diff --git a/docs/source/dev/dependencies.rst b/docs/source/dev/dependencies.rst
@@ -39,7 +39,7 @@ Optional: language bindings
 
 * Python:
 
-  * Python 3.7 - 3.11
+  * Python 3.8 - 3.11
   * pybind11 2.11.1+
   * numpy 1.15+
   * mpi4py 2.1+ (optional, for MPI)

diff --git a/docs/source/usage/workflow.rst b/docs/source/usage/workflow.rst
@@ -98,3 +98,9 @@ Attributes are (currently) unaffected by this:
     Some backends (e.g. the BP5 engine of ADIOS2) have multiple implementations for the openPMD-api-level guarantees of flush points.
     For user-guided selection of such implementations, ``Series::flush`` and ``Attributable::seriesFlush()`` take an optional JSON/TOML string as a parameter.
     See the section on :ref:`backend-specific configuration <backendconfig>` for details.
+
+Deferred Data API Contract
+--------------------------
+
+A verbose debug log can optionally be printed to the standard error output by specifying the environment variable ``OPENPMD_VERBOSE=1``.
+Note that this functionality is at the current time still relatively basic.
diff --git a/examples/10_streaming_read.cpp b/examples/10_streaming_read.cpp
@@ -19,7 +19,16 @@ int main()
         return 0;
     }
 
-    Series series = Series("electrons.sst", Access::READ_LINEAR);
+    Series series = Series("electrons.sst", Access::READ_LINEAR, R"(
+{
+  "adios2": {
+    "engine": {
+      "parameters": {
+        "DataTransport": "WAN"
+      }
+    }
+  }
+})");
 
     // `Series::writeIterations()` and `Series::readIterations()` are
     // intentionally restricted APIs that ensure a workflow which also works

diff --git a/examples/10_streaming_read.py b/examples/10_streaming_read.py
@@ -7,7 +7,8 @@
 # pass-through for ADIOS2 engine parameters
 # https://adios2.readthedocs.io/en/latest/engines/engines.html
 config = {'adios2': {'engine': {}, 'dataset': {}}}
-config['adios2']['engine'] = {'parameters': {'Threads': '4'}}
+config['adios2']['engine'] = {'parameters':
+                              {'Threads': '4', 'DataTransport': 'WAN'}}
 config['adios2']['dataset'] = {'operators': [{'type': 'bzip2'}]}
 
 if __name__ == "__main__":

diff --git a/examples/10_streaming_write.cpp b/examples/10_streaming_write.cpp
@@ -20,7 +20,16 @@ int main()
     }
 
     // open file for writing
-    Series series = Series("electrons.sst", Access::CREATE);
+    Series series = Series("electrons.sst", Access::CREATE, R"(
+{
+  "adios2": {
+    "engine": {
+      "parameters": {
+        "DataTransport": "WAN"
+      }
+    }
+  }
+})");
 
     Datatype datatype = determineDatatype<position_t>();
     constexpr unsigned long length = 10ul;

diff --git a/examples/10_streaming_write.py b/examples/10_streaming_write.py
@@ -8,7 +8,8 @@
 # pass-through for ADIOS2 engine parameters
 # https://adios2.readthedocs.io/en/latest/engines/engines.html
 config = {'adios2': {'engine': {}, 'dataset': {}}}
-config['adios2']['engine'] = {'parameters': {'Threads': '4'}}
+config['adios2']['engine'] = {'parameters':
+                              {'Threads': '4', 'DataTransport': 'WAN'}}
 config['adios2']['dataset'] = {'operators': [{'type': 'bzip2'}]}
 
 if __name__ == "__main__":

diff --git a/examples/9_particle_write_serial.py b/examples/9_particle_write_serial.py
@@ -16,7 +16,7 @@
 if __name__ == "__main__":
     # open file for writing
     f = Series(
-        "../samples/7_particle_write_serial_py.h5",
+        "../samples/9_particle_write_serial_py.h5",
         Access.create
     )
 
@@ -35,27 +35,29 @@
         "Electrons... the necessary evil for ion acceleration! ",
         "Just kidding.")
 
+    n_particles = 234
+
     # let's set a weird user-defined record this time
     electrons["displacement"].unit_dimension = {Unit_Dimension.M: 1}
     electrons["displacement"][SCALAR].unit_SI = 1.e-6
-    dset = Dataset(np.dtype("float64"), extent=[2])
+    dset = Dataset(np.dtype("float64"), extent=[n_particles])
     electrons["displacement"][SCALAR].reset_dataset(dset)
     electrons["displacement"][SCALAR].make_constant(42.43)
     # don't like it anymore? remove it with:
     # del electrons["displacement"]
 
     electrons["weighting"][SCALAR] \
-        .reset_dataset(Dataset(np.dtype("float32"), extent=[1])) \
+        .reset_dataset(Dataset(np.dtype("float32"), extent=[n_particles])) \
         .make_constant(1.e-5)
 
-    particlePos_x = np.random.rand(234).astype(np.float32)
-    particlePos_y = np.random.rand(234).astype(np.float32)
+    particlePos_x = np.random.rand(n_particles).astype(np.float32)
+    particlePos_y = np.random.rand(n_particles).astype(np.float32)
     d = Dataset(particlePos_x.dtype, extent=particlePos_x.shape)
     electrons["position"]["x"].reset_dataset(d)
     electrons["position"]["y"].reset_dataset(d)
 
-    particleOff_x = np.arange(234, dtype=np.uint)
-    particleOff_y = np.arange(234, dtype=np.uint)
+    particleOff_x = np.arange(n_particles, dtype=np.uint)
+    particleOff_y = np.arange(n_particles, dtype=np.uint)
     d = Dataset(particleOff_x.dtype, particleOff_x.shape)
     electrons["positionOffset"]["x"].reset_dataset(d)
     electrons["positionOffset"]["y"].reset_dataset(d)