From 3519016a6041bf5f3b28d0923af04be5a79299b0 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 12 Aug 2024 14:59:37 -0600 Subject: [PATCH 1/8] assignment operator not needed. default constructible. --- spiner/regular_grid_1d.hpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/spiner/regular_grid_1d.hpp b/spiner/regular_grid_1d.hpp index 23fdfa495..d69076d65 100644 --- a/spiner/regular_grid_1d.hpp +++ b/spiner/regular_grid_1d.hpp @@ -62,21 +62,6 @@ class RegularGrid1D { PORTABLE_ALWAYS_REQUIRE(min_ < max_ && N_ > 0, "Valid grid"); } - // Assignment operator - /* - Default copy constructable - PORTABLE_INLINE_FUNCTION RegularGrid1D &operator=(const RegularGrid1D &src) { - if (this != &src) { - min_ = src.min_; - max_ = src.max_; - dx_ = src.dx_; - idx_ = src.idx_; - N_ = src.N_; - } - return *this; - } - */ - // Forces x in the interval PORTABLE_INLINE_FUNCTION int bound(int ix) const { #ifndef SPINER_DISABLE_BOUNDS_CHECKS From 06cf9d1cd9e6817680987bf9dec174507ecaab38 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 12 Aug 2024 17:09:53 -0600 Subject: [PATCH 2/8] implement databox serialization and de-serialization --- doc/sphinx/src/databox.rst | 74 +++++++++++++++++++++++++-- spiner/databox.hpp | 40 +++++++++++++++ test/test.cpp | 100 +++++++++++++++++++++++++++++++++---- 3 files changed, 198 insertions(+), 16 deletions(-) diff --git a/doc/sphinx/src/databox.rst b/doc/sphinx/src/databox.rst index a886e4375..cbc70e182 100644 --- a/doc/sphinx/src/databox.rst +++ b/doc/sphinx/src/databox.rst @@ -107,7 +107,7 @@ yourself. For example: You can also resize a ``DataBox``, which you can use to modify a ``DataBox`` in-place. For example: -.. code-block:: +.. code-block:: cpp Spiner::DataBox db; // empty // clears old memory, resizes the underlying array, @@ -124,7 +124,7 @@ If you want to change the stride without changing the underlying data, you can use ``reshape``, which modifies the dimensions of the array, without modifying the underlying memory. For example: -.. code-block:: +.. code-block:: cpp // allocate a 1D databox Spiner::DataBox db(nx3*nx2*nx1); @@ -170,7 +170,7 @@ Semantics and Memory Management ``DataBox`` has reference semantics---meaning that copying a ``DataBox`` does not copy the underlying data. In other words, -.. code-block:: +.. code-block:: cpp Spiner::DataBox db1(size); Spiner::DataBox db2 = db1; @@ -230,7 +230,7 @@ call ``free`` for you, so long as you use them with a custom deleter. Spiner provides the following deleter for use in this scenario: -.. code-block:: +.. code-block:: cpp struct DBDeleter { template @@ -242,7 +242,7 @@ scenario: It can be used, for example, with a ``std::unique_ptr`` via: -.. code-block:: +.. code-block:: cpp // needed for smart pointers #include @@ -259,6 +259,70 @@ It can be used, for example, with a ``std::unique_ptr`` via: // when you leave scope, the data box will be freed. +Serialization and de-serialization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Shared memory models, such as `MPI Windows`_, require allocation of +memory through an external API call (e.g., +``MPI_Win_allocate_shared``), which tabulated data must be written +to. ``Spiner`` supports this model through **serialization** and +**de-serialization**. The relevant methods are as follows. The +function + +.. cpp:function:: std::size_t DataBox::serializedSizeInBytes() const; + +reports how much memory a ``DataBox`` object requires to be externally +allocated. The function + +.. cpp:function:: std::size_t serialize(char *dst) const; + +takes a ``char*`` pointer, assumed to contain enough space for a +``DataBox``, and stores all information needed for the ``DataBox`` to +reconstruct itself. The return value is the amount of memory in bytes +used in the array by the serialized ``DataBox`` object. This method is +non-destructive; the original ``DataBox`` is unchanged. The function + +.. cpp:function:: std::size_t DataBox::deSerialize(char *src); + +initializes a ``DataBox`` to match the serialized ``DataBox`` +contained in the ``src`` pointer. + +.. note:: + + Note that the de-serialized ``DataBox`` has **unmanaged** memory, as + it is assumed that the ``src`` pointer manages its memory for + it. Therefore, one **cannot** ``free`` the ``src`` pointer until + everything you want to do with the de-serialized ``DataBox`` is + over. + +Putting this all together, an application of +serialization/de-serialization probably looks like this: + +.. code-block:: cpp + + // load a databox from, e.g., file + Spiner::DataBox db; + db.loadHDF(filename); + + // get size of databox + std::size_t allocate_size = db.serialSizeInBytes(); + + // Allocate the memory for the new databox. + // In practice this would be an API call for, e.g., shared memory + char *memory = (char*)malloc(allocate_size); + + // serialize the old databox + std::size_t write_size = db.serialize(memory); + + // make a new databox and de-serialize it + Spiner::DataBox db2; + std::size_t read_size = db2.deSerialize(memory); + + // read_size, write_size, and allocate_size should all be the same. + assert((read_size == write_size) && (write_size == allocate_size)); + +.. _`MPI Windows`: https://www.mpi-forum.org/docs/mpi-4.1/mpi41-report/node311.htm + Accessing Elements of a ``DataBox`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/spiner/databox.hpp b/spiner/databox.hpp index e984521f1..2daabddb0 100644 --- a/spiner/databox.hpp +++ b/spiner/databox.hpp @@ -305,6 +305,46 @@ class DataBox { return indices_[i]; } + // serialization routines + // ------------------------------------ + // this one reports size for serialize/deserialize + std::size_t serializedSizeInBytes() const { + return sizeBytes() + sizeof(*this); + } + // this one takes the pointer `dst`, which is assumed to have + // sufficient memory allocated, and fills it with the + // databox. Return value is the amount of bytes written to. + std::size_t serialize(char *dst) const { + PORTABLE_REQUIRE(status_ != DataStatus::AllocatedDevice, + "Serialization cannot be performed on device memory"); + memcpy(dst, this, sizeof(*this)); + std::size_t offst = sizeof(*this); + if (sizeBytes() > 0) { // could also do data_ != nullptr + memcpy(dst + offst, data_, sizeBytes()); + offst += sizeBytes(); + } + return offst; + } + // This one takes a src pointer, which is assumed to contain a + // databox and initializes the current databox. Note that the + // databox becomes unmananged, as the contents of the box are still + // the externally managed pointer. + std::size_t deSerialize(char *src) { + PORTABLE_REQUIRE( + (status_ == DataStatus::Empty || status_ == DataStatus::Unmanaged), + "Must not de-serialize into an active databox."); + memcpy(this, src, sizeof(*this)); + std::size_t offst = sizeof(*this); + // now sizeBytes is well defined after copying the "header" of the source. + if (sizeBytes() > 0) { // could also do data_ != nullptr + data_ = (double *)(src + offst); + status_ = DataStatus::Unmanaged; + offst += sizeBytes(); + } + return offst; + } + // ------------------------------------ + DataBox getOnDevice() const { // getOnDevice is always a deep copy if (size() == 0 || diff --git a/test/test.cpp b/test/test.cpp index 3cfa9940d..782ed20ed 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -53,8 +53,8 @@ PORTABLE_INLINE_FUNCTION Real linearFunction(Real b, Real a, Real z, Real y, return x + y + z + a + b; } -TEST_CASE("PortableMDArrays can be allocated from a pointer", - "[PortableMDArray]") { +SCENARIO("PortableMDArrays can be allocated from a pointer", + "[PortableMDArray]") { constexpr int N = 2; constexpr int M = 3; std::vector data(N * M); @@ -529,19 +529,22 @@ TEST_CASE("DataBox Interpolation with piecewise grids", WHEN("We construct and fill a 3D DataBox based on this grid") { constexpr int RANK = 3; - PiecewiseDB db(Spiner::AllocationTarget::Device, NCOARSE, NCOARSE, - NCOARSE); + PiecewiseDB dbh(Spiner::AllocationTarget::Host, NCOARSE, NCOARSE, + NCOARSE); for (int i = 0; i < RANK; ++i) { - db.setRange(i, g); + dbh.setRange(i, g); } - portableFor( - "Fill 3D Databox", 0, NCOARSE, 0, NCOARSE, 0, NCOARSE, - PORTABLE_LAMBDA(const int iz, const int iy, const int ix) { + for (int iz = 0; iz < NCOARSE; ++iz) { + for (int iy = 0; iy < NCOARSE; ++iy) { + for (int ix = 0; ix < NCOARSE; ++ix) { Real x = g.x(ix); Real y = g.x(iy); Real z = g.x(iz); - db(iz, iy, ix) = linearFunction(z, y, x); - }); + dbh(iz, iy, ix) = linearFunction(z, y, x); + } + } + } + auto db = dbh.getOnDevice(); THEN("We can interpolate it to a finer grid and get the right answer") { Real error = 0; @@ -561,8 +564,83 @@ TEST_CASE("DataBox Interpolation with piecewise grids", error); REQUIRE(error <= EPSTEST); } + // cleanup free(db); + free(dbh); + } + } +} + +SCENARIO("Serializing and deserializing a DataBox", + "[DataBox][PiecewiseGrid1D][Serialize]") { + GIVEN("A piecewise grid") { + constexpr int NGRIDS = 2; + constexpr Real xmin = 0; + constexpr Real xmax = 1; + + RegularGrid1D g1(xmin, 0.35 * (xmax - xmin), 3); + RegularGrid1D g2(0.35 * (xmax - xmin), xmax, 4); + PiecewiseGrid1D g = {{g1, g2}}; + + const int NCOARSE = g.nPoints(); + + THEN("The piecewise grid contains a number of points equal the sum of " + "the points of the individual grids") { + REQUIRE(g.nPoints() == g1.nPoints() + g2.nPoints()); + } + + WHEN("We construct and fill a 3D DataBox based on this grid") { + constexpr int RANK = 3; + PiecewiseDB dbh(Spiner::AllocationTarget::Host, NCOARSE, NCOARSE, + NCOARSE); + for (int i = 0; i < RANK; ++i) { + dbh.setRange(i, g); + } + for (int iz = 0; iz < NCOARSE; ++iz) { + for (int iy = 0; iy < NCOARSE; ++iy) { + for (int ix = 0; ix < NCOARSE; ++ix) { + Real x = g.x(ix); + Real y = g.x(iy); + Real z = g.x(iz); + dbh(iz, iy, ix) = linearFunction(z, y, x); + } + } + } + WHEN("We serialize the DataBox") { + std::size_t serial_size = dbh.serializedSizeInBytes(); + REQUIRE(serial_size == (sizeof(dbh) + dbh.sizeBytes())); + + char *db_serial = (char *)malloc(serial_size * sizeof(char)); + std::size_t write_offst = dbh.serialize(db_serial); + REQUIRE(write_offst == serial_size); + + THEN("We can initialize a new databox based on the serialized one") { + PiecewiseDB dbh2; + std::size_t read_offst = dbh2.deSerialize(db_serial); + REQUIRE(read_offst == write_offst); + + AND_THEN("The shape is correct") { + REQUIRE(dbh2.rank() == dbh.rank()); + REQUIRE(dbh2.size() == dbh.size()); + for (int d = 1; d <= 3; ++d) { + REQUIRE(dbh2.dim(d) == dbh.dim(d)); + } + } + + AND_THEN("The contents are correct") { + for (int i = 0; i < dbh.size(); ++i) { + REQUIRE(dbh(i) == dbh2(i)); + } + } + } + + // cleanup + free(db_serial); + } + + // cleanup + free(dbh); } } } @@ -702,7 +780,7 @@ SCENARIO("Using unique pointers to garbage collect DataBox", } #if SPINER_USE_HDF -TEST_CASE("PiecewiseGrid HDF5", "[PiecewiseGrid1D][HDF5]") { +SCENARIO("PiecewiseGrid HDF5", "[PiecewiseGrid1D][HDF5]") { GIVEN("A piecewise grid") { RegularGrid1D g1(0, 0.25, 3); RegularGrid1D g2(0.25, 0.75, 11); From 4e825a268619c97c829a0b2f2983a009bbb30860 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 12 Aug 2024 17:28:25 -0600 Subject: [PATCH 3/8] T not double --- spiner/databox.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spiner/databox.hpp b/spiner/databox.hpp index 2daabddb0..07eb6ab25 100644 --- a/spiner/databox.hpp +++ b/spiner/databox.hpp @@ -337,7 +337,7 @@ class DataBox { std::size_t offst = sizeof(*this); // now sizeBytes is well defined after copying the "header" of the source. if (sizeBytes() > 0) { // could also do data_ != nullptr - data_ = (double *)(src + offst); + data_ = (T *)(src + offst); status_ = DataStatus::Unmanaged; offst += sizeBytes(); } From 23b5da5f970ef7628a08814df438378ca67c794e Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 13 Aug 2024 09:45:34 -0600 Subject: [PATCH 4/8] fix bug related to underlying object. add test. add setPointer --- doc/sphinx/src/databox.rst | 12 ++++++++++++ spiner/databox.hpp | 24 +++++++++++++++++++----- test/test.cpp | 7 +++++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/doc/sphinx/src/databox.rst b/doc/sphinx/src/databox.rst index cbc70e182..3da68fb43 100644 --- a/doc/sphinx/src/databox.rst +++ b/doc/sphinx/src/databox.rst @@ -282,6 +282,18 @@ reconstruct itself. The return value is the amount of memory in bytes used in the array by the serialized ``DataBox`` object. This method is non-destructive; the original ``DataBox`` is unchanged. The function +.. cpp:function:: std::size_t DataBox::setPointer(T *src); + +with the overload + +.. cpp:function:: std::size_t DataBox::setPointer(char *src); + +sets the underlying tabulated data from the src pointer, which is +assumed to be the right size and shape. This is useful for the +deSerialize function (described below) and for building your own +serialization/de-serialization routines in composite objects. The +function + .. cpp:function:: std::size_t DataBox::deSerialize(char *src); initializes a ``DataBox`` to match the serialized ``DataBox`` diff --git a/spiner/databox.hpp b/spiner/databox.hpp index 07eb6ab25..1f08873a4 100644 --- a/spiner/databox.hpp +++ b/spiner/databox.hpp @@ -325,6 +325,24 @@ class DataBox { } return offst; } + + // This sets the internal pointer based on a passed in src pointer, + // which is assumed to be the right size. Used below in deSerialize + // and may be used for serialization routines. Returns amount of src + // pointer used. + std::size_t setPointer(T *src) { + if (sizeBytes() > 0) { // could also do data_ != nullptr + data_ = src; + // TODO(JMM): If portable arrays ever change maximum rank, this + // line needs to change. + dataView_.NewPortableMDArray(data_, dim(6), dim(5), dim(4), dim(3), + dim(2), dim(1)); + makeShallow(); + } + return sizeBytes(); + } + std::size_t setPointer(char *src) { return setPointer((T *)src); } + // This one takes a src pointer, which is assumed to contain a // databox and initializes the current databox. Note that the // databox becomes unmananged, as the contents of the box are still @@ -336,11 +354,7 @@ class DataBox { memcpy(this, src, sizeof(*this)); std::size_t offst = sizeof(*this); // now sizeBytes is well defined after copying the "header" of the source. - if (sizeBytes() > 0) { // could also do data_ != nullptr - data_ = (T *)(src + offst); - status_ = DataStatus::Unmanaged; - offst += sizeBytes(); - } + offst += setPointer(src + offst); return offst; } // ------------------------------------ diff --git a/test/test.cpp b/test/test.cpp index 782ed20ed..89f8866b9 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -620,6 +620,13 @@ SCENARIO("Serializing and deserializing a DataBox", std::size_t read_offst = dbh2.deSerialize(db_serial); REQUIRE(read_offst == write_offst); + AND_THEN("They do not point ot the same memory") { + // checks DataBox pointer + REQUIRE(dbh2.data() != dbh.data()); + // checks accessor agrees + REQUIRE(&dbh2(0) != &dbh(0)); + } + AND_THEN("The shape is correct") { REQUIRE(dbh2.rank() == dbh.rank()); REQUIRE(dbh2.size() == dbh.size()); From 6c63804e5cd304c2767a68e9e8b85494c2158a42 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 13 Aug 2024 10:04:39 -0600 Subject: [PATCH 5/8] add warning about portability. --- doc/sphinx/src/databox.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/sphinx/src/databox.rst b/doc/sphinx/src/databox.rst index 3da68fb43..624e187cf 100644 --- a/doc/sphinx/src/databox.rst +++ b/doc/sphinx/src/databox.rst @@ -333,6 +333,16 @@ serialization/de-serialization probably looks like this: // read_size, write_size, and allocate_size should all be the same. assert((read_size == write_size) && (write_size == allocate_size)); +.. warning:: + + The serialization routines described here are **not** architecture + aware. Serializing and de-serializing on a single architecture + inside a single executable will work fine. However, do not use + serialization as a file I/O strategy, as there is no guarantee that + the serialized format for a ``DataBox`` on one architecture will be + the same as on another. This is due to, for example, + architecture-specific differences in endianness and padding. + .. _`MPI Windows`: https://www.mpi-forum.org/docs/mpi-4.1/mpi41-report/node311.htm Accessing Elements of a ``DataBox`` From 1ded37bd89856266a583f4adfd9cd39a63e22442 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 13 Aug 2024 10:26:29 -0600 Subject: [PATCH 6/8] typo --- test/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.cpp b/test/test.cpp index 89f8866b9..f40232b49 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -620,7 +620,7 @@ SCENARIO("Serializing and deserializing a DataBox", std::size_t read_offst = dbh2.deSerialize(db_serial); REQUIRE(read_offst == write_offst); - AND_THEN("They do not point ot the same memory") { + AND_THEN("They do not point to the same memory") { // checks DataBox pointer REQUIRE(dbh2.data() != dbh.data()); // checks accessor agrees From 493a2e5ddcffd9ba2a646e514f3bcb3a894356b2 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Thu, 15 Aug 2024 09:15:12 -0600 Subject: [PATCH 7/8] jhp comments --- spiner/databox.hpp | 11 +++++++++++ test/test.cpp | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/spiner/databox.hpp b/spiner/databox.hpp index 1f08873a4..793664364 100644 --- a/spiner/databox.hpp +++ b/spiner/databox.hpp @@ -352,6 +352,17 @@ class DataBox { (status_ == DataStatus::Empty || status_ == DataStatus::Unmanaged), "Must not de-serialize into an active databox."); memcpy(this, src, sizeof(*this)); + + // sanity check that de-serialization of trivially-copyable memory + // was reasonable + PORTABLE_REQUIRE(size() > 0, "All dimensions must be positive."); + PORTABLE_REQUIRE((size() > 0) || (rank_ == 0), + "Size > 0 for all non-empty databoxes"); + PORTABLE_REQUIRE((size() > 0) || status_ == DataStatus::Empty, + "Size > 0 for all non-empty databoxes"); + PORTABLE_REQUIRE(status_ != DataStatus::AllocatedDevice, + "Serialization cannot be performed on device memory"); + std::size_t offst = sizeof(*this); // now sizeBytes is well defined after copying the "header" of the source. offst += setPointer(src + offst); diff --git a/test/test.cpp b/test/test.cpp index f40232b49..7d6276354 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -627,6 +627,16 @@ SCENARIO("Serializing and deserializing a DataBox", REQUIRE(&dbh2(0) != &dbh(0)); } + WHEN("We initialize a THIRD databox on the serialized one") { + PiecewiseDB dbh3; + std::size_t read_offst3 = dbh3.deSerialize(db_serial); + REQUIRE(read_offst3 == write_offst); + THEN("The second and third databoxes DO point at the same memory") { + REQUIRE(dbh2.data() == dbh3.data()); + REQUIRE(&dbh3(0) == &dbh2(0)); + } + } + AND_THEN("The shape is correct") { REQUIRE(dbh2.rank() == dbh.rank()); REQUIRE(dbh2.size() == dbh.size()); From 9dee38297e5432f249d5272233c8f31d7cacdcb4 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Thu, 15 Aug 2024 09:17:25 -0600 Subject: [PATCH 8/8] add check for different objects --- test/test.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test.cpp b/test/test.cpp index 7d6276354..30671ff93 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -634,6 +634,9 @@ SCENARIO("Serializing and deserializing a DataBox", THEN("The second and third databoxes DO point at the same memory") { REQUIRE(dbh2.data() == dbh3.data()); REQUIRE(&dbh3(0) == &dbh2(0)); + AND_THEN("But they are separate objects") { + REQUIRE(&dbh2 != &dbh3); + } } }