diff --git a/Makefile.am b/Makefile.am index 2209dad7..fb02ee52 100644 --- a/Makefile.am +++ b/Makefile.am @@ -37,7 +37,7 @@ CLEANFILES = trexio.mod if HAVE_FORTRAN BUILT_SOURCES = trexio.mod else -BUILT_SOURCES = +BUILT_SOURCES = endif EXTRA_DIST = .git_hash @@ -116,6 +116,7 @@ TESTS_C += \ tests/io_dset_float_hdf5 \ tests/io_dset_int_hdf5 \ tests/io_dset_sparse_hdf5 \ + tests/io_dset_external_hdf5 \ tests/io_determinant_hdf5 \ tests/io_jastrow_hdf5 \ tests/io_safe_dset_float_hdf5 \ @@ -303,7 +304,7 @@ DEB_FILES = \ helpers-debian/libtrexio0.install \ helpers-debian/libtrexio-dev.install \ helpers-debian/source \ - helpers-debian/README.source + helpers-debian/README.source debian_from_dist: $(DEB_FILES) $(SOURCES) $(trexio_h) cp ../trexio-$(PACKAGE_VERSION).tar.gz ../libtrexio_$(PACKAGE_VERSION).orig.tar.gz diff --git a/python/test/benzene_data.py b/python/test/benzene_data.py index d64f9173..78ce47cb 100644 --- a/python/test/benzene_data.py +++ b/python/test/benzene_data.py @@ -47,3 +47,6 @@ orb_up_test = [0, 65, 128, 129] orb_dn_test = [1, 64, 128, 129] + +external_2Dfloat_name = "test external float matrix" +external_1Dint32_name = "test external int32 vector" diff --git a/python/test/test_api.py b/python/test/test_api.py index 4dff3f6e..818303a6 100644 --- a/python/test/test_api.py +++ b/python/test/test_api.py @@ -149,6 +149,19 @@ def test_array_2D(self): assert trexio.has_nucleus_coord(self.test_file) + def test_external_array(self): + """Write external arrays.""" + self.open() + + assert not trexio.has_external_array(self.test_file, external_2Dfloat_name) + trexio.write_external_array(self.test_file, nucleus_coord, external_2Dfloat_name) + assert trexio.has_external_array(self.test_file, external_2Dfloat_name) + + assert not trexio.has_external_array(self.test_file, external_1Dint32_name) + trexio.write_external_array(self.test_file, np.array(nucleus_charge,dtype=np.int32), external_1Dint32_name) + assert trexio.has_external_array(self.test_file, external_1Dint32_name) + + def test_indices(self): """Write array of indices.""" self.open() @@ -252,6 +265,21 @@ def test_read_array_2D(self): np.testing.assert_array_almost_equal(coords_np, np.array(nucleus_coord).reshape(nucleus_num,3), decimal=8) + def test_read_external_array(self): + """Read external arrays.""" + self.open(mode='r') + # read nuclear coordinates without providing optional argument dim + coords_external_np = trexio.read_external_array(self.test_file, name=external_2Dfloat_name, dtype="float64", size=nucleus_num*3) + assert coords_external_np.dtype is np.dtype(np.float64) + assert coords_external_np.size == nucleus_num * 3 + np.testing.assert_array_almost_equal(coords_external_np.reshape(nucleus_num,3), np.array(nucleus_coord).reshape(nucleus_num,3), decimal=8) + + charge_external_np = trexio.read_external_array(self.test_file, name=external_1Dint32_name, dtype="int32", size=nucleus_num) + assert charge_external_np.dtype is np.dtype(np.int32) + assert charge_external_np.size == nucleus_num + np.testing.assert_array_almost_equal(charge_external_np, np.array(nucleus_charge, dtype=np.int32)) + + def test_read_errors(self): """Test some reading errors.""" self.open(mode='r') diff --git a/src/pytrexio.i b/src/pytrexio.i index 5f3a4e18..3b701794 100644 --- a/src/pytrexio.i +++ b/src/pytrexio.i @@ -108,6 +108,9 @@ import_array(); /* For some reasons SWIG does not apply the proper bitfield_t typemap, so one has to manually specify int64_t* ARGOUT_ARRAY1 below */ %apply (int64_t* ARGOUT_ARRAY1, int32_t DIM1) {(bitfield_t* const bit_list, const int32_t N_int)}; +/* For passing dimensions of external arrays fron Python front to C back */ +%apply (uint64_t* IN_ARRAY1, int32_t DIM1) {(const uint64_t* dims_in, const int32_t dims_dim_in)}; + /* This tells SWIG to treat char ** dset_in pattern as a special case Enables access to trexio_[...]_write_dset_str set of functions directly, i.e. by converting input list of strings from Python into char ** of C diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index 91bfdf8d..eb0fabeb 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -215,6 +215,7 @@ __trexio_path__ = None | ~TREXIO_INVALID_STATE~ | 35 | 'Inconsistent state of the file' | | ~TREXIO_VERSION_PARSING_ISSUE~ | 36 | 'Failed to parse package_version' | | ~TREXIO_PHASE_CHANGE~ | 37 | 'The function succeeded with a change of sign' | + | ~TREXIO_NOT_SUPPORTED~ | 38 | 'This functionality is not supported yet' | # We need to force Emacs not to indent the Python code: # -*- org-src-preserve-indentation: t @@ -225,8 +226,8 @@ __trexio_path__ = None and the corresponding message are not propagated to the source code. #+begin_src python :var table=table-exit-codes :results drawer -""" This script generates the C and Fortran constants for the error - codes from the org-mode table. +""" This script generates the C, Fortran and Python constants + for the error codes from the org-mode table. """ result = [ "#+begin_src c :tangle prefix_front.h :exports none" ] @@ -253,7 +254,6 @@ for (text, code,_) in table: result += [ "#+end_src" ] return '\n'.join(result) - #+end_src @@ -299,6 +299,7 @@ return '\n'.join(result) #define TREXIO_INVALID_STATE ((trexio_exit_code) 35) #define TREXIO_VERSION_PARSING_ISSUE ((trexio_exit_code) 36) #define TREXIO_PHASE_CHANGE ((trexio_exit_code) 37) + #define TREXIO_NOT_SUPPORTED ((trexio_exit_code) 38) #+end_src #+begin_src f90 :tangle prefix_fortran.f90 :exports none @@ -341,6 +342,7 @@ return '\n'.join(result) integer(trexio_exit_code), parameter :: TREXIO_INVALID_STATE = 35 integer(trexio_exit_code), parameter :: TREXIO_VERSION_PARSING_ISSUE = 36 integer(trexio_exit_code), parameter :: TREXIO_PHASE_CHANGE = 37 + integer(trexio_exit_code), parameter :: TREXIO_NOT_SUPPORTED = 38 #+end_src #+begin_src python :tangle prefix_python.py :exports none @@ -384,6 +386,7 @@ return '\n'.join(result) TREXIO_INVALID_STATE = 35 TREXIO_VERSION_PARSING_ISSUE = 36 TREXIO_PHASE_CHANGE = 37 + TREXIO_NOT_SUPPORTED = 38 #+end_src :end: @@ -540,6 +543,12 @@ return '\n'.join(result) case TREXIO_VERSION_PARSING_ISSUE: return "Failed to parse package_version"; break; + case TREXIO_PHASE_CHANGE: + return "The function succeeded with a change of sign"; + break; + case TREXIO_NOT_SUPPORTED: + return "This functionality is not supported yet"; + break; #+end_example **** C source code @@ -1912,6 +1921,742 @@ trexio_pre_close (trexio_t* file) } #+end_src +** External group (generic I/O of arbitrary data) + + ~trexio_[write|read|has]_external_[datatype]_array~ + ~trexio_[write|read|has]_external_[datatype]_attribute~ + write|read|check for existence of an arbitrary data block in a given TREXIO file. + + ~external~ here means that the data does not correspond to the TREXIO format definition. + Thus, it is not present in the =trex.json= specification and does not support any of the + advanced TREXIO capabilities (e.g. sparse I/O, verification of dimensions etc). + + **Input parameters:** + 1) ~trexio_file~ - ~trexio_t*~ pointer to the TREXIO file + 2) ~array~ - ~void*~ pointer to the flat array of data to be written + 3) ~rank~ - ~uint32_t~ value: rank (number of dimensions) of an array + 4) ~dimensions~ - ~uint64_t*~ pointer to the array with the number of elements per dimension + 4) ~datatype~ - ~char*~ string specifying the datatype (e.g. ~double/float~, ~int32/int64~) + 5) ~name~ - ~char*~ lowercase string with the name of the data block (e.g. how it will appear in the TREXIO file) + **important for ~trexio_has_external_[array|attribute]~ to work!** + + **Output:** + - ~trexio_exit_code~ + + **Note 1:** experimental functionality. Please report any issues that occur. + **Note 2:** I/O of strings is not supported yet. + + #+NAME: table-external-datatypes + | TREXIO suffix | C | Fortran | Python | + |---------------+-----------+---------+--------| + | ~int32~ | ~int32_t~ | | | + | ~float32~ | ~float~ | | | + | ~int64~ | ~int64_t~ | | | + | ~float64~ | ~double~ | | | + +*** C + +The Python code below will generate the C function headers and source code for I/O of different datatypes (see table above). +The C source code templates are inserted into Python code from the corresponding org-mode blocks before execution. + +**Note:** the source code for ~has~ functions is written only once (manually) in the end, it is not auto-generated. + + #+NAME:template_write_func_c + #+begin_src c +trexio_exit_code +trexio_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; imode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_external_$suffix$_array(file, array, rank, dimensions, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_write_safe_external_$suffix$_array(trexio_t* const file, const $c_type$* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_in == NULL) return TREXIO_INVALID_ARG_2; + if (dim_in <= 0) return TREXIO_INVALID_ARG_3; + if (rank == 0) return TREXIO_INVALID_ARG_4; + if (dims_in == NULL) return TREXIO_INVALID_ARG_5; + if (dims_dim_in == 0) return TREXIO_INVALID_ARG_6; + if (name == NULL) return TREXIO_INVALID_ARG_7; + for (uint32_t i=0; iback_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_$suffix$_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_read_safe_external_$suffix$_array(trexio_t* const file, $c_type$* const dset_out, const int64_t dim_out, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_out == NULL) return TREXIO_INVALID_ARG_2; + if (dim_out <= 0) return TREXIO_INVALID_ARG_3; + if (name == NULL) return TREXIO_INVALID_ARG_4; + + return trexio_read_external_$suffix$_array(file, dset_out, name); +} + #+end_src + + #+begin_src python :var table=table-external-datatypes :results drawer :noweb yes +""" This script generates the C functions for generic I/O (external group) """ + +template_write_func_h = "trexio_exit_code trexio_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name);\n" +template_write_func_h += "trexio_exit_code trexio_write_safe_external_$suffix$_array(trexio_t* const file, const $c_type$* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name);" +template_read_func_h = "trexio_exit_code trexio_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name);\n" +template_read_func_h += "trexio_exit_code trexio_read_safe_external_$suffix$_array(trexio_t* const file, $c_type$* const dset_out, const int64_t dim_out, const char* name);" +template_write_func_c = """ +<> +""" +template_read_func_c = """ +<> +""" + +result_h = ["#+begin_src c :tangle prefix_front.h :exports none"] +result_c = ["#+begin_src c :tangle prefix_front.c"] +for (suffix, c_type, _, _) in table: + # populate C headers + result_h.append(template_write_func_h.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result_h.append(template_read_func_h.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + # populate C source code + result_c.append(template_write_func_c.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result_c.append(template_read_func_c.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + +# trexio_has functions do not require datatype and are thus unique +result_h.append("trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name);") +result_h.append("trexio_exit_code trexio_has_external(trexio_t* const file);") +result_h.append("#+end_src") + +result_c.append("#+end_src") + +return '\n'.join(result_h + ['\n'] + result_c) +#+end_src + +#+RESULTS: +:results: +#+begin_src c :tangle prefix_front.h :exports none +trexio_exit_code trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_write_safe_external_int32_array(trexio_t* const file, const int32_t* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name); +trexio_exit_code trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name); +trexio_exit_code trexio_read_safe_external_int32_array(trexio_t* const file, int32_t* const dset_out, const int64_t dim_out, const char* name); +trexio_exit_code trexio_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_write_safe_external_float32_array(trexio_t* const file, const float* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name); +trexio_exit_code trexio_read_external_float32_array(trexio_t* const file, float* const array, const char* name); +trexio_exit_code trexio_read_safe_external_float32_array(trexio_t* const file, float* const dset_out, const int64_t dim_out, const char* name); +trexio_exit_code trexio_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_write_safe_external_int64_array(trexio_t* const file, const int64_t* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name); +trexio_exit_code trexio_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name); +trexio_exit_code trexio_read_safe_external_int64_array(trexio_t* const file, int64_t* const dset_out, const int64_t dim_out, const char* name); +trexio_exit_code trexio_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_write_safe_external_float64_array(trexio_t* const file, const double* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name); +trexio_exit_code trexio_read_external_float64_array(trexio_t* const file, double* const array, const char* name); +trexio_exit_code trexio_read_safe_external_float64_array(trexio_t* const file, double* const dset_out, const int64_t dim_out, const char* name); +trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name); +trexio_exit_code trexio_has_external(trexio_t* const file); +#+end_src + + +#+begin_src c :tangle prefix_front.c + +trexio_exit_code +trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; imode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_external_int32_array(file, array, rank, dimensions, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_write_safe_external_int32_array(trexio_t* const file, const int32_t* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_in == NULL) return TREXIO_INVALID_ARG_2; + if (dim_in <= 0) return TREXIO_INVALID_ARG_3; + if (rank == 0) return TREXIO_INVALID_ARG_4; + if (dims_in == NULL) return TREXIO_INVALID_ARG_5; + if (dims_dim_in == 0) return TREXIO_INVALID_ARG_6; + if (name == NULL) return TREXIO_INVALID_ARG_7; + for (uint32_t i=0; iback_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_int32_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_read_safe_external_int32_array(trexio_t* const file, int32_t* const dset_out, const int64_t dim_out, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_out == NULL) return TREXIO_INVALID_ARG_2; + if (dim_out <= 0) return TREXIO_INVALID_ARG_3; + if (name == NULL) return TREXIO_INVALID_ARG_4; + + return trexio_read_external_int32_array(file, dset_out, name); +} + + +trexio_exit_code +trexio_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; imode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_external_float32_array(file, array, rank, dimensions, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_write_safe_external_float32_array(trexio_t* const file, const float* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_in == NULL) return TREXIO_INVALID_ARG_2; + if (dim_in <= 0) return TREXIO_INVALID_ARG_3; + if (rank == 0) return TREXIO_INVALID_ARG_4; + if (dims_in == NULL) return TREXIO_INVALID_ARG_5; + if (dims_dim_in == 0) return TREXIO_INVALID_ARG_6; + if (name == NULL) return TREXIO_INVALID_ARG_7; + for (uint32_t i=0; iback_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_float32_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_read_safe_external_float32_array(trexio_t* const file, float* const dset_out, const int64_t dim_out, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_out == NULL) return TREXIO_INVALID_ARG_2; + if (dim_out <= 0) return TREXIO_INVALID_ARG_3; + if (name == NULL) return TREXIO_INVALID_ARG_4; + + return trexio_read_external_float32_array(file, dset_out, name); +} + + +trexio_exit_code +trexio_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; imode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_external_int64_array(file, array, rank, dimensions, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_write_safe_external_int64_array(trexio_t* const file, const int64_t* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_in == NULL) return TREXIO_INVALID_ARG_2; + if (dim_in <= 0) return TREXIO_INVALID_ARG_3; + if (rank == 0) return TREXIO_INVALID_ARG_4; + if (dims_in == NULL) return TREXIO_INVALID_ARG_5; + if (dims_dim_in == 0) return TREXIO_INVALID_ARG_6; + if (name == NULL) return TREXIO_INVALID_ARG_7; + for (uint32_t i=0; iback_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_int64_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_read_safe_external_int64_array(trexio_t* const file, int64_t* const dset_out, const int64_t dim_out, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_out == NULL) return TREXIO_INVALID_ARG_2; + if (dim_out <= 0) return TREXIO_INVALID_ARG_3; + if (name == NULL) return TREXIO_INVALID_ARG_4; + + return trexio_read_external_int64_array(file, dset_out, name); +} + + +trexio_exit_code +trexio_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; imode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_external_float64_array(file, array, rank, dimensions, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_write_safe_external_float64_array(trexio_t* const file, const double* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_in == NULL) return TREXIO_INVALID_ARG_2; + if (dim_in <= 0) return TREXIO_INVALID_ARG_3; + if (rank == 0) return TREXIO_INVALID_ARG_4; + if (dims_in == NULL) return TREXIO_INVALID_ARG_5; + if (dims_dim_in == 0) return TREXIO_INVALID_ARG_6; + if (name == NULL) return TREXIO_INVALID_ARG_7; + for (uint32_t i=0; iback_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_float64_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_read_safe_external_float64_array(trexio_t* const file, double* const dset_out, const int64_t dim_out, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_out == NULL) return TREXIO_INVALID_ARG_2; + if (dim_out <= 0) return TREXIO_INVALID_ARG_3; + if (name == NULL) return TREXIO_INVALID_ARG_4; + + return trexio_read_external_float64_array(file, dset_out, name); +} + +#+end_src +:end: + + #+begin_src c :tangle prefix_front.c +trexio_exit_code +trexio_has_external(trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + assert(file->back_end < TREXIO_INVALID_BACK_END); + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_has_external(file); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_has_external_array(trexio_t* const file, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (name == NULL) return TREXIO_INVALID_ARG_2; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_has_external_array(file, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + #+end_src + +*** Python + + #+begin_src python :tangle basic_python.py +def write_external_array(trexio_file, dset_w, name) -> None: + """Write an arbitrary array of numbers in the TREXIO file. + + Parameters: + + trexio_file: + TREXIO File object. + + dset_w: list, tuple OR numpy.ndarray + Array of values to be written. + + name: string + Name of the array as it will be stored in the external group of TREXIO file + + Raises: + - trexio.Error if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message. + - Exception from some other error (e.g. RuntimeError). + """ + + # get dimensions and rank from input array + if not isinstance(dset_w, (list, tuple)): + # if input array is not a list or tuple then it is probably a numpy array + rank = len(dset_w.shape) + dimensions = np.array(dset_w.shape, dtype=np.uint64) + else: + get_shape = lambda l: [len(l)] + get_shape(l[0]) if (type(l) == list or type(l) == tuple) else [] + get_type = lambda l: [type(l)] + get_type(l[0]) if (type(l) == list or type(l) == tuple) else [type(l)] + dset_shape = get_shape(dset_w) + dset_dtype = get_type(dset_w)[-1] + rank = len(dset_shape) + dimensions = np.array(dset_shape, dtype=np.uint64) + + # decide whether to flatten or not + doFlatten = False + if rank > 1: + doFlatten = True + + # handle list/typle + if isinstance(dset_w, (list, tuple)): + if dset_dtype is int: + if doFlatten: + rc = pytr.trexio_write_safe_external_int64_array(trexio_file.pytrexio_s, np.array(dset_w, dtype=np.int64).flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_int64_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + elif dset_dtype is float: + if doFlatten: + rc = pytr.trexio_write_safe_external_float64_array(trexio_file.pytrexio_s, np.array(dset_w, dtype=np.float64).flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_float64_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + else: + raise TypeError("Unsupported type of a list/tuple for generic I/O of arrays.") + + # handle numpy array + elif isinstance(dset_w, np.ndarray): + if dset_w.dtype==np.int64: + if doFlatten: + rc = pytr.trexio_write_safe_external_int64_array(trexio_file.pytrexio_s, dset_w.flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_int64_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + elif dset_w.dtype==np.int32: + if doFlatten: + rc = pytr.trexio_write_safe_external_int32_array(trexio_file.pytrexio_s, dset_w.flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_int32_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + elif dset_w.dtype==np.float64: + if doFlatten: + rc = pytr.trexio_write_safe_external_float64_array(trexio_file.pytrexio_s, dset_w.flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_float64_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + elif dset_w.dtype==np.float32: + if doFlatten: + rc = pytr.trexio_write_safe_external_float32_array(trexio_file.pytrexio_s, dset_w.flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_float32_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + else: + raise TypeError("Unsupported type of a NumPy array for generic I/O of arrays.") + else: + raise TypeError("Unsupported array type for generic I/O.") + + if rc != TREXIO_SUCCESS: + raise Error(rc) + #+end_src + + #+begin_src python :tangle basic_python.py +def read_external_array(trexio_file, name, size, dtype): + """Read an external array of numbers from the TREXIO file. + + Parameters: + + trexio_file: + TREXIO File object. + name: + string name of an array + size: + integer value corresponding to the total number of elements to read + dtype: + string indicating the datatype of the array (int/int32/int64/float/float32/float64/double) + + Returns: + ~dset_r~: 1D NumPy array with ~dim~ elements corresponding to of "name" array read from the TREXIO file. + + Raises: + - trexio.Error if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message. + - Exception from some other error (e.g. RuntimeError). +""" + + if dtype in ['int', 'int64']: + rc, dset_r = pytr.trexio_read_safe_external_int64_array(trexio_file.pytrexio_s, size, name) + elif dtype in ['int32']: + rc, dset_r = pytr.trexio_read_safe_external_int32_array(trexio_file.pytrexio_s, size, name) + elif dtype in ['float', 'float64', 'double']: + rc, dset_r = pytr.trexio_read_safe_external_float64_array(trexio_file.pytrexio_s, size, name) + elif dtype in ['float32']: + rc, dset_r = pytr.trexio_read_safe_external_float32_array(trexio_file.pytrexio_s, size, name) + else: + raise ValueError("Unsupported dtype passed to read_external_array.") + + if rc != TREXIO_SUCCESS: + raise Error(rc) + + return dset_r + #+end_src + + #+begin_src python :tangle basic_python.py +def has_external_array(trexio_file, name) -> bool: + """Check that external array exists in the TREXIO file. + + trexio_file: + Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. + name: + String name of the array from the TREXIO file + + Returns: + True if the variable exists, False otherwise + + Raises: + - trexio.Error if TREXIO return code ~rc~ is TREXIO_FAILURE and prints the error message using string_of_error. + - Exception from some other error (e.g. RuntimeError). + """ + + rc = pytr.trexio_has_external_array(trexio_file.pytrexio_s, name) + if rc == TREXIO_FAILURE: + raise Error(rc) + + return rc == TREXIO_SUCCESS + #+end_src + * Templates for front end ** Description @@ -3335,7 +4080,7 @@ trexio_read_$group_dset$(trexio_t* const file, /* Find the maximal value along all dimensions to define the compression technique in the back end */ int64_t max_dim = unique_dims[0]; -#if (unique_rank != 1) +#if (unique_rank != 1) for (uint32_t i = 1; i < unique_rank; i++) { if (unique_dims[i] > max_dim) max_dim = unique_dims[i]; } @@ -3468,7 +4213,7 @@ trexio_write_$group_dset$(trexio_t* const file, /* Find the maximal value along all dimensions to define the compression technique in the back end */ int64_t max_dim = unique_dims[0]; -#if (unique_rank != 1) +#if (unique_rank != 1) for (uint32_t i = 1; i < unique_rank; i++) { if (unique_dims[i] > max_dim) max_dim = unique_dims[i]; } diff --git a/src/templates_hdf5/templator_hdf5.org b/src/templates_hdf5/templator_hdf5.org index ac684755..ed3b2dba 100644 --- a/src/templates_hdf5/templator_hdf5.org +++ b/src/templates_hdf5/templator_hdf5.org @@ -45,12 +45,12 @@ #+begin_src c :tangle prefix_hdf5.c :noweb yes <
> #include "trexio_hdf5.h" - #+end_src * Template for HDF5 definitions #+begin_src c :tangle def_hdf5.c +#define EXTERNAL_GROUP_NAME "external" #define $GROUP$_GROUP_NAME "$group$" #define $GROUP_NUM$_NAME "$group_num$" #define $GROUP_DSET$_NAME "$group_dset$" @@ -69,6 +69,7 @@ typedef struct trexio_hdf5_s { trexio_t parent ; hid_t file_id; hid_t $group$_group; + hid_t external_group; } trexio_hdf5_t; #+end_src @@ -158,18 +159,26 @@ trexio_hdf5_init (trexio_t* const file) case 'r': if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) > 0) f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) == 0) f->$group$_group = (hid_t) 0; + /* Manual addition for the "external" group */ + if (H5Lexists(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT) > 0) f->external_group = H5Gopen(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT); + if (H5Lexists(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT) == 0) f->external_group = (hid_t) 0; break; case 'u': case 'w': if (f_exists == 1) { if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) > 0) f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) == 0) f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + /* Manual addition for the "external" group */ + if (H5Lexists(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT) > 0) f->external_group = H5Gopen(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT); + if (H5Lexists(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT) == 0) f->external_group = H5Gcreate(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); } else { f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + f->external_group = H5Gcreate(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); } break; } if (f->$group$_group < (hid_t) 0) return TREXIO_INVALID_ID; + if (f->external_group < (hid_t) 0) return TREXIO_INVALID_ID; return TREXIO_SUCCESS; } @@ -185,6 +194,9 @@ trexio_hdf5_deinit (trexio_t* const file) if (f->$group$_group != (hid_t) 0) H5Gclose(f->$group$_group); f->$group$_group = 0; + if (f->external_group != (hid_t) 0) H5Gclose(f->external_group); + f->external_group = 0; + H5Fclose(f->file_id); f->file_id = 0; @@ -383,7 +395,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $ /* Read dataset */ herr_t status = H5Dread(dset_id, H5T_$GROUP_DSET_H5_DTYPE$, - H5S_ALL, H5S_ALL, H5P_DEFAULT, + H5S_ALL, H5S_ALL, H5P_DEFAULT, $group_dset$); H5Sclose(dspace_id); @@ -413,7 +425,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$* Consider using HDF5-native h5repack utility after deleting/overwriting big datasets. ,*/ - + if ((trexio_hdf5_has_$group_dset$(file) == TREXIO_SUCCESS) && (file->mode == 'u')) { herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); if (status_del < 0) return TREXIO_FAILURE; @@ -1270,6 +1282,446 @@ trexio_exit_code trexio_hdf5_has_determinant_list(trexio_t* const file) } #+end_src +* Source code for external group (generic I/O of arbitrary data) + + ~trexio_hdf5_[write|read|has]_external_[datatype]_array~ + ~trexio_hdf5_[write|read|has]_external_[datatype]_attribute~ + write|read|check for existence of an arbitrary data block in a given TREXIO file. + + ~external~ here means that the data does not correspond to the TREXIO format definition. + Thus, it is not present in the =trex.json= specification and does not support any of the + advanced TREXIO capabilities (e.g. sparse I/O, verification of dimensions etc). + + **Input parameters:** + 1) ~trexio_file~ - ~trexio_t*~ pointer to the TREXIO file + 2) ~array~ - ~void*~ pointer to the flat array of data to be written + 3) ~rank~ - ~uint32_t~ value: rank (number of dimensions) of an array + 4) ~dimensions~ - ~uint64_t*~ pointer to the array with the number of elements per dimension + 4) ~datatype~ - ~char*~ string specifying the datatype (e.g. ~double/float~, ~int32/int64~) + 5) ~name~ - ~char*~ lowercase string with the name of the data block (e.g. how it will appear in the TREXIO file) + **important for ~trexio_has_external~ to work!** + + **Output:** + - ~trexio_exit_code~ + + **Note 1:** experimental functionality. Please report any issues that occur. + **Note 2:** I/O of strings is not supported yet. + + #+NAME: table-external-datatypes + | TREXIO suffix | C | HDF5 | Fortran | Python | + |---------------+-----------+----------+---------+--------| + | ~int32~ | ~int32_t~ | ~INT32~ | | | + | ~int64~ | ~int64_t~ | ~INT64~ | | | + | ~float32~ | ~float~ | ~FLOAT~ | | | + | ~float64~ | ~double~ | ~DOUBLE~ | | | + +*** C + +The Python code below will generate the C function headers and source code for I/O of different datatypes (see table above). +The C source code templates are inserted into Python code from the corresponding org-mode blocks before execution. + +**Note:** the source code for ~has~ functions is written only once (manually) in the end, it is not auto-generated. + + #+NAME:template_write_hdf5_func_c + #+begin_src c +trexio_exit_code +trexio_hdf5_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_$HDF5_TYPE$, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_$HDF5_TYPE$, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + #+end_src + + #+NAME:template_read_hdf5_func_c + #+begin_src c +trexio_exit_code +trexio_hdf5_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name) +{ + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_$HDF5_TYPE$, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); + + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + #+end_src + + #+begin_src python :var table=table-external-datatypes :results drawer :noweb yes +""" This script generates the C functions for generic I/O (external group) """ + +template_write_func_h = "trexio_exit_code trexio_hdf5_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name);" +template_read_func_h = "trexio_exit_code trexio_hdf5_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name);" +template_write_func_c = """ +<> +""" +template_read_func_c = """ +<> +""" + +result_h = ["#+begin_src c :tangle prefix_hdf5.h :exports none"] +result_c = ["#+begin_src c :tangle basic_hdf5.c"] +for (suffix, c_type, hdf5_type, _, _) in table: + # populate C headers + result_h.append(template_write_func_h.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result_h.append(template_read_func_h.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + # populate C source code + result_c.append(template_write_func_c.replace("$HDF5_TYPE$", hdf5_type.replace("~","")).replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result_c.append(template_read_func_c.replace("$HDF5_TYPE$", hdf5_type.replace("~","")).replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + +# trexio_has functions do not require datatype and are thus unique +result_h.append("trexio_exit_code trexio_hdf5_has_external_array(trexio_t* const file, const char* name);") +result_h.append("trexio_exit_code trexio_hdf5_has_external(trexio_t* const file);") +result_h.append("#+end_src") + +result_c.append("#+end_src") + +return '\n'.join(result_h + ['\n'] + result_c) +#+end_src + +#+RESULTS: +:results: +#+begin_src c :tangle prefix_hdf5.h :exports none +trexio_exit_code trexio_hdf5_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_hdf5_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name); +trexio_exit_code trexio_hdf5_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_hdf5_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name); +trexio_exit_code trexio_hdf5_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_hdf5_read_external_float32_array(trexio_t* const file, float* const array, const char* name); +trexio_exit_code trexio_hdf5_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_hdf5_read_external_float64_array(trexio_t* const file, double* const array, const char* name); +trexio_exit_code trexio_hdf5_has_external_array(trexio_t* const file, const char* name); +trexio_exit_code trexio_hdf5_has_external(trexio_t* const file); +#+end_src + + +#+begin_src c :tangle basic_hdf5.c + +trexio_exit_code +trexio_hdf5_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_INT32, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_INT32, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name) +{ + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_INT32, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); + + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_INT64, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_INT64, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name) +{ + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_INT64, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); + + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_FLOAT, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_FLOAT, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_read_external_float32_array(trexio_t* const file, float* const array, const char* name) +{ + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_FLOAT, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); + + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_DOUBLE, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_DOUBLE, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_read_external_float64_array(trexio_t* const file, double* const array, const char* name) +{ + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_DOUBLE, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); + + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + +#+end_src +:end: + + +#+begin_src c :tangle basic_hdf5.c +trexio_exit_code +trexio_hdf5_has_external (trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + struct H5G_info_t group_info; + + /* H5Gget_info return info about the HDF5 group as a group_info struct */ + herr_t status = H5Gget_info(f->external_group, &group_info); + if (status < 0) return TREXIO_FAILURE; + + /* If nlinks==0 --> the group is empty, i.e. non-existent */ + if (group_info.nlinks == (hsize_t) 0) { + return TREXIO_HAS_NOT; + } else { + return TREXIO_SUCCESS; + } + +} + +trexio_exit_code +trexio_hdf5_has_external_array(trexio_t* const file, const char* name) +{ + + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + if (f->external_group == (hsize_t) 0) return TREXIO_HAS_NOT; + + htri_t exists = H5Lexists(f->external_group, name, H5P_DEFAULT); + if (exists > 0) { + return TREXIO_SUCCESS; + } else if (exists < 0) { + return TREXIO_FAILURE; + } else { + return TREXIO_HAS_NOT; + } + +} +#+end_src + * Helper functions #+begin_src c :tangle helpers_hdf5.c diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d415c170..09cf7619 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -24,6 +24,7 @@ if(ENABLE_HDF5) io_dset_float_hdf5 io_dset_str_hdf5 io_dset_sparse_hdf5 + io_dset_external_hdf5 io_determinant_hdf5 io_safe_dset_float_hdf5 io_dset_int_hdf5 diff --git a/tests/io_dset_external_hdf5.c b/tests/io_dset_external_hdf5.c new file mode 100644 index 00000000..b89a05b3 --- /dev/null +++ b/tests/io_dset_external_hdf5.c @@ -0,0 +1,145 @@ +#include "trexio.h" +#include +#include +#include +#include + +#define TEST_BACKEND TREXIO_HDF5 +#define TREXIO_FILE "test_dset_external.h5" +#define RM_COMMAND "rm -f -- " TREXIO_FILE + +#define VECTOR_NAME "test external vector" +#define MATRIX_NAME "test external matrix" + +static int test_write_dset (const char* file_name, const back_end_t backend) { + +/* Try to write a dataset with numerical (int) values into the TREXIO file */ + + trexio_t* file = NULL; + trexio_exit_code rc; + + // parameters to be written + uint32_t rank_v = 1; + uint64_t dims_v[1] = {12}; + int32_t vector[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + + uint32_t rank_m = 2; + uint64_t dims_m[2] = {3, 4}; + int32_t matrix[12] = { + 0 , 1 , 2 , + 3 , 4 , 5 , + 6 , 7 , 8 , + 9 , 10 , 11 + }; + +/*================= START OF TEST ==================*/ + + // open file in 'write' mode + file = trexio_open(file_name, 'w', backend, &rc); + assert (file != NULL); + + // write numerical (int32) vector in an external group of the file + rc = trexio_write_external_int32_array(file, vector, rank_v, dims_v, VECTOR_NAME); + assert (rc == TREXIO_SUCCESS); + + // write numerical (int32) matrixin an external group of the file + rc = trexio_write_external_int32_array(file, matrix, rank_m, dims_m, MATRIX_NAME); + assert (rc == TREXIO_SUCCESS); + + // close current session + rc = trexio_close(file); + assert (rc == TREXIO_SUCCESS); + +/*================= END OF TEST ==================*/ + + return 0; +} + + +static int test_has_dset (const char* file_name, const back_end_t backend) { + +/* Try to check the existence of a dataset in the TREXIO file */ + + trexio_t* file = NULL; + trexio_exit_code rc; + +/*================= START OF TEST ==================*/ + + // open file + file = trexio_open(file_name, 'r', backend, &rc); + assert (file != NULL); + + // check that the group exists + rc = trexio_has_external(file); + assert(rc==TREXIO_SUCCESS); + + // check that the previously written datasets exist + rc = trexio_has_external_array(file, VECTOR_NAME); + assert (rc == TREXIO_SUCCESS); + + // check that the previously written datasets exist + rc = trexio_has_external_array(file, MATRIX_NAME); + assert (rc == TREXIO_SUCCESS); + + // close current session + rc = trexio_close(file); + assert (rc == TREXIO_SUCCESS); + +/*================= END OF TEST ==================*/ + + return 0; +} + + +static int test_read_dset (const char* file_name, const back_end_t backend) { + +/* Try to read a dataset with numericali (int) values from the TREXIO file */ + + trexio_t* file = NULL; + trexio_exit_code rc; + + // parameters to be read + int32_t* vector; + +/*================= START OF TEST ==================*/ + + // open file in 'read' mode + file = trexio_open(file_name, 'r', backend, &rc); + assert (file != NULL); + + // read numerical dataset from the file + vector = (int32_t*) calloc(12, sizeof(int32_t)); + rc = trexio_read_external_int32_array(file, vector, VECTOR_NAME); + assert (rc == TREXIO_SUCCESS); + assert (vector[0] == 0); + assert (vector[11] == 11); + + free(vector); + + // close current session + rc = trexio_close(file); + assert (rc == TREXIO_SUCCESS); + +/*================= END OF TEST ==================*/ + + return 0; +} + + +int main(void) { + +/*============== Test launcher ================*/ + + int rc; + rc = system(RM_COMMAND); + assert (rc == 0); + + test_write_dset (TREXIO_FILE, TEST_BACKEND); + test_has_dset (TREXIO_FILE, TEST_BACKEND); + test_read_dset (TREXIO_FILE, TEST_BACKEND); + + rc = system(RM_COMMAND); + assert (rc == 0); + + return 0; +}