From dff995c13fd421b7dcbfd4c0d1843741d07478c0 Mon Sep 17 00:00:00 2001 From: q-posev Date: Sat, 6 May 2023 19:55:40 +0200 Subject: [PATCH 1/5] Front end prototype for external group I/O --- src/templates_front/templator_front.org | 168 +++++++++++++++++++++++- 1 file changed, 163 insertions(+), 5 deletions(-) diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index 91bfdf8d..e7e8bc3d 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -215,6 +215,7 @@ __trexio_path__ = None | ~TREXIO_INVALID_STATE~ | 35 | 'Inconsistent state of the file' | | ~TREXIO_VERSION_PARSING_ISSUE~ | 36 | 'Failed to parse package_version' | | ~TREXIO_PHASE_CHANGE~ | 37 | 'The function succeeded with a change of sign' | + | ~TREXIO_NOT_SUPPORTED~ | 38 | 'This functionality is not supported yet' | # We need to force Emacs not to indent the Python code: # -*- org-src-preserve-indentation: t @@ -225,8 +226,8 @@ __trexio_path__ = None and the corresponding message are not propagated to the source code. #+begin_src python :var table=table-exit-codes :results drawer -""" This script generates the C and Fortran constants for the error - codes from the org-mode table. +""" This script generates the C, Fortran and Python constants + for the error codes from the org-mode table. """ result = [ "#+begin_src c :tangle prefix_front.h :exports none" ] @@ -253,7 +254,6 @@ for (text, code,_) in table: result += [ "#+end_src" ] return '\n'.join(result) - #+end_src @@ -299,6 +299,7 @@ return '\n'.join(result) #define TREXIO_INVALID_STATE ((trexio_exit_code) 35) #define TREXIO_VERSION_PARSING_ISSUE ((trexio_exit_code) 36) #define TREXIO_PHASE_CHANGE ((trexio_exit_code) 37) + #define TREXIO_NOT_SUPPORTED ((trexio_exit_code) 38) #+end_src #+begin_src f90 :tangle prefix_fortran.f90 :exports none @@ -341,6 +342,7 @@ return '\n'.join(result) integer(trexio_exit_code), parameter :: TREXIO_INVALID_STATE = 35 integer(trexio_exit_code), parameter :: TREXIO_VERSION_PARSING_ISSUE = 36 integer(trexio_exit_code), parameter :: TREXIO_PHASE_CHANGE = 37 + integer(trexio_exit_code), parameter :: TREXIO_NOT_SUPPORTED = 38 #+end_src #+begin_src python :tangle prefix_python.py :exports none @@ -384,6 +386,7 @@ return '\n'.join(result) TREXIO_INVALID_STATE = 35 TREXIO_VERSION_PARSING_ISSUE = 36 TREXIO_PHASE_CHANGE = 37 + TREXIO_NOT_SUPPORTED = 38 #+end_src :end: @@ -540,6 +543,12 @@ return '\n'.join(result) case TREXIO_VERSION_PARSING_ISSUE: return "Failed to parse package_version"; break; + case TREXIO_PHASE_CHANGE: + return "The function succeeded with a change of sign"; + break; + case TREXIO_NOT_SUPPORTED: + return "This functionality is not supported yet"; + break; #+end_example **** C source code @@ -1912,6 +1921,155 @@ trexio_pre_close (trexio_t* file) } #+end_src +** External group (generic I/O of arbitrary data) + + ~trexio_[write|read|has]_external_[datatype]_array~ + ~trexio_[write|read|has]_external_[datatype]_attribute~ + write|read|check for existence of an arbitrary data block in a given TREXIO file. + + ~external~ here means that the data does not correspond to the TREXIO format definition. + Thus, it is not present in the =trex.json= specification and does not support any of the + advanced TREXIO capabilities (e.g. sparse I/O, verification of dimensions etc). + + **Input parameters:** + 1) ~trexio_file~ - ~trexio_t*~ pointer to the TREXIO file + 2) ~array~ - ~void*~ pointer to the flat array of data to be written + 3) ~rank~ - ~uint32_t~ value: rank (number of dimensions) of an array + 4) ~dimensions~ - ~uint64_t*~ pointer to the array with the number of elements per dimension + 4) ~datatype~ - ~char*~ string specifying the datatype (e.g. ~double/float~, ~int32/int64~) + 5) ~name~ - ~char*~ lowercase string with the name of the data block (e.g. how it will appear in the TREXIO file) + **important for ~trexio_has_external~ to work!** + + **Output:** + - ~trexio_exit_code~ + + **Note 1:** experimental functionality. Please report any issues that occur. + **Note 2:** I/O of strings is not supported yet. + + #+NAME: table-external-datatypes + | TREXIO suffix | C | Fortran | Python | + |---------------+-----------+---------+--------| + | ~int32~ | ~int32_t~ | | | + | ~int64~ | ~int64_t~ | | | + | ~float32~ | ~float~ | | | + | ~float64~ | ~double~ | | | + +*** C + + #+begin_src python :var table=table-external-datatypes :results drawer +""" This script generates the C and Fortran functions for generic I/O """ + +template_write_func = "trexio_exit_code trexio_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name);" +template_read_func = "trexio_exit_code trexio_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name);" + +result = [] +result.append("#+begin_src c :tangle prefix_front.h :exports none") +for (suffix, c_type, _, _) in table: + result.append(template_write_func.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result.append(template_read_func.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + +# trexio_has function does not require datatype and is thus unique +result.append("trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name);") +result.append("#+end_src") +return '\n'.join(result) +#+end_src + +#+RESULTS: +:results: +#+begin_src c :tangle prefix_front.h :exports none +trexio_exit_code trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name); +trexio_exit_code trexio_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name); +trexio_exit_code trexio_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_read_external_float32_array(trexio_t* const file, float* const array, const char* name); +trexio_exit_code trexio_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_read_external_float64_array(trexio_t* const file, double* const array, const char* name); +trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name); +#+end_src +:end: + + + #+begin_src c :tangle prefix_front.c +trexio_exit_code +trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; imode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_external_int32_array(file, array, rank, dimensions, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name); +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (name == NULL) return TREXIO_INVALID_ARG_3; + + trexio_exit_code rc = trexio_has_external_array(file, name) + if (rc == TREXIO_HAS_NOT) return TREXIO_DSET_MISSING; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_int32_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +trexio_exit_code +trexio_has_external_array(trexio_t* const file, const char* name); +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (name == NULL) return TREXIO_INVALID_ARG_2; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_has_external_array(file, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + #+end_src + * Templates for front end ** Description @@ -3335,7 +3493,7 @@ trexio_read_$group_dset$(trexio_t* const file, /* Find the maximal value along all dimensions to define the compression technique in the back end */ int64_t max_dim = unique_dims[0]; -#if (unique_rank != 1) +#if (unique_rank != 1) for (uint32_t i = 1; i < unique_rank; i++) { if (unique_dims[i] > max_dim) max_dim = unique_dims[i]; } @@ -3468,7 +3626,7 @@ trexio_write_$group_dset$(trexio_t* const file, /* Find the maximal value along all dimensions to define the compression technique in the back end */ int64_t max_dim = unique_dims[0]; -#if (unique_rank != 1) +#if (unique_rank != 1) for (uint32_t i = 1; i < unique_rank; i++) { if (unique_dims[i] > max_dim) max_dim = unique_dims[i]; } From 8da662772c076af93791e0e592391235bcad4c25 Mon Sep 17 00:00:00 2001 From: q-posev Date: Sun, 7 May 2023 16:02:41 +0200 Subject: [PATCH 2/5] Works for int32_t arrays --- src/templates_front/templator_front.org | 37 ++++- src/templates_hdf5/templator_hdf5.org | 196 +++++++++++++++++++++++- 2 files changed, 225 insertions(+), 8 deletions(-) diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index e7e8bc3d..43496658 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -1938,7 +1938,7 @@ trexio_pre_close (trexio_t* file) 4) ~dimensions~ - ~uint64_t*~ pointer to the array with the number of elements per dimension 4) ~datatype~ - ~char*~ string specifying the datatype (e.g. ~double/float~, ~int32/int64~) 5) ~name~ - ~char*~ lowercase string with the name of the data block (e.g. how it will appear in the TREXIO file) - **important for ~trexio_has_external~ to work!** + **important for ~trexio_has_external_[array|attribute]~ to work!** **Output:** - ~trexio_exit_code~ @@ -1970,6 +1970,7 @@ for (suffix, c_type, _, _) in table: # trexio_has function does not require datatype and is thus unique result.append("trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name);") +result.append("trexio_exit_code trexio_has_external(trexio_t* const file);") result.append("#+end_src") return '\n'.join(result) #+end_src @@ -1986,13 +1987,37 @@ trexio_exit_code trexio_read_external_float32_array(trexio_t* const file, float* trexio_exit_code trexio_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name); trexio_exit_code trexio_read_external_float64_array(trexio_t* const file, double* const array, const char* name); trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name); +trexio_exit_code trexio_has_external(trexio_t* const file); #+end_src :end: + #+begin_src c :tangle prefix_front.c +trexio_exit_code +trexio_has_external(trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + assert(file->back_end < TREXIO_INVALID_BACK_END); + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_has_external(file); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + #+end_src #+begin_src c :tangle prefix_front.c trexio_exit_code -trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (array == NULL) return TREXIO_INVALID_ARG_2; @@ -2003,7 +2028,7 @@ trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, co if (dimensions[i] == 0) return TREXIO_INVALID_ARG_4; } - trexio_exit_code rc = trexio_has_external_array(file, name) + trexio_exit_code rc = trexio_has_external_array(file, name); if (rc == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_DSET_ALREADY_EXISTS; if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; @@ -2023,13 +2048,13 @@ trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, co } trexio_exit_code -trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name); +trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (array == NULL) return TREXIO_INVALID_ARG_2; if (name == NULL) return TREXIO_INVALID_ARG_3; - trexio_exit_code rc = trexio_has_external_array(file, name) + trexio_exit_code rc = trexio_has_external_array(file, name); if (rc == TREXIO_HAS_NOT) return TREXIO_DSET_MISSING; if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; @@ -2049,7 +2074,7 @@ trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, con } trexio_exit_code -trexio_has_external_array(trexio_t* const file, const char* name); +trexio_has_external_array(trexio_t* const file, const char* name) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (name == NULL) return TREXIO_INVALID_ARG_2; diff --git a/src/templates_hdf5/templator_hdf5.org b/src/templates_hdf5/templator_hdf5.org index ac684755..0e57bb42 100644 --- a/src/templates_hdf5/templator_hdf5.org +++ b/src/templates_hdf5/templator_hdf5.org @@ -46,6 +46,7 @@ <
> #include "trexio_hdf5.h" +#define EXTERNAL_GROUP_NAME "external" #+end_src * Template for HDF5 definitions @@ -69,6 +70,7 @@ typedef struct trexio_hdf5_s { trexio_t parent ; hid_t file_id; hid_t $group$_group; + hid_t external_group; } trexio_hdf5_t; #+end_src @@ -158,18 +160,26 @@ trexio_hdf5_init (trexio_t* const file) case 'r': if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) > 0) f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) == 0) f->$group$_group = (hid_t) 0; + /* Manual addition for the "external" group */ + if (H5Lexists(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT) > 0) f->external_group = H5Gopen(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT); + if (H5Lexists(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT) == 0) f->external_group = (hid_t) 0; break; case 'u': case 'w': if (f_exists == 1) { if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) > 0) f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) == 0) f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + /* Manual addition for the "external" group */ + if (H5Lexists(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT) > 0) f->external_group = H5Gopen(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT); + if (H5Lexists(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT) == 0) f->external_group = H5Gcreate(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); } else { f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + f->external_group = H5Gcreate(f->file_id, EXTERNAL_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); } break; } if (f->$group$_group < (hid_t) 0) return TREXIO_INVALID_ID; + if (f->external_group < (hid_t) 0) return TREXIO_INVALID_ID; return TREXIO_SUCCESS; } @@ -185,6 +195,9 @@ trexio_hdf5_deinit (trexio_t* const file) if (f->$group$_group != (hid_t) 0) H5Gclose(f->$group$_group); f->$group$_group = 0; + if (f->external_group != (hid_t) 0) H5Gclose(f->external_group); + f->external_group = 0; + H5Fclose(f->file_id); f->file_id = 0; @@ -383,7 +396,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $ /* Read dataset */ herr_t status = H5Dread(dset_id, H5T_$GROUP_DSET_H5_DTYPE$, - H5S_ALL, H5S_ALL, H5P_DEFAULT, + H5S_ALL, H5S_ALL, H5P_DEFAULT, $group_dset$); H5Sclose(dspace_id); @@ -413,7 +426,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$* Consider using HDF5-native h5repack utility after deleting/overwriting big datasets. ,*/ - + if ((trexio_hdf5_has_$group_dset$(file) == TREXIO_SUCCESS) && (file->mode == 'u')) { herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); if (status_del < 0) return TREXIO_FAILURE; @@ -1270,6 +1283,185 @@ trexio_exit_code trexio_hdf5_has_determinant_list(trexio_t* const file) } #+end_src +* Source code for external group (generic I/O of arbitrary data) + + ~trexio_hdf5_[write|read|has]_external_[datatype]_array~ + ~trexio_hdf5_[write|read|has]_external_[datatype]_attribute~ + write|read|check for existence of an arbitrary data block in a given TREXIO file. + + ~external~ here means that the data does not correspond to the TREXIO format definition. + Thus, it is not present in the =trex.json= specification and does not support any of the + advanced TREXIO capabilities (e.g. sparse I/O, verification of dimensions etc). + + **Input parameters:** + 1) ~trexio_file~ - ~trexio_t*~ pointer to the TREXIO file + 2) ~array~ - ~void*~ pointer to the flat array of data to be written + 3) ~rank~ - ~uint32_t~ value: rank (number of dimensions) of an array + 4) ~dimensions~ - ~uint64_t*~ pointer to the array with the number of elements per dimension + 4) ~datatype~ - ~char*~ string specifying the datatype (e.g. ~double/float~, ~int32/int64~) + 5) ~name~ - ~char*~ lowercase string with the name of the data block (e.g. how it will appear in the TREXIO file) + **important for ~trexio_has_external~ to work!** + + **Output:** + - ~trexio_exit_code~ + + **Note 1:** experimental functionality. Please report any issues that occur. + **Note 2:** I/O of strings is not supported yet. + + #+NAME: table-external-datatypes + | TREXIO suffix | C | Fortran | Python | + |---------------+-----------+---------+--------| + | ~int32~ | ~int32_t~ | | | + | ~int64~ | ~int64_t~ | | | + | ~float32~ | ~float~ | | | + | ~float64~ | ~double~ | | | + +*** C + + #+begin_src python :var table=table-external-datatypes :results drawer +""" This script generates the C and Fortran functions for generic I/O """ + +template_write_func = "trexio_exit_code trexio_hdf5_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name);" +template_read_func = "trexio_exit_code trexio_hdf5_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name);" + +result = [] +result.append("#+begin_src c :tangle prefix_hdf5.h :exports none") +for (suffix, c_type, _, _) in table: + result.append(template_write_func.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result.append(template_read_func.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + +# trexio_has function does not require datatype and is thus unique +result.append("trexio_exit_code trexio_hdf5_has_external_array(trexio_t* const file, const char* name);") +result.append("trexio_exit_code trexio_hdf5_has_external(trexio_t* const file);") +result.append("#+end_src") +return '\n'.join(result) +#+end_src + +#+RESULTS: +:results: +#+begin_src c :tangle prefix_hdf5.h :exports none +trexio_exit_code trexio_hdf5_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_hdf5_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name); +trexio_exit_code trexio_hdf5_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_hdf5_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name); +trexio_exit_code trexio_hdf5_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_hdf5_read_external_float32_array(trexio_t* const file, float* const array, const char* name); +trexio_exit_code trexio_hdf5_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_hdf5_read_external_float64_array(trexio_t* const file, double* const array, const char* name); +trexio_exit_code trexio_hdf5_has_external_array(trexio_t* const file, const char* name); +trexio_exit_code trexio_hdf5_has_external(trexio_t* const file); +#+end_src +:end: + +#+begin_src c :tangle basic_hdf5.c :exports none +trexio_exit_code +trexio_hdf5_has_external (trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + struct H5G_info_t group_info; + + /* H5Gget_info return info about the HDF5 group as a group_info struct */ + herr_t status = H5Gget_info(f->external_group, &group_info); + if (status < 0) return TREXIO_FAILURE; + + /* If nlinks==0 --> the group is empty, i.e. non-existent */ + if (group_info.nlinks == (hsize_t) 0) { + return TREXIO_HAS_NOT; + } else { + return TREXIO_SUCCESS; + } + +} +#+end_src + +#+begin_src c :tangle basic_hdf5.c :exports none +trexio_exit_code +trexio_hdf5_has_external_array(trexio_t* const file, const char* name) +{ + + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + if (f->external_group == (hsize_t) 0) return TREXIO_HAS_NOT; + + htri_t exists = H5Lexists(f->external_group, name, H5P_DEFAULT); + if (exists > 0) { + return TREXIO_SUCCESS; + } else if (exists < 0) { + return TREXIO_FAILURE; + } else { + return TREXIO_HAS_NOT; + } + +} + +trexio_exit_code +trexio_hdf5_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name) +{ + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_INT32, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); + + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + +trexio_exit_code +trexio_hdf5_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + /* + Try to delete an existing dataset by unlinking it from the group (UNSAFE mode). + NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it, + thus reducing the size of the HDF5 file. In practic, this is not always the case. + Consider using HDF5-native h5repack utility after deleting/overwriting big datasets. + ,*/ + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_INT32, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_INT32, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} +#+end_src + * Helper functions #+begin_src c :tangle helpers_hdf5.c From 1b689d9d690e5a60f08f1ed46d5f9d7a2ff2236e Mon Sep 17 00:00:00 2001 From: q-posev Date: Sun, 7 May 2023 16:03:57 +0200 Subject: [PATCH 3/5] Add testing for for external arrays in HDF5 back end --- Makefile.am | 5 +- tests/CMakeLists.txt | 1 + tests/io_dset_external_hdf5.c | 145 ++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 tests/io_dset_external_hdf5.c diff --git a/Makefile.am b/Makefile.am index 2209dad7..fb02ee52 100644 --- a/Makefile.am +++ b/Makefile.am @@ -37,7 +37,7 @@ CLEANFILES = trexio.mod if HAVE_FORTRAN BUILT_SOURCES = trexio.mod else -BUILT_SOURCES = +BUILT_SOURCES = endif EXTRA_DIST = .git_hash @@ -116,6 +116,7 @@ TESTS_C += \ tests/io_dset_float_hdf5 \ tests/io_dset_int_hdf5 \ tests/io_dset_sparse_hdf5 \ + tests/io_dset_external_hdf5 \ tests/io_determinant_hdf5 \ tests/io_jastrow_hdf5 \ tests/io_safe_dset_float_hdf5 \ @@ -303,7 +304,7 @@ DEB_FILES = \ helpers-debian/libtrexio0.install \ helpers-debian/libtrexio-dev.install \ helpers-debian/source \ - helpers-debian/README.source + helpers-debian/README.source debian_from_dist: $(DEB_FILES) $(SOURCES) $(trexio_h) cp ../trexio-$(PACKAGE_VERSION).tar.gz ../libtrexio_$(PACKAGE_VERSION).orig.tar.gz diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d415c170..09cf7619 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -24,6 +24,7 @@ if(ENABLE_HDF5) io_dset_float_hdf5 io_dset_str_hdf5 io_dset_sparse_hdf5 + io_dset_external_hdf5 io_determinant_hdf5 io_safe_dset_float_hdf5 io_dset_int_hdf5 diff --git a/tests/io_dset_external_hdf5.c b/tests/io_dset_external_hdf5.c new file mode 100644 index 00000000..b89a05b3 --- /dev/null +++ b/tests/io_dset_external_hdf5.c @@ -0,0 +1,145 @@ +#include "trexio.h" +#include +#include +#include +#include + +#define TEST_BACKEND TREXIO_HDF5 +#define TREXIO_FILE "test_dset_external.h5" +#define RM_COMMAND "rm -f -- " TREXIO_FILE + +#define VECTOR_NAME "test external vector" +#define MATRIX_NAME "test external matrix" + +static int test_write_dset (const char* file_name, const back_end_t backend) { + +/* Try to write a dataset with numerical (int) values into the TREXIO file */ + + trexio_t* file = NULL; + trexio_exit_code rc; + + // parameters to be written + uint32_t rank_v = 1; + uint64_t dims_v[1] = {12}; + int32_t vector[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + + uint32_t rank_m = 2; + uint64_t dims_m[2] = {3, 4}; + int32_t matrix[12] = { + 0 , 1 , 2 , + 3 , 4 , 5 , + 6 , 7 , 8 , + 9 , 10 , 11 + }; + +/*================= START OF TEST ==================*/ + + // open file in 'write' mode + file = trexio_open(file_name, 'w', backend, &rc); + assert (file != NULL); + + // write numerical (int32) vector in an external group of the file + rc = trexio_write_external_int32_array(file, vector, rank_v, dims_v, VECTOR_NAME); + assert (rc == TREXIO_SUCCESS); + + // write numerical (int32) matrixin an external group of the file + rc = trexio_write_external_int32_array(file, matrix, rank_m, dims_m, MATRIX_NAME); + assert (rc == TREXIO_SUCCESS); + + // close current session + rc = trexio_close(file); + assert (rc == TREXIO_SUCCESS); + +/*================= END OF TEST ==================*/ + + return 0; +} + + +static int test_has_dset (const char* file_name, const back_end_t backend) { + +/* Try to check the existence of a dataset in the TREXIO file */ + + trexio_t* file = NULL; + trexio_exit_code rc; + +/*================= START OF TEST ==================*/ + + // open file + file = trexio_open(file_name, 'r', backend, &rc); + assert (file != NULL); + + // check that the group exists + rc = trexio_has_external(file); + assert(rc==TREXIO_SUCCESS); + + // check that the previously written datasets exist + rc = trexio_has_external_array(file, VECTOR_NAME); + assert (rc == TREXIO_SUCCESS); + + // check that the previously written datasets exist + rc = trexio_has_external_array(file, MATRIX_NAME); + assert (rc == TREXIO_SUCCESS); + + // close current session + rc = trexio_close(file); + assert (rc == TREXIO_SUCCESS); + +/*================= END OF TEST ==================*/ + + return 0; +} + + +static int test_read_dset (const char* file_name, const back_end_t backend) { + +/* Try to read a dataset with numericali (int) values from the TREXIO file */ + + trexio_t* file = NULL; + trexio_exit_code rc; + + // parameters to be read + int32_t* vector; + +/*================= START OF TEST ==================*/ + + // open file in 'read' mode + file = trexio_open(file_name, 'r', backend, &rc); + assert (file != NULL); + + // read numerical dataset from the file + vector = (int32_t*) calloc(12, sizeof(int32_t)); + rc = trexio_read_external_int32_array(file, vector, VECTOR_NAME); + assert (rc == TREXIO_SUCCESS); + assert (vector[0] == 0); + assert (vector[11] == 11); + + free(vector); + + // close current session + rc = trexio_close(file); + assert (rc == TREXIO_SUCCESS); + +/*================= END OF TEST ==================*/ + + return 0; +} + + +int main(void) { + +/*============== Test launcher ================*/ + + int rc; + rc = system(RM_COMMAND); + assert (rc == 0); + + test_write_dset (TREXIO_FILE, TEST_BACKEND); + test_has_dset (TREXIO_FILE, TEST_BACKEND); + test_read_dset (TREXIO_FILE, TEST_BACKEND); + + rc = system(RM_COMMAND); + assert (rc == 0); + + return 0; +} From 11bf772a34c24bb69a0b19efbe33f00e8d660fa6 Mon Sep 17 00:00:00 2001 From: q-posev Date: Sun, 7 May 2023 17:46:09 +0200 Subject: [PATCH 4/5] Code generation works for external numerical arrays --- src/templates_front/templator_front.org | 318 ++++++++++++++++++-- src/templates_hdf5/templator_hdf5.org | 374 ++++++++++++++++++++---- 2 files changed, 607 insertions(+), 85 deletions(-) diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index 43496658..a94476ed 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -1950,29 +1950,110 @@ trexio_pre_close (trexio_t* file) | TREXIO suffix | C | Fortran | Python | |---------------+-----------+---------+--------| | ~int32~ | ~int32_t~ | | | - | ~int64~ | ~int64_t~ | | | | ~float32~ | ~float~ | | | + | ~int64~ | ~int64_t~ | | | | ~float64~ | ~double~ | | | *** C - #+begin_src python :var table=table-external-datatypes :results drawer -""" This script generates the C and Fortran functions for generic I/O """ +The Python code below will generate the C function headers and source code for I/O of different datatypes (see table above). +The C source code templates are inserted into Python code from the corresponding org-mode blocks before execution. -template_write_func = "trexio_exit_code trexio_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name);" -template_read_func = "trexio_exit_code trexio_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name);" +**Note:** the source code for ~has~ functions is written only once (manually) in the end, it is not auto-generated. -result = [] -result.append("#+begin_src c :tangle prefix_front.h :exports none") + #+NAME:template_write_func_c + #+begin_src c +trexio_exit_code +trexio_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; imode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_external_$suffix$_array(file, array, rank, dimensions, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + #+end_src + + #+NAME:template_read_func_c + #+begin_src c +trexio_exit_code +trexio_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (name == NULL) return TREXIO_INVALID_ARG_3; + + trexio_exit_code rc = trexio_has_external_array(file, name); + if (rc == TREXIO_HAS_NOT) return TREXIO_DSET_MISSING; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_$suffix$_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + #+end_src + + #+begin_src python :var table=table-external-datatypes :results drawer :noweb yes +""" This script generates the C functions for generic I/O (external group) """ + +template_write_func_h = "trexio_exit_code trexio_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name);" +template_read_func_h = "trexio_exit_code trexio_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name);" +template_write_func_c = """ +<> +""" +template_read_func_c = """ +<> +""" + +result_h = ["#+begin_src c :tangle prefix_front.h :exports none"] +result_c = ["#+begin_src c :tangle prefix_front.c"] for (suffix, c_type, _, _) in table: - result.append(template_write_func.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) - result.append(template_read_func.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + # populate C headers + result_h.append(template_write_func_h.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result_h.append(template_read_func_h.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + # populate C source code + result_c.append(template_write_func_c.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result_c.append(template_read_func_c.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) -# trexio_has function does not require datatype and is thus unique -result.append("trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name);") -result.append("trexio_exit_code trexio_has_external(trexio_t* const file);") -result.append("#+end_src") -return '\n'.join(result) +# trexio_has functions do not require datatype and are thus unique +result_h.append("trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name);") +result_h.append("trexio_exit_code trexio_has_external(trexio_t* const file);") +result_h.append("#+end_src") + +result_c.append("#+end_src") + +return '\n'.join(result_h + ['\n'] + result_c) #+end_src #+RESULTS: @@ -1980,24 +2061,34 @@ return '\n'.join(result) #+begin_src c :tangle prefix_front.h :exports none trexio_exit_code trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); trexio_exit_code trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name); -trexio_exit_code trexio_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); -trexio_exit_code trexio_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name); trexio_exit_code trexio_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name); trexio_exit_code trexio_read_external_float32_array(trexio_t* const file, float* const array, const char* name); +trexio_exit_code trexio_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name); trexio_exit_code trexio_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name); trexio_exit_code trexio_read_external_float64_array(trexio_t* const file, double* const array, const char* name); trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name); trexio_exit_code trexio_has_external(trexio_t* const file); #+end_src -:end: - #+begin_src c :tangle prefix_front.c + +#+begin_src c :tangle prefix_front.c + trexio_exit_code -trexio_has_external(trexio_t* const file) +trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) { + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; iback_end < TREXIO_INVALID_BACK_END); + trexio_exit_code rc = trexio_has_external_array(file, name); + if (rc == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; switch (file->back_end) { @@ -2005,7 +2096,7 @@ trexio_has_external(trexio_t* const file) return TREXIO_NOT_SUPPORTED; case TREXIO_HDF5: #ifdef HAVE_HDF5 - return trexio_hdf5_has_external(file); + return trexio_hdf5_write_external_int32_array(file, array, rank, dimensions, name); #else return TREXIO_BACK_END_MISSING; #endif @@ -2013,11 +2104,37 @@ trexio_has_external(trexio_t* const file) return TREXIO_FAILURE; } - #+end_src - #+begin_src c :tangle prefix_front.c + trexio_exit_code -trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (name == NULL) return TREXIO_INVALID_ARG_3; + + trexio_exit_code rc = trexio_has_external_array(file, name); + if (rc == TREXIO_HAS_NOT) return TREXIO_DSET_MISSING; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_int32_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + + +trexio_exit_code +trexio_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (array == NULL) return TREXIO_INVALID_ARG_2; @@ -2038,7 +2155,7 @@ trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, co return TREXIO_NOT_SUPPORTED; case TREXIO_HDF5: #ifdef HAVE_HDF5 - return trexio_hdf5_write_external_int32_array(file, array, rank, dimensions, name); + return trexio_hdf5_write_external_float32_array(file, array, rank, dimensions, name); #else return TREXIO_BACK_END_MISSING; #endif @@ -2047,8 +2164,9 @@ trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, co return TREXIO_FAILURE; } + trexio_exit_code -trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name) +trexio_read_external_float32_array(trexio_t* const file, float* const array, const char* name) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (array == NULL) return TREXIO_INVALID_ARG_2; @@ -2064,7 +2182,151 @@ trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, con return TREXIO_NOT_SUPPORTED; case TREXIO_HDF5: #ifdef HAVE_HDF5 - return trexio_hdf5_read_external_int32_array(file, array, name); + return trexio_hdf5_read_external_float32_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + + +trexio_exit_code +trexio_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; imode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_external_int64_array(file, array, rank, dimensions, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + + +trexio_exit_code +trexio_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (name == NULL) return TREXIO_INVALID_ARG_3; + + trexio_exit_code rc = trexio_has_external_array(file, name); + if (rc == TREXIO_HAS_NOT) return TREXIO_DSET_MISSING; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_int64_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + + +trexio_exit_code +trexio_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (rank == 0) return TREXIO_INVALID_ARG_3; + if (dimensions == NULL) return TREXIO_INVALID_ARG_4; + if (name == NULL) return TREXIO_INVALID_ARG_5; + for (uint32_t i=0; imode != 'u') return TREXIO_DSET_ALREADY_EXISTS; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_external_float64_array(file, array, rank, dimensions, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + + +trexio_exit_code +trexio_read_external_float64_array(trexio_t* const file, double* const array, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (array == NULL) return TREXIO_INVALID_ARG_2; + if (name == NULL) return TREXIO_INVALID_ARG_3; + + trexio_exit_code rc = trexio_has_external_array(file, name); + if (rc == TREXIO_HAS_NOT) return TREXIO_DSET_MISSING; + if (rc != TREXIO_HAS_NOT && rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_external_float64_array(file, array, name); +#else + return TREXIO_BACK_END_MISSING; +#endif + } + + return TREXIO_FAILURE; +} + +#+end_src +:end: + + #+begin_src c :tangle prefix_front.c +trexio_exit_code +trexio_has_external(trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + assert(file->back_end < TREXIO_INVALID_BACK_END); + + switch (file->back_end) { + + case TREXIO_TEXT: + return TREXIO_NOT_SUPPORTED; + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_has_external(file); #else return TREXIO_BACK_END_MISSING; #endif diff --git a/src/templates_hdf5/templator_hdf5.org b/src/templates_hdf5/templator_hdf5.org index 0e57bb42..ed3b2dba 100644 --- a/src/templates_hdf5/templator_hdf5.org +++ b/src/templates_hdf5/templator_hdf5.org @@ -45,13 +45,12 @@ #+begin_src c :tangle prefix_hdf5.c :noweb yes <
> #include "trexio_hdf5.h" - -#define EXTERNAL_GROUP_NAME "external" #+end_src * Template for HDF5 definitions #+begin_src c :tangle def_hdf5.c +#define EXTERNAL_GROUP_NAME "external" #define $GROUP$_GROUP_NAME "$group$" #define $GROUP_NUM$_NAME "$group_num$" #define $GROUP_DSET$_NAME "$group_dset$" @@ -1309,32 +1308,114 @@ trexio_exit_code trexio_hdf5_has_determinant_list(trexio_t* const file) **Note 2:** I/O of strings is not supported yet. #+NAME: table-external-datatypes - | TREXIO suffix | C | Fortran | Python | - |---------------+-----------+---------+--------| - | ~int32~ | ~int32_t~ | | | - | ~int64~ | ~int64_t~ | | | - | ~float32~ | ~float~ | | | - | ~float64~ | ~double~ | | | + | TREXIO suffix | C | HDF5 | Fortran | Python | + |---------------+-----------+----------+---------+--------| + | ~int32~ | ~int32_t~ | ~INT32~ | | | + | ~int64~ | ~int64_t~ | ~INT64~ | | | + | ~float32~ | ~float~ | ~FLOAT~ | | | + | ~float64~ | ~double~ | ~DOUBLE~ | | | *** C - #+begin_src python :var table=table-external-datatypes :results drawer -""" This script generates the C and Fortran functions for generic I/O """ +The Python code below will generate the C function headers and source code for I/O of different datatypes (see table above). +The C source code templates are inserted into Python code from the corresponding org-mode blocks before execution. + +**Note:** the source code for ~has~ functions is written only once (manually) in the end, it is not auto-generated. + + #+NAME:template_write_hdf5_func_c + #+begin_src c +trexio_exit_code +trexio_hdf5_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_$HDF5_TYPE$, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_$HDF5_TYPE$, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + #+end_src + + #+NAME:template_read_hdf5_func_c + #+begin_src c +trexio_exit_code +trexio_hdf5_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name) +{ + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_$HDF5_TYPE$, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); -template_write_func = "trexio_exit_code trexio_hdf5_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name);" -template_read_func = "trexio_exit_code trexio_hdf5_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name);" + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; -result = [] -result.append("#+begin_src c :tangle prefix_hdf5.h :exports none") -for (suffix, c_type, _, _) in table: - result.append(template_write_func.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) - result.append(template_read_func.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + return TREXIO_SUCCESS; +} + #+end_src -# trexio_has function does not require datatype and is thus unique -result.append("trexio_exit_code trexio_hdf5_has_external_array(trexio_t* const file, const char* name);") -result.append("trexio_exit_code trexio_hdf5_has_external(trexio_t* const file);") -result.append("#+end_src") -return '\n'.join(result) + #+begin_src python :var table=table-external-datatypes :results drawer :noweb yes +""" This script generates the C functions for generic I/O (external group) """ + +template_write_func_h = "trexio_exit_code trexio_hdf5_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array, const uint32_t rank, const uint64_t* dimensions, const char* name);" +template_read_func_h = "trexio_exit_code trexio_hdf5_read_external_$suffix$_array(trexio_t* const file, $c_type$* const array, const char* name);" +template_write_func_c = """ +<> +""" +template_read_func_c = """ +<> +""" + +result_h = ["#+begin_src c :tangle prefix_hdf5.h :exports none"] +result_c = ["#+begin_src c :tangle basic_hdf5.c"] +for (suffix, c_type, hdf5_type, _, _) in table: + # populate C headers + result_h.append(template_write_func_h.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result_h.append(template_read_func_h.replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + # populate C source code + result_c.append(template_write_func_c.replace("$HDF5_TYPE$", hdf5_type.replace("~","")).replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + result_c.append(template_read_func_c.replace("$HDF5_TYPE$", hdf5_type.replace("~","")).replace("$suffix$", suffix.replace("~","")).replace("$c_type$", c_type.replace("~",""))) + +# trexio_has functions do not require datatype and are thus unique +result_h.append("trexio_exit_code trexio_hdf5_has_external_array(trexio_t* const file, const char* name);") +result_h.append("trexio_exit_code trexio_hdf5_has_external(trexio_t* const file);") +result_h.append("#+end_src") + +result_c.append("#+end_src") + +return '\n'.join(result_h + ['\n'] + result_c) #+end_src #+RESULTS: @@ -1351,54 +1432,169 @@ trexio_exit_code trexio_hdf5_read_external_float64_array(trexio_t* const file, d trexio_exit_code trexio_hdf5_has_external_array(trexio_t* const file, const char* name); trexio_exit_code trexio_hdf5_has_external(trexio_t* const file); #+end_src -:end: -#+begin_src c :tangle basic_hdf5.c :exports none + +#+begin_src c :tangle basic_hdf5.c + trexio_exit_code -trexio_hdf5_has_external (trexio_t* const file) +trexio_hdf5_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) { + trexio_hdf5_t* f = (trexio_hdf5_t*) file; - if (file == NULL) return TREXIO_INVALID_ARG_1; + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_INT32, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_INT32, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name) +{ const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; - struct H5G_info_t group_info; + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; - /* H5Gget_info return info about the HDF5 group as a group_info struct */ - herr_t status = H5Gget_info(f->external_group, &group_info); + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_INT32, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); + + H5Dclose(dset_id); if (status < 0) return TREXIO_FAILURE; - /* If nlinks==0 --> the group is empty, i.e. non-existent */ - if (group_info.nlinks == (hsize_t) 0) { - return TREXIO_HAS_NOT; - } else { - return TREXIO_SUCCESS; + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_INT64, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; } + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_INT64, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; } -#+end_src -#+begin_src c :tangle basic_hdf5.c :exports none + trexio_exit_code -trexio_hdf5_has_external_array(trexio_t* const file, const char* name) +trexio_hdf5_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name) { + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_INT64, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); + + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +{ trexio_hdf5_t* f = (trexio_hdf5_t*) file; - if (f->external_group == (hsize_t) 0) return TREXIO_HAS_NOT; - htri_t exists = H5Lexists(f->external_group, name, H5P_DEFAULT); - if (exists > 0) { - return TREXIO_SUCCESS; - } else if (exists < 0) { - return TREXIO_FAILURE; - } else { - return TREXIO_HAS_NOT; + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { + herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dimensions, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate(f->external_group, + name, + H5T_NATIVE_FLOAT, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; } + herr_t status = H5Dwrite(dset_id, + H5T_NATIVE_FLOAT, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + array); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; } + trexio_exit_code -trexio_hdf5_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name) +trexio_hdf5_read_external_float32_array(trexio_t* const file, float* const array, const char* name) { const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; @@ -1409,7 +1605,7 @@ trexio_hdf5_read_external_int32_array(trexio_t* const file, int32_t* const array /* Read dataset */ herr_t status = H5Dread(dset_id, - H5T_NATIVE_INT32, + H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, array); @@ -1419,17 +1615,12 @@ trexio_hdf5_read_external_int32_array(trexio_t* const file, int32_t* const array return TREXIO_SUCCESS; } + trexio_exit_code -trexio_hdf5_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name) +trexio_hdf5_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name) { - trexio_hdf5_t* f = (trexio_hdf5_t*) file; - /* - Try to delete an existing dataset by unlinking it from the group (UNSAFE mode). - NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it, - thus reducing the size of the HDF5 file. In practic, this is not always the case. - Consider using HDF5-native h5repack utility after deleting/overwriting big datasets. - ,*/ + if ((trexio_hdf5_has_external_array(file, name) == TREXIO_SUCCESS) && (file->mode == 'u')) { herr_t status_del = H5Ldelete(f->external_group, name, H5P_DEFAULT); if (status_del < 0) return TREXIO_FAILURE; @@ -1440,7 +1631,7 @@ trexio_hdf5_write_external_int32_array(trexio_t* const file, const int32_t* arra hid_t dset_id = H5Dcreate(f->external_group, name, - H5T_NATIVE_INT32, + H5T_NATIVE_DOUBLE, dspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (dset_id <= 0) { @@ -1449,7 +1640,7 @@ trexio_hdf5_write_external_int32_array(trexio_t* const file, const int32_t* arra } herr_t status = H5Dwrite(dset_id, - H5T_NATIVE_INT32, + H5T_NATIVE_DOUBLE, H5S_ALL, dspace_id, H5P_DEFAULT, @@ -1460,6 +1651,75 @@ trexio_hdf5_write_external_int32_array(trexio_t* const file, const int32_t* arra return TREXIO_SUCCESS; } + + +trexio_exit_code +trexio_hdf5_read_external_float64_array(trexio_t* const file, double* const array, const char* name) +{ + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + // open the dataset to get its dimensions + hid_t dset_id = H5Dopen(f->external_group, name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + /* Read dataset */ + herr_t status = H5Dread(dset_id, + H5T_NATIVE_DOUBLE, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + array); + + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + +#+end_src +:end: + + +#+begin_src c :tangle basic_hdf5.c +trexio_exit_code +trexio_hdf5_has_external (trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + struct H5G_info_t group_info; + + /* H5Gget_info return info about the HDF5 group as a group_info struct */ + herr_t status = H5Gget_info(f->external_group, &group_info); + if (status < 0) return TREXIO_FAILURE; + + /* If nlinks==0 --> the group is empty, i.e. non-existent */ + if (group_info.nlinks == (hsize_t) 0) { + return TREXIO_HAS_NOT; + } else { + return TREXIO_SUCCESS; + } + +} + +trexio_exit_code +trexio_hdf5_has_external_array(trexio_t* const file, const char* name) +{ + + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + if (f->external_group == (hsize_t) 0) return TREXIO_HAS_NOT; + + htri_t exists = H5Lexists(f->external_group, name, H5P_DEFAULT); + if (exists > 0) { + return TREXIO_SUCCESS; + } else if (exists < 0) { + return TREXIO_FAILURE; + } else { + return TREXIO_HAS_NOT; + } + +} #+end_src * Helper functions From 8ac64c2a750cc997dd96afb2efe7035ba89043f1 Mon Sep 17 00:00:00 2001 From: q-posev Date: Mon, 8 May 2023 14:52:50 +0200 Subject: [PATCH 5/5] Add Python API for external array I/O --- python/test/benzene_data.py | 3 + python/test/test_api.py | 28 +++ src/pytrexio.i | 3 + src/templates_front/templator_front.org | 304 +++++++++++++++++++++++- 4 files changed, 336 insertions(+), 2 deletions(-) diff --git a/python/test/benzene_data.py b/python/test/benzene_data.py index d64f9173..78ce47cb 100644 --- a/python/test/benzene_data.py +++ b/python/test/benzene_data.py @@ -47,3 +47,6 @@ orb_up_test = [0, 65, 128, 129] orb_dn_test = [1, 64, 128, 129] + +external_2Dfloat_name = "test external float matrix" +external_1Dint32_name = "test external int32 vector" diff --git a/python/test/test_api.py b/python/test/test_api.py index 4dff3f6e..818303a6 100644 --- a/python/test/test_api.py +++ b/python/test/test_api.py @@ -149,6 +149,19 @@ def test_array_2D(self): assert trexio.has_nucleus_coord(self.test_file) + def test_external_array(self): + """Write external arrays.""" + self.open() + + assert not trexio.has_external_array(self.test_file, external_2Dfloat_name) + trexio.write_external_array(self.test_file, nucleus_coord, external_2Dfloat_name) + assert trexio.has_external_array(self.test_file, external_2Dfloat_name) + + assert not trexio.has_external_array(self.test_file, external_1Dint32_name) + trexio.write_external_array(self.test_file, np.array(nucleus_charge,dtype=np.int32), external_1Dint32_name) + assert trexio.has_external_array(self.test_file, external_1Dint32_name) + + def test_indices(self): """Write array of indices.""" self.open() @@ -252,6 +265,21 @@ def test_read_array_2D(self): np.testing.assert_array_almost_equal(coords_np, np.array(nucleus_coord).reshape(nucleus_num,3), decimal=8) + def test_read_external_array(self): + """Read external arrays.""" + self.open(mode='r') + # read nuclear coordinates without providing optional argument dim + coords_external_np = trexio.read_external_array(self.test_file, name=external_2Dfloat_name, dtype="float64", size=nucleus_num*3) + assert coords_external_np.dtype is np.dtype(np.float64) + assert coords_external_np.size == nucleus_num * 3 + np.testing.assert_array_almost_equal(coords_external_np.reshape(nucleus_num,3), np.array(nucleus_coord).reshape(nucleus_num,3), decimal=8) + + charge_external_np = trexio.read_external_array(self.test_file, name=external_1Dint32_name, dtype="int32", size=nucleus_num) + assert charge_external_np.dtype is np.dtype(np.int32) + assert charge_external_np.size == nucleus_num + np.testing.assert_array_almost_equal(charge_external_np, np.array(nucleus_charge, dtype=np.int32)) + + def test_read_errors(self): """Test some reading errors.""" self.open(mode='r') diff --git a/src/pytrexio.i b/src/pytrexio.i index 5f3a4e18..3b701794 100644 --- a/src/pytrexio.i +++ b/src/pytrexio.i @@ -108,6 +108,9 @@ import_array(); /* For some reasons SWIG does not apply the proper bitfield_t typemap, so one has to manually specify int64_t* ARGOUT_ARRAY1 below */ %apply (int64_t* ARGOUT_ARRAY1, int32_t DIM1) {(bitfield_t* const bit_list, const int32_t N_int)}; +/* For passing dimensions of external arrays fron Python front to C back */ +%apply (uint64_t* IN_ARRAY1, int32_t DIM1) {(const uint64_t* dims_in, const int32_t dims_dim_in)}; + /* This tells SWIG to treat char ** dset_in pattern as a special case Enables access to trexio_[...]_write_dset_str set of functions directly, i.e. by converting input list of strings from Python into char ** of C diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index a94476ed..eb0fabeb 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -1993,6 +1993,23 @@ trexio_write_external_$suffix$_array(trexio_t* const file, const $c_type$* array return TREXIO_FAILURE; } + +trexio_exit_code +trexio_write_safe_external_$suffix$_array(trexio_t* const file, const $c_type$* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_in == NULL) return TREXIO_INVALID_ARG_2; + if (dim_in <= 0) return TREXIO_INVALID_ARG_3; + if (rank == 0) return TREXIO_INVALID_ARG_4; + if (dims_in == NULL) return TREXIO_INVALID_ARG_5; + if (dims_dim_in == 0) return TREXIO_INVALID_ARG_6; + if (name == NULL) return TREXIO_INVALID_ARG_7; + for (uint32_t i=0; i> """ @@ -2060,13 +2090,21 @@ return '\n'.join(result_h + ['\n'] + result_c) :results: #+begin_src c :tangle prefix_front.h :exports none trexio_exit_code trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_write_safe_external_int32_array(trexio_t* const file, const int32_t* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name); trexio_exit_code trexio_read_external_int32_array(trexio_t* const file, int32_t* const array, const char* name); +trexio_exit_code trexio_read_safe_external_int32_array(trexio_t* const file, int32_t* const dset_out, const int64_t dim_out, const char* name); trexio_exit_code trexio_write_external_float32_array(trexio_t* const file, const float* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_write_safe_external_float32_array(trexio_t* const file, const float* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name); trexio_exit_code trexio_read_external_float32_array(trexio_t* const file, float* const array, const char* name); +trexio_exit_code trexio_read_safe_external_float32_array(trexio_t* const file, float* const dset_out, const int64_t dim_out, const char* name); trexio_exit_code trexio_write_external_int64_array(trexio_t* const file, const int64_t* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_write_safe_external_int64_array(trexio_t* const file, const int64_t* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name); trexio_exit_code trexio_read_external_int64_array(trexio_t* const file, int64_t* const array, const char* name); +trexio_exit_code trexio_read_safe_external_int64_array(trexio_t* const file, int64_t* const dset_out, const int64_t dim_out, const char* name); trexio_exit_code trexio_write_external_float64_array(trexio_t* const file, const double* array, const uint32_t rank, const uint64_t* dimensions, const char* name); +trexio_exit_code trexio_write_safe_external_float64_array(trexio_t* const file, const double* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name); trexio_exit_code trexio_read_external_float64_array(trexio_t* const file, double* const array, const char* name); +trexio_exit_code trexio_read_safe_external_float64_array(trexio_t* const file, double* const dset_out, const int64_t dim_out, const char* name); trexio_exit_code trexio_has_external_array(trexio_t* const file, const char* name); trexio_exit_code trexio_has_external(trexio_t* const file); #+end_src @@ -2105,6 +2143,23 @@ trexio_write_external_int32_array(trexio_t* const file, const int32_t* array, co return TREXIO_FAILURE; } +trexio_exit_code +trexio_write_safe_external_int32_array(trexio_t* const file, const int32_t* dset_in, const int64_t dim_in, const uint32_t rank, const uint64_t* dims_in, const int32_t dims_dim_in, const char* name) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset_in == NULL) return TREXIO_INVALID_ARG_2; + if (dim_in <= 0) return TREXIO_INVALID_ARG_3; + if (rank == 0) return TREXIO_INVALID_ARG_4; + if (dims_in == NULL) return TREXIO_INVALID_ARG_5; + if (dims_dim_in == 0) return TREXIO_INVALID_ARG_6; + if (name == NULL) return TREXIO_INVALID_ARG_7; + for (uint32_t i=0; i None: + """Write an arbitrary array of numbers in the TREXIO file. + + Parameters: + + trexio_file: + TREXIO File object. + + dset_w: list, tuple OR numpy.ndarray + Array of values to be written. + + name: string + Name of the array as it will be stored in the external group of TREXIO file + + Raises: + - trexio.Error if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message. + - Exception from some other error (e.g. RuntimeError). + """ + + # get dimensions and rank from input array + if not isinstance(dset_w, (list, tuple)): + # if input array is not a list or tuple then it is probably a numpy array + rank = len(dset_w.shape) + dimensions = np.array(dset_w.shape, dtype=np.uint64) + else: + get_shape = lambda l: [len(l)] + get_shape(l[0]) if (type(l) == list or type(l) == tuple) else [] + get_type = lambda l: [type(l)] + get_type(l[0]) if (type(l) == list or type(l) == tuple) else [type(l)] + dset_shape = get_shape(dset_w) + dset_dtype = get_type(dset_w)[-1] + rank = len(dset_shape) + dimensions = np.array(dset_shape, dtype=np.uint64) + + # decide whether to flatten or not + doFlatten = False + if rank > 1: + doFlatten = True + + # handle list/typle + if isinstance(dset_w, (list, tuple)): + if dset_dtype is int: + if doFlatten: + rc = pytr.trexio_write_safe_external_int64_array(trexio_file.pytrexio_s, np.array(dset_w, dtype=np.int64).flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_int64_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + elif dset_dtype is float: + if doFlatten: + rc = pytr.trexio_write_safe_external_float64_array(trexio_file.pytrexio_s, np.array(dset_w, dtype=np.float64).flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_float64_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + else: + raise TypeError("Unsupported type of a list/tuple for generic I/O of arrays.") + + # handle numpy array + elif isinstance(dset_w, np.ndarray): + if dset_w.dtype==np.int64: + if doFlatten: + rc = pytr.trexio_write_safe_external_int64_array(trexio_file.pytrexio_s, dset_w.flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_int64_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + elif dset_w.dtype==np.int32: + if doFlatten: + rc = pytr.trexio_write_safe_external_int32_array(trexio_file.pytrexio_s, dset_w.flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_int32_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + elif dset_w.dtype==np.float64: + if doFlatten: + rc = pytr.trexio_write_safe_external_float64_array(trexio_file.pytrexio_s, dset_w.flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_float64_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + elif dset_w.dtype==np.float32: + if doFlatten: + rc = pytr.trexio_write_safe_external_float32_array(trexio_file.pytrexio_s, dset_w.flatten(), rank, dimensions, name) + else: + rc = pytr.trexio_write_safe_external_float32_array(trexio_file.pytrexio_s, dset_w, rank, dimensions, name) + else: + raise TypeError("Unsupported type of a NumPy array for generic I/O of arrays.") + else: + raise TypeError("Unsupported array type for generic I/O.") + + if rc != TREXIO_SUCCESS: + raise Error(rc) + #+end_src + + #+begin_src python :tangle basic_python.py +def read_external_array(trexio_file, name, size, dtype): + """Read an external array of numbers from the TREXIO file. + + Parameters: + + trexio_file: + TREXIO File object. + name: + string name of an array + size: + integer value corresponding to the total number of elements to read + dtype: + string indicating the datatype of the array (int/int32/int64/float/float32/float64/double) + + Returns: + ~dset_r~: 1D NumPy array with ~dim~ elements corresponding to of "name" array read from the TREXIO file. + + Raises: + - trexio.Error if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message. + - Exception from some other error (e.g. RuntimeError). +""" + + if dtype in ['int', 'int64']: + rc, dset_r = pytr.trexio_read_safe_external_int64_array(trexio_file.pytrexio_s, size, name) + elif dtype in ['int32']: + rc, dset_r = pytr.trexio_read_safe_external_int32_array(trexio_file.pytrexio_s, size, name) + elif dtype in ['float', 'float64', 'double']: + rc, dset_r = pytr.trexio_read_safe_external_float64_array(trexio_file.pytrexio_s, size, name) + elif dtype in ['float32']: + rc, dset_r = pytr.trexio_read_safe_external_float32_array(trexio_file.pytrexio_s, size, name) + else: + raise ValueError("Unsupported dtype passed to read_external_array.") + + if rc != TREXIO_SUCCESS: + raise Error(rc) + + return dset_r + #+end_src + + #+begin_src python :tangle basic_python.py +def has_external_array(trexio_file, name) -> bool: + """Check that external array exists in the TREXIO file. + + trexio_file: + Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. + name: + String name of the array from the TREXIO file + + Returns: + True if the variable exists, False otherwise + + Raises: + - trexio.Error if TREXIO return code ~rc~ is TREXIO_FAILURE and prints the error message using string_of_error. + - Exception from some other error (e.g. RuntimeError). + """ + + rc = pytr.trexio_has_external_array(trexio_file.pytrexio_s, name) + if rc == TREXIO_FAILURE: + raise Error(rc) + + return rc == TREXIO_SUCCESS + #+end_src + * Templates for front end ** Description