diff --git a/CMakeLists.txt b/CMakeLists.txt index bf0b960188..7bf8d78fa8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1737,6 +1737,7 @@ endif() # The Unidata testing S3 bucket # WARNING: this must match the value in configure.ac +set(S3ENDPOINT "s3.us-east-1.amazonaws.com" CACHE STRING "S3 endpoint") set(S3TESTBUCKET "unidata-zarr-test-data" CACHE STRING "S3 test bucket") # The working S3 path tree within the Unidata bucket. diff --git a/config.h.cmake.in b/config.h.cmake.in index 26a5b4773b..ae45fcdb57 100644 --- a/config.h.cmake.in +++ b/config.h.cmake.in @@ -181,6 +181,9 @@ are set when opening a binary file on Windows. */ /* if true, enable S3 testing*/ #cmakedefine WITH_S3_TESTING "${WITH_S3_TESTING}" +/* S3 Test endpoint */ +#define S3ENDPOINT "${S3ENDPOINT}" + /* S3 Test Bucket */ #define S3TESTBUCKET "${S3TESTBUCKET}" diff --git a/configure.ac b/configure.ac index 2f4b4a22ed..ffe24c644e 100644 --- a/configure.ac +++ b/configure.ac @@ -2114,6 +2114,11 @@ if test "x$enable_s3_aws" = xno && test "x$enable_s3_internal" = xno; then AC_SUBST(WHICH_S3_SDK,[none]) fi +# The Unidata testing S3 bucket +# WARNING: this must match the value in CMakeLists.txt +AC_DEFINE([S3ENDPOINT], ["s3.us-east-1.amazonaws.com"], [S3 test endpoint]) +AC_SUBST([S3ENDPOINT],["s3.us-east-1.amazonaws.com"]) + # The Unidata testing S3 bucket # WARNING: this must match the value in CMakeLists.txt AC_DEFINE([S3TESTBUCKET], ["unidata-zarr-test-data"], [S3 test bucket]) diff --git a/include/netcdf.h b/include/netcdf.h index 234a7e4ce4..2b100563a7 100644 --- a/include/netcdf.h +++ b/include/netcdf.h @@ -531,8 +531,9 @@ by the desired type. */ #define NC_EOBJECT (-140) /**< Some object exists when it should not */ #define NC_ENOOBJECT (-141) /**< Some object not found */ #define NC_EPLUGIN (-142) /**< Unclassified failure in accessing a dynamically loaded plugin> */ +#define NC_ENOTZARR (-143) /**< Malformed (NC)Zarr file */ -#define NC4_LAST_ERROR (-142) /**< @internal All netCDF errors > this. */ +#define NC4_LAST_ERROR (-143) /**< @internal All netCDF errors > this. */ /* Errors for all remote access methods(e.g. DAP and CDMREMOTE)*/ #define NC_EURL (NC_EDAPURL) /**< Malformed URL */ diff --git a/libdispatch/ncjson.c b/libdispatch/ncjson.c index 6a9b46cc6d..14c6ff33cb 100644 --- a/libdispatch/ncjson.c +++ b/libdispatch/ncjson.c @@ -898,15 +898,26 @@ NCJaddstring(NCjson* json, int sort, const char* s) OPTSTATIC int NCJinsert(NCjson* object, const char* key, NCjson* jvalue) { - int stat = NCJ_OK; - NCjson* jkey = NULL; - if(object == NULL || object->sort != NCJ_DICT || key == NULL || jvalue == NULL) - {stat = NCJTHROW(NCJ_ERR); goto done;} - if((stat = NCJnewstring(NCJ_STRING,key,&jkey))==NCJ_ERR) goto done; - if((stat = NCJappend(object,jkey))==NCJ_ERR) goto done; - if((stat = NCJappend(object,jvalue))==NCJ_ERR) goto done; + int stat = NCJ_OK; + NCjson *jkey = NULL; + if (object == NULL || object->sort != NCJ_DICT || key == NULL || jvalue == NULL) { + stat = NCJTHROW(NCJ_ERR); + goto done; + } + for (size_t i = 0; i < NCJlength(object); i += 2) { + jkey = NCJith(object, i); + if (jkey->string != NULL && strcmp(jkey->string, key) == 0) { + // replace existing values for new key + NCJreclaim(object->list.contents[i + 1]); // free old value + object->list.contents[i + 1] = jvalue; + goto done; + } + } + if ((stat = NCJnewstring(NCJ_STRING, key, &jkey)) == NCJ_ERR) goto done; + if ((stat = NCJappend(object, jkey)) == NCJ_ERR) goto done; + if ((stat = NCJappend(object, jvalue)) == NCJ_ERR) goto done; done: - return NCJTHROW(stat); + return NCJTHROW(stat); } /* Insert key-value pair as strings into a dict object. diff --git a/libnczarr/CMakeLists.txt b/libnczarr/CMakeLists.txt index c460e1d76e..4e2f572002 100644 --- a/libnczarr/CMakeLists.txt +++ b/libnczarr/CMakeLists.txt @@ -23,6 +23,8 @@ zgrp.c zinternal.c zmap.c zmap_file.c +zmetadata2.c +zmetadata.c zodom.c zopen.c zprov.c @@ -39,6 +41,7 @@ zdispatch.h zincludes.h zinternal.h zmap.h +zmetadata.h zodom.h zprovenance.h zplugins.h diff --git a/libnczarr/Makefile.am b/libnczarr/Makefile.am index 4227516267..f3e166de69 100644 --- a/libnczarr/Makefile.am +++ b/libnczarr/Makefile.am @@ -43,6 +43,8 @@ zgrp.c \ zinternal.c \ zmap.c \ zmap_file.c \ +zmetadata2.c \ +zmetadata.c \ zodom.c \ zopen.c \ zprov.c \ @@ -59,6 +61,7 @@ zdispatch.h \ zincludes.h \ zinternal.h \ zmap.h \ +zmetadata.h \ zodom.h \ zprovenance.h \ zplugins.h \ diff --git a/libnczarr/zarr.c b/libnczarr/zarr.c index 9ff7893a7f..1a1467d1b4 100644 --- a/libnczarr/zarr.c +++ b/libnczarr/zarr.c @@ -79,6 +79,10 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, NClist* controls) if((stat = nczmap_create(zinfo->controls.mapimpl,nc->path,nc->mode,zinfo->controls.flags,NULL,&zinfo->map))) goto done; + /* Initialize metadata handle */ + assert(zinfo->map != NULL); + if((stat = NCZMD_set_metadata_handler(zinfo,(const NCZ_Metadata**)&zinfo->metadata_handler))) goto done; + done: ncurifree(uri); NCJreclaim(json); @@ -143,6 +147,10 @@ ncz_open_dataset(NC_FILE_INFO_T* file, NClist* controls) if((stat = nczmap_open(zinfo->controls.mapimpl,nc->path,mode,zinfo->controls.flags,NULL,&zinfo->map))) goto done; + /* Initialize metadata handle */ + assert(zinfo->map != NULL); + if((stat = NCZMD_set_metadata_handler(zinfo,(const NCZ_Metadata**)&zinfo->metadata_handler))) goto done; + /* Ok, try to read superblock */ if((stat = ncz_read_superblock(file,&nczarr_version,&zarr_format))) goto done; diff --git a/libnczarr/zarr.h b/libnczarr/zarr.h index 714fb3bcea..6704ead9a0 100644 --- a/libnczarr/zarr.h +++ b/libnczarr/zarr.h @@ -69,7 +69,7 @@ EXTERNL int NCZ_inferattrtype(const NCjson* value, nc_type typehint, nc_type* ty EXTERNL int NCZ_inferinttype(unsigned long long u64, int negative); EXTERNL int ncz_fill_value_sort(nc_type nctype, int*); EXTERNL int NCZ_createobject(NCZMAP* zmap, const char* key, size64_t size); -EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json); +EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, const NCjson* json); EXTERNL int NCZ_downloadjson(NCZMAP* zmap, const char* key, NCjson** jsonp); EXTERNL int NCZ_isLittleEndian(void); EXTERNL int NCZ_subobjects(NCZMAP* map, const char* prefix, const char* tag, char dimsep, NClist* objlist); diff --git a/libnczarr/zclose.c b/libnczarr/zclose.c index 3dbba0d6be..f1bbb725d6 100644 --- a/libnczarr/zclose.c +++ b/libnczarr/zclose.c @@ -51,6 +51,7 @@ ncz_close_file(NC_FILE_INFO_T* file, int abort) goto done; nclistfreeall(zinfo->controllist); NC_authfree(zinfo->auth); + NCZMD_free_metadata_handler(zinfo->metadata_handler); nullfree(zinfo); done: diff --git a/libnczarr/zincludes.h b/libnczarr/zincludes.h index 3fdae6c6fd..4280a2ff8a 100644 --- a/libnczarr/zincludes.h +++ b/libnczarr/zincludes.h @@ -48,6 +48,7 @@ extern "C" { #include "ncjson.h" #include "zmap.h" +#include "zmetadata.h" #include "zinternal.h" #include "zdispatch.h" #include "zprovenance.h" diff --git a/libnczarr/zinternal.h b/libnczarr/zinternal.h index 2548ad54ba..8acd6407f4 100644 --- a/libnczarr/zinternal.h +++ b/libnczarr/zinternal.h @@ -13,6 +13,7 @@ #define ZINTERNAL_H #define ZARRVERSION "2" +#define ZARRFORMAT2 2 /* NCZARRVERSION is independent of Zarr version, but NCZARRVERSION => ZARRVERSION */ @@ -38,11 +39,12 @@ # endif #endif -#define ZMETAROOT "/.zgroup" -#define ZMETAATTR "/.zattrs" -#define ZGROUP ".zgroup" -#define ZATTRS ".zattrs" -#define ZARRAY ".zarray" +/* V2 Reserved Objects */ +#define Z2METAROOT "/.zgroup" +#define Z2ATTSROOT "/.zattrs" +#define Z2GROUP ".zgroup" +#define Z2ATTRS ".zattrs" +#define Z2ARRAY ".zarray" /* V2 Reserved Attributes */ /* @@ -143,6 +145,7 @@ typedef struct NCZ_FILE_INFO { # define FLAG_NCZARR_KEY 16 /* _nczarr_xxx keys are stored in object and not in _nczarr_attrs */ NCZM_IMPL mapimpl; } controls; + struct NCZ_Metadata * metadata_handler; int default_maxstrlen; /* default max str size for variables of type string */ } NCZ_FILE_INFO_T; diff --git a/libnczarr/zmap_s3sdk.c b/libnczarr/zmap_s3sdk.c index 552a73473d..4f4fd7a35a 100644 --- a/libnczarr/zmap_s3sdk.c +++ b/libnczarr/zmap_s3sdk.c @@ -223,14 +223,14 @@ zs3open(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** m z3map->s3client = NC_s3sdkcreateclient(&z3map->s3); /* Search the root for content */ - content = nclistnew(); - if((stat = NC_s3sdkgetkeys(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,&nkeys,NULL,&z3map->errmsg))) - goto done; - if(nkeys == 0) { - /* dataset does not actually exist; we choose to return ENOOBJECT instead of EEMPTY */ - stat = NC_ENOOBJECT; - goto done; - } + // content = nclistnew(); + // if((stat = NC_s3sdkgetkeys(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,&nkeys,NULL,&z3map->errmsg))) + // goto done; + // if(nkeys == 0) { + // /* dataset does not actually exist; we choose to return ENOOBJECT instead of EEMPTY */ + // stat = NC_ENOOBJECT; + // goto done; + // } if(mapp) *mapp = (NCZMAP*)z3map; done: diff --git a/libnczarr/zmetadata.c b/libnczarr/zmetadata.c new file mode 100644 index 0000000000..3c0c881c73 --- /dev/null +++ b/libnczarr/zmetadata.c @@ -0,0 +1,259 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zmetadata.h" + +/**************************************************/ + +extern int NCZMD2_initialize(void); +extern int NCZMD2_finalize(void); + +/**************************************************/ +//////////////////////////////////////////////////// + +int NCZMD_initialize(void) +{ + int stat = NC_NOERR; + if((stat=NCZMD2_initialize())) goto done; +done: + return THROW(stat); +} + +int NCZMD_finalize(void) +{ + + int stat = NC_NOERR; + if((stat=NCZMD2_finalize())) goto done; +done: + return THROW(stat); +} + +// Returns the list of subgroups from *grp +int NCZMD_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames) +{ + return zfile->metadata_handler->dispatcher->list_groups(zfile, grp, subgrpnames); +} + +// Returns the list of variables from grp +int NCZMD_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames) +{ + return zfile->metadata_handler->dispatcher->list_variables(zfile, grp, varnames); +} + + +///////////////////////////////////////////////////////////////////// +// Fetch JSON content from .zmetadata or storage +///////////////////////////////////////////////////////////////////// + +int NCZMD_fetch_json_group(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jgroup) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + if ((stat = nczm_concat(group, name, &key))) + goto done; + + stat = zfile->metadata_handler->dispatcher->fetch_json_content(zfile, NCZMD_GROUP, key, jgroup); +done: + nullfree(group); + nullfree(key); + return stat; +} + +int NCZMD_fetch_json_attrs(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jattrs) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + if ((stat = nczm_concat(group, name, &key))) + goto done; + + stat = zfile->metadata_handler->dispatcher->fetch_json_content(zfile, NCZMD_ATTRS, key , jattrs); +done: + nullfree(group); + nullfree(key); + return stat; +} + +int NCZMD_fetch_json_array(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jarray) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + + if ((stat = nczm_concat(group, name, &key))) + goto done; + + + stat = zfile->metadata_handler->dispatcher->fetch_json_content(zfile, NCZMD_ARRAY, key, jarray); +done: + nullfree(group); + nullfree(key); + return stat; +} + +//////////////////////////////////////////////////////////////////////////////// +// Update in-memory + storage JSON content +//////////////////////////////////////////////////////////////////////////////// + +int NCZMD_update_json_group(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, const NCjson *jgroup) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + if ((stat = nczm_concat(group, name, &key))) + goto done; + + stat = zfile->metadata_handler->dispatcher->update_json_content(zfile, NCZMD_GROUP, key, jgroup); +done: + nullfree(group); + nullfree(key); + return stat; +} + +int NCZMD_update_json_attrs(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, const NCjson *jattrs) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + if ((stat = nczm_concat(group, name, &key))) + goto done; + + stat = zfile->metadata_handler->dispatcher->update_json_content(zfile, NCZMD_ATTRS, key , jattrs); +done: + nullfree(group); + nullfree(key); + return stat; +} + +int NCZMD_update_json_array(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, const NCjson *jarray) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + + if ((stat = nczm_concat(group, name, &key))) + goto done; + + + stat = zfile->metadata_handler->dispatcher->update_json_content(zfile, NCZMD_ARRAY, key, jarray); +done: + nullfree(group); + nullfree(key); + return stat; +} + +//////////////////////////////////////////////////////////////////////////// +// Writes .zmetadata file into storage +int NCZMD_consolidate(NCZ_FILE_INFO_T *zfile) { + int stat = NC_NOERR; + if (zfile->creating == 1 && zfile->metadata_handler != NULL && zfile->metadata_handler->jcsl !=NULL){ + stat = NCZ_uploadjson(zfile->map, Z2METADATA ,zfile->metadata_handler->jcsl); + } + return stat; +} +//////////////////////////////////////////////////////////////////////////// + +int NCZMD_is_metadata_consolidated(NCZ_FILE_INFO_T *zfile) +{ + NCZ_Metadata *zmd = NULL; + zmd = zfile->metadata_handler; + if (zmd == NULL || + zmd->jcsl == NULL || + NCJsort(zmd->jcsl) != NCJ_DICT || + !(zmd->dispatcher->flags & ZARR_CONSOLIDATED)) + { + return NC_ENOOBJECT; + } + return NC_NOERR; +} + +int NCZMD_get_metadata_format(NCZ_FILE_INFO_T *zfile, int *zarrformat) +{ // Only pure Zarr is determined + + NCZ_Metadata *zmd = zfile->metadata_handler; + if ( !zmd || !zmd->dispatcher ) { + return NC_EFILEMETA; + } + + + if (zmd->dispatcher->zarr_format >= ZARRFORMAT2) + { + *zarrformat = zmd->dispatcher->zarr_format; + return NC_NOERR; + } + + // Last thing to do is to look for: + // .zattrs, .zgroup or .zarray + + if (!nczmap_exists(zfile->map, "/" Z2ATTRS) && !nczmap_exists(zfile->map, "/" Z2GROUP) && !nczmap_exists(zfile->map, "/" Z2ARRAY)) + { + return NC_ENOTZARR; + } + + *zarrformat = ZARRFORMAT2; + return NC_NOERR; +} + +//Inference of the metadata handler +int NCZMD_set_metadata_handler(NCZ_FILE_INFO_T *zfile, const NCZ_Metadata **mdhandlerp) +{ + int stat = NC_NOERR; + const NCZ_Metadata_Dispatcher *zmd_dispatcher = NULL; + NCjson *jcsl = NULL; + + if (zfile->metadata_handler != NULL) + { + stat = NC_EOBJECT; + goto done; + } + + if ((zfile->creating || (stat = NCZ_downloadjson(zfile->map, Z2METADATA, &jcsl)) == NC_NOERR) + && jcsl != NULL && NCJsort(jcsl) == NCJ_DICT) + { + zmd_dispatcher = NCZ_csl_metadata_handler2; + }else{ + zmd_dispatcher = NCZ_metadata_handler2; + NCJreclaim(jcsl); + jcsl = NULL; + } + + NCZ_Metadata *zmdh = NULL; + if ((zmdh = (NCZ_Metadata *)calloc(1, sizeof(NCZ_Metadata))) == NULL) + { + stat = NC_ENOMEM; + goto done; + } + zmdh->jcsl = jcsl; + zmdh->dispatcher = zmd_dispatcher; + + *mdhandlerp = (const NCZ_Metadata *)zmdh; +done: + return stat; +} + +void NCZMD_free_metadata_handler(NCZ_Metadata * zmd){ + if (zmd == NULL) return; + NCJreclaim(zmd->jcsl); + nullfree(zmd); +} \ No newline at end of file diff --git a/libnczarr/zmetadata.h b/libnczarr/zmetadata.h new file mode 100644 index 0000000000..afb894b209 --- /dev/null +++ b/libnczarr/zmetadata.h @@ -0,0 +1,107 @@ +/* Copyright 2018-2018 University Corporation for Atmospheric + Research/Unidata. */ + +/* +Zarr Metadata Handling + +Encapsulates Zarr metadata operations across versions, supporting both +consolidated access and per-file access. Provides a common interface +for metadata operations. + +The dispatcher is defined by the type NCZ_Metadata_Dispatcher. +It offers 2 types of operations that allow decoupling/abstract +filesystem access, content reading of the JSON metadata files +1. Listings: (involves either listing or parsing consolidated view) + - variables within a group + - groups withing a group +2. Retrieve JSON representation of (sub)groups, arrays and attributes. + Directly read from filesystem/objectstore or retrieve the JSON + object from the consolidated view respective to the group or variable + +Note: This will also be the case of zarr v3 +(the elements will be extracted from zarr.json instead) +*/ + +#ifndef ZMETADATA_H +#define ZMETADATA_H +#include "zincludes.h" +#include "ncjson.h" +#include "zinternal.h" + + +#if defined(__cplusplus) +extern "C" +{ +#endif +/* This is the version of the metadata table. It should be changed + * when new functions are added to the metadata table. */ +#ifndef NCZ_METADATA_VERSION +#define NCZ_METADATA_VERSION 1 +#endif /*NCZ_METADATA_VERSION*/ + +#define Z2METADATA "/.zmetadata" +#define Z3METADATA "/zarr.json" + +#define ZARR_NOT_CONSOLIDATED 0 +#define ZARR_CONSOLIDATED 1 + +typedef enum { + NCZMD_NULL, + NCZMD_GROUP, + NCZMD_ATTRS, + NCZMD_ARRAY +} NCZMD_MetadataType; + +typedef struct NCZ_Metadata_Dispatcher +{ + int zarr_format; /* Zarr format version */ + int dispatch_version; /* Dispatch table version*/ + size64_t flags; /* Metadata handling flags */ + + int (*list_groups)(NCZ_FILE_INFO_T *, NC_GRP_INFO_T *, NClist *subgrpnames); + int (*list_variables)(NCZ_FILE_INFO_T *, NC_GRP_INFO_T *, NClist *varnames); + int (*fetch_json_content)(NCZ_FILE_INFO_T *, NCZMD_MetadataType, const char *name, NCjson **jobj); + int (*update_json_content)(NCZ_FILE_INFO_T *, NCZMD_MetadataType, const char *name, const NCjson *jobj); +} NCZ_Metadata_Dispatcher; + +typedef struct NCZ_Metadata +{ + NCjson *jcsl; // Consolidated JSON view or NULL + const NCZ_Metadata_Dispatcher *dispatcher; +} NCZ_Metadata; + +// regular handler +extern const NCZ_Metadata_Dispatcher *NCZ_metadata_handler2; +// consolidated metadata handler +extern const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler2; + + +/* Called by nc_initialize and nc_finalize respectively */ +extern int NCZMD_initialize(void); +extern int NCZMD_finalize(void); + +extern int NCZMD_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); +extern int NCZMD_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames); + +extern int NCZMD_fetch_json_group(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jgroup); +extern int NCZMD_fetch_json_attrs(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jattrs); +extern int NCZMD_fetch_json_array(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jarrays); + +/* Write operations */ +extern int NCZMD_update_json_group(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, const NCjson *jgroup); +extern int NCZMD_update_json_attrs(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, const NCjson *jattrs); +extern int NCZMD_update_json_array(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, const NCjson *jarrays); +extern int NCZMD_consolidate(NCZ_FILE_INFO_T *zfile); +/**************************************************/ + +/* Inference for the Metadata handler */ +extern int NCZMD_is_metadata_consolidated(NCZ_FILE_INFO_T *zfile); +extern int NCZMD_get_metadata_format(NCZ_FILE_INFO_T *zfile, int *zarrformat); // Only pure Zarr is determined +extern int NCZMD_set_metadata_handler(NCZ_FILE_INFO_T *zfile, const NCZ_Metadata **mdhandlerp); +extern void NCZMD_free_metadata_handler(NCZ_Metadata * zmd); + +#if defined(__cplusplus) +} +#endif + +#endif /* ZMETADATA_H */ diff --git a/libnczarr/zmetadata2.c b/libnczarr/zmetadata2.c new file mode 100644 index 0000000000..f8eec609e8 --- /dev/null +++ b/libnczarr/zmetadata2.c @@ -0,0 +1,364 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zmetadata.h" + +/**************************************************/ + +extern int NCZF2_initialize(void); +extern int NCZF2_finalize(void); + +#define MINIMIM_CSL_REP_RAW "{\"metadata\":{},\"zarr_consolidated_format\":1}" + +int NCZMD_v2_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); +int NCZMD_v2_csl_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); + +int NCZMD_v2_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); +int NCZMD_v2_csl_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); + +int fetch_json_content_v2(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zarr_obj_type, const char *key, NCjson **jobj); +int fetch_csl_json_content_v2(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zarr_obj_type, const char *key, NCjson **jobj); + +int update_csl_json_content_v2(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, const NCjson *jobj); +int update_json_content_v2(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, const NCjson *jobj); + +/**************************************************/ + +static const NCZ_Metadata_Dispatcher NCZ_md2_table = { + ZARRFORMAT2, + NCZ_METADATA_VERSION, /* Version of the dispatch table */ + ZARR_NOT_CONSOLIDATED, /* Flags*/ + + .list_groups = NCZMD_v2_list_groups, + .list_variables = NCZMD_v2_list_variables, + + .fetch_json_content = fetch_json_content_v2, + .update_json_content = update_json_content_v2, +}; + +static const NCZ_Metadata_Dispatcher NCZ_csl_md2_table = { + ZARRFORMAT2, + NCZ_METADATA_VERSION, /* Version of the dispatch table */ + ZARR_CONSOLIDATED, /* Flags*/ + + .list_groups = NCZMD_v2_csl_list_groups, + .list_variables = NCZMD_v2_csl_list_variables, + + .fetch_json_content = fetch_csl_json_content_v2, + .update_json_content = update_csl_json_content_v2, +}; + +const NCZ_Metadata_Dispatcher *NCZ_metadata_handler2 = &NCZ_md2_table; +const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler2 = &NCZ_csl_md2_table; + +/******************************************************/ + +int +NCZMD2_initialize(void) +{ + return NC_NOERR; +} + +int +NCZMD2_finalize(void) +{ + return NC_NOERR; +} + +//////////////////////////////////////////////////// + +int NCZMD_v2_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames) +{ + size_t i; + int stat = NC_NOERR; + char *grpkey = NULL; + char *subkey = NULL; + char *zgroup = NULL; + NClist *matches = nclistnew(); + + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + /* Get the map and search group */ + if ((stat = nczmap_search(zfile->map, grpkey, matches))) + goto done; + for (i = 0; i < nclistlength(matches); i++) + { + const char *name = nclistget(matches, i); + if (name[0] == NCZM_DOT) + continue; /* zarr/nczarr specific */ + /* See if name/.zgroup exists */ + if ((stat = nczm_concat(grpkey, name, &subkey))) + goto done; + if ((stat = nczm_concat(subkey, Z2GROUP, &zgroup))) + goto done; + if ((stat = nczmap_exists(zfile->map, zgroup)) == NC_NOERR) + nclistpush(subgrpnames, strdup(name)); + stat = NC_NOERR; + nullfree(subkey); + subkey = NULL; + nullfree(zgroup); + zgroup = NULL; + } + +done: + nullfree(grpkey); + nullfree(subkey); + nullfree(zgroup); + nclistfreeall(matches); + return stat; +} + +int NCZMD_v2_csl_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames) +{ + size_t i; + int stat = NC_NOERR; + char *grpkey = NULL; + char *subkey = NULL; + char *zgroup = NULL; + NClist *matches = nclistnew(); + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + const char *group = grpkey + (grpkey[0] == '/'); + size_t lgroup = strlen(group); + + const NCjson *jmetadata = NULL; + NCJdictget(zfile->metadata_handler->jcsl, "metadata", &jmetadata); + for (i = 0; i < NCJlength(jmetadata); i += 2) + { + NCjson *jname = NCJith(jmetadata, i); + const char *fullname = NCJstring(jname); + size_t lfullname = strlen(fullname); + if (lfullname < lgroup || + strncmp(fullname, group, lgroup) || + (lgroup > 0 && fullname[lgroup] != NCZM_SEP[0])) + { + continue; + } + const char *start = fullname + lgroup + (lgroup > 0); + const char *end = strchr(start, NCZM_SEP[0]); + if (end == NULL || end <= start) + continue; + size_t lname = (size_t)(end - start); + // Ends with "/.zgroup + if (strncmp(Z2METAROOT, end, sizeof(Z2METAROOT)) == 0) + { + nclistpush(subgrpnames, strndup(start, lname)); + } + } +done: + nullfree(grpkey); + nullfree(subkey); + nullfree(zgroup); + nclistfreeall(matches); + return stat; +} + +int NCZMD_v2_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames) +{ + size_t i; + int stat = NC_NOERR; + char *grpkey = NULL; + char *varkey = NULL; + char *zarray = NULL; + NClist *matches = nclistnew(); + + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + /* Get the map and search group */ + if ((stat = nczmap_search(zfile->map, grpkey, matches))) + goto done; + for (i = 0; i < nclistlength(matches); i++) + { + const char *name = nclistget(matches, i); + if (name[0] == NCZM_DOT) + continue; /* zarr/nczarr specific */ + /* See if name/.zarray exists */ + if ((stat = nczm_concat(grpkey, name, &varkey))) + goto done; + if ((stat = nczm_concat(varkey, Z2ARRAY, &zarray))) + goto done; + if ((stat = nczmap_exists(zfile->map, zarray)) == NC_NOERR) + nclistpush(varnames, strdup(name)); + stat = NC_NOERR; + nullfree(varkey); + varkey = NULL; + nullfree(zarray); + zarray = NULL; + } + +done: + nullfree(grpkey); + nullfree(varkey); + nullfree(zarray); + nclistfreeall(matches); + return stat; +} + +int NCZMD_v2_csl_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames) +{ + size_t i; + int stat = NC_NOERR; + char *grpkey = NULL; + char *varkey = NULL; + char *zarray = NULL; + NClist *matches = nclistnew(); + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + const char *group = grpkey + (grpkey[0] == '/'); + size_t lgroup = strlen(group); + + const NCjson *jmetadata = NULL; + NCJdictget(zfile->metadata_handler->jcsl, "metadata", &jmetadata); + for (i = 0; i < NCJlength(jmetadata); i += 2) + { + NCjson *jname = NCJith(jmetadata, i); + const char *fullname = NCJstring(jname); + size_t lfullname = strlen(fullname); + if (lfullname < lgroup || + strncmp(fullname, group, lgroup) || + (lgroup > 0 && fullname[lgroup] != NCZM_SEP[0])) + { + continue; + } + const char *start = fullname + lgroup + (lgroup > 0); + const char *end = strchr(start, NCZM_SEP[0]); + if (end == NULL || end <= start) + continue; + size_t lname = (size_t)(end - start); + // Ends with ".zarray" + if (strncmp("/" Z2ARRAY, end, sizeof("/" Z2ARRAY)) == 0) + { + nclistpush(varnames, strndup(start, lname)); + } + } +done: + nullfree(grpkey); + nullfree(varkey); + nullfree(zarray); + nclistfreeall(matches); + return stat; +} + +// Static function only valid for V2! +static int zarr_obj_type2suffix(NCZMD_MetadataType zarr_obj_type, const char **suffix){ + switch (zarr_obj_type) + { + case NCZMD_GROUP: + *suffix = Z2GROUP; + break; + case NCZMD_ATTRS: + *suffix = Z2ATTRS; + break; + case NCZMD_ARRAY: + *suffix = Z2ARRAY; + break; + default: + return NC_EINVAL; // Invalid type + } + return NC_NOERR; +} + +int fetch_csl_json_content_v2(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, NCjson **jobj) +{ + int stat = NC_NOERR; + const NCjson *jtmp = NULL; + const char *suffix; + char * key = NULL; + if ( (stat = zarr_obj_type2suffix(zobj_t, &suffix)) + ||(stat = nczm_concat(prefix, suffix, &key))){ + return stat; + } + + if (NCJdictget(zfile->metadata_handler->jcsl, "metadata", &jtmp) == 0 + && jtmp && NCJsort(jtmp) == NCJ_DICT) + { + NCjson *tmp = NULL; + if ((stat = NCJdictget(jtmp, key + (key[0] == '/'), (const NCjson**)&tmp))) + goto done; + if (tmp) + NCJclone(tmp, jobj); + } +done: + nullfree(key); + return stat; + +} + +int fetch_json_content_v2(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, NCjson **jobj) +{ + int stat = NC_NOERR; + const char *suffix; + char * key = NULL; + if ((stat = zarr_obj_type2suffix(zobj_t, &suffix)) + || (stat = nczm_concat(prefix, suffix, &key))){ + goto done; + } + + stat = NCZ_downloadjson(zfile->map, key, jobj); +done: + nullfree(key); + return stat; +} + +//////////////////////////////////////////////////////////////////////////// +// Write to internal JSON pointer and/or directly to storage +///////////////////////////// +int update_csl_json_content_v2(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, const NCjson *jobj) +{ + int stat = NC_NOERR; + + // uses normal implementation to write all the .z files + if ((stat=update_json_content_v2(zfile,zobj_t,prefix,jobj))){ + goto done; + } + // Allocating representation if doesn't exist + if (zfile->metadata_handler->jcsl == NULL && + (stat = NCJparse(MINIMIM_CSL_REP_RAW,0,&zfile->metadata_handler->jcsl))){ + goto done; + } + // Updating the internal JSON representation to be synced later + NCjson * jrep = NULL; + if ((stat = NCJdictget(zfile->metadata_handler->jcsl,"metadata", (const NCjson**)&jrep)) || jrep == NULL) { + goto done; + } + + const char *suffix; + char * key = NULL; + if ((stat = zarr_obj_type2suffix(zobj_t, &suffix)) + || (stat = nczm_concat(prefix, suffix, &key))){ + goto done; + } + // Concatenate will add separator as prefix if prefix NULL + const char * mdkey= key[0] == '/'?key+1:key; + NCjson * jval = NULL; + NCJclone(jobj,&jval); + // We overwrite existing values if key is the same + NCJinsert(jrep, mdkey, jval); +done: + // No frees at this point + free(key); + return stat; + +} + +int update_json_content_v2(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, const NCjson *jobj) +{ + int stat = NC_NOERR; + const char *suffix; + char * key = NULL; + if ((stat = zarr_obj_type2suffix(zobj_t, &suffix)) + || (stat = nczm_concat(prefix, suffix, &key))){ + goto done; + } + + stat = NCZ_uploadjson(zfile->map, key, jobj); +done: + nullfree(key); + return stat; +} \ No newline at end of file diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index c9d55ee751..46072cfae6 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -32,8 +32,6 @@ static int define_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* diminfo static int define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames); static int define_var1(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const char* varname); static int define_subgrps(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* subgrpnames); -static int searchvars(NCZ_FILE_INFO_T*, NC_GRP_INFO_T*, NClist*); -static int searchsubgrps(NCZ_FILE_INFO_T*, NC_GRP_INFO_T*, NClist*); static int locategroup(NC_FILE_INFO_T* file, size_t nsegs, NClist* segments, NC_GRP_INFO_T** grpp); static int createdim(NC_FILE_INFO_T* file, const char* name, size64_t dimlen, NC_DIM_INFO_T** dimp); static int parsedimrefs(NC_FILE_INFO_T*, NClist* dimnames, size64_t* shape, NC_DIM_INFO_T** dims, int create); @@ -44,7 +42,6 @@ static int json_convention_read(const NCjson* jdict, NCjson** jtextp); static int ncz_validate(NC_FILE_INFO_T* file); static int insert_attr(NCjson* jatts, NCjson* jtypes, const char* aname, NCjson* javalue, const char* atype); static int insert_nczarr_attr(NCjson* jatts, NCjson* jtypes); -static int upload_attrs(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson* jatts); static int getnczarrkey(NC_OBJ* container, const char* name, const NCjson** jncxxxp); static int downloadzarrobj(NC_FILE_INFO_T*, struct ZARROBJ* zobj, const char* fullpath, const char* objname); static int dictgetalt(const NCjson* jdict, const char* name, const char* alt, const NCjson** jvaluep); @@ -82,6 +79,7 @@ ncz_sync_file(NC_FILE_INFO_T* file, int isclose) if((stat = ncz_sync_grp(file, file->root_grp, isclose))) goto done; + stat = NCZMD_consolidate((NCZ_FILE_INFO_T*)file->format_file_info); done: NCJreclaim(json); return ZUNTRACE(stat); @@ -156,9 +154,6 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) char version[1024]; int purezarr = 0; NCZMAP* map = NULL; - char* fullpath = NULL; - char* key = NULL; - NCjson* json = NULL; NCjson* jgroup = NULL; NCjson* jdims = NULL; NCjson* jvars = NULL; @@ -169,7 +164,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) NCjson* jatts = NULL; NCjson* jtypes = NULL; - LOG((3, "%s: dims: %s", __func__, key)); + LOG((3, "%s: dims", __func__)); ZTRACE(3,"file=%s grp=%s isclose=%d",file->controller->path,grp->hdr.name,isclose); zinfo = file->format_file_info; @@ -177,21 +172,12 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; - /* Construct grp key */ - if((stat = NCZ_grpkey(grp,&fullpath))) - goto done; - - /* build ZGROUP contents */ NCJnew(NCJ_DICT,&jgroup); snprintf(version,sizeof(version),"%d",zinfo->zarr.zarr_version); if((stat = NCJaddstring(jgroup,NCJ_STRING,"zarr_format"))<0) {stat = NC_EINVAL; goto done;} if((stat = NCJaddstring(jgroup,NCJ_INT,version))<0) {stat = NC_EINVAL; goto done;} - /* build ZGROUP path */ - if((stat = nczm_concat(fullpath,ZGROUP,&key))) - goto done; /* Write to map */ - if((stat=NCZ_uploadjson(map,key,jgroup))) goto done; - nullfree(key); key = NULL; + if((stat=NCZMD_update_json_group(zinfo,grp,NULL,(const NCjson*)jgroup))) goto done; if(!purezarr) { if(grp->parent == NULL) { /* Root group */ @@ -255,7 +241,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) } /* Write out the .zattrs */ - if((stat = upload_attrs(file,(NC_OBJ*)grp,jatts))) goto done; + if((stat = NCZMD_update_json_attrs(zinfo, grp, NULL, (const NCjson *)jatts))) goto done; /* Now synchronize all the variables */ for(i=0; ivars); i++) { @@ -269,10 +255,10 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) if((stat = ncz_sync_grp(file,g,isclose))) goto done; } + // Last step try to write consolidated file done: NCJreclaim(jtmp); NCJreclaim(jsuper); - NCJreclaim(json); NCJreclaim(jgroup); NCJreclaim(jdims); NCJreclaim(jvars); @@ -280,8 +266,6 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) NCJreclaim(jnczgrp); NCJreclaim(jtypes); NCJreclaim(jatts); - nullfree(fullpath); - nullfree(key); return ZUNTRACE(THROW(stat)); } @@ -303,8 +287,6 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) NCZ_FILE_INFO_T* zinfo = NULL; char number[1024]; NCZMAP* map = NULL; - char* fullpath = NULL; - char* key = NULL; char* dimpath = NULL; NClist* dimrefs = NULL; NCjson* jvar = NULL; @@ -343,10 +325,6 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) if((stat = NCZ_filter_setup(var))) goto done; #endif - /* Construct var path */ - if((stat = NCZ_varkey(var,&fullpath))) - goto done; - /* Create the zarray json object */ NCJnew(NCJ_DICT,&jvar); @@ -484,15 +462,10 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) if((stat = NCJinsert(jvar,"dimension_separator",jtmp))<0) {stat = NC_EINVAL; goto done;} jtmp = NULL; } - - /* build .zarray path */ - if((stat = nczm_concat(fullpath,ZARRAY,&key))) - goto done; - + /* Write to map */ - if((stat=NCZ_uploadjson(map,key,jvar))) + if((stat=NCZMD_update_json_array(zinfo,var->container,var->hdr.name,(const NCjson*)jvar))) goto done; - nullfree(key); key = NULL; /* Capture dimref names as FQNs */ if(var->ndims > 0) { @@ -550,15 +523,13 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) jtypes = NULL; } - /* Write out the .zattrs */ - if((stat = upload_attrs(file,(NC_OBJ*)var,jatts))) goto done; + /* Write out the //.zattrs */ + if((stat = NCZMD_update_json_attrs(zinfo,var->container, var->hdr.name, jatts))) goto done; var->created = 1; done: nclistfreeall(dimrefs); - nullfree(fullpath); - nullfree(key); nullfree(dtypename); nullfree(dimpath); NCJreclaim(jvar); @@ -1140,9 +1111,10 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) /* Construct grp path */ if((stat = NCZ_grpkey(grp,&fullpath))) goto done; - + /* Download .zgroup and .zattrs */ - if((stat = downloadzarrobj(file,&zgrp->zgroup,fullpath,ZGROUP))) goto done; + stat = NCZMD_fetch_json_group(zinfo, grp, NULL, &zgrp->zgroup.obj); + stat = NCZMD_fetch_json_attrs(zinfo, grp, NULL, &zgrp->zgroup.atts); jgroup = zgrp->zgroup.obj; jattrs = zgrp->zgroup.atts; @@ -1404,7 +1376,6 @@ define_var1(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const char* varname) const NCjson* jncvar = NULL; const NCjson* jdimrefs = NULL; const NCjson* jvalue = NULL; - char* varpath = NULL; char* key = NULL; size64_t* shapes = NULL; NClist* dimnames = NULL; @@ -1443,12 +1414,10 @@ define_var1(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const char* varname) /* Indicate we do not have quantizer yet */ var->quantize_mode = -1; - /* Construct var path */ - if((stat = NCZ_varkey(var,&varpath))) - goto done; - /* Download */ - if((stat = downloadzarrobj(file,&zvar->zarray,varpath,ZARRAY))) goto done; + if(stat = NCZMD_fetch_json_array(zinfo, grp, varname, &zvar->zarray.obj) + || NCZMD_fetch_json_attrs(zinfo, grp, varname, &zvar->zarray.atts)) goto done; + jvar = zvar->zarray.obj; jatts = zvar->zarray.atts; assert(jvar == NULL || NCJsort(jvar) == NCJ_DICT); @@ -1711,7 +1680,6 @@ define_var1(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const char* varname) done: nclistfreeall(dimnames); dimnames = NULL; - nullfree(varpath); varpath = NULL; nullfree(shapes); shapes = NULL; nullfree(key); key = NULL; return THROW(stat); @@ -1815,11 +1783,11 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; zroot = (NCZ_GRP_INFO_T*)root->format_grp_info; - /* Construct grp key */ - if((stat = NCZ_grpkey(root,&fullpath))) goto done; - /* Download the root group .zgroup and associated .zattrs */ - if((stat = downloadzarrobj(file, &zroot->zgroup, fullpath, ZGROUP))) goto done; + /* Download */ + if(stat = NCZMD_fetch_json_group(zinfo, root, NULL, &zroot->zgroup.obj) + || NCZMD_fetch_json_attrs(zinfo, root, NULL, &zroot->zgroup.atts)) goto done; + jzgroup = zroot->zgroup.obj; /* Look for superblock; first in .zattrs and then in .zgroup */ @@ -1837,12 +1805,20 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) if(jsuper == NULL) { /* See if this is looks like a NCZarr/Zarr dataset at all by looking for anything here of the form ".z*" */ - if((stat = ncz_validate(file))) goto done; + if(!NCZMD_is_metadata_consolidated(zinfo) || (stat = ncz_validate(file))) goto done; /* ok, assume pure zarr with no groups */ zinfo->controls.flags |= FLAG_PUREZARR; if(zarr_format == NULL) zarr_format = strdup("2"); } + int tformat = 0; + if(!NCZMD_get_metadata_format(zinfo, &tformat)){ + if (zarr_format == NULL) { + zarr_format = strdup("0"); + } + sprintf(zarr_format, "%d",tformat); + } + /* Look for _nczarr_group */ if((stat = getnczarrkey((NC_OBJ*)root,NCZ_V2_GROUP,&jnczgroup))) goto done; @@ -1868,7 +1844,8 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) if(nczarrvp) {*nczarrvp = nczarr_version; nczarr_version = NULL;} if(zarrfp) {*zarrfp = zarr_format; zarr_format = NULL;} done: - nullfree(fullpath); + NCJreclaim(zroot->zgroup.obj); + NCJreclaim(zroot->zgroup.atts); nullfree(zarr_format); nullfree(nczarr_version); return ZUNTRACE(THROW(stat)); @@ -1957,85 +1934,14 @@ parse_group_content_pure(NCZ_FILE_INFO_T* zinfo, NC_GRP_INFO_T* grp, NClist* va ZTRACE(3,"zinfo=%s grp=%s |varnames|=%u |subgrps|=%u",zinfo->common.file->controller->path,grp->hdr.name,(unsigned)nclistlength(varnames),(unsigned)nclistlength(subgrps)); nclistclear(varnames); - if((stat = searchvars(zinfo,grp,varnames))) goto done; + if((stat = NCZMD_list_variables(zinfo, grp,varnames))) goto done; nclistclear(subgrps); - if((stat = searchsubgrps(zinfo,grp,subgrps))) goto done; + if((stat = NCZMD_list_groups(zinfo, grp,subgrps))) goto done; done: return ZUNTRACE(THROW(stat)); } - -static int -searchvars(NCZ_FILE_INFO_T* zfile, NC_GRP_INFO_T* grp, NClist* varnames) -{ - size_t i; - int stat = NC_NOERR; - char* grpkey = NULL; - char* varkey = NULL; - char* zarray = NULL; - NClist* matches = nclistnew(); - - /* Compute the key for the grp */ - if((stat = NCZ_grpkey(grp,&grpkey))) goto done; - /* Get the map and search group */ - if((stat = nczmap_search(zfile->map,grpkey,matches))) goto done; - for(i=0;imap,zarray)) == NC_NOERR) - nclistpush(varnames,strdup(name)); - stat = NC_NOERR; - nullfree(varkey); varkey = NULL; - nullfree(zarray); zarray = NULL; - } - -done: - nullfree(grpkey); - nullfree(varkey); - nullfree(zarray); - nclistfreeall(matches); - return stat; -} - -static int -searchsubgrps(NCZ_FILE_INFO_T* zfile, NC_GRP_INFO_T* grp, NClist* subgrpnames) -{ - size_t i; - int stat = NC_NOERR; - char* grpkey = NULL; - char* subkey = NULL; - char* zgroup = NULL; - NClist* matches = nclistnew(); - - /* Compute the key for the grp */ - if((stat = NCZ_grpkey(grp,&grpkey))) goto done; - /* Get the map and search group */ - if((stat = nczmap_search(zfile->map,grpkey,matches))) goto done; - for(i=0;imap,zgroup)) == NC_NOERR) - nclistpush(subgrpnames,strdup(name)); - stat = NC_NOERR; - nullfree(subkey); subkey = NULL; - nullfree(zgroup); zgroup = NULL; - } - -done: - nullfree(grpkey); - nullfree(subkey); - nullfree(zgroup); - nclistfreeall(matches); - return stat; -} - /* Convert a list of integer strings to 64 bit dimension sizes (shapes) */ static int decodeints(const NCjson* jshape, size64_t* shapes) @@ -2459,55 +2365,6 @@ insert_nczarr_attr(NCjson* jatts, NCjson* jtypes) return NC_NOERR; } -/** -Upload a .zattrs object -Optionally take control of jatts and jtypes -@param file -@param container -@param jattsp -@param jtypesp -*/ -static int -upload_attrs(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson* jatts) -{ - int stat = NC_NOERR; - NCZ_FILE_INFO_T* zinfo = NULL; - NC_VAR_INFO_T* var = NULL; - NC_GRP_INFO_T* grp = NULL; - NCZMAP* map = NULL; - char* fullpath = NULL; - char* key = NULL; - - ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); - - if(jatts == NULL) goto done; - - zinfo = file->format_file_info; - map = zinfo->map; - - if(container->sort == NCVAR) { - var = (NC_VAR_INFO_T*)container; - } else if(container->sort == NCGRP) { - grp = (NC_GRP_INFO_T*)container; - } - - /* Construct container path */ - if(container->sort == NCGRP) - stat = NCZ_grpkey(grp,&fullpath); - else - stat = NCZ_varkey(var,&fullpath); - if(stat) goto done; - - /* write .zattrs*/ - if((stat = nczm_concat(fullpath,ZATTRS,&key))) goto done; - if((stat=NCZ_uploadjson(map,key,jatts))) goto done; - nullfree(key); key = NULL; - -done: - nullfree(fullpath); - return ZUNTRACE(THROW(stat)); -} - #if 0 /** @internal Get contents of a meta object; fail it it does not exist @@ -2600,7 +2457,7 @@ downloadzarrobj(NC_FILE_INFO_T* file, struct ZARROBJ* zobj, const char* fullpath if((stat = nczm_concat(fullpath,objname,&key))) goto done; if((stat=NCZ_downloadjson(map,key,&zobj->obj))) goto done; nullfree(key); key = NULL; - if((stat = nczm_concat(fullpath,ZATTRS,&key))) goto done; + if((stat = nczm_concat(fullpath,Z2ATTRS,&key))) goto done; if((stat=NCZ_downloadjson(map,key,&zobj->atts))) goto done; done: nullfree(key); diff --git a/libnczarr/zutil.c b/libnczarr/zutil.c index 8ca4602b24..c8d920e961 100644 --- a/libnczarr/zutil.c +++ b/libnczarr/zutil.c @@ -272,7 +272,7 @@ NCZ_downloadjson(NCZMAP* zmap, const char* key, NCjson** jsonp) @author Dennis Heimbigner */ int -NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json) +NCZ_uploadjson(NCZMAP* zmap, const char* key, const NCjson* json) { int stat = NC_NOERR; char* content = NULL; diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt index ad6fde8c8b..e7ffbb19f4 100644 --- a/nczarr_test/CMakeLists.txt +++ b/nczarr_test/CMakeLists.txt @@ -181,6 +181,7 @@ IF(NETCDF_ENABLE_TESTS) ENDIF() add_sh_test(nczarr_test run_purezarr) + add_sh_test(nczarr_test run_consolidated_zarr) add_sh_test(nczarr_test run_misc) add_sh_test(nczarr_test run_nczarr_fill) add_sh_test(nczarr_test run_jsonconvention) diff --git a/nczarr_test/Makefile.am b/nczarr_test/Makefile.am index 7054eeb42b..71aad41c38 100644 --- a/nczarr_test/Makefile.am +++ b/nczarr_test/Makefile.am @@ -68,6 +68,7 @@ TESTS += run_ncgen4.sh TESTS += run_quantize.sh TESTS += run_purezarr.sh +TESTS += run_consolidated_zarr.sh TESTS += run_interop.sh TESTS += run_misc.sh TESTS += run_nczarr_fill.sh @@ -205,7 +206,7 @@ endif EXTRA_DIST = CMakeLists.txt \ run_ut_map.sh run_ut_mapapi.sh run_ut_misc.sh run_ncgen4.sh \ run_nccopyz.sh run_fillonlyz.sh run_chunkcases.sh test_nczarr.sh run_perf_chunks1.sh \ -run_purezarr.sh run_interop.sh run_misc.sh \ +run_purezarr.sh run_consolidated_zarr.sh run_interop.sh run_misc.sh \ run_filter.sh \ run_newformat.sh run_nczarr_fill.sh run_quantize.sh \ run_jsonconvention.sh run_nczfilter.sh run_unknown.sh \ diff --git a/nczarr_test/ref_consolidated_zarr.cdl b/nczarr_test/ref_consolidated_zarr.cdl new file mode 100644 index 0000000000..f37bd3946f --- /dev/null +++ b/nczarr_test/ref_consolidated_zarr.cdl @@ -0,0 +1,12 @@ +netcdf tmp_consolidated_zarr { +dimensions: + _Anonymous_Dim_2 = 2 ; + _Anonymous_Dim_5 = 5 ; +variables: + int i(_Anonymous_Dim_2, _Anonymous_Dim_5) ; +data: + + i = + _, _, _, _, _, + _, _, _, _, _ ; +} diff --git a/nczarr_test/ref_consolidated_zarr_2.18.2_python.zarr/.zattrs b/nczarr_test/ref_consolidated_zarr_2.18.2_python.zarr/.zattrs new file mode 100644 index 0000000000..89c77849b2 --- /dev/null +++ b/nczarr_test/ref_consolidated_zarr_2.18.2_python.zarr/.zattrs @@ -0,0 +1,3 @@ +{ + "Description": "Consolidated zarr test" +} \ No newline at end of file diff --git a/nczarr_test/ref_consolidated_zarr_2.18.2_python.zarr/.zgroup b/nczarr_test/ref_consolidated_zarr_2.18.2_python.zarr/.zgroup new file mode 100644 index 0000000000..3b7daf227c --- /dev/null +++ b/nczarr_test/ref_consolidated_zarr_2.18.2_python.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/nczarr_test/ref_consolidated_zarr_2.18.2_python.zarr/.zmetadata b/nczarr_test/ref_consolidated_zarr_2.18.2_python.zarr/.zmetadata new file mode 100644 index 0000000000..f77dab187e --- /dev/null +++ b/nczarr_test/ref_consolidated_zarr_2.18.2_python.zarr/.zmetadata @@ -0,0 +1,67 @@ +{ + "metadata": { + ".zattrs": { + "Description": "Consolidated zarr test" + }, + ".zgroup": { + "zarr_format": 2 + }, + "G1/.zattrs": { + "Details": "Varibles are chunked" + }, + "G1/.zgroup": { + "zarr_format": 2 + }, + "G1/subg1/.zgroup": { + "zarr_format": 2 + }, + "G1/subg1/myarray/.zarray": { + "chunks": [ + 6, + 15 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " tmp_consolidated_zarr_${zext}.cdl + diff -b ${srcdir}/ref_consolidated_zarr.cdl tmp_consolidated_zarr_${zext}.cdl + + echo "*** Test: xarray zarr write then read; format=$zext" + fileargs tmp_xarray "mode=zarr,$zext" + #deletemap $zext $file + ${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_consolidated_zarr_base.cdl + ${NCDUMP} $fileurl > tmp_xarray_${zext}.cdl + diff -b ${srcdir}/ref_xarray.cdl tmp_xarray_${zext}.cdl + + echo "*** Test: consolidated zarr reading nczarr; format=$zext" + fileargs tmp_nczarr "mode=nczarr,noxarray,$zext" + deletemap $zext $file + ${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_whole.cdl + fileargs tmp_nczarr "mode=zarr,$zext" + ${NCDUMP} -n nczarr2zarr $fileurl > tmp_nczarr_${zext}.cdl + diff -b ${srcdir}/ref_nczarr2zarr.cdl tmp_nczarr_${zext}.cdl +} + +testcase_csl_vs_no(){ + zext=$1 + + set -x + echo "*** Test: consolidated pure python zarr read; format=$zext" + deletemap $zext $file + cp -r $srcdir/ref_consolidated_zarr_2.18.2_python.zarr ref_consolidated_zarr_2.18.2_python.zarr.$zext + cp -r $srcdir/ref_consolidated_zarr_2.18.2_python.zarr ref_zarr_2.18.2_python.zarr.$zext + rm -f ref_zarr_2.18.2_python.zarr.$zext/.zmetadata + fileargs ref_consolidated_zarr_2.18.2_python.zarr "mode=zarr" + echo "${NCDUMP} $fileurl > tmp_consolidated_python_zarr_${zext}.cdl" + ${NCDUMP} -n same_name $fileurl > tmp_consolidated_python_zarr_${zext}.cdl + fileargs ref_zarr_2.18.2_python.zarr "mode=zarr,$zext" + ${NCDUMP} -n same_name $fileurl > tmp_python_zarr_${zext}.cdl + diff -b tmp_consolidated_python_zarr_${zext}.cdl tmp_python_zarr_${zext}.cdl + set +x +} + +testcase file +testcase_csl_vs_no file +if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi +if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi + diff --git a/nczarr_test/run_newformat.sh b/nczarr_test/run_newformat.sh index fde66c9314..195a3e227b 100755 --- a/nczarr_test/run_newformat.sh +++ b/nczarr_test/run_newformat.sh @@ -22,6 +22,14 @@ ${NCDUMP} -n ref_oldformat "$fileurl" > ./tmp_oldformat.cdl diff -w ${srcdir}/ref_oldformat.cdl ./tmp_oldformat.cdl } +testcaseoldconsolidated() { +echo "*** Test old format support on consolidated zarr" +zext=$1 +fileargs ${srcdir}/ref_oldformat_only_consolidated +${NCDUMP} -n ref_oldformat "$fileurl" > ./tmp_oldformat_consolidated.cdl +diff -w ${srcdir}/ref_oldformat.cdl ./tmp_oldformat_consolidated.cdl +} + testcasecvt() { echo "*** Test old format to new format nczarr copy" zext=$1 @@ -40,9 +48,32 @@ ${NCDUMP} -n ref_oldformat "file://tmp_newformat.file#mode=zarr,file" > ./tmp_ne diff -w ${srcdir}/ref_newformatpure.cdl ./tmp_newpure.cdl } +testcaseconsolidated() { +echo "*** Test old format to new format consolidated nczarr copy" +zext=$1 +fileargs ${srcdir}/ref_oldformat_only_consolidated +${NCCOPY} "$fileurl" "file://tmp_newformat_consolidated.file#mode=nczarr,file" +${NCDUMP} -n ref_oldformat "file://tmp_newformat_consolidated.file#mode=nczarr,file" > ./tmp_oldformat_consolidated.cdl +diff -w ${srcdir}/ref_oldformat.cdl ./tmp_oldformat_consolidated.cdl +} + +testcasepureconsolidated() { +echo "*** Test old format to new format consolidated pure zarr copy" +zext=$1 +fileargs ${srcdir}/ref_oldformat_only_consolidated +# For the moment is not possible to write consolidated datasets +#${NCCOPY} "$fileurl" "file://tmp_newformat_consolidated.file#mode=nczarr,file" +# so unziping will act like nccopy: +unzip -qq ${srcdir}/ref_oldformat_only_consolidated.zip -d tmp_newformat_consolidated.file +${NCDUMP} -n ref_oldformat "file://tmp_newformat_consolidated.file#mode=zarr,file" > ./tmp_new_consolidated.cdl +diff -w ${srcdir}/ref_newformatpure.cdl ./tmp_newpure.cdl +} # Do zip tests only if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcaseold zip + testcaseoldconsolidated zip testcasecvt zip testcasepure zip + testcaseconsolidated zip + testcasepureconsolidated zip fi diff --git a/nczarr_test/test_nczarr.sh b/nczarr_test/test_nczarr.sh index 85e4794cd0..90faa7c89d 100755 --- a/nczarr_test/test_nczarr.sh +++ b/nczarr_test/test_nczarr.sh @@ -9,7 +9,7 @@ if test "x$SETX" != x; then set -x; fi # Figure out which cloud repo to use if test "x$NCZARR_S3_TEST_HOST" = x ; then # export NCZARR_S3_TEST_HOST=stratus.ucar.edu - export NCZARR_S3_TEST_HOST=s3.us-east-1.amazonaws.com + export NCZARR_S3_TEST_HOST="${S3ENDPOINT:-s3.us-east-1.amazonaws.com}" fi if test "x$NCZARR_S3_TEST_BUCKET" = x ; then export NCZARR_S3_TEST_BUCKET="${S3TESTBUCKET}" diff --git a/nczarr_test/ut_map.c b/nczarr_test/ut_map.c index b769ce643a..72d418a992 100644 --- a/nczarr_test/ut_map.c +++ b/nczarr_test/ut_map.c @@ -95,7 +95,7 @@ simplecreate(void) if((stat = nczmap_create(impl,url,0,0,NULL,&map))) goto done; - if((stat=nczm_concat(NULL,ZMETAROOT,&path))) + if((stat=nczm_concat(NULL,Z2METAROOT,&path))) goto done; /* Write empty metadata content */ @@ -135,7 +135,7 @@ writemeta(void) if((stat = nczmap_open(impl,url,NC_WRITE,0,NULL,&map))) goto done; - if((stat=nczm_concat(META1,ZARRAY,&path))) + if((stat=nczm_concat(META1,Z2ARRAY,&path))) goto done; if((stat = nczmap_write(map, path, strlen(metadata1), metadata1))) goto done; @@ -158,7 +158,7 @@ writemeta2(void) if((stat = nczmap_open(impl,url,NC_WRITE,0,NULL,&map))) goto done; - if((stat=nczm_concat(META2,ZARRAY,&path))) + if((stat=nczm_concat(META2,Z2ARRAY,&path))) goto done; if((stat = nczmap_write(map, path, strlen(metadata2), metadata2))) goto done; @@ -212,7 +212,7 @@ readmeta(void) if((stat = nczmap_open(impl,url,0,0,NULL,&map))) goto done; - if((stat = readkey(map,META1,ZARRAY))) goto done; + if((stat = readkey(map,META1,Z2ARRAY))) goto done; done: (void)nczmap_close(map,0); @@ -228,7 +228,7 @@ readmeta2(void) if((stat = nczmap_open(impl,url,0,0,NULL,&map))) goto done; - if((stat = readkey(map,META2,ZARRAY))) + if((stat = readkey(map,META2,Z2ARRAY))) goto done; done: diff --git a/nczarr_test/ut_mapapi.c b/nczarr_test/ut_mapapi.c index be0787cc60..0ccc4bf883 100644 --- a/nczarr_test/ut_mapapi.c +++ b/nczarr_test/ut_mapapi.c @@ -99,7 +99,7 @@ simplecreate(void) printf("Pass: create: create: %s\n",url); - truekey = makekey(ZMETAROOT); + truekey = makekey(Z2METAROOT); if((stat = nczmap_write(map, truekey, 0, NULL))) goto done; printf("Pass: create: defineobj: %s\n",truekey); @@ -184,13 +184,13 @@ simplemeta(void) report(PASS,"open",map); /* Make sure .nczarr exists (from simplecreate) */ - truekey = makekey(ZMETAROOT); + truekey = makekey(Z2METAROOT); if((stat = nczmap_exists(map,truekey))) goto done; report(PASS,".nczarr: exists",map); free(truekey); truekey = NULL; - if((stat=nczm_concat(META1,ZARRAY,&key))) + if((stat=nczm_concat(META1,Z2ARRAY,&key))) goto done; truekey = makekey(key); nullfree(key); key = NULL; @@ -199,13 +199,13 @@ simplemeta(void) report(PASS,".zarray: def",map); free(truekey); truekey = NULL; - truekey = makekey(ZMETAROOT); + truekey = makekey(Z2METAROOT); if((stat = nczmap_write(map, truekey, strlen(metadata1), metadata1))) goto done; report(PASS,".nczarr: writemetadata",map); free(truekey); truekey = NULL; - if((stat=nczm_concat(META1,ZARRAY,&key))) + if((stat=nczm_concat(META1,Z2ARRAY,&key))) goto done; truekey = makekey(key); free(key); key = NULL; @@ -225,7 +225,7 @@ simplemeta(void) report(PASS,"re-open",map); /* Read previously written */ - truekey = makekey(ZMETAROOT); + truekey = makekey(Z2METAROOT); if((stat = nczmap_exists(map, truekey))) goto done; report(PASS,".nczarr: exists",map); @@ -245,7 +245,7 @@ simplemeta(void) else report(PASS,".nczarr: content verify",map); nullfree(content); content = NULL; - if((stat=nczm_concat(META1,ZARRAY,&key))) + if((stat=nczm_concat(META1,Z2ARRAY,&key))) goto done; truekey = makekey(key); nullfree(key); key = NULL; diff --git a/test_common.in b/test_common.in index 8786568768..460445680c 100644 --- a/test_common.in +++ b/test_common.in @@ -52,6 +52,9 @@ FEATURE_PLUGIN_SEARCH_PATH="@NETCDF_PLUGIN_SEARCH_PATH@" # Thredds-test server is currently disabled #FEATURE_THREDDSTEST=1 +# This is the S3 endpoint to be used during tests +S3ENDPOINT=@S3ENDPOINT@ + # This is the Unidata S3 test bucket # All S3 tests should use this to store intermediate results. S3TESTBUCKET=@S3TESTBUCKET@ diff --git a/unit_test/run_s3sdk.sh b/unit_test/run_s3sdk.sh index 18073e4570..3d3f9eead0 100755 --- a/unit_test/run_s3sdk.sh +++ b/unit_test/run_s3sdk.sh @@ -7,7 +7,7 @@ set -e #CMD="valgrind --leak-check=full" -URL="https://s3.us-east-1.amazonaws.com/${S3TESTBUCKET}" +URL="https://${S3ENDPOINT}/${S3TESTBUCKET}" isolate "testdir_uts3sdk"