diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index cdac66eadf..882e0c6df9 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,7 +7,9 @@ This file contains a high-level description of this package's evolution. Release ## 4.9.1 - T.B.D. -* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????). + +* [Enhancement] Add experimental support for fixed length string type in nczarr. See [Github #2467](https://github.com/Unidata/netcdf-c/pull/2467). +* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #2466](https://github.com/Unidata/netcdf-c/pull/2466). * [Bug Fix] Provide a default enum const when fill value does not match any enum constant for the value zero. See [Github #2462](https://github.com/Unidata/netcdf-c/pull/2462). * [Bug Fix] Fix the json submodule symbol conflicts between libnetcdf and the plugin specific netcdf_json.h. See [Github #2448](https://github.com/Unidata/netcdf-c/pull/2448). * [Bug Fix] Fix quantize with CLASSIC_MODEL files. See [Github #2405](https://github.com/Unidata/netcdf-c/pull/2445). diff --git a/include/nc4internal.h b/include/nc4internal.h index 9b76c135a8..2833e0c5a1 100644 --- a/include/nc4internal.h +++ b/include/nc4internal.h @@ -106,9 +106,6 @@ typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT; /** Subset of readonly flags; readable by name only thru the API. */ #define NAMEONLYFLAG 4 -/** Subset of readonly flags; Value is actually in file. */ -#define MATERIALIZEDFLAG 8 - /** Per-variable attribute, as opposed to global */ #define VARFLAG 16 @@ -492,9 +489,13 @@ extern void NC_freeglobalstate(void); #define NC_ATT_COORDINATES "_Netcdf4Coordinates" /*see hdf5internal.h:COORDINATES*/ #define NC_ATT_FORMAT "_Format" #define NC_ATT_DIMID_NAME "_Netcdf4Dimid" +#define NC_ATT_FILLVALUE "_FillValue" #define NC_ATT_NC3_STRICT_NAME "_nc3_strict" #define NC_XARRAY_DIMS "_ARRAY_DIMENSIONS" #define NC_ATT_CODECS "_Codecs" -#define NC_NCZARR_ATTR "_NCZARR_ATTR" +#define NC_NCZARR_ATTR "_nczarr_attr" +#define NC_NCZARR_ATTR_UC "_NCZARR_ATTR" +#define NC_NCZARR_MAXSTRLEN_ATTR "_nczarr_maxstrlen" +#define NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR "_nczarr_default_maxstrlen" #endif /* _NC4INTERNAL_ */ diff --git a/include/ncconfigure.h b/include/ncconfigure.h index c0679bc304..9d8c0c7037 100644 --- a/include/ncconfigure.h +++ b/include/ncconfigure.h @@ -53,6 +53,7 @@ extern "C" { #ifndef HAVE_STRDUP #ifndef strdup char* strdup(const char*); +#define HAVE_STRDUP #endif #endif @@ -120,11 +121,7 @@ unsigned long long int strtoull(const char*, char**, int); /* handle null arguments */ #ifndef nulldup -#ifdef HAVE_STRDUP #define nulldup(s) ((s)==NULL?NULL:strdup(s)) -#else -extern char *nulldup(const char* s); -#endif #endif #ifndef nulllen diff --git a/include/ncjson.h b/include/ncjson.h index c4974cfb58..379633e9cb 100644 --- a/include/ncjson.h +++ b/include/ncjson.h @@ -64,6 +64,7 @@ typedef struct NCjson { don't use union so we can know when to reclaim sval */ struct NCJconst {int bval; long long ival; double dval; char* sval;}; +#define NCJconst_empty {0,0,0.0,NULL} /**************************************************/ /* Extended API */ diff --git a/libdap4/ncd4dispatch.c b/libdap4/ncd4dispatch.c index f875c641e0..616f9c9051 100644 --- a/libdap4/ncd4dispatch.c +++ b/libdap4/ncd4dispatch.c @@ -43,7 +43,7 @@ static const NC_reservedatt NCD4_reserved[] = { {D4CHECKSUMATTR, READONLYFLAG|NAMEONLYFLAG}, /*_DAP4_Checksum_CRC32*/ {D4LEATTR, READONLYFLAG|NAMEONLYFLAG}, /*_DAP4_Little_Endian*/ /* Also need to include the provenance attributes */ - {NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG}, /*_NCProperties*/ + {NCPROPS, READONLYFLAG|NAMEONLYFLAG}, /*_NCProperties*/ {NULL, 0} }; diff --git a/libnczarr/zarr.c b/libnczarr/zarr.c index 671b0c2ace..05961c7e0f 100644 --- a/libnczarr/zarr.c +++ b/libnczarr/zarr.c @@ -62,6 +62,8 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, const char** contr &zinfo->zarr.nczarr_version.minor, &zinfo->zarr.nczarr_version.release); + zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT; + /* Apply client controls */ if((stat = applycontrols(zinfo))) goto done; @@ -126,6 +128,7 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls) zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG); if((zinfo->envv_controls = NCZ_clonestringvec(0,controls))==NULL) /*0=>envv style*/ {stat = NC_ENOMEM; goto done;} + zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT; /* Add struct to hold NCZ-specific group info. */ if (!(root->format_grp_info = calloc(1, sizeof(NCZ_GRP_INFO_T)))) @@ -288,6 +291,64 @@ ncz_open_rootgroup(NC_FILE_INFO_T* dataset) } #endif + +static const char* +controllookup(const char** envv_controls, const char* key) +{ + const char** p; + for(p=envv_controls;*p;p+=2) { + if(strcasecmp(key,*p)==0) { + return p[1]; + } + } + return NULL; +} + + +static int +applycontrols(NCZ_FILE_INFO_T* zinfo) +{ + int i,stat = NC_NOERR; + const char* value = NULL; + NClist* modelist = nclistnew(); + int noflags = 0; /* track non-default negative flags */ + + if((value = controllookup((const char**)zinfo->envv_controls,"mode")) != NULL) { + if((stat = NCZ_comma_parse(value,modelist))) goto done; + } + /* Process the modelist first */ + zinfo->controls.mapimpl = NCZM_DEFAULT; + zinfo->controls.flags |= FLAG_XARRAYDIMS; /* Always support XArray convention where possible */ + for(i=0;icontrols.flags |= (FLAG_PUREZARR); + else if(strcasecmp(p,XARRAYCONTROL)==0) + zinfo->controls.flags |= FLAG_PUREZARR; + else if(strcasecmp(p,NOXARRAYCONTROL)==0) + noflags |= FLAG_XARRAYDIMS; + else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP; + else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE; + else if(strcasecmp(p,"s3")==0) zinfo->controls.mapimpl = NCZM_S3; + } + /* Apply negative controls by turning off negative flags */ + /* This is necessary to avoid order dependence of mode flags when both positive and negative flags are defined */ + zinfo->controls.flags &= (~noflags); + + /* Process other controls */ + if((value = controllookup((const char**)zinfo->envv_controls,"log")) != NULL) { + zinfo->controls.flags |= FLAG_LOGGING; + ncsetlogging(1); + } + if((value = controllookup((const char**)zinfo->envv_controls,"show")) != NULL) { + if(strcasecmp(value,"fetch")==0) + zinfo->controls.flags |= FLAG_SHOWFETCH; + } +done: + nclistfreeall(modelist); + return stat; +} + #if 0 /** @internal Rewrite attributes into a group or var @@ -332,7 +393,7 @@ ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjs NCjson* k = NULL; NCjson* v = NULL; /* remove any previous version */ - if(!NCJremove(jattrs,NCZ_V2_ATTRS,&k,&v)) { + if(!NCJremove(jattrs,NCZ_V2_ATTRS,1,&k,&v)) { NCJreclaim(k); NCJreclaim(v); } } @@ -358,60 +419,5 @@ ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjs } #endif -static const char* -controllookup(const char** envv_controls, const char* key) -{ - const char** p; - for(p=envv_controls;*p;p+=2) { - if(strcasecmp(key,*p)==0) { - return p[1]; - } - } - return NULL; -} - -static int -applycontrols(NCZ_FILE_INFO_T* zinfo) -{ - int i,stat = NC_NOERR; - const char* value = NULL; - NClist* modelist = nclistnew(); - int noflags = 0; /* track non-default negative flags */ - - if((value = controllookup((const char**)zinfo->envv_controls,"mode")) != NULL) { - if((stat = NCZ_comma_parse(value,modelist))) goto done; - } - /* Process the modelist first */ - zinfo->controls.mapimpl = NCZM_DEFAULT; - zinfo->controls.flags |= FLAG_XARRAYDIMS; /* Always support XArray convention where possible */ - for(i=0;icontrols.flags |= (FLAG_PUREZARR); - else if(strcasecmp(p,XARRAYCONTROL)==0) - zinfo->controls.flags |= FLAG_PUREZARR; - else if(strcasecmp(p,NOXARRAYCONTROL)==0) - noflags |= FLAG_XARRAYDIMS; - else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP; - else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE; - else if(strcasecmp(p,"s3")==0) zinfo->controls.mapimpl = NCZM_S3; - } - /* Apply negative controls by turning off negative flags */ - /* This is necessary to avoid order dependence of mode flags when both positive and negative flags are defined */ - zinfo->controls.flags &= (~noflags); - - /* Process other controls */ - if((value = controllookup((const char**)zinfo->envv_controls,"log")) != NULL) { - zinfo->controls.flags |= FLAG_LOGGING; - ncsetlogging(1); - } - if((value = controllookup((const char**)zinfo->envv_controls,"show")) != NULL) { - if(strcasecmp(value,"fetch")==0) - zinfo->controls.flags |= FLAG_SHOWFETCH; - } -done: - nclistfreeall(modelist); - return stat; -} diff --git a/libnczarr/zarr.h b/libnczarr/zarr.h index 9f78e19297..402aabe3df 100644 --- a/libnczarr/zarr.h +++ b/libnczarr/zarr.h @@ -31,8 +31,8 @@ EXTERNL int ncz_unload_jatts(NCZ_FILE_INFO_T*, NC_OBJ* container, NCjson* jattrs EXTERNL int ncz_close_file(NC_FILE_INFO_T* file, int abort); /* zcvt.c */ -EXTERNL int NCZ_convert1(NCjson* jsrc, nc_type, unsigned char* memory0); -EXTERNL int NCZ_stringconvert1(nc_type typid, size_t len, char* src, NCjson* jvalue); +EXTERNL int NCZ_convert1(NCjson* jsrc, nc_type, unsigned char* memory0, size_t* len); +EXTERNL int NCZ_stringconvert1(nc_type typid, char* src, NCjson* jvalue); EXTERNL int NCZ_stringconvert(nc_type typid, size_t len, void* data0, NCjson** jdatap); /* zsync.c */ @@ -53,9 +53,11 @@ EXTERNL int NCZ_dimkey(const NC_DIM_INFO_T* dim, char** pathp); EXTERNL int ncz_splitkey(const char* path, NClist* segments); EXTERNL int NCZ_readdict(NCZMAP* zmap, const char* key, NCjson** jsonp); EXTERNL int NCZ_readarray(NCZMAP* zmap, const char* key, NCjson** jsonp); -EXTERNL int ncz_zarr_type_name(nc_type nctype, int little, const char** znamep); -EXTERNL int ncz_nctype2typeinfo(const char* snctype, nc_type* nctypep); -EXTERNL int ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianness); +EXTERNL int ncz_nctypedecode(const char* snctype, nc_type* nctypep); +EXTERNL int ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr,int len, char** dnamep); +EXTERNL int ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp); +EXTERNL int NCZ_inferattrtype(NCjson* value, nc_type typehint, nc_type* typeidp); +EXTERNL int NCZ_inferinttype(unsigned long long u64, int negative); EXTERNL int ncz_fill_value_sort(nc_type nctype, int*); EXTERNL int NCZ_createobject(NCZMAP* zmap, const char* key, size64_t size); EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json); @@ -73,6 +75,10 @@ EXTERNL int NCZ_ischunkname(const char* name,char dimsep); EXTERNL char* NCZ_chunkpath(struct ChunkKey key); EXTERNL int NCZ_reclaim_fill_value(NC_VAR_INFO_T* var); EXTERNL int NCZ_copy_fill_value(NC_VAR_INFO_T* var, void** dstp); +EXTERNL int NCZ_get_maxstrlen(NC_OBJ* obj); +EXTERNL int NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen); +EXTERNL int NCZ_char2fixed(const char** charp, void* fixed, size_t count, int maxstrlen); +EXTERNL int NCZ_copy_data(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* xtype, const void* memory, size_t count, int nofill, void* copy); /* zwalk.c */ EXTERNL int NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata); diff --git a/libnczarr/zattr.c b/libnczarr/zattr.c index 8cd4cfd7a4..ece3550ecd 100644 --- a/libnczarr/zattr.c +++ b/libnczarr/zattr.c @@ -68,9 +68,11 @@ ncz_getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp, NCindex **at } /** - * @internal Get one of three special attributes, NCPROPS, - * ISNETCDF4ATT, and SUPERBLOCKATT. These atts are not all really in - * the file, they are constructed on the fly. + * @internal Get one of the special attributes: + * See the reserved attribute table in libsrc4/nc4internal.c. + * The special attributes are the ones marked with NAMEONLYFLAG. + * For example: NCPROPS, ISNETCDF4ATT, and SUPERBLOCKATT, and CODECS. + * These atts are not all really in the file, they are constructed on the fly. * * @param h5 Pointer to ZARR file info struct. * @param var Pointer to var info struct; NULL signals global. @@ -426,7 +428,7 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, size_t len, const void *data, nc_type mem_type, int force) { NC* nc; - NC_FILE_INFO_T *h5; + NC_FILE_INFO_T *h5 = NULL; NC_VAR_INFO_T *var = NULL; NCindex* attlist = NULL; NC_ATT_INFO_T* att; @@ -575,7 +577,7 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, } /* If this is the _FillValue attribute, then we will also have to - * copy the value to the fill_vlue pointer of the NC_VAR_INFO_T + * copy the value to the fill_value pointer of the NC_VAR_INFO_T * struct for this var. (But ignore a global _FillValue * attribute). Also kill the cache fillchunk as no longer valid */ if (!strcmp(att->hdr.name, _FillValue) && varid != NC_GLOBAL) @@ -670,6 +672,23 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, att->data = copy; copy = NULL; } } + + /* If this is a maxstrlen attribute, then we will also have to + * sync the value to NCZ_VAR_INFO_T or NCZ_FILE_INFO_T structure */ + { + if(strcmp(att->hdr.name,NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR)==0 && varid == NC_GLOBAL && len == 1) { + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)h5->format_file_info; + if((retval = nc4_convert_type(att->data, &zfile->default_maxstrlen, file_type, NC_INT, + len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0))) + BAIL(retval); + } else if(strcmp(att->hdr.name,NC_NCZARR_MAXSTRLEN_ATTR)==0 && varid != NC_GLOBAL && len == 1) { + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + if((retval = nc4_convert_type(att->data, &zvar->maxstrlen, file_type, NC_INT, + len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0))) + BAIL(retval); + } + } + att->dirty = NC_TRUE; att->created = NC_FALSE; att->len = len; diff --git a/libnczarr/zcache.h b/libnczarr/zcache.h index f5f7362dd2..2ef0fe8ad9 100644 --- a/libnczarr/zcache.h +++ b/libnczarr/zcache.h @@ -32,6 +32,7 @@ typedef struct NCZCacheEntry { } key; size64_t hashkey; int isfiltered; /* 1=>data contains filtered data else real data */ + int isfixedstring; /* 1 => data contains the fixed strings, 0 => data contains pointers to strings */ size64_t size; /* |data| */ void* data; /* contains either filtered or real data */ } NCZCacheEntry; diff --git a/libnczarr/zclose.c b/libnczarr/zclose.c index f1f3354b1d..cc8b4d0064 100644 --- a/libnczarr/zclose.c +++ b/libnczarr/zclose.c @@ -172,10 +172,10 @@ zclose_vars(NC_GRP_INFO_T* grp) var->filters = NULL; #endif /* Reclaim the type */ - (void)zclose_type(var->type_info); - NCZ_free_chunk_cache(zvar->cache); + if(var->type_info) (void)zclose_type(var->type_info); + if(zvar->cache) NCZ_free_chunk_cache(zvar->cache); /* reclaim xarray */ - nclistfreeall(zvar->xarray); + if(zvar->xarray) nclistfreeall(zvar->xarray); nullfree(zvar); var->format_var_info = NULL; /* avoid memory errors */ } @@ -223,13 +223,9 @@ static int zclose_type(NC_TYPE_INFO_T* type) { int stat = NC_NOERR; - NCZ_TYPE_INFO_T* ztype; assert(type && type->format_type_info != NULL); - /* Get Zarr-specific type info. */ - ztype = type->format_type_info; - nullfree(ztype); - type->format_type_info = NULL; /* avoid memory errors */ + nullfree(type->format_type_info); return stat; } diff --git a/libnczarr/zcvt.c b/libnczarr/zcvt.c index 4b59b881d0..478159035a 100644 --- a/libnczarr/zcvt.c +++ b/libnczarr/zcvt.c @@ -20,19 +20,43 @@ struct ZCVT { signed long long int64v; unsigned long long uint64v; double float64v; + char* strv; /* null terminated utf-8 */ +}; + +static const int ncz_type_size[NC_MAX_ATOMIC_TYPE+1] = { +0, /*NC_NAT*/ +sizeof(char), /*NC_BYTE*/ +sizeof(char), /*NC_CHAR*/ +sizeof(short), /*NC_SHORT*/ +sizeof(int), /*NC_INT*/ +sizeof(float), /*NC_FLOAT*/ +sizeof(double), /*NC_DOUBLE*/ +sizeof(unsigned char), /*NC_UBYTE*/ +sizeof(unsigned short), /*NC_USHORT*/ +sizeof(unsigned int), /*NC_UINT*/ +sizeof(long long), /*NC_INT64*/ +sizeof(unsigned long long), /*NC_UINT64*/ +sizeof(char *), /*NC_STRING*/ }; /* Forward */ static int typeid2jtype(nc_type typeid); -/* Convert an NCJ_STRING to a memory equivalent value of specified dsttype */ +/* Convert an NCJ_STRING to a memory equivalent value of specified dsttype; + return # of bytes written +*/ int -NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) +NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory, size_t* lenp) { int stat = NC_NOERR; nc_type srctype; struct ZCVT zcvt; int outofrange = 0; + size_t len = 0; + + assert(dsttype != NC_NAT && dsttype <= NC_MAX_ATOMIC_TYPE); + + len = ncz_type_size[dsttype]; /* Convert the incoming jsrc string to a restricted set of values */ switch (NCJsort(jsrc)) { @@ -47,7 +71,6 @@ NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) srctype = NC_UINT64; } break; - case NCJ_STRING: case NCJ_DOUBLE: /* Capture nan and infinity values */ if(strcasecmp(NCJstring(jsrc),"nan")==0) @@ -71,6 +94,10 @@ NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) else zcvt.uint64v = 1; break; + case NCJ_STRING: + srctype = NC_STRING; + zcvt.strv = NCJstring(jsrc); + break; default: stat = NC_EINTERNAL; goto done; } @@ -230,9 +257,21 @@ NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) break; } } break; + case NC_STRING: { + char** p = (char**)memory; + if(srctype != NC_STRING) {stat = NC_EINVAL; goto done;} + *p = nulldup(zcvt.strv); + } break; + case NC_CHAR: { + if(srctype != NC_STRING) {stat = NC_EINVAL; goto done;} + len = strlen(zcvt.strv); + memcpy(memory,zcvt.strv,len); + } break; default: stat = NC_EINTERNAL; goto done; } + if(lenp) *lenp = len; + done: if(stat == NC_NOERR && outofrange) stat = NC_ERANGE; return stat; @@ -240,14 +279,15 @@ NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) /* Convert a memory value to a JSON string value */ int -NCZ_stringconvert1(nc_type srctype, size_t len, char* src, NCjson* jvalue) +NCZ_stringconvert1(nc_type srctype, char* src, NCjson* jvalue) { int stat = NC_NOERR; struct ZCVT zcvt; nc_type dsttype = NC_NAT; char s[1024]; + char* p = NULL; - assert(srctype >= NC_NAT && srctype != NC_CHAR && srctype < NC_STRING); + assert(srctype >= NC_NAT && srctype != NC_CHAR && srctype <= NC_STRING); /* Convert to a restricted set of values */ switch (srctype) { case NC_BYTE: { @@ -290,6 +330,10 @@ NCZ_stringconvert1(nc_type srctype, size_t len, char* src, NCjson* jvalue) dsttype = NC_DOUBLE; zcvt.float64v= (double)(*((double*)src)); } break; + case NC_STRING: { + dsttype = NC_STRING; + zcvt.strv= *((char**)src); + } break; default: stat = NC_EINTERNAL; goto done; } @@ -326,10 +370,17 @@ NCZ_stringconvert1(nc_type srctype, size_t len, char* src, NCjson* jvalue) } #endif } break; + case NC_STRING: { + p = nulldup(zcvt.strv); + } break; default: stat = NC_EINTERNAL; goto done; } - NCJsetstring(jvalue,strdup(s)); + if(p == NULL) + p = strdup(s); + NCJsetstring(jvalue,p); + p = NULL; done: + nullfree(p); return stat; } @@ -357,12 +408,12 @@ NCZ_stringconvert(nc_type typeid, size_t len, void* data0, NCjson** jdatap) goto done; } else if(len == 1) { /* create singleton */ if((stat = NCJnew(jtype,&jdata))) goto done; - if((stat = NCZ_stringconvert1(typeid, len, src, jdata))) goto done; + if((stat = NCZ_stringconvert1(typeid, src, jdata))) goto done; } else { /* len > 1 create array of values */ if((stat = NCJnew(NCJ_ARRAY,&jdata))) goto done; for(i=0;iwritten_to = NC_TRUE; /* mark it written */ var->created = 1; -#if 0 - /* set the fill value and _FillValue attribute */ - if((stat = NCZ_ensure_fill_value(var))) goto done; /* ensure var->fill_value is set */ - assert(var->no_fill || var->fill_value != NULL); - /* rebuild the fill chunk */ - if((stat = NCZ_adjust_var_cache(var))) goto done; -#ifdef ENABLE_NCZARR_FILTERS - /* Build the filter working parameters for any filters */ - if((stat = NCZ_filter_setup(var))) goto done; -#endif -#endif /*0|1*/ } } if((stat = ncz_enddef_netcdf4_file(h5))) goto done; diff --git a/libnczarr/zinternal.h b/libnczarr/zinternal.h index 0e3cec55a4..3672c340f4 100644 --- a/libnczarr/zinternal.h +++ b/libnczarr/zinternal.h @@ -22,9 +22,6 @@ #define NCZ_CHUNKSIZE_FACTOR (10) #define NCZ_MIN_CHUNK_SIZE (2) -/* An attribute in the ZARR root group of this name means that the - * file must follow strict netCDF classic format rules. */ -#define NCZ_NC3_STRICT_ATT_NAME "_nc3_strict" /**************************************************/ /* Constants */ @@ -62,43 +59,54 @@ /* V2 Reserved Attributes */ /* Inserted into /.zgroup -_NCZARR_SUPERBLOCK: {"version": "2.0.0"} +_nczarr_superblock: {"version": "2.0.0"} Inserted into any .zgroup -"_NCZARR_GROUP": "{ +"_nczarr_group": "{ \"dimensions\": {\"d1\": \"1\", \"d2\": \"1\",...} \"variables\": [\"v1\", \"v2\", ...] \"groups\": [\"g1\", \"g2\", ...] }" Inserted into any .zarray -"_NCZARR_ARRAY": "{ +"_nczarr_array": "{ \"dimensions\": [\"/g1/g2/d1\", \"/d2\",...] \"storage\": \"scalar\"|\"contiguous\"|\"compact\"|\"chunked\" }" Inserted into any .zattrs ? or should it go into the container? -"_NCZARR_ATTRS": "{ +"_nczarr_attrs": "{ \"types\": {\"attr1\": \" NC_CHAR. ++ */ -#define NCZ_V2_SUPERBLOCK "_NCZARR_SUPERBLOCK" -#define NCZ_V2_GROUP "_NCZARR_GROUP" -#define NCZ_V2_ARRAY "_NCZARR_ARRAY" +#define NCZ_V2_SUPERBLOCK "_nczarr_superblock" +#define NCZ_V2_GROUP "_nczarr_group" +#define NCZ_V2_ARRAY "_nczarr_array" #define NCZ_V2_ATTR NC_NCZARR_ATTR +#define NCZ_V2_SUPERBLOCK_UC "_NCZARR_SUPERBLOCK" +#define NCZ_V2_GROUP_UC "_NCZARR_GROUP" +#define NCZ_V2_ARRAY_UC "_NCZARR_ARRAY" +#define NCZ_V2_ATTR_UC NC_NCZARR_ATTR_UC + +#define NCZARRCONTROL "nczarr" #define PUREZARRCONTROL "zarr" #define XARRAYCONTROL "xarray" #define NOXARRAYCONTROL "noxarray" +#define XARRAYSCALAR "_scalar_" #define LEGAL_DIM_SEPARATORS "./" #define DFALT_DIM_SEPARATOR '.' -#define islegaldimsep(c) ((c) != '\0' && strchr(LEGAL_DIM_SEPARATORS,(c)) != NULL) +/* Default max string length for fixed length strings */ +#define NCZ_MAXSTR_DEFAULT 64 -/* Mnemonics */ -#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ +#define islegaldimsep(c) ((c) != '\0' && strchr(LEGAL_DIM_SEPARATORS,(c)) != NULL) /* Mnemonics */ -#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ +#define ZCLEAR 0 /* For NCZ_copy_data */ +#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ /* Useful macro */ #define ncidforx(file,grpid) ((file)->controller->ext_ncid | (grpid)) @@ -146,6 +154,7 @@ typedef struct NCZ_FILE_INFO { # define FLAG_NCZARR_V1 16 NCZM_IMPL mapimpl; } controls; + int default_maxstrlen; /* default max str size for variables of type string */ } NCZ_FILE_INFO_T; /* This is a struct to handle the dim metadata. */ @@ -186,6 +195,7 @@ typedef struct NCZ_VAR_INFO { struct NClist* xarray; /* names from _ARRAY_DIMENSIONS */ char dimension_separator; /* '.' | '/' */ NClist* incompletefilters; + int maxstrlen; /* max length of strings for this variable */ } NCZ_VAR_INFO_T; /* Struct to hold ZARR-specific info for a field. */ diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index d5cbee966a..52f356dae6 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -22,8 +22,8 @@ static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose); static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp); static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypes); -static int zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst); -static int computeattrinfo(const char* name, NClist* atypes, NCjson* values, +static int zconvert(nc_type typeid, size_t typelen, size_t count, void* dst, NCjson* src); +static int computeattrinfo(const char* name, NClist* atypes, nc_type typehint, int purezarr, NCjson* values, nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap); static int parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps); static int parse_group_content_pure(NCZ_FILE_INFO_T* zinfo, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrps); @@ -37,9 +37,7 @@ static int locategroup(NC_FILE_INFO_T* file, size_t nsegs, NClist* segments, NC_ static int createdim(NC_FILE_INFO_T* file, const char* name, size64_t dimlen, NC_DIM_INFO_T** dimp); static int parsedimrefs(NC_FILE_INFO_T*, NClist* dimnames, size64_t* shape, NC_DIM_INFO_T** dims, int create); static int decodeints(NCjson* jshape, size64_t* shapes); -static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap); -static int inferattrtype(NCjson* values, nc_type* typeidp); -static int mininttype(unsigned long long u64, int negative); +static int computeattrdata(nc_type typehint, nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap); static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims); static int read_dict(NCjson* jdict, NCjson** jtextp); static int write_dict(size_t len, const void* data, NCjson** jsonp); @@ -169,7 +167,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) NC_GRP_INFO_T* g = (NC_GRP_INFO_T*)ncindexith(grp->children,i); if((stat = NCJaddstring(jsubgrps,NCJ_STRING,g->hdr.name))) goto done; } - /* Create the "_NCZARR_GROUP" dict */ + /* Create the "_nczarr_group" dict */ if((stat = NCJnew(NCJ_DICT,&json))) goto done; /* Insert the various dicts and arrays */ @@ -270,6 +268,8 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) NCjson* jdimrefs = NULL; NCjson* jtmp = NULL; NCjson* jfill = NULL; + char* dtypename = NULL; + int purezarr = 0; size64_t shape[NC_MAX_VAR_DIMS]; NCZ_VAR_INFO_T* zvar = var->format_var_info; #ifdef ENABLE_NCZARR_FILTERS @@ -280,7 +280,8 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) zinfo = file->format_file_info; map = zinfo->map; -#if 1 + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + /* Make sure that everything is established */ /* ensure the fill value */ if((stat = NCZ_ensure_fill_value(var))) goto done; /* ensure var->fill_value is set */ @@ -293,7 +294,6 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) /* Build the filter working parameters for any filters */ if((stat = NCZ_filter_setup(var))) goto done; #endif -#endif /*0|1*/ /* Construct var path */ if((stat = NCZ_varkey(var,&fullpath))) @@ -321,7 +321,9 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) /* Integer list defining the length of each dimension of the array.*/ /* Create the list */ if((stat = NCJnew(NCJ_ARRAY,&jtmp))) goto done; - for(i=0;indims+zvar->scalar;i++) { + if(zvar->scalar) { + NCJaddstring(jtmp,NCJ_INT,"1"); + } else for(i=0;indims;i++) { snprintf(number,sizeof(number),"%llu",shape[i]); NCJaddstring(jtmp,NCJ_INT,number); } @@ -332,18 +334,12 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) /* A string or list defining a valid data type for the array. */ if((stat = NCJaddstring(jvar,NCJ_STRING,"dtype"))) goto done; { /* Add the type name */ - const char* dtypename; int endianness = var->type_info->endianness; - int islittle; - switch (endianness) { - case NC_ENDIAN_LITTLE: islittle = 1; break; - case NC_ENDIAN_BIG: islittle = 0; break; - case NC_ENDIAN_NATIVE: abort(); /* should never happen */ - } int atomictype = var->type_info->hdr.id; - assert(atomictype > 0 && atomictype <= NC_MAX_ATOMIC_TYPE && atomictype != NC_STRING); - if((stat = ncz_zarr_type_name(atomictype,islittle,&dtypename))) goto done; + assert(atomictype > 0 && atomictype <= NC_MAX_ATOMIC_TYPE); + if((stat = ncz_nctype2dtype(atomictype,endianness,purezarr,NCZ_get_maxstrlen((NC_OBJ*)var),&dtypename))) goto done; if((stat = NCJaddstring(jvar,NCJ_STRING,dtypename))) goto done; + nullfree(dtypename); dtypename = NULL; } /* chunks key */ @@ -356,7 +352,9 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) if((stat = NCJaddstring(jvar,NCJ_STRING,"chunks"))) goto done; /* Create the list */ if((stat = NCJnew(NCJ_ARRAY,&jtmp))) goto done; - for(i=0;i<(var->ndims+zvar->scalar);i++) { + if(zvar->scalar) { + NCJaddstring(jtmp,NCJ_INT,"1"); /* one chunk of size 1 */ + } else for(i=0;indims;i++) { size64_t len = (var->storage == NC_CONTIGUOUS ? shape[i] : var->chunksizes[i]); snprintf(number,sizeof(number),"%lld",len); NCJaddstring(jtmp,NCJ_INT,number); @@ -511,6 +509,7 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) nclistfreeall(dimrefs); nullfree(fullpath); nullfree(key); + nullfree(dtypename); nullfree(dimpath); NCJreclaim(jvar); NCJreclaim(jncvar); @@ -590,6 +589,7 @@ ncz_write_var(NC_VAR_INFO_T* var) } } + { /* Iterate over all the chunks to create missing ones */ if((chunkodom = nczodom_new(var->ndims+zvar->scalar,start,stop,stride,stop))==NULL) {stat = NC_ENOMEM; goto done;} @@ -609,6 +609,7 @@ ncz_write_var(NC_VAR_INFO_T* var) nullfree(key); key = NULL; } + } nczodom_free(chunkodom); nullfree(key); } @@ -647,7 +648,9 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc int isrootgroup = 0; NC_VAR_INFO_T* var = NULL; NC_GRP_INFO_T* grp = NULL; - + char* tname = NULL; + int purezarr = 0; + LOG((3, "%s", __func__)); if(container->sort == NCVAR) @@ -658,13 +661,14 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc zinfo = file->format_file_info; map = zinfo->map; - if(zinfo->controls.flags & FLAG_XARRAYDIMS) isxarray = 1; - if(container->sort == NCVAR) { if(var->container && var->container->parent == NULL) isrootgroup = 1; } + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + if(zinfo->controls.flags & FLAG_XARRAYDIMS) isxarray = 1; + if(ncindexsize(attlist) > 0) { /* Create the jncattr.types object */ if((stat = NCJnew(NCJ_DICT,&jtypes))) @@ -672,15 +676,22 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc /* Walk all the attributes and collect the types by attribute name */ for(i=0;ihdr.name); + size_t typesize = 0; + int endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); + /* If reserved and hidden, then ignore */ if(ra && (ra->flags & HIDDENATTRFLAG)) continue; - if(a->nc_typeid >= NC_STRING) + if(a->nc_typeid > NC_MAX_ATOMIC_TYPE) {stat = THROW(NC_ENCZARR); goto done;} - if((stat = ncz_zarr_type_name(a->nc_typeid,1,&tname))) goto done; + if(a->nc_typeid == NC_STRING) + typesize = NCZ_get_maxstrlen(container); + else + {if((stat = NC4_inq_atomic_type(a->nc_typeid,NULL,&typesize))) goto done;} + if((stat = ncz_nctype2dtype(a->nc_typeid,endianness,purezarr,typesize,&tname))) goto done; if((stat = NCJnewstring(NCJ_STRING,tname,&jtype))) goto done; + nullfree(tname); tname = NULL; if((stat = NCJinsert(jtypes,a->hdr.name,jtype))) /* add {name: type} */ goto done; jtype = NULL; @@ -705,7 +716,10 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc /* Insert the XARRAY _ARRAY_ATTRIBUTE attribute */ if((stat = NCJnew(NCJ_ARRAY,&jdimrefs))) goto done; - /* Walk the dimensions to check in root group */ + /* Fake the scalar case */ + if(var->ndims == 0) { + NCJaddstring(jdimrefs,NCJ_STRING,XARRAYSCALAR); + } else /* Walk the dimensions and capture the names */ for(i=0;indims;i++) { NC_DIM_INFO_T* dim = var->dim[i]; /* Verify that the dimension is in the root group */ @@ -780,6 +794,7 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc nullfree(key); nullfree(content); nullfree(dimpath); + nullfree(tname); NCJreclaim(jatts); NCJreclaim(jtypes); NCJreclaim(jtype); @@ -903,8 +918,10 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis if((stat = nczm_concat(fullpath,NCZATTRDEP,&key))) goto done; stat=NCZ_downloadjson(map,key,&jncattr); } - } else {/* Get _NCZARR_ATTRS from .zattrs */ - stat = NCJdictget(jattrs,NCZ_V2_ATTR,&jncattr); + } else {/* Get _nczarr_attrs from .zattrs */ + stat = NCJdictget(jattrs,NCZ_V2_ATTR,&jncattr); + if(!stat && jncattr == NULL) + {stat = NCJdictget(jattrs,NCZ_V2_ATTR_UC,&jncattr);} } nullfree(key); key = NULL; switch (stat) { @@ -947,36 +964,71 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis return THROW(stat); } +/* Compute the NC_CHAR size of an array of json values */ +static int +zcharify(NCjson* src, size_t* lenp, char** sp) +{ + int i, stat = NC_NOERR; + struct NCJconst jstr = NCJconst_empty; + NCbytes* buf = ncbytesnew(); + + assert(NCJsort(src) != NCJ_DICT); + if(NCJsort(src) != NCJ_ARRAY) { /* singleton */ + if((stat = NCJcvt(src, NCJ_STRING, &jstr))) goto done; + ncbytescat(buf,jstr.sval); + } else for(i=0;i= NC_STRING) + if(typeid > NC_MAX_ATOMIC_TYPE) {stat = NC_EINTERNAL; goto done;} - if((stat = computeattrdata(&typeid, values, &typelen, &len, &data))) goto done; + /* Use the hint if given one */ + if(typeid == NC_NAT) + typeid = typehint; + + if((stat = computeattrdata(typehint, &typeid, values, &typelen, &len, &data))) goto done; if(typeidp) *typeidp = typeid; if(lenp) *lenp = len; @@ -1028,10 +1084,10 @@ computeattrinfo(const char* name, NClist* atypes, NCjson* values, Extract data for an attribute */ static int -computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap) +computeattrdata(nc_type typehint, nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap) { int stat = NC_NOERR; - size_t count; + size_t count = 0; void* data = NULL; size_t typelen; nc_type typeid = NC_NAT; @@ -1040,7 +1096,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp /* Get assumed type */ if(typeidp) typeid = *typeidp; - if(typeid == NC_NAT) if((stat = inferattrtype(values,&typeid))) goto done; + if(typeid == NC_NAT) if((stat = NCZ_inferattrtype(values,typehint, &typeid))) goto done; if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;} if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) @@ -1049,7 +1105,10 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp /* Collect the length of the attribute; might be a singleton */ switch (NCJsort(values)) { case NCJ_ARRAY: - count = NCJlength(values); + if(typeid == NC_CHAR) { + if((stat = zcharify(values,&count,NULL))) goto done; + } else + count = NCJlength(values); break; case NCJ_DICT: /* Apply the JSON dictionary convention and convert to string */ @@ -1059,26 +1118,27 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp /* fall thru */ case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */ if(typeid == NC_CHAR) { - count = strlen(NCJstring(values)); - if(count == 0) count = 1; /* Actually a single nul char, probably default fill value ugh!*/ + if((stat = zcharify(values,&count,NULL))) goto done; + } else if(typeid == NC_STRING) { + count = 1; } else count = 1; break; default: - count = 1; /* singleton */ + count = 1; break; } if(count > 0 && data == NULL) { /* Allocate data space */ - if(typeid == NC_CHAR) - data = malloc(typelen*(count+1)); - else + if(typeid == NC_CHAR) { + data = malloc(count+1); + } else data = malloc(typelen*count); if(data == NULL) {stat = NC_ENOMEM; goto done;} /* convert to target type */ - if((stat = zconvert(typeid, typelen, values, data))) + if((stat = zconvert(typeid, typelen, count, data, values))) goto done; } if(lenp) *lenp = count; @@ -1092,87 +1152,6 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp return THROW(stat); } -static int -inferattrtype(NCjson* value, nc_type* typeidp) -{ - int stat = NC_NOERR; - nc_type typeid; - NCjson* j = NULL; - unsigned long long u64; - long long i64; - int negative = 0; - - if(NCJsort(value) == NCJ_ARRAY && NCJlength(value) == 0) - {typeid = NC_NAT; goto done;} - - if(NCJsort(value) == NCJ_NULL) - {typeid = NC_NAT; goto done;} - - if(value->sort == NCJ_ARRAY) { - j=NCJith(value,0); - return inferattrtype(j,typeidp); - } - - switch (NCJsort(value)) { - case NCJ_NULL: - typeid = NC_CHAR; - return NC_NOERR; - case NCJ_DICT: - typeid = NC_CHAR; - goto done; - case NCJ_UNDEF: - return NC_EINVAL; - default: /* atomic */ - break; - } - if(NCJstring(value) != NULL) - negative = (NCJstring(value)[0] == '-'); - switch (value->sort) { - case NCJ_INT: - if(negative) { - sscanf(NCJstring(value),"%lld",&i64); - u64 = (unsigned long long)i64; - } else - sscanf(NCJstring(value),"%llu",&u64); - typeid = mininttype(u64,negative); - break; - case NCJ_DOUBLE: - typeid = NC_DOUBLE; - break; - case NCJ_BOOLEAN: - typeid = NC_UBYTE; - break; - case NCJ_STRING: /* requires special handling as an array of characters */ - typeid = NC_CHAR; - break; - default: - stat = NC_ENCZARR; - } -done: - if(typeidp) *typeidp = typeid; - return stat; -} - -static int -mininttype(unsigned long long u64, int negative) -{ - long long i64 = (long long)u64; /* keep bit pattern */ - if(!negative && u64 >= NC_MAX_INT64) return NC_UINT64; - if(i64 < 0) { - if(i64 >= NC_MIN_BYTE) return NC_BYTE; - if(i64 >= NC_MIN_SHORT) return NC_SHORT; - if(i64 >= NC_MIN_INT) return NC_INT; - return NC_INT64; - } - if(i64 <= NC_MAX_BYTE) return NC_BYTE; - if(i64 <= NC_MAX_UBYTE) return NC_UBYTE; - if(i64 <= NC_MAX_SHORT) return NC_SHORT; - if(i64 <= NC_MAX_USHORT) return NC_USHORT; - if(i64 <= NC_MAX_INT) return NC_INT; - if(i64 <= NC_MAX_UINT) return NC_UINT; - return NC_INT64; -} - /** * @internal Read file data from map to memory. @@ -1255,9 +1234,10 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) goto done; /* Read */ switch (stat=NCZ_downloadjson(map,key,&jgroup)) { - case NC_NOERR: /* we read it */ - /* Extract the NCZ_V2_GROUP dict */ + case NC_NOERR: /* Extract the NCZ_V2_GROUP dict */ if((stat = NCJdictget(jgroup,NCZ_V2_GROUP,&jdict))) goto done; + if(!stat && jdict == NULL) + {if((stat = NCJdictget(jgroup,NCZ_V2_GROUP_UC,&jdict))) goto done;} break; case NC_EEMPTY: /* does not exist, use search */ if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps))) @@ -1316,6 +1296,9 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) char* fullpath = NULL; char* key = NULL; NCZ_FILE_INFO_T* zinfo = NULL; + NC_VAR_INFO_T* var = NULL; + NCZ_VAR_INFO_T* zvar = NULL; + NC_GRP_INFO_T* grp = NULL; NCZMAP* map = NULL; NC_ATT_INFO_T* att = NULL; NCindex* attlist = NULL; @@ -1325,14 +1308,22 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) size_t len, typelen; void* data = NULL; NC_ATT_INFO_T* fillvalueatt = NULL; + nc_type typehint = NC_NAT; + int purezarr; zinfo = file->format_file_info; map = zinfo->map; - if(container->sort == NCGRP) - attlist = ((NC_GRP_INFO_T*)container)->att; - else - attlist = ((NC_VAR_INFO_T*)container)->att; + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + + if(container->sort == NCGRP) { + grp = ((NC_GRP_INFO_T*)container); + attlist = grp->att; + } else { + var = ((NC_VAR_INFO_T*)container); + zvar = (NCZ_VAR_INFO_T*)(var->format_var_info); + attlist = var->att; + } switch ((stat = load_jatts(map, container, (zinfo->controls.flags & FLAG_NCZARR_V1), &jattrs, &atypes))) { case NC_NOERR: break; @@ -1344,31 +1335,37 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) if(jattrs != NULL) { /* Iterate over the attributes to create the in-memory attributes */ - /* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray) */ + /* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray), etc. */ for(i=0;iparent == NULL && strcmp(aname,NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR)==0) + isdfaltmaxstrlen = 1; + if(var != NULL && strcmp(aname,NC_NCZARR_MAXSTRLEN_ATTR)==0) + ismaxstrlen = 1; /* See if this is reserved attribute */ - ra = NC_findreserved(NCJstring(key)); + ra = NC_findreserved(aname); if(ra != NULL) { - /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL, flags & READONLYFLAG */ - if(strcmp(NCJstring(key),NCPROPS)==0 - && container->sort == NCGRP - && file->root_grp == (NC_GRP_INFO_T*)container) { + /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL */ + if(strcmp(aname,NCPROPS)==0 && grp != NULL && file->root_grp == grp) { /* Setup provenance */ if(NCJsort(value) != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;} /*malformed*/ - if((stat = NCZ_read_provenance(file,NCJstring(key),NCJstring(value)))) + if((stat = NCZ_read_provenance(file,aname,NCJstring(value)))) goto done; } /* case 2: name = _ARRAY_DIMENSIONS, sort==NCVAR, flags & HIDDENATTRFLAG */ - if(strcmp(NCJstring(key),NC_XARRAY_DIMS)==0 - && container->sort == NCVAR - && (ra->flags & HIDDENATTRFLAG)) { - /* store for later */ - NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)((NC_VAR_INFO_T*)container)->format_var_info; + if(strcmp(aname,NC_XARRAY_DIMS)==0 && var != NULL && (ra->flags & HIDDENATTRFLAG)) { + /* store for later */ int i; assert(NCJsort(value) == NCJ_ARRAY); if((zvar->xarray = nclistnew())==NULL) @@ -1379,19 +1376,25 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) nclistpush(zvar->xarray,strdup(NCJstring(k))); } } - /* else ignore */ - continue; + /* case other: if attribute is hidden */ + if(ra->flags & HIDDENATTRFLAG) continue; /* ignore it */ } + if(isfillvalue) + typehint = var->type_info->hdr.id ; /* if unknown use the var's type for _FillValue */ /* Create the attribute */ /* Collect the attribute's type and value */ - if((stat = computeattrinfo(NCJstring(key),atypes,value, + if((stat = computeattrinfo(aname,atypes,typehint,purezarr,value, &typeid,&typelen,&len,&data))) goto done; - if((stat = ncz_makeattr(container,attlist,NCJstring(key),typeid,len,data,&att))) + if((stat = ncz_makeattr(container,attlist,aname,typeid,len,data,&att))) goto done; + if(isfillvalue) + fillvalueatt = att; + if(ismaxstrlen && att->nc_typeid == NC_INT) + zvar->maxstrlen = ((int*)att->data)[0]; + if(isdfaltmaxstrlen && att->nc_typeid == NC_INT) + zinfo->default_maxstrlen = ((int*)att->data)[0]; nullfree(data); data = NULL; /* passed to the attribute */ - /* Is this _FillValue ? */ - if(strcmp(att->hdr.name,_FillValue)==0) fillvalueatt = att; } } /* If we have not read a _FillValue, then go ahead and create it */ @@ -1480,9 +1483,11 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) int purezarr = 0; int xarray = 0; int formatv1 = 0; - nc_type typeid; + nc_type vtype; + int vtypelen; size64_t* shapes = NULL; int rank = 0; + int zarr_rank = 1; /* Need to watch out for scalars */ NClist* dimnames = nclistnew(); #ifdef ENABLE_NCZARR_FILTERS NCjson* jfilter = NULL; @@ -1501,7 +1506,6 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) NC_VAR_INFO_T* var; const char* varname = nclistget(varnames,i); - /* Create the NC_VAR_INFO_T object */ if((stat = nc4_var_list_add2(grp, varname, &var))) goto done; @@ -1546,37 +1550,106 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) } /* Set the type and endianness of the variable */ { - nc_type vtype; int endianness; if((stat = NCJdictget(jvar,"dtype",&jvalue))) goto done; /* Convert dtype to nc_type + endianness */ - if((stat = ncz_dtype2typeinfo(NCJstring(jvalue),&vtype,&endianness))) + if((stat = ncz_dtype2nctype(NCJstring(jvalue),NC_NAT,purezarr,&vtype,&endianness,&vtypelen))) goto done; - if(vtype > NC_NAT && vtype < NC_STRING) { + if(vtype > NC_NAT && vtype <= NC_MAX_ATOMIC_TYPE) { /* Locate the NC_TYPE_INFO_T object */ if((stat = ncz_gettype(file,grp,vtype,&var->type_info))) goto done; } else {stat = NC_EBADTYPE; goto done;} if(endianness == NC_ENDIAN_NATIVE) endianness = zinfo->native_endianness; + if(endianness == NC_ENDIAN_NATIVE) + endianness = (NCZ_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); if(endianness == NC_ENDIAN_LITTLE || endianness == NC_ENDIAN_BIG) { var->endianness = endianness; } else {stat = NC_EBADTYPE; goto done;} var->type_info->endianness = var->endianness; /* Propagate */ + if(vtype == NC_STRING) { + zvar->maxstrlen = vtypelen; + vtypelen = sizeof(char*); /* in-memory len */ + if(zvar->maxstrlen <= 0) zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var); + } + } + + if(!purezarr) { + /* Extract the _NCZARR_ARRAY values */ + /* Do this first so we know about storage esp. scalar */ + if(formatv1) { + /* Construct the path to the zarray object */ + if((stat = nczm_concat(varpath,NCZARRAY,&key))) + goto done; + /* Download the nczarray object */ + if((stat=NCZ_readdict(map,key,&jncvar))) + goto done; + nullfree(key); key = NULL; + } else {/* format v2 */ + /* Extract the NCZ_V2_ARRAY dict */ + if((stat = NCJdictget(jvar,NCZ_V2_ARRAY,&jncvar))) goto done; + if(!stat && jncvar == NULL) + {if((stat = NCJdictget(jvar,NCZ_V2_ARRAY_UC,&jncvar))) goto done;} + } + if(jncvar == NULL) {stat = NC_ENCZARR; goto done;} + assert((NCJsort(jncvar) == NCJ_DICT)); + /* Extract storage flag */ + if((stat = NCJdictget(jncvar,"storage",&jvalue))) + goto done; + if(jvalue != NULL) { + if(strcmp(NCJstring(jvalue),"chunked") == 0) { + var->storage = NC_CHUNKED; + } else if(strcmp(NCJstring(jvalue),"compact") == 0) { + var->storage = NC_COMPACT; + } else if(strcmp(NCJstring(jvalue),"scalar") == 0) { + var->storage = NC_CONTIGUOUS; + zvar->scalar = 1; + } else { /*storage = NC_CONTIGUOUS;*/ + var->storage = NC_CONTIGUOUS; + } + } + /* Extract dimrefs list */ + switch ((stat = NCJdictget(jncvar,"dimrefs",&jdimrefs))) { + case NC_NOERR: /* Extract the dimref names */ + assert((NCJsort(jdimrefs) == NCJ_ARRAY)); + if(zvar->scalar) { + assert(NCJlength(jdimrefs) == 0); + } else { + rank = NCJlength(jdimrefs); + for(j=0;jscalar) rank = 0; else rank = NCJlength(jvalue); - /* Set the rank of the variable */ + if(zvar->scalar) { + rank = 0; + zarr_rank = 1; /* Zarr does not support scalars */ + } else + rank = (zarr_rank = NCJlength(jvalue)); + /* Save the rank of the variable */ if((stat = nc4_var_set_ndims(var, rank))) goto done; /* extract the shapes */ - if((shapes = (size64_t*)malloc(sizeof(size64_t)*rank)) == NULL) + if((shapes = (size64_t*)malloc(sizeof(size64_t)*zarr_rank)) == NULL) {stat = THROW(NC_ENOMEM); goto done;} if((stat = decodeints(jvalue, shapes))) goto done; } + /* Capture dimension_separator (must precede chunk cache creation) */ { NCglobalstate* ngs = NC_getglobalstate(); @@ -1593,6 +1666,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) zvar->dimension_separator = ngs->zarr.dimension_separator; /* use global value */ assert(islegaldimsep(zvar->dimension_separator)); /* we are hosed */ } + /* fill_value; must precede calls to adjust cache */ { if((stat = NCJdictget(jvar,"fill_value",&jvalue))) goto done; @@ -1600,30 +1674,31 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) var->no_fill = 1; else { size_t fvlen; - typeid = var->type_info->hdr.id; + nc_type atypeid = vtype; var->no_fill = 0; - if((stat = computeattrdata(&typeid, jvalue, NULL, &fvlen, &var->fill_value))) + if((stat = computeattrdata(var->type_info->hdr.id, &atypeid, jvalue, NULL, &fvlen, &var->fill_value))) goto done; - assert(typeid == var->type_info->hdr.id); + assert(atypeid == vtype); /* Note that we do not create the _FillValue attribute here to avoid having to read all the attributes and thus foiling lazy read.*/ } } + /* chunks */ { - int rank; size64_t chunks[NC_MAX_VAR_DIMS]; if((stat = NCJdictget(jvar,"chunks",&jvalue))) goto done; if(jvalue != NULL && NCJsort(jvalue) != NCJ_ARRAY) {stat = THROW(NC_ENCZARR); goto done;} /* Verify the rank */ - rank = NCJlength(jvalue); - if(rank > 0) { + assert (zarr_rank == NCJlength(jvalue)); + if(!zvar->scalar) { + if(zarr_rank == 0) {stat = NC_ENCZARR; goto done;} var->storage = NC_CHUNKED; - if(var->ndims+zvar->scalar != rank) + if(var->ndims != rank) {stat = THROW(NC_ENCZARR); goto done;} - if((var->chunksizes = malloc(sizeof(size_t)*rank)) == NULL) + if((var->chunksizes = malloc(sizeof(size_t)*zarr_rank)) == NULL) {stat = NC_ENOMEM; goto done;} if((stat = decodeints(jvalue, chunks))) goto done; /* validate the chunk sizes */ @@ -1690,62 +1765,14 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) #endif } - if(!purezarr) { - if(formatv1) { - /* Construct the path to the zarray object */ - if((stat = nczm_concat(varpath,NCZARRAY,&key))) - goto done; - /* Download the nczarray object */ - if((stat=NCZ_readdict(map,key,&jncvar))) - goto done; - nullfree(key); key = NULL; - } else {/* format v2 */ - /* Extract the NCZ_V2_ARRAY dict */ - if((stat = NCJdictget(jvar,NCZ_V2_ARRAY,&jncvar))) goto done; - } - if(jncvar == NULL) {stat = NC_ENCZARR; goto done;} - assert((NCJsort(jncvar) == NCJ_DICT)); - /* Extract storage flag */ - if((stat = NCJdictget(jncvar,"storage",&jvalue))) - goto done; - if(jvalue != NULL) { - if(strcmp(NCJstring(jvalue),"chunked") == 0) { - var->storage = NC_CHUNKED; - } else if(strcmp(NCJstring(jvalue),"compact") == 0) { - var->storage = NC_COMPACT; - } else if(strcmp(NCJstring(jvalue),"scalar") == 0) { - var->storage = NC_CONTIGUOUS; - zvar->scalar = 1; - } else { /*storage = NC_CONTIGUOUS;*/ - var->storage = NC_CONTIGUOUS; - } - } - /* Extract dimnames list */ - switch ((stat = NCJdictget(jncvar,"dimrefs",&jdimrefs))) { - case NC_NOERR: /* Extract the dimref names */ - assert((NCJsort(jdimrefs) == NCJ_ARRAY)); - assert(NCJlength(jdimrefs) == rank); - for(j=0;jdim))) goto done; - /* Extract the dimids */ - for(j=0;jdimids[j] = var->dim[j]->hdr.id; + if(!zvar->scalar) { + /* Extract the dimids */ + for(j=0;jdimids[j] = var->dim[j]->hdr.id; + } #ifdef ENABLE_NCZARR_FILTERS /* At this point, we can finalize the filters */ @@ -1846,7 +1873,9 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) if(jzgroup != NULL) { /* See if this NCZarr V2 */ if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK,&jsuper))) goto done; - if(jsuper != NULL) { + if(!stat && jsuper == NULL) + {if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK_UC,&jsuper))) goto done;} + if(jsuper != NULL) { /* Extract the equivalent attribute */ if(jsuper->sort != NCJ_DICT) {stat = NC_ENCZARR; goto done;} @@ -2062,7 +2091,7 @@ decodeints(NCjson* jshape, size64_t* shapes) for(i=0;ii1", /*NC_BYTE*/ ">U1", /*NC_CHAR*/ ">i2", /*NC_SHORT*/ ">i4", /*NC_INT*/ -">f4", /*NC_FLOAT*/ ">f8", /*NC_DOUBLE*/ ">u1", /*NC_UBYTE*/ -">u2", /*NC_USHORT*/ ">u4", /*NC_UINT*/ ">i8", /*NC_INT64*/ ">u8", /*NC_UINT64*/ -NULL, /*NC_STRING*/ +/* Table of nc_type X {Zarr,NCZarr} X endianness +Issue: Need to distinquish NC_STRING && MAXSTRLEN==1 from NC_CHAR +in a way that allows other Zarr implementations to read the data. + +Available info: +Write: we have the netcdf type, so there is no ambiguity. +Read: we have the variable type and also any attribute dtype, +but those types are ambiguous. +We also have the attribute vs variable type problem. +For pure zarr, we have to infer the type of an attribute, +so if we have "var:strattr = \"abcdef\"", then we need +to decide how to infer the type: NC_STRING vs NC_CHAR. + +Solution: +For variables and for NCZarr type attributes, distinquish by using: +* "|S1" for NC_CHAR. +* ">S1" for NC_STRING && MAXSTRLEN==1 +It is a bit of a hack to use endianness, but it should be ok since for +string/char, the endianness has no meaning. +Note that we could use "|U1", but since this is utf-16 or utf-32 +in python, it may cause problems when reading what amounts to utf-8. + +For attributes, we infer: +* NC_CHAR if the hint is 0 + - e.g. var:strattr = 'abcdef'" => NC_CHAR +* NC_STRING if hint is NC_STRING. + - e.g. string var:strattr = \"abc\", \"def\"" => NC_STRING + +Note also that if we read a pure zarr file we will probably always +see "|S1", so we will never see a variable of type NC_STRING with length == 1. +We might however see an attribute of type string. +*/ +static const struct ZTYPES { + char* zarr[3]; + char* nczarr[3]; +} znames[NUM_ATOMIC_TYPES] = { +/* nc_type Pure Zarr NCZarr + NE LE BE NE LE BE*/ +/*NC_NAT*/ {{NULL,NULL,NULL}, {NULL,NULL,NULL}}, +/*NC_BYTE*/ {{"|i1","i1"},{"|i1","i1"}}, +/*NC_CHAR*/ {{"|S1","|S1","|S1"},{"|S1","|S1","|S1"}}, +/*NC_SHORT*/ {{"|i2","i2"},{"|i2","i2"}}, +/*NC_INT*/ {{"|i4","i4"},{"|i4","i4"}}, +/*NC_FLOAT*/ {{"|f4","f4"},{"|f4","f4"}}, +/*NC_DOUBLE*/ {{"|f8","f8"},{"|f8","f8"}}, +/*NC_UBYTE*/ {{"|u1","u1"},{"|u1","u1"}}, +/*NC_USHORT*/ {{"|u2","u2"},{"|u2","u2"}}, +/*NC_UINT*/ {{"|u4","u4"},{"|u4","u4"}}, +/*NC_INT64*/ {{"|i8","i8"},{"|i8","i8"}}, +/*NC_UINT64*/ {{"|u8","u8"},{"|u8","u8"}}, +/*NC_STRING*/ {{">S%d",">S%d",">S%d"},{">S%d",">S%d",">S%d"}}, }; #if 0 @@ -47,6 +84,7 @@ NULL, /*NC_NAT*/ "4294967295", /*NC_UINT*/ "-9223372036854775806", /*NC_INT64*/ "18446744073709551614", /*NC_UINT64*/ +"", /*NC_STRING*/ }; #endif @@ -64,6 +102,7 @@ NCJ_INT, /*NC_USHORT*/ NCJ_INT, /*NC_UINT*/ NCJ_INT, /*NC_INT64*/ NCJ_INT, /*NC_UINT64*/ +NCJ_STRING, /*NC_STRING*/ }; /* Forward */ @@ -370,28 +409,6 @@ NCZ_readarray(NCZMAP* zmap, const char* key, NCjson** jsonp) return stat; } -/** -@internal Given an nc_type+other, produce the corresponding -zarr type name. -@param nctype - [in] nc_type -@param little - [in] 1=>little, 0 => big -@param namep - [out] pointer to hold pointer to the name -@return NC_NOERR -@author Dennis Heimbigner -*/ - -int -ncz_zarr_type_name(nc_type nctype, int little, const char** znamep) -{ - if(nctype <= 0 || nctype >= NC_STRING) return NC_EINVAL; - if(little) { - if(znamep) *znamep = znames_little[nctype]; - } else { - if(znamep) *znamep = znames_big[nctype]; - } - return NC_NOERR; -} - #if 0 /** @internal Given an nc_type, produce the corresponding @@ -405,7 +422,7 @@ default fill value as a string. int ncz_default_fill_value(nc_type nctype, const char** dfaltp) { - if(nctype <= 0 || nctype >= NC_STRING) return NC_EINVAL; + if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; if(dfaltp) *dfaltp = zfillvalue[nctype]; return NC_NOERR; } @@ -423,7 +440,7 @@ fill value JSON type int ncz_fill_value_sort(nc_type nctype, int* sortp) { - if(nctype <= 0 || nctype >= NC_STRING) return NC_EINVAL; + if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; if(sortp) *sortp = zjsonsort[nctype]; return NC_NOERR; } @@ -486,8 +503,9 @@ NCZ_subobjects(NCZMAP* map, const char* prefix, const char* tag, char dimsep, NC return stat; } +/* Convert a netcdf-4 type integer */ int -ncz_nctype2typeinfo(const char* snctype, nc_type* nctypep) +ncz_nctypedecode(const char* snctype, nc_type* nctypep) { unsigned nctype = 0; if(sscanf(snctype,"%u",&nctype)!=1) return NC_EINVAL; @@ -495,48 +513,111 @@ ncz_nctype2typeinfo(const char* snctype, nc_type* nctypep) return NC_NOERR; } +/** +@internal Given an nc_type+other, produce the corresponding dtype string. +@param nctype - [in] nc_type +@param endianness - [in] endianness +@param purezarr - [in] 1=>pure zarr, 0 => nczarr +@param strlen - [in] max string length +@param namep - [out] pointer to hold pointer to the dtype; user frees +@return NC_NOERR +@return NC_EINVAL +@author Dennis Heimbigner +*/ + +int +ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr, int strlen, char** dnamep) +{ + char dname[64]; + char* format = NULL; + + if(nctype <= NC_NAT || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; + if(purezarr) + format = znames[nctype].zarr[endianness]; + else + format = znames[nctype].nczarr[endianness]; + snprintf(dname,sizeof(dname),format,strlen); + if(dnamep) *dnamep = strdup(dname); + return NC_NOERR; +} + +/* +@internal Convert a numcodecs dtype spec to a corresponding nc_type. +@param nctype - [in] dtype the dtype to convert +@param nctype - [in] typehint help disambiguate char vs string +@param purezarr - [in] 1=>pure zarr, 0 => nczarr +@param nctypep - [out] hold corresponding type +@param endianp - [out] hold corresponding endianness +@param typelenp - [out] hold corresponding type size (for fixed length strings) +@return NC_NOERR +@return NC_EINVAL +@author Dennis Heimbigner +*/ + int -ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) +ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp) { int stat = NC_NOERR; int typelen = 0; int count; char tchar; nc_type nctype = NC_NAT; - int endianness = 0; + int endianness = -1; + const char* p; + int n; - if(endianness) *endianp = NC_ENDIAN_NATIVE; + if(endianp) *endianp = NC_ENDIAN_NATIVE; if(nctypep) *nctypep = NC_NAT; if(dtype == NULL) goto zerr; - if(strlen(dtype) < 3) goto zerr; - switch (dtype[0]) { + p = dtype; + switch (*p++) { case '<': endianness = NC_ENDIAN_LITTLE; break; case '>': endianness = NC_ENDIAN_BIG; break; + case '=': endianness = NC_ENDIAN_NATIVE; break; case '|': endianness = NC_ENDIAN_NATIVE; break; - default: goto zerr; + default: p--; endianness = NC_ENDIAN_NATIVE; break; } + tchar = *p++; /* get the base type */ /* Decode the type length */ - count = sscanf(dtype+2,"%d",&typelen); - if(count != 1) goto zerr; - tchar = dtype[1]; - switch(typelen) { - case 1: - switch (tchar) { - case 'i': nctype = NC_BYTE; break; - case 'u': nctype = NC_UBYTE; break; - case 'U': nctype = NC_CHAR; break; - default: goto zerr; + count = sscanf(p,"%d%n",&typelen,&n); + if(count == 0) goto zerr; + p += n; + + /* Short circuit fixed length strings */ + if(tchar == 'S') { + /* Fixed length string */ + switch (typehint) { + case NC_CHAR: nctype = NC_CHAR; typelen = 1; break; + case NC_STRING: nctype = NC_STRING; break; + default: + if(typelen == 1) {/* so |S1 => NC_CHAR */ + if(purezarr || endianness == NC_ENDIAN_NATIVE) nctype = NC_CHAR; + } else + nctype = NC_STRING; } - break; - case 2: +#if 0 + } else if(tchar == 'U') {/*back compatibility*/ + if(purezarr || typelen != 1) goto zerr; + nctype = NC_CHAR; +#endif + } else { + switch(typelen) { + case 1: + switch (tchar) { + case 'i': nctype = NC_BYTE; break; + case 'u': nctype = NC_UBYTE; break; + default: goto zerr; + } + break; + case 2: switch (tchar) { case 'i': nctype = NC_SHORT; break; case 'u': nctype = NC_USHORT; break; default: goto zerr; } break; - case 4: + case 4: switch (tchar) { case 'i': nctype = NC_INT; break; case 'u': nctype = NC_UINT; break; @@ -544,7 +625,7 @@ ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) default: goto zerr; } break; - case 8: + case 8: switch (tchar) { case 'i': nctype = NC_INT64; break; case 'u': nctype = NC_UINT64; break; @@ -552,10 +633,16 @@ ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) default: goto zerr; } break; - default: goto zerr; + default: goto zerr; + } } + /* Convert NC_ENDIAN_NATIVE and NC_ENDIAN_NA */ + if(endianness == NC_ENDIAN_NATIVE) + endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); + if(nctypep) *nctypep = nctype; + if(typelenp) *typelenp = typelen; if(endianp) *endianp = endianness; done: @@ -565,7 +652,87 @@ ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) goto done; } +int +NCZ_inferattrtype(NCjson* value, nc_type typehint, nc_type* typeidp) +{ + int stat = NC_NOERR; + nc_type typeid; + NCjson* j = NULL; + unsigned long long u64; + long long i64; + int negative = 0; + + if(NCJsort(value) == NCJ_ARRAY && NCJlength(value) == 0) + {typeid = NC_NAT; goto done;} + + if(NCJsort(value) == NCJ_NULL) + {typeid = NC_NAT; goto done;} + + if(value->sort == NCJ_ARRAY) { + j=NCJith(value,0); + return NCZ_inferattrtype(j,typehint,typeidp); + } + + /* At this point, value is a primitive JSON Value */ + + switch (NCJsort(value)) { + case NCJ_NULL: + typeid = NC_NAT; + return NC_NOERR; + case NCJ_DICT: + typeid = NC_CHAR; + goto done; + case NCJ_UNDEF: + return NC_EINVAL; + default: /* atomic */ + break; + } + + if(NCJstring(value) != NULL) + negative = (NCJstring(value)[0] == '-'); + switch (value->sort) { + case NCJ_INT: + if(negative) { + sscanf(NCJstring(value),"%lld",&i64); + u64 = (unsigned long long)i64; + } else + sscanf(NCJstring(value),"%llu",&u64); + typeid = NCZ_inferinttype(u64,negative); + break; + case NCJ_DOUBLE: + typeid = NC_DOUBLE; + break; + case NCJ_BOOLEAN: + typeid = NC_UBYTE; + break; + case NCJ_STRING: /* requires special handling as an array of characters */ + typeid = NC_CHAR; + break; + default: + stat = NC_ENCZARR; + } +done: + if(typeidp) *typeidp = typeid; + return stat; +} +/* Infer the int type from the value; + minimum type will be int. +*/ +int +NCZ_inferinttype(unsigned long long u64, int negative) +{ + long long i64 = (long long)u64; /* keep bit pattern */ + if(!negative && u64 >= NC_MAX_INT64) return NC_UINT64; + if(i64 < 0) { + if(i64 >= NC_MIN_INT) return NC_INT; + return NC_INT64; + } + if(i64 <= NC_MAX_INT) return NC_INT; + if(i64 <= NC_MAX_UINT) return NC_UINT; + return NC_INT64; +} + /** @internal Similar to NCZ_grppath, but using group ids. @param gid - [in] group id @@ -788,3 +955,88 @@ NCZ_copy_fill_value(NC_VAR_INFO_T* var, void** dstp) if(dst) (void)nc_reclaim_data_all(ncid,tid,dst,1); return stat; } + + +/* Get max str len for a variable or grp */ +/* Has side effect of setting values in the + internal data structures */ +int +NCZ_get_maxstrlen(NC_OBJ* obj) +{ + int maxstrlen = 0; + assert(obj->sort == NCGRP || obj->sort == NCVAR); + if(obj->sort == NCGRP) { + NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)obj; + NC_FILE_INFO_T* file = grp->nc4_info; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + if(zfile->default_maxstrlen == 0) + zfile->default_maxstrlen = NCZ_MAXSTR_DEFAULT; + maxstrlen = zfile->default_maxstrlen; + } else { /*(obj->sort == NCVAR)*/ + NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)obj; + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + if(zvar->maxstrlen == 0) + zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var->container); + maxstrlen = zvar->maxstrlen; + } + return maxstrlen; +} + +int +NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen) +{ + size_t i; + unsigned char* sp = NULL; + const unsigned char* p = fixed; + memset((void*)charp,0,sizeof(char*)*count); + for(i=0;i maxstrlen) len = maxstrlen; + memcpy(p,charp[i],len); + } else { + memset(p,'\0',maxstrlen); + } + } + return NC_NOERR; +} + +/* +Wrap NC_copy_data, but take string value into account when overwriting +*/ +int +NCZ_copy_data(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* xtype, const void* memory, size_t count, int noclear, void* copy) +{ + if(xtype->hdr.id == NC_STRING && !noclear) { + size_t i; + char** scopy = (char**)copy; + /* Reclaim any string fill values in copy */ + for(i=0;icontroller->ext_ncid,xtype->hdr.id,memory,count,copy); +} diff --git a/libnczarr/zvar.c b/libnczarr/zvar.c index da4ebba6f4..dd6311346b 100644 --- a/libnczarr/zvar.c +++ b/libnczarr/zvar.c @@ -12,6 +12,11 @@ #include "zincludes.h" #include /* For pow() used below. */ +/* Mnemonics */ +#define CREATE 0 +#define NOCREATE 1 + + #ifdef LOGGING static void reportchunking(const char* title, NC_VAR_INFO_T* var) @@ -107,10 +112,7 @@ ncz_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var) double total_chunk_size; #endif - if (var->type_info->nc_type_class == NC_STRING) - type_size = sizeof(char *); - else - type_size = var->type_info->size; + type_size = var->type_info->size; #ifdef LOGGING /* Later this will become the total number of bytes in the default @@ -406,7 +408,7 @@ var->type_info->rc++; /* Set variables no_fill to match the database default unless the * variable type is variable length (NC_STRING or NC_VLEN) or is * user-defined type. */ - if (var->type_info->nc_type_class < NC_STRING) + if (var->type_info->nc_type_class <= NC_STRING) var->no_fill = (h5->fill_mode == NC_NOFILL); /* Assign dimensions to the variable. At the same time, check to @@ -473,7 +475,6 @@ var->type_info->rc++; if (type) if ((retval = nc4_type_free(type))) BAILLOG(retval); - return ZUNTRACE(retval); } diff --git a/libnczarr/zwalk.c b/libnczarr/zwalk.c index 87f490c592..70d879a493 100644 --- a/libnczarr/zwalk.c +++ b/libnczarr/zwalk.c @@ -128,10 +128,16 @@ NCZ_transferslice(NC_VAR_INFO_T* var, int reading, common.swap = (zfile->native_endianness == var->endianness ? 0 : 1); common.chunkcount = 1; - for(r=0;rchunksizes[0]; + slices[0].start = 0; + slices[0].stride = 1; + slices[0].stop = 0; + slices[0].len = 1; + common.chunkcount = 1; + memshape[0] = 1; + } else for(r=0;rdim[r]->len; chunklens[r] = var->chunksizes[r]; slices[r].start = start[r]; @@ -221,7 +227,7 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) if((stat=wholechunk_indices(common,slices,chunkindices))) goto done; if(wdebug >= 1) fprintf(stderr,"case: wholechunk: chunkindices: %s\n",nczprint_vector(common->rank,chunkindices)); - /* Read the chunk */ + /* Read the chunk; handles fixed vs char* strings*/ switch ((stat = common->reader.read(common->reader.source, chunkindices, &chunkdata))) { case NC_EEMPTY: /* cache created the chunk */ break; @@ -232,9 +238,9 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) memptr = ((unsigned char*)common->memory); slpptr = ((unsigned char*)chunkdata); if(common->reading) { - memcpy(memptr,slpptr,common->chunkcount*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,slpptr,common->chunkcount,!ZCLEAR,memptr))) goto done; } else { - memcpy(slpptr,memptr,common->chunkcount*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,memptr,common->chunkcount,ZCLEAR,slpptr))) goto done; } // transfern(common,slpptr,memptr,common->chunkcount,1,chunkdata); if(zutest && zutest->tests & UTEST_WHOLECHUNK) @@ -410,15 +416,16 @@ NCZ_walk(NCZProjection** projv, NCZOdometer* chunkodom, NCZOdometer* slpodom, NC if(slpavail > 0) { if(wdebug > 0) wdebug2(common,slpptr0,memptr0,slpavail,laststride,chunkdata); if(common->reading) { - memcpy(memptr0,slpptr0,slpavail*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,slpptr0,slpavail,!ZCLEAR,memptr0))) goto done; } else { - memcpy(slpptr0,memptr0,slpavail*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,memptr0,slpavail,ZCLEAR,slpptr0))) goto done; } } // if((stat = transfern(common,slpptr0,memptr0,avail,nczodom_laststride(slpodom),chunkdata)))goto done; nczodom_next(memodom); nczodom_next(slpodom); } +done: return stat; } @@ -734,10 +741,11 @@ NCZ_transferscalar(struct Common* common) /* Figure out memory address */ memptr = ((unsigned char*)common->memory); slpptr = ((unsigned char*)chunkdata); - if(common->reading) - memcpy(memptr,slpptr,common->chunkcount*common->typesize); - else - memcpy(slpptr,memptr,common->chunkcount*common->typesize); + if(common->reading) { + if((stat=NCZ_copy_data(common->file,common->var->type_info,slpptr,common->chunkcount,!ZCLEAR,memptr))) goto done; + } else { + if((stat=NCZ_copy_data(common->file,common->var->type_info,memptr,common->chunkcount,ZCLEAR,slpptr))) goto done; + } done: return stat; @@ -749,7 +757,7 @@ NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata) { int stat = NC_NOERR; NC_VAR_INFO_T* var = NULL; - NCZ_VAR_INFO_T* zvar; + NCZ_VAR_INFO_T* zvar = NULL; struct NCZChunkCache* cache = NULL; void* cachedata = NULL; @@ -759,8 +767,9 @@ NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata) cache = zvar->cache; if((stat = NCZ_read_cache_chunk(cache,zindices,&cachedata))) goto done; - if(chunkdata) - memcpy(chunkdata,cachedata,cache->chunksize); + if(chunkdata) { + if((stat = nc_copy_data(ncid,var->type_info->hdr.id,cachedata,cache->chunkcount,chunkdata))) goto done; + } done: return stat; diff --git a/libnczarr/zxcache.c b/libnczarr/zxcache.c index 52f8abf2ef..374890be68 100644 --- a/libnczarr/zxcache.c +++ b/libnczarr/zxcache.c @@ -202,9 +202,14 @@ NCZ_create_chunk_cache(NC_VAR_INFO_T* var, size64_t chunksize, char dimsep, NCZC } static void -free_cache_entry(NCZCacheEntry* entry) +free_cache_entry(NCZChunkCache* cache, NCZCacheEntry* entry) { if(entry) { + int tid = cache->var->type_info->hdr.id; + if(tid == NC_STRING && !entry->isfixedstring) { + int ncid = cache->var->container->nc4_info->controller->ext_ncid; + nc_reclaim_data(ncid,tid,entry->data,cache->chunkcount); + } nullfree(entry->data); nullfree(entry->key.varkey); nullfree(entry->key.chunkkey); @@ -225,7 +230,7 @@ NCZ_free_chunk_cache(NCZChunkCache* cache) NCZCacheEntry* entry = nclistremove(cache->mru,0); (void)ncxcacheremove(cache->xcache,entry->hashkey,&ptr); assert(ptr == entry); - free_cache_entry(entry); + free_cache_entry(cache,entry); } #ifdef DEBUG fprintf(stderr,"|cache.free|=%ld\n",nclistlength(cache->mru)); @@ -303,7 +308,7 @@ fprintf(stderr,"|cache.read.lru|=%ld\n",nclistlength(cache->mru)); done: if(created && stat == NC_NOERR) stat = NC_EEMPTY; /* tell upper layers */ - if(entry) free_cache_entry(entry); + if(entry) free_cache_entry(cache,entry); return THROW(stat); } @@ -343,7 +348,7 @@ fprintf(stderr,"|cache.write|=%ld\n",nclistlength(cache->mru)); if((stat=makeroom(cache))) goto done; done: - if(entry) free_cache_entry(entry); + if(entry) free_cache_entry(cache,entry); return THROW(stat); } #endif @@ -449,6 +454,7 @@ NCZ_ensure_fill_chunk(NCZChunkCache* cache) { int i, stat = NC_NOERR; NC_VAR_INFO_T* var = cache->var; + nc_type typeid = var->type_info->hdr.id; size_t typesize = var->type_info->size; if(cache->fillchunk) goto done; @@ -461,6 +467,11 @@ NCZ_ensure_fill_chunk(NCZChunkCache* cache) goto done; } if((stat = NCZ_ensure_fill_value(var))) goto done; + if(typeid == NC_STRING) { + char* src = *((char**)(var->fill_value)); + char** dst = (char**)(cache->fillchunk); + for(i=0;ichunkcount;i++) dst[i] = strdup(src); + } else switch (typesize) { case 1: { unsigned char c = *((unsigned char*)var->fill_value); @@ -597,6 +608,9 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) NCZ_FILE_INFO_T* zfile = NULL; NCZMAP* map = NULL; char* path = NULL; + nc_type tid = NC_NAT; + void* strchunk = NULL; + int ncid = 0; ZTRACE(5,"cache.var=%s entry.key=%s",cache->var->hdr.name,entry->key); LOG((3, "%s: var: %p", __func__, cache->var)); @@ -605,6 +619,26 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) zfile = file->format_file_info; map = zfile->map; + /* Collect some info */ + ncid = file->controller->ext_ncid; + tid = cache->var->type_info->hdr.id; + + if(tid == NC_STRING && !entry->isfixedstring) { + /* Convert from char* to char[strlen] format */ + int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); + assert(maxstrlen > 0); + if((strchunk = malloc(cache->chunkcount*maxstrlen))==NULL) {stat = NC_ENOMEM; goto done;} + /* copy char* to char[] format */ + if((stat = NCZ_char2fixed((const char**)entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done; + /* Reclaim the old chunk */ + if((stat = nc_reclaim_data_all(ncid,tid,entry->data,cache->chunkcount))) goto done; + entry->data = NULL; + entry->data = strchunk; strchunk = NULL; + entry->size = cache->chunkcount * maxstrlen; + entry->isfixedstring = 1; + } + + #ifdef ENABLE_NCZARR_FILTERS /* Make sure the entry is in filtered state */ if(!entry->isfiltered) { @@ -636,6 +670,7 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) default: goto done; } done: + nullfree(strchunk); nullfree(path); return ZUNTRACE(stat); } @@ -657,9 +692,12 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) NCZMAP* map = NULL; NC_FILE_INFO_T* file = NULL; NCZ_FILE_INFO_T* zfile = NULL; + NC_TYPE_INFO_T* xtype = NULL; + char** strchunk = NULL; size64_t size; int empty = 0; char* path = NULL; + int tid; ZTRACE(5,"cache.var=%s entry.key=%s sep=%d",cache->var->hdr.name,entry->key,cache->dimension_separator); @@ -670,22 +708,24 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) map = zfile->map; assert(map); + /* Collect some info */ + xtype = cache->var->type_info; + tid = xtype->hdr.id; + /* get size of the "raw" data on "disk" */ path = NCZ_chunkpath(entry->key); stat = nczmap_len(map,path,&size); nullfree(path); path = NULL; switch(stat) { - case NC_NOERR: break; + case NC_NOERR: entry->size = size; break; case NC_EEMPTY: empty = 1; stat = NC_NOERR; break; default: goto done; } if(!empty) { /* Make sure we have a place to read it */ - entry->size = size; - entry->isfiltered = FILTERED(cache); /* Is the data being read filtered? */ - if((entry->data = (void*)malloc(entry->size)) == NULL) - {stat = NC_ENOMEM; goto done;} + if((entry->data = (void*)calloc(1,entry->size)) == NULL) + {stat = NC_ENOMEM; goto done;} /* Read the raw data */ path = NCZ_chunkpath(entry->key); stat = nczmap_read(map,path,0,entry->size,(char*)entry->data); @@ -695,27 +735,32 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) case NC_EEMPTY: empty = 1; stat = NC_NOERR;break; default: goto done; } + entry->isfiltered = FILTERED(cache); /* Is the data being read filtered? */ + if(tid == NC_STRING) + entry->isfixedstring = 1; /* fill cache is in char[maxstrlen] format */ } if(empty) { /* fake the chunk */ entry->modified = (file->no_write?0:1); entry->size = cache->chunksize; - if((entry->data = (void*)malloc(entry->size)) == NULL) - {stat = NC_ENOMEM; goto done;} + entry->data = NULL; + entry->isfixedstring = 0; + entry->isfiltered = 0; /* apply fill value */ if(cache->fillchunk == NULL) {if((stat = NCZ_ensure_fill_chunk(cache))) goto done;} - memcpy(entry->data,cache->fillchunk,entry->size); - entry->isfiltered = 0; + if((entry->data = calloc(1,entry->size))==NULL) {stat = NC_ENOMEM; goto done;} + if((stat = NCZ_copy_data(file,xtype,cache->fillchunk,cache->chunkcount,!ZCLEAR,entry->data))) goto done; stat = NC_NOERR; } #ifdef ENABLE_NCZARR_FILTERS /* Make sure the entry is in unfiltered state */ - if(entry->isfiltered) { + if(!empty && entry->isfiltered) { NC_VAR_INFO_T* var = cache->var; void* unfiltered = NULL; /* pointer to the unfiltered data */ void* filtered = NULL; /* pointer to the filtered data */ size_t unflen; /* length of unfiltered data */ + assert(tid != NC_STRING || entry->isfixedstring); /* Get the filter chain to apply */ NClist* filterchain = (NClist*)var->filters; if(nclistlength(filterchain) == 0) {stat = NC_EFILTER; goto done;} @@ -730,7 +775,24 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) } #endif + if(tid == NC_STRING && entry->isfixedstring) { + /* Convert from char[strlen] to char* format */ + int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); + assert(maxstrlen > 0); + /* copy char[] to char* format */ + if((strchunk = (char**)malloc(sizeof(char*)*cache->chunkcount))==NULL) + {stat = NC_ENOMEM; goto done;} + if((stat = NCZ_fixed2char(entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done; + /* Reclaim the old chunk */ + nullfree(entry->data); + entry->data = NULL; + entry->data = strchunk; strchunk = NULL; + entry->size = cache->chunkcount * sizeof(char*); + entry->isfixedstring = 0; + } + done: + nullfree(strchunk); nullfree(path); return ZUNTRACE(stat); } diff --git a/libsrc4/nc4internal.c b/libsrc4/nc4internal.c index a4ead8f80f..5677fe340f 100644 --- a/libsrc4/nc4internal.c +++ b/libsrc4/nc4internal.c @@ -35,22 +35,24 @@ */ /** @internal List of reserved attributes. - WARNING: This list must be in sorted order for binary search. */ + WARNING: This list must be in (strcmp) sorted order for binary search. */ static const NC_reservedatt NC_reserved[] = { {NC_ATT_CLASS, READONLYFLAG|HIDDENATTRFLAG}, /*CLASS*/ {NC_ATT_DIMENSION_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*DIMENSION_LIST*/ {NC_ATT_NAME, READONLYFLAG|HIDDENATTRFLAG}, /*NAME*/ {NC_ATT_REFERENCE_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*REFERENCE_LIST*/ {NC_XARRAY_DIMS, READONLYFLAG|HIDDENATTRFLAG}, /*_ARRAY_DIMENSIONS*/ - {NC_ATT_CODECS, VARFLAG|READONLYFLAG|NAMEONLYFLAG|HIDDENATTRFLAG}, /*_Codecs*/ + {NC_ATT_CODECS, VARFLAG|READONLYFLAG|NAMEONLYFLAG}, /*_Codecs*/ {NC_ATT_FORMAT, READONLYFLAG}, /*_Format*/ {ISNETCDF4ATT, READONLYFLAG|NAMEONLYFLAG}, /*_IsNetcdf4*/ - {NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG}, /*_NCProperties*/ - {NC_NCZARR_ATTR, READONLYFLAG|HIDDENATTRFLAG}, /*_NCZARR_ATTR*/ - {NC_ATT_COORDINATES, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Coordinates*/ - {NC_ATT_DIMID_NAME, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Dimid*/ + {NCPROPS,READONLYFLAG|NAMEONLYFLAG|HIDDENATTRFLAG}, /*_NCProperties*/ + {NC_NCZARR_ATTR_UC, READONLYFLAG|HIDDENATTRFLAG}, /*_NCZARR_ATTR */ + {NC_ATT_COORDINATES, READONLYFLAG|HIDDENATTRFLAG}, /*_Netcdf4Coordinates*/ + {NC_ATT_DIMID_NAME, READONLYFLAG|HIDDENATTRFLAG}, /*_Netcdf4Dimid*/ {SUPERBLOCKATT, READONLYFLAG|NAMEONLYFLAG}, /*_SuperblockVersion*/ - {NC_ATT_NC3_STRICT_NAME, READONLYFLAG|MATERIALIZEDFLAG}, /*_nc3_strict*/ + {NC_ATT_NC3_STRICT_NAME, READONLYFLAG}, /*_nc3_strict*/ + {NC_ATT_NC3_STRICT_NAME, READONLYFLAG}, /*_nc3_strict*/ + {NC_NCZARR_ATTR, READONLYFLAG|HIDDENATTRFLAG}, /*_nczarr_attr */ }; #define NRESERVED (sizeof(NC_reserved) / sizeof(NC_reservedatt)) /*|NC_reservedatt|*/ diff --git a/nc_test4/tst_filter.sh b/nc_test4/tst_filter.sh index e777956d0b..099e040d58 100755 --- a/nc_test4/tst_filter.sh +++ b/nc_test4/tst_filter.sh @@ -3,6 +3,7 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh +set -x set -e # Which test cases to exercise diff --git a/ncgen/bindata.c b/ncgen/bindata.c index 4ed143f3a5..49ead4dfca 100644 --- a/ncgen/bindata.c +++ b/ncgen/bindata.c @@ -422,6 +422,7 @@ bin_generate_data_r(NCConstant* instance, Symbol* tsym, Datalist* fillvalue, Byt return stat; } +#if 0 /** Internal equivalent of ncaux_reclaim_data. */ @@ -591,5 +592,8 @@ bin_reclaim_compound(Symbol* tsym, Reclaim* reclaimer) } #endif /*USE_NETCDF4*/ +#endif /*0*/ + + #endif /*ENABLE_BINARY*/ diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt index ed41e4be3c..8e79c88735 100644 --- a/nczarr_test/CMakeLists.txt +++ b/nczarr_test/CMakeLists.txt @@ -116,6 +116,7 @@ IF(ENABLE_TESTS) add_sh_test(nczarr_test run_misc) add_sh_test(nczarr_test run_nczarr_fill) add_sh_test(nczarr_test run_jsonconvention) + add_sh_test(nczarr_test run_strings) BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC}) add_sh_test(nczarr_test run_quantize) diff --git a/nczarr_test/Makefile.am b/nczarr_test/Makefile.am index ca43d28b35..b462c89e40 100644 --- a/nczarr_test/Makefile.am +++ b/nczarr_test/Makefile.am @@ -62,6 +62,7 @@ TESTS += run_interop.sh TESTS += run_misc.sh TESTS += run_nczarr_fill.sh TESTS += run_jsonconvention.sh +TESTS += run_strings.sh endif @@ -149,7 +150,8 @@ ref_xarray.cdl ref_purezarr.cdl ref_purezarr_base.cdl ref_nczarr2zarr.cdl \ ref_bzip2.cdl ref_filtered.cdl ref_multi.cdl \ ref_any.cdl ref_oldformat.cdl ref_oldformat.zip ref_newformatpure.cdl \ ref_groups.h5 ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip \ -ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl ref_jsonconvention.cdl +ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl ref_jsonconvention.cdl \ +ref_string.cdl # Interoperability files EXTRA_DIST += ref_power_901_constants_orig.zip ref_power_901_constants.cdl ref_quotes_orig.zip ref_quotes.cdl diff --git a/nczarr_test/ref_quotes.cdl b/nczarr_test/ref_quotes.cdl index 97802cd76e..9caeae45e2 100644 --- a/nczarr_test/ref_quotes.cdl +++ b/nczarr_test/ref_quotes.cdl @@ -5,7 +5,7 @@ dimensions: lon = 30 ; variables: float fractional_snow_cover(time, lat, lon) ; - fractional_snow_cover:ID = 68b ; + fractional_snow_cover:ID = 68 ; fractional_snow_cover:esa_cci_path = NaN ; fractional_snow_cover:long_name = "Surface Fraction Covered by Snow" ; fractional_snow_cover:orig_attrs = "{\'comment\': \'Grid cell fractional snow cover based on the Globsnow CCI product.\', \'long_name\': \'Surface fraction covered by snow.\', \'project_name\': \'GlobSnow\', \'references\': \'Luojus, Kari, et al. \"ESA DUE Globsnow-Global Snow Database for Climate Research.\" ESA Special Publication. Vol. 686. 2010.\', \'source_name\': \'MFSC\', \'standard_name\': \'surface_snow_area_fraction\', \'units\': \'percent\', \'url\': \'http://www.globsnow.info/\'}" ; diff --git a/nczarr_test/ref_string.cdl b/nczarr_test/ref_string.cdl new file mode 100644 index 0000000000..f8895ff2a9 --- /dev/null +++ b/nczarr_test/ref_string.cdl @@ -0,0 +1,17 @@ +netcdf ref_string { +dimensions: + d = 2 ; +variables: + char c(d); + string v(d) ; + +// global attributes: + string :stringattr = "abc", "def" ; + :charattr = "ghi", "jkl" ; + :_nczarr_default_maxstrlen = 6 ; +data: + + c = "a", "b" ; + + v = "uvw", "xyz" ; +} diff --git a/nczarr_test/run_strings.sh b/nczarr_test/run_strings.sh new file mode 100755 index 0000000000..4a90d84248 --- /dev/null +++ b/nczarr_test/run_strings.sh @@ -0,0 +1,80 @@ +#!/bin/sh + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +. "$srcdir/test_nczarr.sh" + +# This shell script tests support for the NC_STRING type + +#set -e + +# Cvt stringattr to single char string +stringfixsa() { +rm -f $2 +sed -e '/:stringattr/ s|string :|:|' -e '/:stringattr/ s|", "||g' < $1 > $2 +} + +# Cvt v var data to single char string +stringfixv() { +rm -f $2 +sed -e '/v = / s|", "||g' < $1 > $2 +} + +# Cvt charattr to single char string +stringfixca() { +rm -f $2 +sed -e '/:charattr/ s|", "||g' <$1 > $2 +} + +# Cvt c var data to single char string +stringfixc() { +rm -f $2 +sed -e '/c = / s|", "||g' < $1 > $2 +} + +testcase() { +zext=$1 + +echo "*** Test: nczarr string write then read; format=$zext" +# Get pure zarr args +fileargs tmp_string_zarr "mode=zarr,$zext" +zarrurl="$fileurl" +zarrfile="$file" +# Get nczarr args +fileargs tmp_string_nczarr "mode=nczarr,$zext" +nczarrurl="$fileurl" +nczarrfile="$file" + +# setup +deletemap $zext $zarrfile +deletemap $zext $nczarrfile + +# Create alternate ref files +echo "*** create pure zarr file" +${NCGEN} -4 -b -o "$zarrurl" $srcdir/ref_string.cdl +echo "*** create nczarr file" +${NCGEN} -4 -b -o "$nczarrurl" $srcdir/ref_string.cdl + +echo "*** read purezarr" +${NCDUMP} -n ref_string $zarrurl > tmp_string_zarr_${zext}.cdl +echo "*** read nczarr" +${NCDUMP} -n ref_string $nczarrurl > tmp_string_nczarr_${zext}.cdl + +echo "*** convert for nczarr comparison" +stringfixca ${srcdir}/ref_string.cdl tmp_ref_string_ca.cdl +stringfixc tmp_ref_string_ca.cdl tmp_ref_string_cac.cdl + +echo "*** convert for zarr comparison" +stringfixsa tmp_ref_string_cac.cdl tmp_ref_string_cacsa.cdl + +echo "*** verify" +diff -bw tmp_ref_string_cac.cdl tmp_string_nczarr_${zext}.cdl +diff -bw tmp_ref_string_cacsa.cdl tmp_string_zarr_${zext}.cdl +} + +testcase file +if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi +if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi + +exit 0 diff --git a/nczarr_test/zmapio.c b/nczarr_test/zmapio.c index c9c982c881..3131793e38 100644 --- a/nczarr_test/zmapio.c +++ b/nczarr_test/zmapio.c @@ -55,18 +55,21 @@ static struct Type { const char* typename; nc_type nctype; int typesize; + const char format[16]; } types[] = { -{"ubyte",NC_UBYTE,1}, -{"byte",NC_BYTE,1}, -{"ushort",NC_USHORT,2}, -{"short",NC_SHORT,2}, -{"uint",NC_UINT,4}, -{"int",NC_INT,4}, -{"uint64",NC_UINT64,8}, -{"int64",NC_INT64,8}, -{"float",NC_FLOAT,4}, -{"double",NC_DOUBLE,8}, -{NULL,NC_NAT,0} +{"ubyte",NC_UBYTE,1,"%u"}, +{"byte",NC_BYTE,1,"%d"}, +{"ushort",NC_USHORT,2,"%u"}, +{"short",NC_SHORT,2,"%d"}, +{"uint",NC_UINT,4,"%u"}, +{"int",NC_INT,4,"%d"}, +{"uint64",NC_UINT64,8,"%llu"}, +{"int64",NC_INT64,8,"%lld"}, +{"float",NC_FLOAT,4,"%f"}, +{"double",NC_DOUBLE,8,"%lf"}, +{"char",NC_CHAR,1,"'%c'"}, +{"string",NC_STRING,sizeof(char*),"%*s"}, +{NULL,NC_NAT,0,""} }; /* Command line options */ @@ -78,8 +81,10 @@ struct Dumpptions { NCZM_IMPL impl; char* rootpath; const struct Type* nctype; + char format[16]; int xflags; # define XNOZMETADATA 1 + int strlen; } dumpoptions; /* Forward */ @@ -120,9 +125,15 @@ decodeop(const char* name) } static const struct Type* -decodetype(const char* name) +decodetype(const char* name, int* strlenp) { struct Type* p = types; + + if(strncmp(name,"string/",strlen("string/"))==0) { + *strlenp = atoi(name+strlen("string/")); + name = "string"; + } + for(;p->typename != NULL;p++) { if(strcasecmp(p->typename,name)==0) return p; } @@ -138,9 +149,10 @@ main(int argc, char** argv) nc_initialize(); + /* Init options */ memset((void*)&dumpoptions,0,sizeof(dumpoptions)); - while ((c = getopt(argc, argv, "dhvx:t:T:X:")) != EOF) { + while ((c = getopt(argc, argv, "dhvx:t:F:T:X:")) != EOF) { switch(c) { case 'd': dumpoptions.debug = 1; @@ -148,17 +160,20 @@ main(int argc, char** argv) case 'h': dumpoptions.meta_only = 1; break; - case 'v': - zmapusage(); - goto done; case 't': - dumpoptions.nctype = decodetype(optarg); + dumpoptions.nctype = decodetype(optarg,&dumpoptions.strlen); if(dumpoptions.nctype == NULL) zmapusage(); break; case 'x': dumpoptions.mop = decodeop(optarg); if(dumpoptions.mop == MOP_NONE) zmapusage(); break; + case 'v': + zmapusage(); + goto done; + case 'F': + strcpy(dumpoptions.format,optarg); + break; case 'T': nctracelevel(atoi(optarg)); break; @@ -344,7 +359,7 @@ objdump(void) len = ceildiv(len,dumpoptions.nctype->typesize); } printf("[%d] %s : (%llu)",depth,obj,len); - if(kind == OK_CHUNK) + if(kind == OK_CHUNK && dumpoptions.nctype->nctype != NC_STRING) printf(" (%s)",dumpoptions.nctype->typename); printf(" |"); switch(kind) { @@ -434,25 +449,40 @@ static char hex[16] = "0123456789abcdef"; static void printcontent(size64_t len, const char* content, OBJKIND kind) { - size64_t i; + size64_t i, count; unsigned int c0,c1; - for(i=0;iformat; + if(dumpoptions.format[0] != '\0') + format = dumpoptions.format; + + if(dumpoptions.strlen > 0) { + strlen = dumpoptions.strlen; + count = ((len+strlen)-1)/strlen; + } else + count = len; + + for(i=0;i 0) printf(", "); switch(dumpoptions.nctype->nctype) { - case NC_BYTE: printf("%d",((char*)content)[i]); break; - case NC_SHORT: printf("%d",((short*)content)[i]); break; - case NC_INT: printf("%d",((int*)content)[i]); break; - case NC_INT64: printf("%lld",((long long*)content)[i]); break; - case NC_UBYTE: printf("%u",((unsigned char*)content)[i]); break; - case NC_USHORT: printf("%u",((unsigned short*)content)[i]); break; - case NC_UINT: printf("%u",((unsigned int*)content)[i]); break; - case NC_UINT64: printf("%llu",((unsigned long long*)content)[i]); break; - case NC_FLOAT: printf("%f",((float*)content)[i]); break; - case NC_DOUBLE: printf("%lf",((double*)content)[i]); break; + case NC_BYTE: printf(format,((char*)content)[i]); break; + case NC_SHORT: printf(format,((short*)content)[i]); break; + case NC_INT: printf(format,((int*)content)[i]); break; + case NC_INT64: printf(format,((long long*)content)[i]); break; + case NC_UBYTE: printf(format,((unsigned char*)content)[i]); break; + case NC_USHORT: printf(format,((unsigned short*)content)[i]); break; + case NC_UINT: printf(format,((unsigned int*)content)[i]); break; + case NC_UINT64: printf(format,((unsigned long long*)content)[i]); break; + case NC_FLOAT: printf(format,((float*)content)[i]); break; + case NC_DOUBLE: printf(format,((double*)content)[i]); break; + case NC_CHAR: printf(format,((char*)content)[i]); break; + case NC_STRING: printf(format,(int)strlen,((char*)(&content[i*strlen]))); break; default: abort(); } break;