Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add experimental support for fixed length string type to nczarr. #2467

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ This file contains a high-level description of this package's evolution. Release

## 4.9.1 - T.B.D.

* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).

* [Enhancement] Add experimental support for fixed length string type in nczarr. See [Github #2467](https://github.com/Unidata/netcdf-c/pull/2467).
* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #2466](https://github.com/Unidata/netcdf-c/pull/2466).
* [Bug Fix] Provide a default enum const when fill value does not match any enum constant for the value zero. See [Github #2462](https://github.com/Unidata/netcdf-c/pull/2462).
* [Bug Fix] Fix the json submodule symbol conflicts between libnetcdf and the plugin specific netcdf_json.h. See [Github #2448](https://github.com/Unidata/netcdf-c/pull/2448).
* [Bug Fix] Fix quantize with CLASSIC_MODEL files. See [Github #2405](https://github.com/Unidata/netcdf-c/pull/2445).
Expand Down
9 changes: 5 additions & 4 deletions include/nc4internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,6 @@ typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT;
/** Subset of readonly flags; readable by name only thru the API. */
#define NAMEONLYFLAG 4

/** Subset of readonly flags; Value is actually in file. */
#define MATERIALIZEDFLAG 8

/** Per-variable attribute, as opposed to global */
#define VARFLAG 16

Expand Down Expand Up @@ -492,9 +489,13 @@ extern void NC_freeglobalstate(void);
#define NC_ATT_COORDINATES "_Netcdf4Coordinates" /*see hdf5internal.h:COORDINATES*/
#define NC_ATT_FORMAT "_Format"
#define NC_ATT_DIMID_NAME "_Netcdf4Dimid"
#define NC_ATT_FILLVALUE "_FillValue"
#define NC_ATT_NC3_STRICT_NAME "_nc3_strict"
#define NC_XARRAY_DIMS "_ARRAY_DIMENSIONS"
#define NC_ATT_CODECS "_Codecs"
#define NC_NCZARR_ATTR "_NCZARR_ATTR"
#define NC_NCZARR_ATTR "_nczarr_attr"
#define NC_NCZARR_ATTR_UC "_NCZARR_ATTR"
#define NC_NCZARR_MAXSTRLEN_ATTR "_nczarr_maxstrlen"
#define NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR "_nczarr_default_maxstrlen"

#endif /* _NC4INTERNAL_ */
5 changes: 1 addition & 4 deletions include/ncconfigure.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ extern "C" {
#ifndef HAVE_STRDUP
#ifndef strdup
char* strdup(const char*);
#define HAVE_STRDUP
#endif
#endif

Expand Down Expand Up @@ -120,11 +121,7 @@ unsigned long long int strtoull(const char*, char**, int);

/* handle null arguments */
#ifndef nulldup
#ifdef HAVE_STRDUP
#define nulldup(s) ((s)==NULL?NULL:strdup(s))
#else
extern char *nulldup(const char* s);
#endif
#endif

#ifndef nulllen
Expand Down
1 change: 1 addition & 0 deletions include/ncjson.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ typedef struct NCjson {
don't use union so we can know when to reclaim sval
*/
struct NCJconst {int bval; long long ival; double dval; char* sval;};
#define NCJconst_empty {0,0,0.0,NULL}

/**************************************************/
/* Extended API */
Expand Down
2 changes: 1 addition & 1 deletion libdap4/ncd4dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static const NC_reservedatt NCD4_reserved[] = {
{D4CHECKSUMATTR, READONLYFLAG|NAMEONLYFLAG}, /*_DAP4_Checksum_CRC32*/
{D4LEATTR, READONLYFLAG|NAMEONLYFLAG}, /*_DAP4_Little_Endian*/
/* Also need to include the provenance attributes */
{NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG}, /*_NCProperties*/
{NCPROPS, READONLYFLAG|NAMEONLYFLAG}, /*_NCProperties*/
{NULL, 0}
};

Expand Down
118 changes: 62 additions & 56 deletions libnczarr/zarr.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, const char** contr
&zinfo->zarr.nczarr_version.minor,
&zinfo->zarr.nczarr_version.release);

zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT;

/* Apply client controls */
if((stat = applycontrols(zinfo))) goto done;

Expand Down Expand Up @@ -126,6 +128,7 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls)
zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG);
if((zinfo->envv_controls = NCZ_clonestringvec(0,controls))==NULL) /*0=>envv style*/
{stat = NC_ENOMEM; goto done;}
zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT;

/* Add struct to hold NCZ-specific group info. */
if (!(root->format_grp_info = calloc(1, sizeof(NCZ_GRP_INFO_T))))
Expand Down Expand Up @@ -288,6 +291,64 @@ ncz_open_rootgroup(NC_FILE_INFO_T* dataset)
}
#endif


static const char*
controllookup(const char** envv_controls, const char* key)
{
const char** p;
for(p=envv_controls;*p;p+=2) {
if(strcasecmp(key,*p)==0) {
return p[1];
}
}
return NULL;
}


static int
applycontrols(NCZ_FILE_INFO_T* zinfo)
{
int i,stat = NC_NOERR;
const char* value = NULL;
NClist* modelist = nclistnew();
int noflags = 0; /* track non-default negative flags */

if((value = controllookup((const char**)zinfo->envv_controls,"mode")) != NULL) {
if((stat = NCZ_comma_parse(value,modelist))) goto done;
}
/* Process the modelist first */
zinfo->controls.mapimpl = NCZM_DEFAULT;
zinfo->controls.flags |= FLAG_XARRAYDIMS; /* Always support XArray convention where possible */
for(i=0;i<nclistlength(modelist);i++) {
const char* p = nclistget(modelist,i);
if(strcasecmp(p,PUREZARRCONTROL)==0)
zinfo->controls.flags |= (FLAG_PUREZARR);
else if(strcasecmp(p,XARRAYCONTROL)==0)
zinfo->controls.flags |= FLAG_PUREZARR;
else if(strcasecmp(p,NOXARRAYCONTROL)==0)
noflags |= FLAG_XARRAYDIMS;
else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP;
else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE;
else if(strcasecmp(p,"s3")==0) zinfo->controls.mapimpl = NCZM_S3;
}
/* Apply negative controls by turning off negative flags */
/* This is necessary to avoid order dependence of mode flags when both positive and negative flags are defined */
zinfo->controls.flags &= (~noflags);

/* Process other controls */
if((value = controllookup((const char**)zinfo->envv_controls,"log")) != NULL) {
zinfo->controls.flags |= FLAG_LOGGING;
ncsetlogging(1);
}
if((value = controllookup((const char**)zinfo->envv_controls,"show")) != NULL) {
if(strcasecmp(value,"fetch")==0)
zinfo->controls.flags |= FLAG_SHOWFETCH;
}
done:
nclistfreeall(modelist);
return stat;
}

#if 0
/**
@internal Rewrite attributes into a group or var
Expand Down Expand Up @@ -332,7 +393,7 @@ ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjs
NCjson* k = NULL;
NCjson* v = NULL;
/* remove any previous version */
if(!NCJremove(jattrs,NCZ_V2_ATTRS,&k,&v)) {
if(!NCJremove(jattrs,NCZ_V2_ATTRS,1,&k,&v)) {
NCJreclaim(k); NCJreclaim(v);
}
}
Expand All @@ -358,60 +419,5 @@ ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjs
}
#endif

static const char*
controllookup(const char** envv_controls, const char* key)
{
const char** p;
for(p=envv_controls;*p;p+=2) {
if(strcasecmp(key,*p)==0) {
return p[1];
}
}
return NULL;
}


static int
applycontrols(NCZ_FILE_INFO_T* zinfo)
{
int i,stat = NC_NOERR;
const char* value = NULL;
NClist* modelist = nclistnew();
int noflags = 0; /* track non-default negative flags */

if((value = controllookup((const char**)zinfo->envv_controls,"mode")) != NULL) {
if((stat = NCZ_comma_parse(value,modelist))) goto done;
}
/* Process the modelist first */
zinfo->controls.mapimpl = NCZM_DEFAULT;
zinfo->controls.flags |= FLAG_XARRAYDIMS; /* Always support XArray convention where possible */
for(i=0;i<nclistlength(modelist);i++) {
const char* p = nclistget(modelist,i);
if(strcasecmp(p,PUREZARRCONTROL)==0)
zinfo->controls.flags |= (FLAG_PUREZARR);
else if(strcasecmp(p,XARRAYCONTROL)==0)
zinfo->controls.flags |= FLAG_PUREZARR;
else if(strcasecmp(p,NOXARRAYCONTROL)==0)
noflags |= FLAG_XARRAYDIMS;
else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP;
else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE;
else if(strcasecmp(p,"s3")==0) zinfo->controls.mapimpl = NCZM_S3;
}
/* Apply negative controls by turning off negative flags */
/* This is necessary to avoid order dependence of mode flags when both positive and negative flags are defined */
zinfo->controls.flags &= (~noflags);

/* Process other controls */
if((value = controllookup((const char**)zinfo->envv_controls,"log")) != NULL) {
zinfo->controls.flags |= FLAG_LOGGING;
ncsetlogging(1);
}
if((value = controllookup((const char**)zinfo->envv_controls,"show")) != NULL) {
if(strcasecmp(value,"fetch")==0)
zinfo->controls.flags |= FLAG_SHOWFETCH;
}
done:
nclistfreeall(modelist);
return stat;
}

16 changes: 11 additions & 5 deletions libnczarr/zarr.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ EXTERNL int ncz_unload_jatts(NCZ_FILE_INFO_T*, NC_OBJ* container, NCjson* jattrs
EXTERNL int ncz_close_file(NC_FILE_INFO_T* file, int abort);

/* zcvt.c */
EXTERNL int NCZ_convert1(NCjson* jsrc, nc_type, unsigned char* memory0);
EXTERNL int NCZ_stringconvert1(nc_type typid, size_t len, char* src, NCjson* jvalue);
EXTERNL int NCZ_convert1(NCjson* jsrc, nc_type, unsigned char* memory0, size_t* len);
EXTERNL int NCZ_stringconvert1(nc_type typid, char* src, NCjson* jvalue);
EXTERNL int NCZ_stringconvert(nc_type typid, size_t len, void* data0, NCjson** jdatap);

/* zsync.c */
Expand All @@ -53,9 +53,11 @@ EXTERNL int NCZ_dimkey(const NC_DIM_INFO_T* dim, char** pathp);
EXTERNL int ncz_splitkey(const char* path, NClist* segments);
EXTERNL int NCZ_readdict(NCZMAP* zmap, const char* key, NCjson** jsonp);
EXTERNL int NCZ_readarray(NCZMAP* zmap, const char* key, NCjson** jsonp);
EXTERNL int ncz_zarr_type_name(nc_type nctype, int little, const char** znamep);
EXTERNL int ncz_nctype2typeinfo(const char* snctype, nc_type* nctypep);
EXTERNL int ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianness);
EXTERNL int ncz_nctypedecode(const char* snctype, nc_type* nctypep);
EXTERNL int ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr,int len, char** dnamep);
EXTERNL int ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp);
EXTERNL int NCZ_inferattrtype(NCjson* value, nc_type typehint, nc_type* typeidp);
EXTERNL int NCZ_inferinttype(unsigned long long u64, int negative);
EXTERNL int ncz_fill_value_sort(nc_type nctype, int*);
EXTERNL int NCZ_createobject(NCZMAP* zmap, const char* key, size64_t size);
EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json);
Expand All @@ -73,6 +75,10 @@ EXTERNL int NCZ_ischunkname(const char* name,char dimsep);
EXTERNL char* NCZ_chunkpath(struct ChunkKey key);
EXTERNL int NCZ_reclaim_fill_value(NC_VAR_INFO_T* var);
EXTERNL int NCZ_copy_fill_value(NC_VAR_INFO_T* var, void** dstp);
EXTERNL int NCZ_get_maxstrlen(NC_OBJ* obj);
EXTERNL int NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen);
EXTERNL int NCZ_char2fixed(const char** charp, void* fixed, size_t count, int maxstrlen);
EXTERNL int NCZ_copy_data(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* xtype, const void* memory, size_t count, int nofill, void* copy);

/* zwalk.c */
EXTERNL int NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata);
Expand Down
29 changes: 24 additions & 5 deletions libnczarr/zattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,11 @@ ncz_getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp, NCindex **at
}

/**
* @internal Get one of three special attributes, NCPROPS,
* ISNETCDF4ATT, and SUPERBLOCKATT. These atts are not all really in
* the file, they are constructed on the fly.
* @internal Get one of the special attributes:
* See the reserved attribute table in libsrc4/nc4internal.c.
* The special attributes are the ones marked with NAMEONLYFLAG.
* For example: NCPROPS, ISNETCDF4ATT, and SUPERBLOCKATT, and CODECS.
* These atts are not all really in the file, they are constructed on the fly.
*
* @param h5 Pointer to ZARR file info struct.
* @param var Pointer to var info struct; NULL signals global.
Expand Down Expand Up @@ -426,7 +428,7 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type,
size_t len, const void *data, nc_type mem_type, int force)
{
NC* nc;
NC_FILE_INFO_T *h5;
NC_FILE_INFO_T *h5 = NULL;
NC_VAR_INFO_T *var = NULL;
NCindex* attlist = NULL;
NC_ATT_INFO_T* att;
Expand Down Expand Up @@ -575,7 +577,7 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type,
}

/* If this is the _FillValue attribute, then we will also have to
* copy the value to the fill_vlue pointer of the NC_VAR_INFO_T
* copy the value to the fill_value pointer of the NC_VAR_INFO_T
* struct for this var. (But ignore a global _FillValue
* attribute). Also kill the cache fillchunk as no longer valid */
if (!strcmp(att->hdr.name, _FillValue) && varid != NC_GLOBAL)
Expand Down Expand Up @@ -670,6 +672,23 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type,
att->data = copy; copy = NULL;
}
}

/* If this is a maxstrlen attribute, then we will also have to
* sync the value to NCZ_VAR_INFO_T or NCZ_FILE_INFO_T structure */
{
if(strcmp(att->hdr.name,NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR)==0 && varid == NC_GLOBAL && len == 1) {
NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)h5->format_file_info;
if((retval = nc4_convert_type(att->data, &zfile->default_maxstrlen, file_type, NC_INT,
len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0)))
BAIL(retval);
} else if(strcmp(att->hdr.name,NC_NCZARR_MAXSTRLEN_ATTR)==0 && varid != NC_GLOBAL && len == 1) {
NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info;
if((retval = nc4_convert_type(att->data, &zvar->maxstrlen, file_type, NC_INT,
len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0)))
BAIL(retval);
}
}

att->dirty = NC_TRUE;
att->created = NC_FALSE;
att->len = len;
Expand Down
1 change: 1 addition & 0 deletions libnczarr/zcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ typedef struct NCZCacheEntry {
} key;
size64_t hashkey;
int isfiltered; /* 1=>data contains filtered data else real data */
int isfixedstring; /* 1 => data contains the fixed strings, 0 => data contains pointers to strings */
size64_t size; /* |data| */
void* data; /* contains either filtered or real data */
} NCZCacheEntry;
Expand Down
12 changes: 4 additions & 8 deletions libnczarr/zclose.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,10 @@ zclose_vars(NC_GRP_INFO_T* grp)
var->filters = NULL;
#endif
/* Reclaim the type */
(void)zclose_type(var->type_info);
NCZ_free_chunk_cache(zvar->cache);
if(var->type_info) (void)zclose_type(var->type_info);
if(zvar->cache) NCZ_free_chunk_cache(zvar->cache);
/* reclaim xarray */
nclistfreeall(zvar->xarray);
if(zvar->xarray) nclistfreeall(zvar->xarray);
nullfree(zvar);
var->format_var_info = NULL; /* avoid memory errors */
}
Expand Down Expand Up @@ -223,13 +223,9 @@ static int
zclose_type(NC_TYPE_INFO_T* type)
{
int stat = NC_NOERR;
NCZ_TYPE_INFO_T* ztype;

assert(type && type->format_type_info != NULL);
/* Get Zarr-specific type info. */
ztype = type->format_type_info;
nullfree(ztype);
type->format_type_info = NULL; /* avoid memory errors */
nullfree(type->format_type_info);
return stat;
}

Expand Down
Loading