Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read + Write Zarr with consolidated metadata #3066

Draft
wants to merge 25 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0260f9c
Add dispatch layer for consolidated ZARR access
mannreis Nov 18, 2024
0832d45
Skip s3 listing until actually necessary
mannreis Nov 22, 2024
5061334
Resolving rebase issues
mannreis Nov 22, 2024
19046fc
Use zmetadata layer
mannreis Nov 22, 2024
32a98a1
Ensure no s3 listing with using consolidated metadata
mannreis Nov 22, 2024
802cb06
Remove draft consolidated zarr3
mannreis Nov 25, 2024
29fae5d
Free zarr metadata pointer
mannreis Nov 25, 2024
a2d9fb1
Fix memory leaks
mannreis Dec 10, 2024
50e4fa0
Housekeeping
mannreis Dec 10, 2024
6346e91
Add consolidated [nc]zarr tests with mode=file|zip
mannreis Dec 12, 2024
3ac802a
Allow to define S3 endpoint for tests during build
mannreis Dec 12, 2024
7ddd7c9
Setup initial Zarr consolidated tests
mannreis Dec 12, 2024
c20aec1
Add more tests
mannreis Dec 17, 2024
2b9f305
Fix warnings
mannreis Dec 17, 2024
42ff051
Adjusting DEFINEs
mannreis Dec 18, 2024
e2eef20
Removing dead code
mannreis Dec 18, 2024
19a23d8
NCjson make uploads take const pointer
mannreis Dec 13, 2024
3ffe288
Add Zarr metadata write layer
mannreis Dec 13, 2024
4064baa
Add zarr consolidated write
mannreis Dec 13, 2024
57bf0b9
Ensure no repeated keys in JSON dictionaries
mannreis Dec 17, 2024
f3d8dfb
Write .zmetadata when syncing file
mannreis Dec 17, 2024
207a8a6
Fix warnings
mannreis Dec 17, 2024
40e2e85
Rebase issues
mannreis Dec 18, 2024
eaa936b
Dead code
mannreis Dec 18, 2024
82b3458
Merge branch 'main' of https://github.com/Unidata/netcdf-c into zarr-csl
WardF Dec 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1737,6 +1737,7 @@ endif()

# The Unidata testing S3 bucket
# WARNING: this must match the value in configure.ac
set(S3ENDPOINT "s3.us-east-1.amazonaws.com" CACHE STRING "S3 endpoint")
set(S3TESTBUCKET "unidata-zarr-test-data" CACHE STRING "S3 test bucket")

# The working S3 path tree within the Unidata bucket.
Expand Down
3 changes: 3 additions & 0 deletions config.h.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ are set when opening a binary file on Windows. */
/* if true, enable S3 testing*/
#cmakedefine WITH_S3_TESTING "${WITH_S3_TESTING}"

/* S3 Test endpoint */
#define S3ENDPOINT "${S3ENDPOINT}"

/* S3 Test Bucket */
#define S3TESTBUCKET "${S3TESTBUCKET}"

Expand Down
5 changes: 5 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2114,6 +2114,11 @@ if test "x$enable_s3_aws" = xno && test "x$enable_s3_internal" = xno; then
AC_SUBST(WHICH_S3_SDK,[none])
fi

# The Unidata testing S3 bucket
# WARNING: this must match the value in CMakeLists.txt
AC_DEFINE([S3ENDPOINT], ["s3.us-east-1.amazonaws.com"], [S3 test endpoint])
AC_SUBST([S3ENDPOINT],["s3.us-east-1.amazonaws.com"])

# The Unidata testing S3 bucket
# WARNING: this must match the value in CMakeLists.txt
AC_DEFINE([S3TESTBUCKET], ["unidata-zarr-test-data"], [S3 test bucket])
Expand Down
3 changes: 2 additions & 1 deletion include/netcdf.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,8 +531,9 @@ by the desired type. */
#define NC_EOBJECT (-140) /**< Some object exists when it should not */
#define NC_ENOOBJECT (-141) /**< Some object not found */
#define NC_EPLUGIN (-142) /**< Unclassified failure in accessing a dynamically loaded plugin> */
#define NC_ENOTZARR (-143) /**< Malformed (NC)Zarr file */

#define NC4_LAST_ERROR (-142) /**< @internal All netCDF errors > this. */
#define NC4_LAST_ERROR (-143) /**< @internal All netCDF errors > this. */

/* Errors for all remote access methods(e.g. DAP and CDMREMOTE)*/
#define NC_EURL (NC_EDAPURL) /**< Malformed URL */
Expand Down
27 changes: 19 additions & 8 deletions libdispatch/ncjson.c
Original file line number Diff line number Diff line change
Expand Up @@ -898,15 +898,26 @@ NCJaddstring(NCjson* json, int sort, const char* s)
OPTSTATIC int
NCJinsert(NCjson* object, const char* key, NCjson* jvalue)
{
int stat = NCJ_OK;
NCjson* jkey = NULL;
if(object == NULL || object->sort != NCJ_DICT || key == NULL || jvalue == NULL)
{stat = NCJTHROW(NCJ_ERR); goto done;}
if((stat = NCJnewstring(NCJ_STRING,key,&jkey))==NCJ_ERR) goto done;
if((stat = NCJappend(object,jkey))==NCJ_ERR) goto done;
if((stat = NCJappend(object,jvalue))==NCJ_ERR) goto done;
int stat = NCJ_OK;
NCjson *jkey = NULL;
if (object == NULL || object->sort != NCJ_DICT || key == NULL || jvalue == NULL) {
stat = NCJTHROW(NCJ_ERR);
goto done;
}
for (size_t i = 0; i < NCJlength(object); i += 2) {
jkey = NCJith(object, i);
if (jkey->string != NULL && strcmp(jkey->string, key) == 0) {
// replace existing values for new key
NCJreclaim(object->list.contents[i + 1]); // free old value
object->list.contents[i + 1] = jvalue;
goto done;
}
}
if ((stat = NCJnewstring(NCJ_STRING, key, &jkey)) == NCJ_ERR) goto done;
if ((stat = NCJappend(object, jkey)) == NCJ_ERR) goto done;
if ((stat = NCJappend(object, jvalue)) == NCJ_ERR) goto done;
done:
return NCJTHROW(stat);
return NCJTHROW(stat);
}

/* Insert key-value pair as strings into a dict object.
Expand Down
3 changes: 3 additions & 0 deletions libnczarr/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ zgrp.c
zinternal.c
zmap.c
zmap_file.c
zmetadata2.c
zmetadata.c
zodom.c
zopen.c
zprov.c
Expand All @@ -39,6 +41,7 @@ zdispatch.h
zincludes.h
zinternal.h
zmap.h
zmetadata.h
zodom.h
zprovenance.h
zplugins.h
Expand Down
3 changes: 3 additions & 0 deletions libnczarr/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ zgrp.c \
zinternal.c \
zmap.c \
zmap_file.c \
zmetadata2.c \
zmetadata.c \
zodom.c \
zopen.c \
zprov.c \
Expand All @@ -59,6 +61,7 @@ zdispatch.h \
zincludes.h \
zinternal.h \
zmap.h \
zmetadata.h \
zodom.h \
zprovenance.h \
zplugins.h \
Expand Down
8 changes: 8 additions & 0 deletions libnczarr/zarr.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, NClist* controls)
if((stat = nczmap_create(zinfo->controls.mapimpl,nc->path,nc->mode,zinfo->controls.flags,NULL,&zinfo->map)))
goto done;

/* Initialize metadata handle */
assert(zinfo->map != NULL);
if((stat = NCZMD_set_metadata_handler(zinfo,(const NCZ_Metadata**)&zinfo->metadata_handler))) goto done;

done:
ncurifree(uri);
NCJreclaim(json);
Expand Down Expand Up @@ -143,6 +147,10 @@ ncz_open_dataset(NC_FILE_INFO_T* file, NClist* controls)
if((stat = nczmap_open(zinfo->controls.mapimpl,nc->path,mode,zinfo->controls.flags,NULL,&zinfo->map)))
goto done;

/* Initialize metadata handle */
assert(zinfo->map != NULL);
if((stat = NCZMD_set_metadata_handler(zinfo,(const NCZ_Metadata**)&zinfo->metadata_handler))) goto done;

/* Ok, try to read superblock */
if((stat = ncz_read_superblock(file,&nczarr_version,&zarr_format))) goto done;

Expand Down
2 changes: 1 addition & 1 deletion libnczarr/zarr.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ EXTERNL int NCZ_inferattrtype(const NCjson* value, nc_type typehint, nc_type* ty
EXTERNL int NCZ_inferinttype(unsigned long long u64, int negative);
EXTERNL int ncz_fill_value_sort(nc_type nctype, int*);
EXTERNL int NCZ_createobject(NCZMAP* zmap, const char* key, size64_t size);
EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json);
EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, const NCjson* json);
EXTERNL int NCZ_downloadjson(NCZMAP* zmap, const char* key, NCjson** jsonp);
EXTERNL int NCZ_isLittleEndian(void);
EXTERNL int NCZ_subobjects(NCZMAP* map, const char* prefix, const char* tag, char dimsep, NClist* objlist);
Expand Down
1 change: 1 addition & 0 deletions libnczarr/zclose.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ ncz_close_file(NC_FILE_INFO_T* file, int abort)
goto done;
nclistfreeall(zinfo->controllist);
NC_authfree(zinfo->auth);
NCZMD_free_metadata_handler(zinfo->metadata_handler);
nullfree(zinfo);

done:
Expand Down
1 change: 1 addition & 0 deletions libnczarr/zincludes.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ extern "C" {
#include "ncjson.h"

#include "zmap.h"
#include "zmetadata.h"
#include "zinternal.h"
#include "zdispatch.h"
#include "zprovenance.h"
Expand Down
13 changes: 8 additions & 5 deletions libnczarr/zinternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#define ZINTERNAL_H

#define ZARRVERSION "2"
#define ZARRFORMAT2 2

/* NCZARRVERSION is independent of Zarr version,
but NCZARRVERSION => ZARRVERSION */
Expand All @@ -38,11 +39,12 @@
# endif
#endif

#define ZMETAROOT "/.zgroup"
#define ZMETAATTR "/.zattrs"
#define ZGROUP ".zgroup"
#define ZATTRS ".zattrs"
#define ZARRAY ".zarray"
/* V2 Reserved Objects */
#define Z2METAROOT "/.zgroup"
#define Z2ATTSROOT "/.zattrs"
#define Z2GROUP ".zgroup"
#define Z2ATTRS ".zattrs"
#define Z2ARRAY ".zarray"

/* V2 Reserved Attributes */
/*
Expand Down Expand Up @@ -143,6 +145,7 @@ typedef struct NCZ_FILE_INFO {
# define FLAG_NCZARR_KEY 16 /* _nczarr_xxx keys are stored in object and not in _nczarr_attrs */
NCZM_IMPL mapimpl;
} controls;
struct NCZ_Metadata * metadata_handler;
int default_maxstrlen; /* default max str size for variables of type string */
} NCZ_FILE_INFO_T;

Expand Down
16 changes: 8 additions & 8 deletions libnczarr/zmap_s3sdk.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,14 @@ zs3open(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** m
z3map->s3client = NC_s3sdkcreateclient(&z3map->s3);

/* Search the root for content */
content = nclistnew();
if((stat = NC_s3sdkgetkeys(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,&nkeys,NULL,&z3map->errmsg)))
goto done;
if(nkeys == 0) {
/* dataset does not actually exist; we choose to return ENOOBJECT instead of EEMPTY */
stat = NC_ENOOBJECT;
goto done;
}
// content = nclistnew();
// if((stat = NC_s3sdkgetkeys(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,&nkeys,NULL,&z3map->errmsg)))
// goto done;
// if(nkeys == 0) {
// /* dataset does not actually exist; we choose to return ENOOBJECT instead of EEMPTY */
// stat = NC_ENOOBJECT;
// goto done;
// }
if(mapp) *mapp = (NCZMAP*)z3map;

done:
Expand Down
Loading
Loading