Merge branch 'v3plug.dmh' into clean1.dmh

Unidata · Jan 17, 2025 · 400134c · 400134c
2 parents d23adb7 + e9fad85
commit 400134c
Show file tree

Hide file tree

Showing 22 changed files with 271 additions and 111 deletions.
diff --git a/.github/workflows/run_tests_win_cygwin.yml b/.github/workflows/run_tests_win_cygwin.yml
@@ -87,13 +87,23 @@ jobs:
         timeout-minutes: 30
         run: |
           make check -j$(nproc) SHELL=/bin/dash
+
+      - name: Upload autoconf test results
+        uses: actions/upload-artifact@v4
+        with:
+          name: cygwin-autotools-test-logs-${{ matrix.plugin_dir_option }}
+          path: |
+            */*.log
+            */*.trs
   
   build-and-test-cmake:
     name: Cygwin-based CMake tests
     runs-on: windows-latest
     defaults:
       run:
-        shell: C:/cygwin/bin/bash.exe -eo pipefail -o igncr "{0}"
+        shell: /usr/bin/bash.exe -eo pipefail -o igncr "{0}"
+
+#        shell: C:/cygwin/bin/bash.exe -eo pipefail -o igncr "{0}"
 
     steps:
 

diff --git a/.github/workflows/run_tests_win_mingw.yml b/.github/workflows/run_tests_win_mingw.yml
@@ -70,8 +70,7 @@ jobs:
         if: ${{ success() }}
         id: tests
 
-      - name: Upload test failures
-        if: ${{ failure() && steps.tests.conclusion == 'failure' }}
+      - name: Upload autoconf test results
         uses: actions/upload-artifact@v4
         with:
           name: mingw-autotools-test-logs-${{ matrix.msystem }}

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -357,7 +357,7 @@ endif()
 
 # Default Cache variables.
 set(DEFAULT_CHUNK_SIZE 16777216 CACHE STRING "Default Chunk Cache Size.")
-set(DEFAULT_CHUNK_CACHE_SIZE 16777216U CACHE STRING "Default Chunk Cache Size.")
+set(DEFAULT_CHUNK_CACHE_SIZE 67108864U CACHE STRING "Default Chunk Cache Size.")
 set(DEFAULT_CHUNKS_IN_CACHE 1000 CACHE STRING "Default number of chunks in cache.")
 set(DEFAULT_CHUNK_CACHE_PREEMPTION 0.75 CACHE STRING "Default file chunk cache preemption policy (a number between 0 and 1, inclusive).")
 
@@ -1672,10 +1672,6 @@ install(FILES ${netCDF_BINARY_DIR}/netcdf.pc
 ##
 print_conf_summary()
 
-# Enable Makedist files.
-ADD_MAKEDIST()
-ENABLE_MAKEDIST(README.md COPYRIGHT RELEASE_NOTES.md INSTALL INSTALL.cmake test_prog.c lib_flags.am cmake CMakeLists.txt COMPILE.cmake.txt config.h.cmake.in cmake_uninstall.cmake.in netcdf-config-version.cmake.in netcdf-config.cmake.in FixBundle.cmake.in nc-config.cmake.in configure configure.ac install-sh config.h.in config.sub CTestConfig.cmake.in)
-
 #####
 # Configure and print the libnetcdf.settings file.
 #####

diff --git a/configure.ac b/configure.ac
@@ -422,7 +422,7 @@ AC_MSG_CHECKING([whether a default cache size was specified])
 AC_ARG_WITH([default-chunk-cache-size],
               [AS_HELP_STRING([--with-default-chunk-cache-size=<integer>],
                               [Specify default size (in bytes) for chunk cache.])],
-            [DEFAULT_CHUNK_CACHE_SIZE=$with_default_chunk_cache_size], [DEFAULT_CHUNK_CACHE_SIZE=16777216U])
+            [DEFAULT_CHUNK_CACHE_SIZE=$with_default_chunk_cache_size], [DEFAULT_CHUNK_CACHE_SIZE=67108864U])
 AC_MSG_RESULT([$DEFAULT_CHUNK_CACHE_SIZE])
 AC_DEFINE_UNQUOTED([DEFAULT_CHUNK_CACHE_SIZE], [$DEFAULT_CHUNK_CACHE_SIZE], [default size of the chunk cache.])
 

diff --git a/docs/all-error-codes.md b/docs/all-error-codes.md
@@ -116,6 +116,15 @@ were added for new errors unique to netCDF-4.
 #define NC_ERCFILE       (-133)    // RC file failure
 #define NC_ENULLPAD      (-134)    // Header Bytes not Null-Byte padded
 #define NC_EINMEMORY     (-135)    // In-memory file error
+#define NC_ENOFILTER     (-136)    // Filter not defined on variable.
+#define NC_ENCZARR       (-137)    // Error at NCZarr layer.
+#define NC_ES3           (-138)    // Generic S3 error
+#define NC_EEMPTY        (-139)    // Attempt to read empty NCZarr map key
+#define NC_EOBJECT       (-140)    // Some object exists when it should not
+#define NC_ENOOBJECT     (-141)    // Some object not found
+#define NC_EPLUGIN       (-142)    // Unclassified failure in accessing a dynamically loaded plugin
+
+
 ~~~~
 
 # PnetCDF Error Codes {#pnetcdf-error-codes}

diff --git a/docs/nczarr.md b/docs/nczarr.md
@@ -108,9 +108,14 @@ See the document "quickstart_paths" for details about
 using URLs.
 
 There are, however, some details that are important.
-- Protocol: this should be _https_ or _s3_,or _file_.
-  The _s3_ scheme is equivalent to "https" plus setting "mode=s3".
-  Specifying "file" is for directory tree or zipfile format storage, and also for testing.
+Several URL protocols are semantically meaningfull for the NCZarr implementation.
+* _http_ or _https_ -- this just signals that we have a URL; the actual storage type is inferred from the _mode_ flag or by probing the object to which the URL refers.
+* _file_ -- The _file_ scheme is equivalent to "mode=...,file,..."
+* _s3_ -- The _s3_ scheme is equivalent to "https" plus setting "mode=...,s3,..." plus using an elided host.
+* _gs3_ -- The _gs3_ scheme is equivalent to "https" plus setting "mode=...,gs3,...", and using the google-specific host.
+* _zoh_ -- The _zoh_ scheme is equivalent to "http" plus setting "mode=...,zoh,...", plus using a host that leads to a server supporting the ZoH REST API.
+
+Note that currently there is no "zip:" protocol so it must be inferred or specified by a _mode_ tag. 
 
 ## Client Parameters
 
@@ -127,8 +132,8 @@ Additional pairs are provided to specify the Zarr version.
 
 Obviously, _v2_ implies using the Zarr Version 2 format; similarly for _v3_.
 
-Additional pairs are provided to specify the storage medium: Amazon S3 vs File tree vs Zip file.
-- mode=file|zip|s3
+Additional pairs are provided to specify the storage medium: Amazon S3 vs File vs, etc.
+- mode=file|zip|s3|gs3|zoh
 
 The modes imply use of a specific driver:
 * The _s3_ driver stores data using Amazon S3 or some equivalent.
@@ -345,16 +350,16 @@ The reason for this is that the bucket name forms the initial segment in the key
 The NCZarr storage format is almost identical to that of the the standard Zarr format.
 The data model differs as follows.
 
-1. Zarr only supports anonymous dimensions -- NCZarr supports only shared (named) dimensions, but can read anonymous dimensions by assigning special names to the anonymous dimensions.
+1. Zarr only supports anonymous dimensions (plus a limited set of names via _\_ARRAY_ATTRIBUTES_) -- NCZarr supports only shared (named) dimensions, but can read anonymous dimensions by assigning special names to the anonymous dimensions.
 2. Zarr attributes are untyped -- or perhaps more correctly characterized as of type string (in "JSON" format). NCZarr supports typing of attributes.
-3. Zarr does not explicitly support unlimited dimensions -- NCZarr does support them.
+3. Zarr might not explicitly support unlimited dimensions (the documentation is unclear) -- NCZarr does support them.
 
 ## Storage Medium
 
 Consider both NCZarr and Zarr, and assume S3 notions of bucket and object.
 In both systems, Groups and Variables (aka Arrays in Zarr) map to S3 objects.
 Containment is modeled using the fact that the dataset's key is a prefix of the variable's key.
-So for example, if variable _v1_ is contained in top level group g1 -- _/g1 -- then the key for _v1_ is _/g1/v_.
+So for example, if variable _v1_ is contained in top level group _g1_ (i.e. _/g1_)  -- then the key for _v1_ is _/g1/v1_.
 Additional meta-data information is stored in special objects whose name start with ".z" (V2) or "zarr.json" (V3).
 
 In Zarr Version 2, the following special objects exist.
@@ -365,6 +370,7 @@ so for example the object _/g1/v1/.zarray_.
 3. Group-level attributes and variable-level attributes are stored in a special object named _.zattr_;
 so for example the objects _/g1/.zattr_ and _/g1/v1/.zattr_.
 4. Chunk data is stored in objects named "\<n1\>.\<n2\>...,\<nr\>" where the ni are positive integers representing the chunk index for the ith dimension.
+Note that the character '/' can substiture for the '.' character in the chunk name.
 
 The first three contain meta-data objects in the form of a string representing a JSON-formatted dictionary. 
 The NCZarr format uses the same objects as Zarr, but inserts NCZarr
@@ -411,10 +417,12 @@ zarr attributes.
 
 The former case -- nczarr reading zarr -- is possible if the nczarr code can simulate or infer the contents of the missing _\_nczarr\_xxx_ attributes.
 As a rule this can be done as follows.
-1. _\_nczarr_group\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group.
-For V2, the search looks for occurrences of _.zgroup_, _.zattr_, _.zarray_ to infer the keys for the contained groups, attribute sets, and arrays (variables).
-For V2, the search looks for occurrences of _zarr.jsson_.
-Constructing the set of "shared dimensions" is carried out
+1. _\_nczarr_group\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group. Alternatively, the root group may contain a _.zmetadata_ object that can be used to determine the lists of variables and subgroups.
+
+    For V2, the search looks for occurrences of _.zmetadata_, _.zgroup_, _.zattr_, _.zarray_ to infer the keys for the contained groups, attribute sets, and arrays (variables).
+For V3, the search looks for occurrences of _zarr.json_.
+
+    Constructing the set of "shared dimensions" is carried out
 by walking all the variables in the whole dataset and collecting
 the set of unique integer shapes for the variables.
 For each such dimension length, a dimension is created in the root group
@@ -493,11 +501,11 @@ Here are a couple of examples using the _ncgen_ and _ncdump_ utilities.
     ```
 3. Create an nczarr file using S3 as storage.
     ```
-    ncgen -4 -lb -o "s3://s3.us-west-1.amazonaws.com/datasetbucket" dataset.cdl
+    ncgen -4 -lb -o "s3://datasetbucket/" dataset.cdl
     ```
 4. Create an nczarr file using S3 as storage and keeping to the pure zarr format.
     ```
-    ncgen -4 -lb -o 's3://s3.uswest-1.amazonaws.com/datasetbucket\#mode=zarr dataset.cdl
+    ncgen -4 -lb -o 'https://s3.uswest-1.amazonaws.com/datasetbucket\#mode=zarr,s3 dataset.cdl
     ```
 5. Create an nczarr file using the s3 protocol with a specific profile
     ```
@@ -553,6 +561,7 @@ The relevant ./configure options are as follows.
 The relevant CMake flags are as follows.
 
 1. *-DNETCDF_ENABLE_NCZARR=off* -- equivalent to the Automake *--disable-nczarr* option.
+
 ## Testing NCZarr S3 Support {#nczarr_testing_S3_support}
 
 The relevant tests for S3 support are in the _nczarr_test_ directory.
@@ -743,12 +752,14 @@ would be handled by Zarr as a string of length 1.
 For Zarr version 3, the added NCZarr specific metadata is stored 
 as attributes pretty much the same as for Version 2.
 Specifically, the following Netcdf-4 meta-data information needs to be captured by NCZarr:
+
     1. Shared dimensions: name and size.
     2. Unlimited dimensions: which dimensions are unlimited.
     3. Attribute types.
     4. Netcdf types not included in Zarr: currently "char" and "string".
     5. Zarr types not included in Netcdf: currently only "complex(32|64)"
-This extra netcdfd-4 meta-data is stored as attributes with special names so as to not interfere with existing implementations.
+
+This extra netcdf-4 meta-data is stored as attributes with special names so as to not interfere with existing implementations.
 
 ## Supported Types
 Zarr version 3 supports the following "atomic" types:
@@ -806,7 +817,7 @@ The relevant attribute has the following format:
 ````
 Its purpose is two-fold:
 1. record the objects immediately within that group
-2. define netcdf-4 dimenension objects within that group.
+2. define netcdf-4 dimension objects within that group.
 
 ## Array Annotations
 In order to support Netcdf concepts in Zarr, it may be necessary
@@ -878,8 +889,7 @@ There is one entry for every attribute (including itself) giving the type
 of that attribute.
 It should be noted that Zarr allows the value of an attribute to be an arbitrary
 JSON-encoded structure. In order to support this in netcdf-4, is such a structure
-is encountered as an attribute value, then it typed as *json* (see previously
-described table).
+is encountered as an attribute value, then it has the type alias *json* (see previously described table).
 
 ## Codec Specification
 The Zarr version 3 representation of codecs is slightly different
@@ -901,6 +911,9 @@ intended to be a detailed chronology. Rather, it provides highlights
 that will be of interest to NCZarr users. In order to see exact changes,
 It is necessary to use the 'git diff' command.
 
+## 01/16/2025
+1. Document the addition of .zmetadata support.
+
 ## 03/31/2024
 1. Document the change to V2 to using attributes to hold NCZarr metadata.
 
@@ -926,4 +939,4 @@ include arbitrary JSON expressions; see Appendix D for more details.
 __Author__: Dennis Heimbigner<br>
 __Email__: dmh at ucar dot edu<br>
 __Initial Version__: 4/10/2020<br>
-__Last Revised__: 6/07/2024
+__Last Revised__: 1/16/2025
diff --git a/docs/quickstart_paths.md b/docs/quickstart_paths.md
@@ -71,8 +71,8 @@ This appendix list known keys, although it may be somewhat out-of-date.
 The current set of keys used in the netcdf-c library is as follows.
 * _mode_ -- A special key that is used to provide single values for controlling the netcdf-c library. It consists of a comma separated sequence of values
 primarily used to control the file format.
-The *mode* key supports the following values
-    - _dap2_ -- Specifies that the URL accesses a resource using the DAP2 protocol
+The *mode* key currently supports the following values
+    - _dap2_ -- Specifies that the URL accesses a resource using the DAP2 protocol (default if no mode is specified)
     - _dap4_ -- Specifies that the URL accesses a resource using the DAP4 protocol
     - _netcdf-3_ -- Specifies that a file is a netcdf-classic file
     - _classic_ -- Alias for _netcdf-3_
@@ -88,6 +88,7 @@ The *mode* key supports the following values
     - _file_ --Specifies that the file is an NCZarr/Zarr file stored as a file tree
     - _zip_ --Specifies that the file is an NCZarr/Zarr file stored as a zip file
     - _bytes_ -- Specifies that the file is remote and is to be read using byte-range support
+    - _zoh_ --Specifies that the file is remote and supports the [GWDG ZoH](https://pad.gwdg.de/DtHGRP38Sw2YQDAAjPuP2Q) (Zarr-over-HTTP) protocol
  in NCZarr format
 * _dap2_ -- equivalent to "mode=dap2"
 * _dap4_ -- equivalent to "mode=dap4"
@@ -99,20 +100,18 @@ The *mode* key supports the following values
 A URL path is required for accessing datasets on the Amazon S3 storage cloud.
 Unfortunately S3 URLs are complicated.
 They can have the following forms:
-* _Virtual_: the host starts with the bucket name; e.g. __bucket.s3.&lt;region&gt;.amazonaws.com__ | __bucket.s3.amazonaws.com__
-* _Path_: the host does not include the bucket name, but rather the bucket name is the first segment of the path. For example __s3.&lt;region&gt;.amazonaws.com/bucket__ or __s3.amazonaws.com/bucket__
+* _Virtual_: the protocol is "http:" or "https:", the mode specifies "s3", and the host starts with the bucket name; e.g. __bucket.s3.&lt;region&gt;.amazonaws.com__  or  __bucket.s3.amazonaws.com__
+* _Path_: the protocol is "http:" or "https:", the mode specifies "s3", and the host does not include the bucket name, but rather the bucket name is the first segment of the path. For example __s3.&lt;region&gt;.amazonaws.com/bucket__ or __s3.amazonaws.com/bucket__
 * _Protocol_: the protocol is "s3:" and if the host is a single name, then it is interpreted as the bucket. The region is determined using an algorithm defined in the nczarr documentation.
 
 For all of the above URL forms, there are two additional pieces.
 * Query: currently not used.
 * Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation.
 
 ## Addendum C. Google Storage Specific URLS {#nc_paths_google_urls}
-Google provides an interface to its storage that is compatible with
-the Amazon S3 REST API.
+Google provides an interface to its storage that is compatible with the Amazon S3 REST API.
 A URL path is required for accessing datasets on the Google storage cloud.
-Note that the Google host is always "storage.googleapis.com"
-and has no concept of region.
+Note that the Google host is always "storage.googleapis.com" and has no concept of region.
 It has the following forms.
 * _Path_: the bucket name is the first segment of the path.
 For example __storage.googleapis.com/bucket__.
@@ -123,9 +122,25 @@ For all of the above URL forms, there are two additional pieces.
 * Query: currently not used.
 * Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation.
 
+## Addendum D. Zarr-Over-HTTP (ZoH) Protocol Specific URLS {#nc_paths_zoh_urls}
+The [GWDG ZoH](https://pad.gwdg.de/DtHGRP38Sw2YQDAAjPuP2Q) (Zarr-over-HTTP) protocol provides an interface to any server supporting the ZoH REST API.
+The URLs for this API are very similar to the S3 or Google URLs.
+Note the _virtual_ URL format is not currently supported.
+A ZoH URL has one of the following forms.
+* _Path_: the protocol is "http:" or "https:", the host is any standard host (including an optional port number), and the bucket name is the first segment of the path.
+For example __http://zoh.gwdg.de/&lt;bucket&gt;/&lt;key&gt;__.
+* _Protocol_: the protocol is "zoh:" and a complete host must be specified.
+The URL path part is the key to be interpreted by the ZoH server
+as it wishes.
+
+For all of the above URL forms, there are two additional pieces.
+* Query: currently not used.
+* Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation.
+
 ## Point of Contact {#nc_paths_poc}
 
 __Author__: Dennis Heimbigner<br>
 __Email__: dmh at ucar dot edu<br>
 __Initial Version__: 4/10/2020<br>
-__Last Revised__: 11/01/2023
+__Last Revised__: 1/16/2025
+
diff --git a/include/ncs3sdk.h b/include/ncs3sdk.h
@@ -14,8 +14,9 @@
 
 /* Track the server type, if known */
 typedef enum NCS3SVC {NCS3UNK=0, /* unknown */
-                 NCS3=1,    /* s3.amazon.aws */
-		 NCS3GS=2   /* storage.googleapis.com */
+	NCS3=1,     /* s3.amazon.aws */
+	NCS3GS=2,   /* storage.googleapis.com */
+	NCS3ZOH=4,  /* ZoH Server */
 } NCS3SVC;
 
 /* Opaque Handles */

diff --git a/include/netcdf.h b/include/netcdf.h
@@ -545,6 +545,11 @@ by the desired type. */
 
 #define NC4_LAST_ERROR   (-144)    /**< @internal All netCDF errors > this. */
 
+/*
+ * Don't forget to update docs/all-error-codes.md if adding new error codes here!
+ *
+ */
+
 /* Errors for all remote access methods(e.g. DAP and CDMREMOTE)*/
 #define NC_EURL         (NC_EDAPURL)   /**< Malformed URL */
 #define NC_ECONSTRAINT  (NC_EDAPCONSTRAINT)   /**< Malformed Constraint*/

diff --git a/libdap4/d4meta.c b/libdap4/d4meta.c
@@ -893,7 +893,7 @@ decodeEconst(NCD4meta* builder, NCD4node* enumtype, const char* nameorval, union
 {
     size_t i;
     int ret = NC_NOERR;
-    union ATOMICS number;
+    union ATOMICS number = {0};
     NCD4node* match = NULL;
 
     /* First, see if the value is an econst name */
@@ -902,7 +902,7 @@ decodeEconst(NCD4meta* builder, NCD4node* enumtype, const char* nameorval, union
         if(strcmp(ec->name,nameorval)==0) {match = ec; break;}
     }
     /* If no match, try to invert as a number to see if there is a matching econst */
-    if(!match) {
+    if(!match && enumtype->en.econsts) {
         /* get the incoming value as number */
         if((ret=convertString(&number,enumtype->basetype,nameorval)))
             goto done;