Skip to content

Commit

Permalink
JSON Escape group and dataset names
Browse files Browse the repository at this point in the history
  • Loading branch information
mattjala committed Jan 9, 2024
1 parent 218f579 commit 4275450
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 9 deletions.
84 changes: 84 additions & 0 deletions src/rest_vol.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#define BACKOFF_SCALE_FACTOR 1.5
#define BACKOFF_MAX_BEFORE_FAIL 3000000000 /* 30,000,000,000 ns -> 30 sec */

/* Number of unique characters which need to be escaped before being sent as JSON */
#define NUM_JSON_ESCAPE_CHARS 7
/*
* The VOL connector identification number.
*/
Expand Down Expand Up @@ -2233,6 +2235,7 @@ RV_find_object_by_path(RV_object_t *parent_obj, const char *obj_path, H5I_type_t
if (CURLE_OK != curl_easy_setopt(curl, CURLOPT_HTTPGET, 1))
FUNC_GOTO_ERROR(H5E_LINK, H5E_CANTSET, FAIL, "can't set up cURL to make HTTP GET request: %s",
curl_err_buf);

if (CURLE_OK != curl_easy_setopt(curl, CURLOPT_URL, request_url))
FUNC_GOTO_ERROR(H5E_LINK, H5E_CANTSET, FAIL, "can't set cURL request URL: %s", curl_err_buf);

Expand Down Expand Up @@ -3826,3 +3829,84 @@ RV_free_visited_link_hash_table_key(rv_hash_table_key_t value)
RV_free(value);
value = NULL;
} /* end RV_free_visited_link_hash_table_key() */

/*-------------------------------------------------------------------------
* Function: RV_JSON_escape_string
*
* Purpose: Helper function to escape control characters for JSON strings.
* If 'out' is NULL, out_size will be changed to the buffer size
* needed for the escaped version of 'in'.
* If 'out' is non-NULL, it should be a buffer of out_size bytes
* that will be populated with the escaped version of 'in'.
* If the provided buffer is too small and this operation fails,
* the value of the buffer will still be modified.
*
* Return: Non-negative on success/Negative on failure
*
* Programmer: Matthew Larson
* January, 2024
*/
herr_t
RV_JSON_escape_string(const char *in, char *out, size_t *out_size)
{
herr_t ret_value = SUCCEED;
size_t in_size = strlen(in);

char *out_ptr = NULL;
char escape_characters[NUM_JSON_ESCAPE_CHARS] = {'\b', '\f', '\n', '\r', '\t', '\"', '\\'};

if (out == NULL) {
/* Determine necessary buffer size */
*out_size = in_size + 1;

for (size_t i = 0; i < in_size; i++) {
char c = in[i];

for (size_t j = 0; j < NUM_JSON_ESCAPE_CHARS; j++) {
char ec = escape_characters[j];

/* Each escaped character requires additonal '\' in final string */

Check failure on line 3868 in src/rest_vol.c

View workflow job for this annotation

GitHub Actions / Check for spelling errors

additonal ==> additional
if (c == ec)
*out_size += 1;
}
}
}
else {
/* Escaped string is at least as long as original */
if (*out_size < strlen(in) + 1)
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "escaped buffer is smaller than original");

/* Populate provided buffer */
out_ptr = out;

for (size_t i = 0; i < in_size; i++) {
char c = in[i];

for (size_t j = 0; j < NUM_JSON_ESCAPE_CHARS; j++) {
char ec = escape_characters[j];

if (c == ec) {
if ((out_ptr - out + 1) > *out_size)
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "buffer too small for encoded string");
out_ptr[0] = '\\';
out_ptr++;
}
}

if ((out_ptr - out + 1) > *out_size)
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "buffer too small for encoded string");

out_ptr[0] = c;
out_ptr++;
}

if ((out_ptr - out + 1) > *out_size)
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "buffer too small for encoded string");

out_ptr[0] = '\0';
}

done:

return ret_value;
}
3 changes: 3 additions & 0 deletions src/rest_vol.h
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,9 @@ herr_t RV_tconv_init(hid_t src_type_id, size_t *src_type_size, hid_t dst_type_id
herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested,
server_api_version server_version);

/* Helper function to escape control characters for JSON strings */
herr_t RV_JSON_escape_string(const char *in, char *out, size_t *out_size);

#define SERVER_VERSION_MATCHES_OR_EXCEEDS(version, major_needed, minor_needed, patch_needed) \
(version.major > major_needed) || (version.major == major_needed && version.minor > minor_needed) || \
(version.major == major_needed && version.minor == minor_needed && version.patch >= patch_needed)
Expand Down
28 changes: 21 additions & 7 deletions src/rest_vol_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -3568,6 +3568,7 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
char *creation_properties_body = NULL;
char *link_body = NULL;
char *path_dirname = NULL;
char *escaped_link_name = NULL;
int create_request_len = 0;
int link_body_len = 0;
herr_t ret_value = SUCCEED;
Expand Down Expand Up @@ -3612,11 +3613,12 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
if (name) {
hbool_t empty_dirname;
char target_URI[URI_MAX_LENGTH];
const char *const link_basename = H5_rest_basename(name);
const char *const link_body_format = "\"link\": {"
"\"id\": \"%s\", "
"\"name\": \"%s\""
"}";
const char *const link_basename = H5_rest_basename(name);
const char *const link_body_format = "\"link\": {"
"\"id\": \"%s\", "
"\"name\": \"%s\""
"}";
size_t escaped_name_size = 0;

#ifdef RV_CONNECTOR_DEBUG
printf("-> Creating JSON link for dataset\n\n");
Expand All @@ -3643,15 +3645,25 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
FUNC_GOTO_ERROR(H5E_DATASET, H5E_PATH, FAIL, "can't locate target for dataset link");
} /* end if */

link_body_nalloc = strlen(link_body_format) + strlen(link_basename) +
/* JSON-escape link name */
if (RV_JSON_escape_string(link_basename, escaped_link_name, &escaped_name_size) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "can't get length of JSON escaped link name");

if ((escaped_link_name = RV_malloc(escaped_name_size)) == NULL)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for escaped link name");

if (RV_JSON_escape_string(link_basename, escaped_link_name, &escaped_name_size) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "can't JSON escape link name");

link_body_nalloc = strlen(link_body_format) + strlen(escaped_link_name) +
(empty_dirname ? strlen(pobj->URI) : strlen(target_URI)) + 1;
if (NULL == (link_body = (char *)RV_malloc(link_body_nalloc)))
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for dataset link body");

/* Form the Dataset Creation Link portion of the Dataset create request using the above format
* specifier and the corresponding arguments */
if ((link_body_len = snprintf(link_body, link_body_nalloc, link_body_format,
empty_dirname ? pobj->URI : target_URI, link_basename)) < 0)
empty_dirname ? pobj->URI : target_URI, escaped_link_name)) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_SYSERRSTR, FAIL, "snprintf error");

if ((size_t)link_body_len >= link_body_nalloc)
Expand Down Expand Up @@ -3721,6 +3733,8 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
RV_free(shape_body);
if (datatype_body)
RV_free(datatype_body);
if (escaped_link_name)
RV_free(escaped_link_name);

return ret_value;
} /* end RV_setup_dataset_create_request_body() */
Expand Down
19 changes: 17 additions & 2 deletions src/rest_vol_group.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name
char *base64_plist_buffer = NULL;
char target_URI[URI_MAX_LENGTH];
char request_url[URL_MAX_LENGTH];
char *escaped_group_name = NULL;
int create_request_body_len = 0;
int url_len = 0;
void *binary_plist_buffer = NULL;
Expand Down Expand Up @@ -128,6 +129,7 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name
if (name) {
const char *path_basename = H5_rest_basename(name);
hbool_t empty_dirname;
size_t escaped_name_size = 0;

#ifdef RV_CONNECTOR_DEBUG
printf("-> Creating JSON link for group\n\n");
Expand Down Expand Up @@ -204,15 +206,25 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name
if (RV_base64_encode(binary_plist_buffer, plist_nalloc, &base64_plist_buffer, &base64_buf_size) < 0)
FUNC_GOTO_ERROR(H5E_PLIST, H5E_CANTENCODE, NULL, "failed to base64 encode plist binary");

create_request_nalloc = strlen(fmt_string) + strlen(path_basename) +
/* Escape group name to be sent as JSON */
if (RV_JSON_escape_string(path_basename, escaped_group_name, &escaped_name_size) < 0)
FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTENCODE, NULL, "can't get size of JSON escaped group name");

if ((escaped_group_name = RV_malloc(escaped_name_size)) == NULL)
FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTALLOC, NULL, "can't allocate space for escaped group name");

if (RV_JSON_escape_string(path_basename, escaped_group_name, &escaped_name_size) < 0)
FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTENCODE, NULL, "can't JSON escape group name");

create_request_nalloc = strlen(fmt_string) + strlen(escaped_group_name) +
(empty_dirname ? strlen(parent->URI) : strlen(target_URI)) + base64_buf_size +
1;
if (NULL == (create_request_body = (char *)RV_malloc(create_request_nalloc)))
FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTALLOC, NULL,
"can't allocate space for group create request body");

if ((create_request_body_len = snprintf(create_request_body, create_request_nalloc, fmt_string,
empty_dirname ? parent->URI : target_URI, path_basename,
empty_dirname ? parent->URI : target_URI, escaped_group_name,
(char *)base64_plist_buffer)) < 0)
FUNC_GOTO_ERROR(H5E_SYM, H5E_SYSERRSTR, NULL, "snprintf error");

Expand Down Expand Up @@ -323,6 +335,9 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name
curl_headers = NULL;
} /* end if */

if (escaped_group_name)
RV_free(escaped_group_name);

PRINT_ERROR_STACK;

return ret_value;
Expand Down

0 comments on commit 4275450

Please sign in to comment.