diff --git a/src/H5FDs3comms.c b/src/H5FDs3comms.c index 191f946ea7e..1691688702a 100644 --- a/src/H5FDs3comms.c +++ b/src/H5FDs3comms.c @@ -865,10 +865,12 @@ s3r_t * H5FD_s3comms_s3r_open(const char *url, const char *region, const char *id, const unsigned char *signing_key, const char *token) { - size_t tmplen = 0; - CURL *curlh = NULL; - s3r_t *handle = NULL; - parsed_url_t *purl = NULL; + size_t tmplen = 0; + CURL *curlh = NULL; + s3r_t *handle = NULL; + parsed_url_t *purl = NULL; + CURLU *curlurl = NULL; + CURLUcode rc; s3r_t *ret_value = NULL; FUNC_ENTER_NOAPI_NOINIT @@ -876,14 +878,40 @@ H5FD_s3comms_s3r_open(const char *url, const char *region, const char *id, const if (url == NULL || url[0] == '\0') HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, "url cannot be NULL"); - if (FAIL == H5FD_s3comms_parse_url(url, &purl)) - /* probably a malformed url, but could be internal error */ - HGOTO_ERROR(H5E_VFL, H5E_CANTCREATE, NULL, "unable to create parsed url structure"); - - assert(purl != NULL); /* if above passes, this must be true */ - - handle = (s3r_t *)H5MM_malloc(sizeof(s3r_t)); - if (handle == NULL) + /* Parse URL */ + + if (NULL == (curlurl = curl_url())) + HGOTO_ERROR(H5E_VFL, H5E_CANTCREATE, NULL, "unable to curl url"); + if (CURLUE_OK != curl_url_set(curlurl, CURLUPART_URL, url, 0)) + HGOTO_ERROR(H5E_VFL, H5E_CANTCREATE, NULL, "unable to parse url"); + + if (NULL == (purl = (parsed_url_t *)H5MM_calloc(sizeof(parsed_url_t)))) + HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, NULL, "can't allocate space for parsed_url_t"); + + /* scheme */ + rc = curl_url_get(curlurl, CURLUPART_SCHEME, &(purl->scheme), 0); + if (CURLUE_OK != rc) + HGOTO_ERROR(H5E_VFL, H5E_CANTCREATE, NULL, "unable to get url scheme"); + /* host */ + rc = curl_url_get(curlurl, CURLUPART_HOST, &(purl->host), 0); + if (CURLUE_OK != rc) + HGOTO_ERROR(H5E_VFL, H5E_CANTCREATE, NULL, "unable to get url host"); + /* port - okay to not exist */ + rc = curl_url_get(curlurl, CURLUPART_PORT, &(purl->port), 0); + if (CURLUE_OK != rc && CURLUE_NO_PORT != rc) + HGOTO_ERROR(H5E_VFL, H5E_CANTCREATE, NULL, "unable to get url port"); + /* path */ + rc = curl_url_get(curlurl, CURLUPART_PATH, &(purl->path), 0); + if (CURLUE_OK != rc) + HGOTO_ERROR(H5E_VFL, H5E_CANTCREATE, NULL, "unable to get url path"); + /* query - okay to not exist */ + rc = curl_url_get(curlurl, CURLUPART_QUERY, &(purl->query), 0); + if (CURLUE_OK != rc && CURLUE_NO_QUERY != rc) + HGOTO_ERROR(H5E_VFL, H5E_CANTCREATE, NULL, "unable to get url query"); + + /* Create handle and set fields */ + + if (NULL == (handle = (s3r_t *)H5MM_malloc(sizeof(s3r_t)))) HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, NULL, "could not malloc space for handle"); handle->purl = purl; @@ -935,14 +963,13 @@ H5FD_s3comms_s3r_open(const char *url, const char *region, const char *id, const if (handle->token == NULL) HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "could not malloc space for handle token copy"); H5MM_memcpy(handle->token, token, tmplen); - } /* if authentication information provided */ + } /************************ * INITIATE CURL HANDLE * ************************/ - curlh = curl_easy_init(); - if (curlh == NULL) + if (NULL == (curlh = curl_easy_init())) HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "problem creating curl easy handle!"); if (CURLE_OK != curl_easy_setopt(curlh, CURLOPT_HTTPGET, 1L)) @@ -987,8 +1014,10 @@ H5FD_s3comms_s3r_open(const char *url, const char *region, const char *id, const done: if (ret_value == NULL) { - if (curlh != NULL) - curl_easy_cleanup(curlh); + /* Can't fail, returns void */ + curl_url_cleanup(curlurl); + curl_easy_cleanup(curlh); + if (FAIL == H5FD_s3comms_free_purl(purl)) HDONE_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "unable to free parsed url structure"); if (handle != NULL) { @@ -996,8 +1025,7 @@ H5FD_s3comms_s3r_open(const char *url, const char *region, const char *id, const H5MM_xfree(handle->secret_id); H5MM_xfree(handle->signing_key); H5MM_xfree(handle->token); - if (handle->httpverb != NULL) - H5MM_xfree(handle->httpverb); + H5MM_xfree(handle->httpverb); H5MM_xfree(handle); } } @@ -1597,40 +1625,31 @@ H5FD_s3comms_bytes_to_hex(char *dest, const unsigned char *msg, size_t msg_len, /*---------------------------------------------------------------------------- * - * Function: H5FD_s3comms_free_purl() - * - * Purpose: - * - * Release resources from a parsed_url_t pointer. - * - * If pointer is NULL, nothing happens. - * - * Return: + * Function: H5FD_s3comms_free_purl() * - * `SUCCEED` (never fails) + * Purpose: Release resources from a parsed_url_t pointer * + * Return: SUCCEED (Can't fail - passing NULL is okay) *---------------------------------------------------------------------------- */ herr_t H5FD_s3comms_free_purl(parsed_url_t *purl) { + herr_t ret_value = SUCCEED; + FUNC_ENTER_NOAPI_NOINIT_NOERR - if (purl != NULL) { - if (purl->scheme != NULL) - H5MM_xfree(purl->scheme); - if (purl->host != NULL) - H5MM_xfree(purl->host); - if (purl->port != NULL) - H5MM_xfree(purl->port); - if (purl->path != NULL) - H5MM_xfree(purl->path); - if (purl->query != NULL) - H5MM_xfree(purl->query); - H5MM_xfree(purl); - } + if (NULL == purl) + HGOTO_DONE(SUCCEED); - FUNC_LEAVE_NOAPI(SUCCEED) + curl_free(purl->scheme); + curl_free(purl->host); + curl_free(purl->port); + curl_free(purl->path); + curl_free(purl->query); + +done: + FUNC_LEAVE_NOAPI(ret_value) } /* end H5FD_s3comms_free_purl() */ /*---------------------------------------------------------------------------- @@ -1916,206 +1935,6 @@ H5FD_s3comms_load_aws_profile(const char *profile_name, char *key_id_out, char * FUNC_LEAVE_NOAPI(ret_value) } /* end H5FD_s3comms_load_aws_profile() */ -/*---------------------------------------------------------------------------- - * - * Function: H5FD_s3comms_parse_url() - * - * Purpose: - * - * Parse URL-like string and stuff URL components into - * `parsed_url` structure, if possible. - * - * Expects NUL-terminated string of format: - * SCHEME "://" HOST [":" PORT ] ["/" [ PATH ] ] ["?" QUERY] - * where SCHEME :: "[a-zA-Z/.-]+" - * PORT :: "[0-9]" - * - * Stores resulting structure in argument pointer `purl`, if successful, - * creating and populating new `parsed_url_t` structure pointer. - * Empty or absent elements are NULL in new purl structure. - * - * Return: - * - * - SUCCESS: `SUCCEED` - * - `purl` pointer is populated - * - FAILURE: `FAIL` - * - unable to parse - * - `purl` is unaltered (probably NULL) - * - *---------------------------------------------------------------------------- - */ -herr_t -H5FD_s3comms_parse_url(const char *str, parsed_url_t **_purl) -{ - parsed_url_t *purl = NULL; /* pointer to new structure */ - const char *tmpstr = NULL; /* working pointer in string */ - const char *curstr = str; /* "start" pointer in string */ - long int len = 0; /* substring length */ - long int urllen = 0; /* length of passed-in url string */ - unsigned int i = 0; - herr_t ret_value = FAIL; - - FUNC_ENTER_NOAPI_NOINIT - - if (str == NULL || *str == '\0') - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid url string"); - - urllen = (long int)strlen(str); - - purl = (parsed_url_t *)H5MM_malloc(sizeof(parsed_url_t)); - if (purl == NULL) - HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, "can't allocate space for parsed_url_t"); - purl->scheme = NULL; - purl->host = NULL; - purl->port = NULL; - purl->path = NULL; - purl->query = NULL; - - /*************** - * READ SCHEME * - ***************/ - - tmpstr = strchr(curstr, ':'); - if (tmpstr == NULL) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "invalid SCHEME construction: probably not URL"); - len = tmpstr - curstr; - assert((0 <= len) && (len < urllen)); - - /* check for restrictions */ - for (i = 0; i < len; i++) { - /* scheme = [a-zA-Z+-.]+ (terminated by ":") */ - if (!isalpha(curstr[i]) && '+' != curstr[i] && '-' != curstr[i] && '.' != curstr[i]) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "invalid SCHEME construction"); - } - - /* copy lowercased scheme to structure */ - purl->scheme = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); - if (purl->scheme == NULL) - HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, "can't allocate space for SCHEME"); - strncpy(purl->scheme, curstr, (size_t)len); - purl->scheme[len] = '\0'; - for (i = 0; i < len; i++) - purl->scheme[i] = (char)tolower(purl->scheme[i]); - - /* Skip "://" */ - tmpstr += 3; - curstr = tmpstr; - - /************* - * READ HOST * - *************/ - - if (*curstr == '[') { - /* IPv6 */ - while (']' != *tmpstr) { - /* end of string reached! */ - if (tmpstr == 0) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "reached end of URL: incomplete IPv6 HOST"); - tmpstr++; - } - tmpstr++; - } /* end if (IPv6) */ - else { - while (0 != *tmpstr) { - if (':' == *tmpstr || '/' == *tmpstr || '?' == *tmpstr) - break; - tmpstr++; - } - } /* end else (IPv4) */ - len = tmpstr - curstr; - if (len == 0) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "HOST substring cannot be empty"); - else if (len > urllen) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "problem with length of HOST substring"); - - /* copy host */ - purl->host = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); - if (purl->host == NULL) - HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, "can't allocate space for HOST"); - strncpy(purl->host, curstr, (size_t)len); - purl->host[len] = 0; - - /************* - * READ PORT * - *************/ - - if (':' == *tmpstr) { - tmpstr += 1; /* advance past ':' */ - curstr = tmpstr; - while ((0 != *tmpstr) && ('/' != *tmpstr) && ('?' != *tmpstr)) - tmpstr++; - len = tmpstr - curstr; - if (len == 0) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "PORT element cannot be empty"); - else if (len > urllen) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "problem with length of PORT substring"); - for (i = 0; i < len; i++) - if (!isdigit(curstr[i])) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "PORT is not a decimal string"); - - /* copy port */ - purl->port = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); - if (purl->port == NULL) - HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, "can't allocate space for PORT"); - strncpy(purl->port, curstr, (size_t)len); - purl->port[len] = 0; - } /* end if PORT element */ - - /************* - * READ PATH * - *************/ - - if ('/' == *tmpstr) { - /* advance past '/' */ - tmpstr += 1; - curstr = tmpstr; - - /* seek end of PATH */ - while ((0 != *tmpstr) && ('?' != *tmpstr)) - tmpstr++; - len = tmpstr - curstr; - if (len > urllen) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "problem with length of PATH substring"); - if (len > 0) { - purl->path = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); - if (purl->path == NULL) - HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, "can't allocate space for PATH"); - strncpy(purl->path, curstr, (size_t)len); - purl->path[len] = 0; - } - } /* end if PATH element */ - - /************** - * READ QUERY * - **************/ - - if ('?' == *tmpstr) { - tmpstr += 1; - curstr = tmpstr; - while (0 != *tmpstr) - tmpstr++; - len = tmpstr - curstr; - if (len == 0) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "QUERY cannot be empty"); - else if (len > urllen) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "problem with length of QUERY substring"); - purl->query = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); - if (purl->query == NULL) - HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, "can't allocate space for QUERY"); - strncpy(purl->query, curstr, (size_t)len); - purl->query[len] = 0; - } /* end if QUERY exists */ - - *_purl = purl; - ret_value = SUCCEED; - -done: - if (ret_value == FAIL) - H5FD_s3comms_free_purl(purl); - - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5FD_s3comms_parse_url() */ - /*---------------------------------------------------------------------------- * * Function: H5FD_s3comms_signing_key() diff --git a/src/H5FDs3comms.h b/src/H5FDs3comms.h index 7e541646d13..60ac21f6a1b 100644 --- a/src/H5FDs3comms.h +++ b/src/H5FDs3comms.h @@ -498,8 +498,6 @@ H5_DLL herr_t H5FD_s3comms_HMAC_SHA256(const unsigned char *key, size_t key_len, H5_DLL herr_t H5FD_s3comms_load_aws_profile(const char *name, char *key_id_out, char *secret_access_key_out, char *aws_region_out); -H5_DLL herr_t H5FD_s3comms_parse_url(const char *str, parsed_url_t **purl); - H5_DLL herr_t H5FD_s3comms_signing_key(unsigned char *md, const char *secret, const char *region, const char *iso8601now); diff --git a/test/s3comms.c b/test/s3comms.c index 61d73f37f35..49dc31e29e5 100644 --- a/test/s3comms.c +++ b/test/s3comms.c @@ -896,298 +896,6 @@ test_HMAC_SHA256(void) } /* end test_HMAC_SHA256() */ -/*--------------------------------------------------------------------------- - * Function: test_parse_url - * - * - * Return: PASS : 0 - * FAIL : 1 - *--------------------------------------------------------------------------- - */ -static int -test_parse_url(void) -{ - typedef struct { - const char *scheme; - const char *host; - const char *port; - const char *path; - const char *query; - } const_purl_t; - - struct testcase { - const char *url; - herr_t exp_ret; /* expected return */ - const_purl_t expected; /* unused if exp_ret is FAIL */ - const char *msg; - }; - - parsed_url_t *purl = NULL; - const int NCASES = 15; - struct testcase cases[] = { - { - NULL, - FAIL, - {NULL, NULL, NULL, NULL, NULL}, - "null url", - }, - { - "", - FAIL, - {NULL, NULL, NULL, NULL, NULL}, - "empty url", - }, - { - "ftp://[1000:4000:0002:2010]", - SUCCEED, - { - "ftp", - "[1000:4000:0002:2010]", - NULL, - NULL, - NULL, - }, - "IPv6 ftp and empty path (root)", - }, - { - "ftp://[1000:4000:0002:2010]:2040", - SUCCEED, - { - "ftp", - "[1000:4000:0002:2010]", - "2040", - NULL, - NULL, - }, - "root IPv6 ftp with port", - }, - { - "http://some.domain.org:9000/path/to/resource.txt", - SUCCEED, - { - "http", - "some.domain.org", - "9000", - "path/to/resource.txt", - NULL, - }, - "without query", - }, - { - "https://domain.me:00/file.txt?some_params unchecked", - SUCCEED, - { - "https", - "domain.me", - "00", - "file.txt", - "some_params unchecked", - }, - "with query", - }, - { - "ftp://domain.com/", - SUCCEED, - { - "ftp", - "domain.com", - NULL, - NULL, - NULL, - }, - "explicit root w/out port", - }, - { - "ftp://domain.com:1234/", - SUCCEED, - { - "ftp", - "domain.com", - "1234", - NULL, - NULL, - }, - "explicit root with port", - }, - { - "ftp://domain.com:1234/file?", - FAIL, - { - NULL, - NULL, - NULL, - NULL, - NULL, - }, - "empty query is invalid", - }, - { - "ftp://:1234/file", - FAIL, - { - NULL, - NULL, - NULL, - NULL, - NULL, - }, - "no host", - }, - { - "h&r block", - FAIL, - { - NULL, - NULL, - NULL, - NULL, - NULL, - }, - "no scheme (bad URL)", - }, - { - "http://domain.com?a=b&d=b", - SUCCEED, - { - "http", - "domain.com", - NULL, - NULL, - "a=b&d=b", - }, - "QUERY with implicit PATH", - }, - { - "http://[5]/path?a=b&d=b", - SUCCEED, - { - "http", - "[5]", - NULL, - "path", - "a=b&d=b", - }, - "IPv6 extraction is really dumb", - }, - { - "http://[1234:5678:0910:1112]:port/path", - FAIL, - { - NULL, - NULL, - NULL, - NULL, - NULL, - }, - "non-decimal PORT (port)", - }, - { - "http://mydomain.com:01a3/path", - FAIL, - { - NULL, - NULL, - NULL, - NULL, - NULL, - }, - "non-decimal PORT (01a3)", - }, - }; - - TESTING("url-parsing functionality"); - - /********* - * TESTS * - *********/ - - for (int i = 0; i < NCASES; i++) { - - if (cases[i].exp_ret != H5FD_s3comms_parse_url(cases[i].url, &purl)) - TEST_ERROR; - - if (cases[i].exp_ret == FAIL) { - /* On FAIL, `purl` should be untouched--remains NULL */ - if (purl != NULL) - TEST_ERROR; - } - else { - /* On SUCCEED, `purl` should be set */ - if (purl == NULL) - TEST_ERROR; - - if (cases[i].expected.scheme != NULL) { - if (NULL == purl->scheme) - TEST_ERROR; - if (strcmp(cases[i].expected.scheme, purl->scheme)) - TEST_ERROR; - } - else { - if (NULL != purl->scheme) - TEST_ERROR; - } - - if (cases[i].expected.host != NULL) { - if (NULL == purl->host) - TEST_ERROR; - if (strcmp(cases[i].expected.host, purl->host)) - TEST_ERROR; - } - else { - if (NULL != purl->host) - TEST_ERROR; - } - - if (cases[i].expected.port != NULL) { - if (NULL == purl->port) - TEST_ERROR; - if (strcmp(cases[i].expected.port, purl->port)) - TEST_ERROR; - } - else { - if (NULL != purl->port) - TEST_ERROR; - } - - if (cases[i].expected.path != NULL) { - if (NULL == purl->path) - TEST_ERROR; - if (strcmp(cases[i].expected.path, purl->path)) - TEST_ERROR; - } - else { - if (NULL != purl->path) - TEST_ERROR; - } - - if (cases[i].expected.query != NULL) { - if (NULL == purl->query) - TEST_ERROR; - if (strcmp(cases[i].expected.query, purl->query)) - TEST_ERROR; - } - else { - if (NULL != purl->query) - TEST_ERROR; - } - } - - if (H5FD_s3comms_free_purl(purl) < 0) - TEST_ERROR; - - purl = NULL; - } - - PASSED(); - return 0; - -error: - H5FD_s3comms_free_purl(purl); - - return 1; - -} /* end test_parse_url() */ - /*--------------------------------------------------------------------------- * Function: test_signing_key * @@ -1423,7 +1131,6 @@ test_s3r_open(void) struct tm *now = NULL; char iso8601now[ISO8601_SIZE]; s3r_t *handle = NULL; - parsed_url_t *purl = NULL; TESTING("s3r_open"); @@ -1456,27 +1163,6 @@ test_s3r_open(void) snprintf(url_raven, S3_TEST_MAX_URL_SIZE, "%s/%s", s3_test_bucket_url, S3_TEST_RESOURCE_TEXT_PUBLIC)) TEST_ERROR; - /* Set given bucket url with invalid/inactive port number for badport. - * Note, this sort of micro-management of parsed_url_t is not advised - */ - if (H5FD_s3comms_parse_url(s3_test_bucket_url, &purl) < 0) - TEST_ERROR; - - if (purl->port == NULL) { - if (NULL == (purl->port = (char *)H5MM_malloc(sizeof(char) * 5))) - TEST_ERROR; - if (5 < snprintf(purl->port, 5, "9000")) - TEST_ERROR; - } - else if (strcmp(purl->port, "9000") != 0) { - if (5 < snprintf(purl->port, 5, "9000")) - TEST_ERROR; - } - else { - if (5 < snprintf(purl->port, 5, "1234")) - TEST_ERROR; - } - if (NULL == (now = gmnow())) TEST_ERROR; if (ISO8601NOW(iso8601now, now) != (ISO8601_SIZE - 1)) @@ -1586,16 +1272,11 @@ test_s3r_open(void) TEST_ERROR; handle = NULL; - if (H5FD_s3comms_free_purl(purl) < 0) - TEST_ERROR; - PASSED(); return 0; error: if (handle != NULL) H5FD_s3comms_s3r_close(handle); - if (purl != NULL) - H5FD_s3comms_free_purl(purl); return 1; } /* end test_s3r_open() */ @@ -1800,7 +1481,6 @@ main(void) nerrors += test_hrb_init_request(); nerrors += test_hrb_node_set(); nerrors += test_HMAC_SHA256(); - nerrors += test_parse_url(); nerrors += test_signing_key(); nerrors += test_tostringtosign();