diff --git a/hsds/attr_dn.py b/hsds/attr_dn.py index f9cf94ce..32994840 100755 --- a/hsds/attr_dn.py +++ b/hsds/attr_dn.py @@ -239,23 +239,21 @@ async def GET_Attribute(request): return resp -async def PUT_Attribute(request): - """ Handler for PUT /(obj)//attributes/ +async def PUT_Attributes(request): + """ Handler for PUT /(obj)//attributes """ log.request(request) app = request.app params = request.rel_url.query + log.debug(f"got PUT_Attributes params: {params}") obj_id = get_obj_id(request) - attr_name = request.match_info.get('name') - log.info("PUT attribute {} in {}".format(attr_name, obj_id)) - validateAttributeName(attr_name) - if not request.has_body: log.error("PUT_Attribute with no body") raise HTTPBadRequest(message="body expected") body = await request.json() + log.debug(f"PUT Attribute body: {body}") # remove this to avoid verbosity if "bucket" in params: bucket = params["bucket"] elif "bucket" in body: @@ -267,69 +265,90 @@ async def PUT_Attribute(request): if "replace" in params and params["replace"]: replace = True log.info("replace attribute") - datatype = None - shape = None - value = None - - if "type" not in body: - log.error("PUT attribute with no type in body") - raise HTTPInternalServerError() - - datatype = body["type"] - if "shape" not in body: - log.error("PUT attribute with no shape in body") - raise HTTPInternalServerError() - shape = body["shape"] + if "attributes" in body: + items = body["attributes"] + else: + # make it look like a dictionary anyway to make + # the processing more consistent + items = {} + if "name" not in body: + log.error("PUT attribute with no name in body") + raise HTTPInternalServerError() + attr_name = body["name"] + attribute = {} + if "type" in body: + attribute["type"] = body["type"] + if "shape" in body: + attribute["shape"] = body["shape"] + if "value" in body: + attribute["value"] = body["value"] + items[attr_name] = attribute + + # validate input + for attr_name in items: + validateAttributeName(attr_name) + attr_json = items[attr_name] + if "type" not in attr_json: + log.error("PUT attribute with no type in body") + raise HTTPInternalServerError() + if "shape" not in attr_json: + log.error("PUT attribute with no shape in body") + raise HTTPInternalServerError() - if "value" in body: - value = body["value"] + log.info(f"PUT {len(items)} attributes to obj_id: {obj_id} bucket: {bucket}") obj_json = await get_metadata_obj(app, obj_id, bucket=bucket) - log.debug(f"PUT attribute obj_id: {obj_id} bucket: {bucket} got json") - if "attributes" not in obj_json: log.error(f"unexpected obj data for id: {obj_id}") raise HTTPInternalServerError() attributes = obj_json["attributes"] - if attr_name in attributes and not replace: - # Attribute already exists, return a 409 - msg = f"Attempt to overwrite attribute: {attr_name} " - msg += f"in obj_id: {obj_id}" - log.warn(msg) - raise HTTPConflict() - - if replace and attr_name not in attributes: - # Replace requires attribute exists - msg = f"Attempt to update missing attribute: {attr_name} " - msg += f"in obj_id: {obj_id}" - log.warn() - raise HTTPNotFound() - - if replace: - orig_attr = attributes[attr_name] - create_time = orig_attr["created"] - else: - create_time = time.time() - # ok - all set, create attribute obj - attr_json = {"type": datatype, - "shape": shape, - "value": value, - "created": create_time} - attributes[attr_name] = attr_json + # check for conflicts, also set timestamp + create_time = time.time() + new_attribute = False # set this if we have any new attributes + for attr_name in items: + attribute = items[attr_name] + if attr_name in attributes: + log.debug(f"attribute {attr_name} exists") + if replace: + # don't change the create timestamp + log.debug(f"attribute {attr_name} exists, but will be updated") + old_item = attributes[attr_name] + attribute["created"] = old_item["created"] + else: + # Attribute already exists, return a 409 + msg = f"Attempt to overwrite attribute: {attr_name} " + msg += f"in obj_id: {obj_id}" + log.warn(msg) + raise HTTPConflict() + else: + # set the timestamp + log.debug(f"new attribute {attr_name}") + attribute["created"] = create_time + new_attribute = True + + # ok - all set, create the attributes + for attr_name in items: + log.debug(f"adding attribute {attr_name}") + attr_json = items[attr_name] + attributes[attr_name] = attr_json # write back to S3, save to metadata cache await save_metadata_obj(app, obj_id, obj_json, bucket=bucket) - resp_json = {} + if new_attribute: + status = 201 + else: + status = 200 - resp = json_response(resp_json, status=201) + resp_json = {"status": status} + + resp = json_response(resp_json, status=status) log.response(request, resp=resp) return resp - async def DELETE_Attribute(request): """HTTP DELETE method for /(obj)//attributes/ """ diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py index edc26a6d..36c469bf 100755 --- a/hsds/attr_sn.py +++ b/hsds/attr_sn.py @@ -20,7 +20,7 @@ from .util.httpUtil import http_get, http_put, http_delete, http_post, getHref from .util.httpUtil import getAcceptType, jsonResponse -from .util.idUtil import isValidUuid, getDataNodeUrl, getCollectionForId +from .util.idUtil import isValidUuid, getDataNodeUrl, getCollectionForId, getRootObjId from .util.authUtil import getUserPasswordFromRequest, validateUserPassword from .util.domainUtil import getDomainFromRequest, isValidDomain from .util.domainUtil import getBucketForDomain, verifyRoot @@ -206,65 +206,14 @@ async def GET_Attribute(request): log.response(request, resp=resp) return resp - -async def PUT_Attribute(request): - """HTTP method to create a new attribute""" - log.request(request) - app = request.app - # returns datasets|groups|datatypes - collection = getRequestCollectionName(request) - - obj_id = request.match_info.get("id") - if not obj_id: - msg = "Missing object id" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - if not isValidUuid(obj_id, obj_class=collection): - msg = f"Invalid object id: {obj_id}" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - attr_name = request.match_info.get("name") - log.debug(f"Attribute name: [{attr_name}]") - validateAttributeName(attr_name) - - log.info(f"PUT Attribute id: {obj_id} name: {attr_name}") - username, pswd = getUserPasswordFromRequest(request) - # write actions need auth - await validateUserPassword(app, username, pswd) - - if not request.has_body: - msg = "PUT Attribute with no body" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - - try: - body = await request.json() - except JSONDecodeError: - msg = "Unable to load JSON body" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - - domain = getDomainFromRequest(request) - if not isValidDomain(domain): - msg = f"Invalid domain: {domain}" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - bucket = getBucketForDomain(domain) - - # get domain JSON - domain_json = await getDomainJson(app, domain) - verifyRoot(domain_json) - - root_id = domain_json["root"] - - # TBD - verify that the obj_id belongs to the given domain - await validateAction(app, domain, obj_id, username, "create") - +async def _getTypeFromRequest(app, body, obj_id=None, bucket=None): + """ return a type json from the request body """ if "type" not in body: msg = "PUT attribute with no type in body" log.warn(msg) raise HTTPBadRequest(reason=msg) datatype = body["type"] + log.debug(f"got datatype: {datatype} from request") if isinstance(datatype, str) and datatype.startswith("t-"): # Committed type - fetch type json from DN @@ -272,12 +221,13 @@ async def PUT_Attribute(request): log.debug(f"got ctypeid: {ctype_id}") ctype_json = await getObjectJson(app, ctype_id, bucket=bucket) log.debug(f"ctype {ctype_id}: {ctype_json}") + root_id = getRootObjId(obj_id) if ctype_json["root"] != root_id: msg = "Referenced committed datatype must belong in same domain" log.warn(msg) raise HTTPBadRequest(reason=msg) datatype = ctype_json["type"] - # add the ctype_id to type type + # add the ctype_id to the type datatype["id"] = ctype_id elif isinstance(datatype, str): try: @@ -304,8 +254,13 @@ async def PUT_Attribute(request): msg = f"ValueError creating type: {ve}" log.warn(msg) raise HTTPBadRequest(reason=msg) + + return datatype + + - dims = None +def _getShapeFromRequest(body): + """ get shape json from request body """ shape_json = {} if "shape" in body: shape_body = body["shape"] @@ -345,15 +300,17 @@ async def PUT_Attribute(request): # use H5S_SIMPLE as class if isinstance(shape_body, list) and len(shape_body) == 0: shape_json["class"] = "H5S_SCALAR" - dims = [1, ] else: shape_json["class"] = "H5S_SIMPLE" dims = getShapeDims(shape_body) shape_json["dims"] = dims else: shape_json["class"] = "H5S_SCALAR" - dims = [1, ] + + return shape_json +def _getValueFromRequest(body, data_type, data_shape): + dims = getShapeDims(data_shape) if "value" in body: if dims is None: msg = "Bad Request: data can not be included with H5S_NULL space" @@ -361,11 +318,9 @@ async def PUT_Attribute(request): raise HTTPBadRequest(reason=msg) value = body["value"] # validate that the value agrees with type/shape - arr_dtype = createDataType(datatype) # np datatype + arr_dtype = createDataType(data_type) # np datatype if len(dims) == 0: - np_dims = [ - 1, - ] + np_dims = [1, ] else: np_dims = dims log.debug(f"attribute dims: {np_dims}") @@ -382,28 +337,318 @@ async def PUT_Attribute(request): log.debug(f"Got: {arr.size} array elements") else: value = None + + return value + +async def _getAttributeFromRequest(app, req_json, obj_id=None, bucket=None): + attr_item = {} + attr_type = await _getTypeFromRequest(app, req_json, obj_id=obj_id, bucket=bucket) + attr_shape = _getShapeFromRequest(req_json) + attr_item = {"type": attr_type, "shape": attr_shape} + attr_value = _getValueFromRequest(req_json, attr_type, attr_shape) + if attr_value is not None: + attr_item["value"] = attr_value + else: + attr_item["value"] = None + return attr_item + + +async def _getAttributesFromRequest(request, req_json, obj_id=None, bucket=None): + """ read the given JSON dictinary and return dict of attribute json """ + + app = request.app + attr_items = {} + kwargs = {"obj_id": obj_id, "bucket": bucket} + if "attributes" in req_json: + attributes = req_json["attributes"] + if not isinstance(attributes, dict): + msg = f"expected list for attributes but got: {type(attributes)}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + # read each attr_item and canonicalize the shape, type, verify value + for attr_name in attributes: + attr_json = attributes[attr_name] + attr_item = await _getAttributeFromRequest(app, attr_json, **kwargs) + attr_items[attr_name] = attr_item + + elif "type" in req_json: + # single attribute create - fake an item list + attr_item = await _getAttributeFromRequest(app, req_json, **kwargs) + if "name" in req_json: + attr_name = req_json["name"] + else: + attr_name = request.match_info.get("name") + validateAttributeName(attr_name) + if not attr_name: + msg = "Missing attribute name" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + attr_items[attr_name] = attr_item + else: + log.warn(f"no attribute defined in req_json: {req_json}") + + return attr_items + +async def PUT_Attribute(request): + """HTTP method to create a new attribute""" + log.request(request) + app = request.app + req_params = request.rel_url.query + # returns datasets|groups|datatypes + collection = getRequestCollectionName(request) + + obj_id = request.match_info.get("id") + if not obj_id: + msg = "Missing object id" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + if not isValidUuid(obj_id, obj_class=collection): + msg = f"Invalid object id: {obj_id}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + attr_name = request.match_info.get("name") + log.debug(f"Attribute name: [{attr_name}]") + validateAttributeName(attr_name) + + log.info(f"PUT Attribute id: {obj_id} name: {attr_name}") + username, pswd = getUserPasswordFromRequest(request) + # write actions need auth + await validateUserPassword(app, username, pswd) + + if not request.has_body: + msg = "PUT Attribute with no body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + try: + body = await request.json() + except JSONDecodeError: + msg = "Unable to load JSON body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + domain = getDomainFromRequest(request) + if not isValidDomain(domain): + msg = f"Invalid domain: {domain}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + bucket = getBucketForDomain(domain) + # get domain JSON + domain_json = await getDomainJson(app, domain) + verifyRoot(domain_json) + + # TBD - verify that the obj_id belongs to the given domain + await validateAction(app, domain, obj_id, username, "create") + + kwargs = {"obj_id": obj_id, "bucket": bucket} + attr_json = await _getAttributeFromRequest(app, body, **kwargs) + attr_json["name"] = attr_name + log.debug(f"got attr_json: {attr_json}") + # ready to add attribute now req = getDataNodeUrl(app, obj_id) - req += f"/{collection}/{obj_id}/attributes/{attr_name}" - log.info("PUT Attribute: " + req) + req += f"/{collection}/{obj_id}/attributes" + log.info(f"PUT Attribute: {req}") + + params = {} + if "replace" in req_params and req_params["replace"]: + # allow attribute to be overwritten + log.debug("setting replace for PUT Atttribute") + params["replace"] = 1 + else: + log.debug("replace is not set for PUT Attribute") - attr_json = {} - attr_json["type"] = datatype - attr_json["shape"] = shape_json - if value is not None: - attr_json["value"] = value - params = {} if bucket: params["bucket"] = bucket - put_rsp = await http_put(app, req, params=params, data=attr_json) + put_rsp = await http_put(app, req, data=attr_json, params=params) log.info(f"PUT Attribute resp: {put_rsp}") + if "status" in put_rsp: + status = put_rsp["status"] + else: + status = 201 + hrefs = [] # TBD req_rsp = {"hrefs": hrefs} # attribute creation successful - resp = await jsonResponse(request, req_rsp, status=201) + resp = await jsonResponse(request, req_rsp, status=status) + log.response(request, resp=resp) + return resp + + +async def PUT_Attributes(request): + """HTTP method to create a new attribute""" + log.request(request) + req_params = request.rel_url.query + app = request.app + status = None + + log.debug("PUT_Attributes") + + username, pswd = getUserPasswordFromRequest(request) + # write actions need auth + await validateUserPassword(app, username, pswd) + + if not request.has_body: + msg = "PUT Attribute with no body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + try: + body = await request.json() + except JSONDecodeError: + msg = "Unable to load JSON body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + log.debug(f"got body: {body}") + + domain = getDomainFromRequest(request) + if not isValidDomain(domain): + msg = f"Invalid domain: {domain}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + bucket = getBucketForDomain(domain) + + params = {} + if "replace" in req_params and req_params["replace"]: + # allow attribute to be overwritten + log.debug("setting replace for PUT Atttribute") + params["replace"] = 1 + else: + log.debug("replace is not set for PUT Attribute") + + if bucket: + params["bucket"] = bucket + + # get domain JSON + domain_json = await getDomainJson(app, domain) + verifyRoot(domain_json) + + req_obj_id = request.match_info.get("id") + if not req_obj_id: + req_obj_id = domain_json["root"] + kwargs = {"obj_id": req_obj_id, "bucket": bucket} + attr_items = await _getAttributesFromRequest(request, body, **kwargs) + + if attr_items: + log.debug(f"PUT Attribute {len(attr_items)} attibutes to add") + else: + log.debug("no attributes defined yet") + + # next, sort out where these attributes are going to + + obj_ids = {} + if "obj_ids" in body: + body_ids = body["obj_ids"] + if isinstance(body_ids, list): + # multi cast the attributes + if not attr_items: + msg = "no attributes provided" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + else: + for obj_id in body_ids: + if not isValidUuid(obj_id): + msg = f"Invalid object id: {obj_id}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + obj_ids[obj_id] = attr_items + + msg = f"{len(attr_items)} attributes will be multicast to " + msg += f"{len(obj_ids)} objects" + log.info(msg) + elif isinstance(body_ids, dict): + if attr_items: + msg = "attributes defined outside the obj_ids dict" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + else: + for obj_id in body_ids: + if not isValidUuid(obj_id): + msg = f"Invalid object id: {obj_id}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + id_json = body_ids[obj_id] + kwargs = {"obj_id": obj_id, "bucket": bucket} + obj_items = _getAttributesFromRequest(request, id_json, **kwargs) + if obj_items: + obj_ids[obj_id] = obj_items + + # write different attributes to different objects + msg = f"put attributes over {len(obj_ids)} objects" + else: + msg = f"unexpected type for obj_ids: {type(obj_ids)}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + else: + # get the object id from the request + obj_id = request.match_info.get("id") + if not obj_id: + msg = "Missing object id" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + obj_ids[obj_id] = attr_items # make it look like a list for consistency + + log.debug(f"obj_ids: {obj_ids}") + + # TBD - verify that the obj_id belongs to the given domain + await validateAction(app, domain, req_obj_id, username, "create") + + count = len(obj_ids) + if count == 0: + msg = "no obj_ids defined" + log.warn(f"PUT_Attributes: {msg}") + raise HTTPBadRequest(reason=msg) + elif count == 1: + # just send one PUT Attributes request to the dn + obj_id = list(obj_ids.keys())[0] + attr_json = obj_ids[obj_id] + log.debug(f"got attr_json: {attr_json}") + req = getDataNodeUrl(app, obj_id) + collection = getCollectionForId(obj_id) + req += f"/{collection}/{obj_id}/attributes" + log.info(f"PUT Attribute: {req}") + data = {"attributes": attr_json} + put_rsp = await http_put(app, req, data=data, params=params) + log.info(f"PUT Attribute resp: {put_rsp}") + + if "status" in put_rsp: + status = put_rsp["status"] + else: + status = 201 + else: + # put multi obj + + # mixin some additonal kwargs + crawler_params = {"follow_links": False} + if bucket: + crawler_params["bucket"] = bucket + + crawler = DomainCrawler(app, obj_ids, action="put_attr", params=crawler_params) + await crawler.crawl() + + status = 200 + for obj_id in crawler._obj_dict: + item = crawler._obj_dict[obj_id] + log.debug(f"got item from crawler for {obj_id}: {item}") + if "status" in item: + item_status = item["status"] + if item_status > status: + # return the more sever error + log.debug(f"setting status to {item_status}") + status = item_status + + log.info("DomainCrawler done for put_attrs action") + + hrefs = [] # TBD + req_rsp = {"hrefs": hrefs} + # attribute creation successful + log.debug(f"PUT_Attributes returning status: {status}") + resp = await jsonResponse(request, req_rsp, status=status) log.response(request, resp=resp) return resp @@ -639,7 +884,7 @@ async def PUT_AttributeValue(request): req = getDataNodeUrl(app, obj_id) req += "/" + collection + "/" + obj_id + "/attributes/" + attr_name log.debug("get Attribute: " + req) - params = {} + params = {"replace": 1} # allow overwrites if bucket: params["bucket"] = bucket dn_json = await http_get(app, req, params=params) @@ -740,12 +985,13 @@ async def PUT_AttributeValue(request): # ready to add attribute now attr_json = {} + attr_json["name"] = attr_name attr_json["type"] = type_json attr_json["shape"] = attr_shape attr_json["value"] = value req = getDataNodeUrl(app, obj_id) - req += "/" + collection + "/" + obj_id + "/attributes/" + attr_name + req += "/" + collection + "/" + obj_id + "/attributes" log.info(f"PUT Attribute Value: {req}") dn_json["value"] = value @@ -765,7 +1011,7 @@ async def PUT_AttributeValue(request): async def _get_attributes(app, obj_id, attr_names, - include_data=False, + IncludeData=False, ignore_nan=False, bucket=None ): @@ -775,7 +1021,7 @@ async def _get_attributes(app, obj_id, attr_names, req = f"{node_url}/{collection}/{obj_id}/attributes" log.debug(f"POST Attributes: {req}") params = {} - if include_data: + if IncludeData: params["IncludeData"] = 1 if ignore_nan: params["ignore_nan"] = 1 @@ -806,8 +1052,6 @@ async def POST_Attributes(request): log.request(request) app = request.app log.info("POST_Attributes") - # returns datasets|groups|datatypes - collection = getRequestCollectionName(request) if not request.has_body: msg = "POST Attributes with no body" @@ -832,52 +1076,53 @@ async def POST_Attributes(request): if "obj_ids" in body: obj_ids = body["obj_ids"] - if not isinstance(obj_ids, list): - msg = f"expected list for attr_names but got: {type(obj_ids)}" - log.warn(msg) - raise HTTPBadRequest(reason=msg) else: obj_ids = None if attr_names is None and obj_ids is None: - # should have an items list with id and attr_names for each item - if "items" not in body: - msg = "items list must be provided if attr_names and obj_ids are not" + msg = "expected body to contain one of attr_names, obj_ids keys" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + # construct an item list from attr_names and obj_ids + items = {} + if obj_ids is None: + obj_id = request.match_info.get("id") + if not obj_id: + msg = "no object id in request" log.warn(msg) raise HTTPBadRequest(reason=msg) - items = body["items"] - else: - # construct an item list from attr_names and obj_ids - items = [] - if obj_ids is None: - obj_id = request.match_info.get("id") - if not obj_id: - msg = "no object id in request" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - obj_ids = [obj_id, ] + items[obj_id] = attr_names + elif isinstance(obj_ids, list): if attr_names is None: - msg = "attr_names not set in request body" + msg = "attr_names must be provided if obj_ids is a list" log.warn(msg) raise HTTPBadRequest(reason=msg) for obj_id in obj_ids: - item = {"id": obj_id, "attr_names": attr_names} - items.append(item) + items[obj_id] = attr_names + elif isinstance(obj_ids, dict): + if attr_names is not None: + msg = "attr_names must not be proved if obj_ids is a dict" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + for obj_id in obj_ids: + names_for_id = obj_ids[obj_id] + if not isinstance(names_for_id, list): + msg = "expected list of attribute names" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + items[obj_id] = names_for_id log.debug(f"POST Attributes items: {items}") # do a check that everything is as it should with the item list - for item in items: - if "id" not in item or "attr_names" not in item: - msg = f"invalid item for POST Attributes: {item}" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - obj_id = item["id"] + for obj_id in items: if not isValidUuid(obj_id): msg = f"Invalid object id: {obj_id}" log.warn(msg) - raise HTTPBadRequest(reason=msg) - attr_names = item["attr_names"] + + attr_names = items[obj_id] + if not isinstance(attr_names, list): msg = f"expected list for attr_names but got: {type(attr_names)}" log.warn(msg) @@ -919,7 +1164,7 @@ async def POST_Attributes(request): include_data = True if "ignore_nan" in params and params["ignore_nan"]: ignore_nan = True - kwargs = {"bucket": bucket, "include_data": include_data, "ignore_nan": ignore_nan} + kwargs = {"bucket": bucket, "IncludeData": include_data, "ignore_nan": ignore_nan} resp_json = {} @@ -929,9 +1174,9 @@ async def POST_Attributes(request): raise HTTPBadRequest(reason=msg) elif len(items) == 1: # just make a request the datanode - item = items[0] - obj_id = item["id"] - attr_names = item["attr_names"] + obj_id = list(items.keys())[0] + collection = getCollectionForId(obj_id) + attr_names = items[obj_id] attributes = await _get_attributes(app, obj_id, attr_names, **kwargs) # mixin hrefs @@ -943,10 +1188,13 @@ async def POST_Attributes(request): resp_json["attributes"] = attributes else: # get multi obj - # mixin some additonal kwargs - kwargs["follow_links"] = False + # don't follow links! + crawler_params = {"follow_links": False} + # mixin kwargs + for k in kwargs: + crawler_params[k] = kwargs[k] - crawler = DomainCrawler(app, items, **kwargs) + crawler = DomainCrawler(app, items, action="get_attr", params=crawler_params) await crawler.crawl() msg = f"DomainCrawler returning: {len(crawler._obj_dict)} objects" @@ -955,11 +1203,12 @@ async def POST_Attributes(request): # mixin hrefs for obj_id in attributes.keys(): obj_attributes = attributes[obj_id] + collection = getCollectionForId(obj_id) for attribute in obj_attributes: attr_name = attribute["name"] attr_href = f"/{collection}/{obj_id}/attributes/{attr_name}" attribute["href"] = getHref(request, attr_href) - log.debug(f"got attributes: {attributes}") + log.debug(f"got {len(attributes)} attributes") resp_json["attributes"] = attributes hrefs = [] diff --git a/hsds/datanode.py b/hsds/datanode.py index 75bfe058..bb01cf16 100644 --- a/hsds/datanode.py +++ b/hsds/datanode.py @@ -32,7 +32,7 @@ from .group_dn import POST_Root from .link_dn import GET_Links, GET_Link, PUT_Link, DELETE_Link from .attr_dn import GET_Attributes, POST_Attributes, GET_Attribute -from .attr_dn import PUT_Attribute, DELETE_Attribute +from .attr_dn import PUT_Attributes, DELETE_Attribute from .ctype_dn import GET_Datatype, POST_Datatype, DELETE_Datatype from .dset_dn import GET_Dataset, POST_Dataset, DELETE_Dataset from .dset_dn import PUT_DatasetShape @@ -66,7 +66,7 @@ async def init(): app.router.add_route("POST", "/groups/{id}/attributes", POST_Attributes) app.router.add_route("GET", "/groups/{id}/attributes/{name}", GET_Attribute) app.router.add_route("DELETE", "/groups/{id}/attributes/{name}", DELETE_Attribute) - app.router.add_route("PUT", "/groups/{id}/attributes/{name}", PUT_Attribute) + app.router.add_route("PUT", "/groups/{id}/attributes", PUT_Attributes) app.router.add_route("GET", "/datatypes/{id}", GET_Datatype) app.router.add_route("DELETE", "/datatypes/{id}", DELETE_Datatype) app.router.add_route("POST", "/datatypes", POST_Datatype) @@ -74,8 +74,9 @@ async def init(): app.router.add_route("POST", "/datatypes/{id}/attributes", POST_Attributes) app.router.add_route("GET", "/datatypes/{id}/attributes/{name}", GET_Attribute) app.router.add_route("DELETE", "/datatypes/{id}/attributes/{name}", DELETE_Attribute) - app.router.add_route("PUT", "/datatypes/{id}/attributes/{name}", PUT_Attribute) + app.router.add_route("PUT", "/datatypes/{id}/attributes", PUT_Attributes) app.router.add_route("GET", "/datasets/{id}", GET_Dataset) + # app.router.add_route("GET", "/datasets/", GET_Dataset) app.router.add_route("DELETE", "/datasets/{id}", DELETE_Dataset) app.router.add_route("POST", "/datasets", POST_Dataset) app.router.add_route("PUT", "/datasets/{id}/shape", PUT_DatasetShape) @@ -83,7 +84,7 @@ async def init(): app.router.add_route("POST", "/datasets/{id}/attributes", POST_Attributes) app.router.add_route("GET", "/datasets/{id}/attributes/{name}", GET_Attribute) app.router.add_route("DELETE", "/datasets/{id}/attributes/{name}", DELETE_Attribute) - app.router.add_route("PUT", "/datasets/{id}/attributes/{name}", PUT_Attribute) + app.router.add_route("PUT", "/datasets/{id}/attributes", PUT_Attributes) app.router.add_route("PUT", "/chunks/{id}", PUT_Chunk) app.router.add_route("GET", "/chunks/{id}", GET_Chunk) app.router.add_route("POST", "/chunks/{id}", POST_Chunk) diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py index 108eff07..c782b0e4 100644 --- a/hsds/datanode_lib.py +++ b/hsds/datanode_lib.py @@ -328,9 +328,7 @@ async def get_metadata_obj(app, obj_id, bucket=None): if isValidDomain(obj_id): domain_bucket = getBucketForDomain(obj_id) if bucket and domain_bucket and bucket != domain_bucket: - msg = ( - f"get_metadata_obj for domain: {obj_id} but bucket param was: {bucket}" - ) + msg = f"get_metadata_obj for domain: {obj_id} but bucket param was: {bucket}" log.error(msg) raise HTTPInternalServerError() if not bucket: diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py index e1199f50..b3a368f4 100644 --- a/hsds/domain_crawl.py +++ b/hsds/domain_crawl.py @@ -10,13 +10,17 @@ # request a copy from help@hdfgroup.org. # ############################################################################## # -# service node of hsds cluster +# domain crawler # import asyncio +from aiohttp.web_exceptions import HTTPServiceUnavailable, HTTPConflict, HTTPBadRequest +from aiohttp.web_exceptions import HTTPInternalServerError, HTTPNotFound, HTTPGone + + from .util.idUtil import getCollectionForId, getDataNodeUrl -from .util.httpUtil import http_post +from .util.httpUtil import http_post, http_put from .servicenode_lib import getObjectJson from . import hsds_logger as log @@ -27,23 +31,16 @@ def __init__( self, app, objs, - bucket=None, - include_attrs=True, - attr_names=[], - include_data=False, - follow_links=True, - ignore_nan=False, + action="get_obj", + params=None, max_tasks=40, max_objects_limit=0, ): log.info(f"DomainCrawler.__init__ root_id: {len(objs)} objs") self._app = app + self._action = action self._max_objects_limit = max_objects_limit - self._include_attrs = include_attrs - self._attr_names = attr_names - self._include_data = include_data - self._follow_links = follow_links - self._ignore_nan = ignore_nan + self._params = params self._max_tasks = max_tasks self._q = asyncio.Queue() self._obj_dict = {} @@ -51,10 +48,15 @@ def __init__( if not objs: log.error("no objs for crawler to crawl!") raise ValueError() - for obj in objs: - log.debug(f"adding {obj} to the queue") - self._q.put_nowait(obj) - self._bucket = bucket + + for obj_id in objs: + log.debug(f"adding {obj_id} to the queue") + self._q.put_nowait(obj_id) + if isinstance(objs, dict): + self._objs = objs + else: + self._objs = None + async def get_attributes(self, obj_id, attr_names): # get the given attributes for the obj_id @@ -64,60 +66,143 @@ async def get_attributes(self, obj_id, attr_names): req += f"/{collection}/{obj_id}/attributes" log.debug(f"POST Attributes: {req}") params = {} - if self._include_data: - params["IncludeData"] = 1 - if self._ignore_nan: - params["ignore_nan"] = 1 - if self._bucket: - params["bucket"] = self._bucket + for key in ("IncludeData", "ignore_nan", "bucket"): + if key in self._params: + value = self._params[key] + if isinstance(value, bool): + if value: + params[key] = 1 # http doesn't like booleans + else: + params[key] = self._params[key] data = {"attributes": attr_names} log.debug(f"using params: {params}") - dn_json = await http_post(self._app, req, data=data, params=params) - log.debug(f"got attributes json from dn for obj_id: {dn_json}") - if "attributes" not in dn_json: - log.error(f"DomainCrawler - expected attributes in json, but got: {dn_json}") - return - attributes = dn_json["attributes"] - if len(attributes) < len(attr_names): - msg = f"POST attributes requested {len(attr_names)}, " - msg += f"but only {len(attributes)} were returned" - log.warn(msg) - self._obj_dict[obj_id] = attributes + dn_json = None + status = 200 + try: + dn_json = await http_post(self._app, req, data=data, params=params) + except HTTPBadRequest: + status = 400 + except HTTPNotFound: + status = 404 + except HTTPGone: + status = 410 + except HTTPServiceUnavailable: + status = 503 + except HTTPInternalServerError: + status = 500 + except Exception as e: + log.error(f"unexpected exception from post request: {e}") + + if status == 200: + log.debug(f"got attributes json from dn for obj_id: {dn_json}") + if "attributes" not in dn_json: + log.error(f"DomainCrawler - expected attributes in json, but got: {dn_json}") + return + attributes = dn_json["attributes"] + + if len(attributes) < len(attr_names): + msg = f"POST attributes requested {len(attr_names)}, " + msg += f"but only {len(attributes)} were returned" + log.warn(msg) + self._obj_dict[obj_id] = attributes + else: + log.warn(f"Domain crawler - got {status} status for obj_id {obj_id}") + self._obj_dict[obj_id] = {"status": status} + + + async def put_attributes(self, obj_id, attr_items): + # get the given attributes for the obj_id + log.debug(f"put_attributes for {obj_id}, {len(attr_items)} attributes") + req = getDataNodeUrl(self._app, obj_id) + collection = getCollectionForId(obj_id) + req += f"/{collection}/{obj_id}/attributes" + params = {} + if "bucket" in self._params: + params["bucket"] = self._params["bucket"] + data = {"attributes": attr_items} + status = None + put_rsp = None + try: + put_rsp = await http_put(self._app, req, data=data, params=params) + except HTTPConflict: + log.warn("DomainCrawler - got HTTPConflict from http_put") + status = 409 + except HTTPServiceUnavailable: + status = 503 + except HTTPInternalServerError: + status = 500 + except Exception as e: + log.error(f"unexpected exception {e}") + if put_rsp is not None: + log.info(f"PUT Attributes resp: {put_rsp}") + if "status" in put_rsp: + status = put_rsp["status"] + else: + status = 201 + log.debug(f"DomainCrawler fetch for {obj_id} - returning status: {status}") + self._obj_dict[obj_id] = {"status": status} + async def get_obj_json(self, obj_id): - # get the given obj_json for the obj_id - kwargs = { - "include_links": self._follow_links, - "include_attrs": self._include_attrs, - "bucket": self._bucket, - } - obj_json = await getObjectJson(self._app, obj_id, **kwargs) + """ get the given obj_json for the obj_id. + for each group found, search the links if include_links is set """ + kwargs = {} + for k in ("include_links", "include_attrs", "bucket"): + if k in self._params: + kwargs[k] = self._params[k] + + obj_json = None + status = 200 + try: + obj_json = await getObjectJson(self._app, obj_id, **kwargs) + except HTTPNotFound: + status = 404 + except HTTPServiceUnavailable: + status = 503 + except HTTPInternalServerError: + status = 500 + except Exception as e: + log.error(f"unexpected exception {e}") + status = 500 + + if obj_json is None: + msg = f"DomainCrawler - getObjectJson for {obj_id} " + if status >= 500: + msg += f"failed, status: {status}" + log.error(msg) + else: + msg += f"returned status: {status}" + log.warn(msg) + return + log.debug(f"DomainCrawler - got json for {obj_id}") # if including links, we need link count - if self._follow_links and "link_count" in obj_json: + if self._params.get("follow_links") and "link_count" in obj_json: del obj_json["link_count"] # similarly, don't need attributeCount if we have the attributes - if self._include_attrs: + if self._params.get("include_attrs"): del obj_json["attributeCount"] self._obj_dict[obj_id] = obj_json # store the obj_json - # if this is a group, iterate through all the hard links and + # for groups iterate through all the hard links and # add to the lookup ids set - if getCollectionForId(obj_id) == "groups" and self._follow_links: + collection = getCollectionForId(obj_id) + if collection == "groups" and self._params.get("follow_links"): links = obj_json["links"] log.debug(f"DomainCrawler links: {links}") for title in links: log.debug(f"DomainCrawler - got link: {title}") link_obj = links[title] num_objects = len(self._obj_dict) - if self._max_objects_limit > 0: - if num_objects >= self._max_objects_limit: + if self._params.get("max_objects_limit") is not None: + max_objects_limit = self._params["max_objects_limit"] + if num_objects >= max_objects_limit: msg = "DomainCrawler reached limit of " - msg += f"{self._max_objects_limit}" + msg += f"{max_objects_limit}" log.info(msg) break if link_obj["class"] != "H5L_TYPE_HARD": @@ -128,7 +213,7 @@ async def get_obj_json(self, obj_id): # haven't seen this object yet, get obj json log.debug(f"DomainCrawler - adding link_id: {link_id}") self._obj_dict[link_id] = {} # placeholder for obj id - self._q.put_nowait({"id": link_id}) + self._q.put_nowait(link_id) async def crawl(self): workers = [asyncio.Task(self.work()) for _ in range(self._max_tasks)] @@ -151,22 +236,38 @@ async def work(self): await self.fetch(obj_id) self._q.task_done() - async def fetch(self, obj): - if "id" not in obj: - log.error(f"DomainCrawler - expected to find id key, but got: {obj}") - return - obj_id = obj["id"] + async def fetch(self, obj_id): log.debug(f"DomainCrawler fetch for id: {obj_id}") - if not self._attr_names and "attr_names" not in obj: + log.debug(f"action: {self._action}") + if self._action == "get_obj": + log.debug("DomainCrawler - get obj") # just get the obj json await self.get_obj_json(obj_id) - else: + elif self._action == "get_attr": + log.debug("DomainCrawler - get attributes") # fetch the given attributes - if "attr_names" in obj: - attr_names = obj["attr_names"] - else: - attr_names = self._attr_names + if self._objs is None: + log.error("DomainCrawler - self._objs not set") + return + if obj_id not in self._objs: + log.error(f"couldn't find {obj_id} in self._objs") + return + attr_names = self._objs[obj_id] + log.debug(f"DomainCrawler - got attribute names: {attr_names}") await self.get_attributes(obj_id, attr_names) + elif self._action == "put_attr": + log.debug("DomainCrawler - put attributes") + # write attributes + if self._objs and obj_id not in self._objs: + log.error(f"couldn't find {obj_id} in self._objs") + return + attr_items = self._objs[obj_id] + log.debug(f"got attr_items: {attr_items}") + + await self.put_attributes(obj_id, attr_items) + else: + msg = f"DomainCrawler: unexpected action: {self._action}" + log.error(msg) msg = f"DomainCrawler - fetch complete obj_id: {obj_id}, " msg += f"{len(self._obj_dict)} objects found" diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py index 3f790929..fbb833a7 100755 --- a/hsds/domain_sn.py +++ b/hsds/domain_sn.py @@ -112,12 +112,14 @@ async def getDomainObjects(app, root_id, include_attrs=False, bucket=None): log.info(f"getDomainObjects for root: {root_id}") max_objects_limit = int(config.get("domain_req_max_objects_limit", default=500)) - kwargs = { + crawler_params = { "include_attrs": include_attrs, "bucket": bucket, + "follow_links": True, "max_objects_limit": max_objects_limit, } - crawler = DomainCrawler(app, [{"id": root_id}, ], **kwargs) + + crawler = DomainCrawler(app, [root_id, ], action="get_obj", params=crawler_params) await crawler.crawl() if len(crawler._obj_dict) >= max_objects_limit: msg = "getDomainObjects - too many objects: " diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py index 26e4bf5a..7df44873 100755 --- a/hsds/dset_sn.py +++ b/hsds/dset_sn.py @@ -716,6 +716,7 @@ async def POST_Dataset(request): log.warn(msg) raise HTTPBadRequest(reason=msg) + log.debug(f"got body: {body}") # get domain, check authorization domain = getDomainFromRequest(request) if not isValidDomain(domain): @@ -725,6 +726,7 @@ async def POST_Dataset(request): bucket = getBucketForDomain(domain) domain_json = await getDomainJson(app, domain, reload=True) + log.debug(f"got domain_json: {domain_json}") root_id = domain_json["root"] # throws exception if not allowed @@ -741,6 +743,7 @@ async def POST_Dataset(request): raise HTTPBadRequest(reason=msg) datatype = body["type"] + log.debug(f"got datatype: {datatype}") if isinstance(datatype, str) and datatype.startswith("t-"): # Committed type - fetch type json from DN ctype_id = datatype @@ -793,11 +796,10 @@ async def POST_Dataset(request): shape_json["class"] = "H5S_SCALAR" else: shape = body["shape"] + log.debug(f"got shape: {shape}") if isinstance(shape, int): shape_json["class"] = "H5S_SIMPLE" - dims = [ - shape, - ] + dims = [shape, ] shape_json["dims"] = dims rank = 1 elif isinstance(shape, str): @@ -1040,6 +1042,7 @@ async def POST_Dataset(request): link_title = None if "link" in body: link_body = body["link"] + log.debug(f"got link_body: {link_body}") if "id" in link_body: link_id = link_body["id"] if "name" in link_body: @@ -1051,7 +1054,7 @@ async def POST_Dataset(request): await validateAction(app, domain, link_id, username, "create") dset_id = createObjId("datasets", rootid=root_id) - log.info(f"new dataset id: {dset_id}") + log.info(f"new dataset id: {dset_id}") dataset_json = { "id": dset_id, diff --git a/hsds/servicenode.py b/hsds/servicenode.py index 7161916f..b932a6e0 100755 --- a/hsds/servicenode.py +++ b/hsds/servicenode.py @@ -30,7 +30,7 @@ from .domain_sn import GET_ACL, GET_ACLs, PUT_ACL from .group_sn import GET_Group, POST_Group, DELETE_Group from .link_sn import GET_Links, GET_Link, PUT_Link, DELETE_Link -from .attr_sn import GET_Attributes, GET_Attribute, PUT_Attribute +from .attr_sn import GET_Attributes, GET_Attribute, PUT_Attribute, PUT_Attributes from .attr_sn import DELETE_Attribute, GET_AttributeValue, PUT_AttributeValue, POST_Attributes from .ctype_sn import GET_Datatype, POST_Datatype, DELETE_Datatype from .dset_sn import GET_Dataset, POST_Dataset, DELETE_Dataset @@ -44,84 +44,127 @@ async def init(): # call app.router.add_get() here to add node-specific routes # + + # + # domain paths + # path = "/" app.router.add_route("GET", path, GET_Domain) app.router.add_route("DELETE", path, DELETE_Domain) app.router.add_route("PUT", path, PUT_Domain) + path = "/domains" app.router.add_route("GET", path, GET_Domains) + + # + # acls paths + # path = "/acls/{username}" app.router.add_route("GET", path, GET_ACL) app.router.add_route("PUT", path, PUT_ACL) + path = "/acls" app.router.add_route("GET", path, GET_ACLs) + + # + # groups paths + # path = "/groups/" app.router.add_route("GET", path, GET_Group) + path = "/groups" app.router.add_route("GET", path, GET_Groups) app.router.add_route("POST", path, POST_Group) + path = "/groups/{id}" app.router.add_route("GET", path, GET_Group) app.router.add_route("DELETE", path, DELETE_Group) + path = "/groups/{id}/links" app.router.add_route("GET", path, GET_Links) + path = "/groups/{id}/links/{title}" app.router.add_route("GET", path, GET_Link) app.router.add_route("DELETE", path, DELETE_Link) app.router.add_route("PUT", path, PUT_Link) - path = "/attributes" - app.router.add_route("POST", path, POST_Attributes) - path = "/groups/{id}/attributes" + + path = "/groups/{id}/attributes" app.router.add_route("GET", path, GET_Attributes) app.router.add_route("POST", path, POST_Attributes) + app.router.add_route("PUT", path, PUT_Attributes) + path = "/groups/{id}/attributes/{name}" app.router.add_route("GET", path, GET_Attribute) app.router.add_route("DELETE", path, DELETE_Attribute) app.router.add_route("PUT", path, PUT_Attribute) + path = "/groups/{id}/attributes/{name}/value" app.router.add_route("GET", path, GET_AttributeValue) app.router.add_route("PUT", path, PUT_AttributeValue) + + # + # datatypes paths + # path = "/datatypes" app.router.add_route("GET", path, GET_Datatypes) app.router.add_route("POST", path, POST_Datatype) + path = "/datatypes/" app.router.add_route("GET", path, GET_Datatype) + path = "/datatypes/{id}" app.router.add_route("GET", path, GET_Datatype) app.router.add_route("DELETE", path, DELETE_Datatype) + path = "/datatypes/{id}/attributes" app.router.add_route("GET", path, GET_Attributes) app.router.add_route("POST", path, POST_Attributes) + app.router.add_route("PUT", path, PUT_Attributes) + path = "/datatypes/{id}/attributes/{name}" app.router.add_route("GET", path, GET_Attribute) app.router.add_route("DELETE", path, DELETE_Attribute) app.router.add_route("PUT", path, PUT_Attribute) + path = "/datatypes/{id}/attributes/{name}/value" app.router.add_route("GET", path, GET_AttributeValue) app.router.add_route("PUT", path, PUT_AttributeValue) + + # + # datasets paths + # path = "/datasets/{id}" app.router.add_route("GET", path, GET_Dataset) app.router.add_route("DELETE", path, DELETE_Dataset) + path = "/datasets/" app.router.add_route("GET", path, GET_Dataset) + path = "/datasets" app.router.add_route("GET", path, GET_Datasets) app.router.add_route("POST", path, POST_Dataset) + path = "/datasets/{id}/shape" app.router.add_route("GET", path, GET_DatasetShape) app.router.add_route("PUT", path, PUT_DatasetShape) + path = "/datasets/{id}/type" app.router.add_route("GET", path, GET_DatasetType) + path = "/datasets/{id}/attributes" app.router.add_route("GET", path, GET_Attributes) app.router.add_route("POST", path, POST_Attributes) + app.router.add_route("PUT", path, PUT_Attributes) + path = "/datasets/{id}/attributes/{name}" app.router.add_route("GET", path, GET_Attribute) app.router.add_route("DELETE", path, DELETE_Attribute) app.router.add_route("PUT", path, PUT_Attribute) + path = "/datasets/{id}/attributes/{name}/value" app.router.add_route("GET", path, GET_AttributeValue) app.router.add_route("PUT", path, PUT_AttributeValue) + path = "/datasets/{id}/value" app.router.add_route("PUT", path, PUT_Value) app.router.add_route("GET", path, GET_Value) diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py index 330f8aed..355b21f6 100644 --- a/hsds/util/dsetUtil.py +++ b/hsds/util/dsetUtil.py @@ -366,9 +366,7 @@ def getShapeDims(shape): """ dims = None if isinstance(shape, int): - dims = [ - shape, - ] + dims = [shape, ] elif isinstance(shape, list) or isinstance(shape, tuple): dims = shape # can use as is elif isinstance(shape, str): diff --git a/tests/integ/attr_test.py b/tests/integ/attr_test.py index 00186a70..3aa1277d 100644 --- a/tests/integ/attr_test.py +++ b/tests/integ/attr_test.py @@ -295,6 +295,11 @@ def testObjAttr(self): rsp = self.session.put(req, data=json.dumps(attr_payload), headers=headers) self.assertEqual(rsp.status_code, 409) # conflict + # set the replace param and we should get a 200 + params = {"replace": 1} + rsp = self.session.put(req, params=params, data=json.dumps(attr_payload), headers=headers) + self.assertEqual(rsp.status_code, 200) # OK + # delete the attribute rsp = self.session.delete(req, headers=headers) self.assertEqual(rsp.status_code, 200) # OK @@ -1418,9 +1423,7 @@ def testNaNAttributeValue(self): helper.validateId(root_uuid) # create attr - value = [ - np.NaN, - ] * 6 + value = [np.NaN, ] * 6 data = {"type": "H5T_IEEE_F32LE", "shape": 6, "value": value} attr_name = "nan_arr_attr" req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name @@ -1654,10 +1657,10 @@ def testPostAttributeMultiple(self): self.assertEqual(attrJson["value"], expected_values[i]) # test with unique attr names per obj id - items = [] - items.append({"id": root_id, "attr_names": [attr_names[0], attr_names[1]]}) - items.append({"id": dset_id, "attr_names": [attr_names[1], ]}) - data = {"items": items} + items = {} + items[root_id] = [attr_names[0], attr_names[1]] + items[dset_id] = [attr_names[1], ] + data = {"obj_ids": items} req = helper.getEndpoint() + "/groups/" + root_id + "/attributes" rsp = self.session.post(req, data=json.dumps(data), headers=headers) self.assertEqual(rsp.status_code, 200) @@ -1679,10 +1682,10 @@ def testPostAttributeMultiple(self): self.assertEqual(dset_attr["name"], "attr2") # try asking for a non-existent attribute - items = [] - items.append({"id": root_id, "attr_names": [attr_names[0], attr_names[1]]}) - items.append({"id": dset_id, "attr_names": [attr_names[1], "foobar"]}) - data = {"items": items} + items = {} + items[root_id] = [attr_names[0], attr_names[1]] + items[dset_id] = [attr_names[1], "foobar"] + data = {"obj_ids": items} req = helper.getEndpoint() + "/groups/" + root_id + "/attributes" rsp = self.session.post(req, data=json.dumps(data), headers=headers) self.assertEqual(rsp.status_code, 200) @@ -1700,6 +1703,142 @@ def testPostAttributeMultiple(self): self.assertEqual(len(root_attrs), 2) dset_attrs = attributes[dset_id] self.assertEqual(len(dset_attrs), 1) # one of the ones we asked for didn't exist + + def testPutAttributeMultiple(self): + print("testPutAttributeMultiple", self.base_domain) + headers = helper.getRequestHeaders(domain=self.base_domain) + req = self.endpoint + "/" + + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + root_id = rspJson["root"] + + # create a dataset + req = self.endpoint + "/datasets" + data = {"type": "H5T_IEEE_F32LE"} + rsp = self.session.post(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + rspJson = json.loads(rsp.text) + self.assertEqual(rspJson["attributeCount"], 0) + dset_id = rspJson["id"] + self.assertTrue(helper.validateId(dset_id)) + + # link new obj as '/dset' + req = self.endpoint + "/groups/" + root_id + "/links/dset1" + payload = {"id": dset_id} + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) # created + + # get obj and verify it has no attributes + req = self.endpoint + "/datasets/" + dset_id + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertEqual(rspJson["attributeCount"], 0) # no attributes + + # create some groups + grp_count = 3 + + grp_names = [f"group{i+1}" for i in range(grp_count)] + grp_ids = [] + + for grp_name in grp_names: + # create sub_groups + req = self.endpoint + "/groups" + rsp = self.session.post(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + rspJson = json.loads(rsp.text) + self.assertEqual(rspJson["attributeCount"], 0) + grp_id = rspJson["id"] + self.assertTrue(helper.validateId(grp_id)) + grp_ids.append(grp_id) + + # link new obj as '/grp_name' + req = self.endpoint + "/groups/" + root_id + "/links/" + grp_name + payload = {"id": grp_id} + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) # created + + # get obj and verify it has no attributes + req = self.endpoint + "/groups/" + grp_id + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertEqual(rspJson["attributeCount"], 0) # no attributes + + # setup some attributes to write + attr_count = 4 + attributes = {} + extent = 10 + for i in range(attr_count): + value = [i*10 + j for j in range(extent)] + data = {"type": "H5T_STD_I32LE", "shape": extent, "value": value} + attr_name = f"attr{i+1:04d}" + attributes[attr_name] = data + """ + # write attributes to the dataset + data = {"attributes": attributes} + req = self.endpoint + "/datasets/" + dset_id + "/attributes" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # do a get on the attributes + params = {"IncludeData": 1} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + ret_attrs = rspJson["attributes"] + self.assertEqual(len(ret_attrs), attr_count) + for i in range(attr_count): + attr = ret_attrs[i] + self.assertTrue("name" in attr) + self.assertEqual(attr["name"], f"attr{i+1:04d}") + self.assertTrue("value" in attr) + attr_value = attr["value"] + self.assertEqual(len(attr_value), extent) + self.assertEqual(attr_value, [i*10+j for j in range(extent)]) + + # try writing again, should get 409 + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 409) + """ + + # write attributes to the three group objects + print(grp_ids) + data = {"obj_ids": grp_ids, "attributes": attributes} + req = self.endpoint + "/groups/" + root_id + "/attributes" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # do a get attributes on the three group objects to verify + for grp_id in grp_ids: + # do a get on the attributes + params = {"IncludeData": 1} + req = self.endpoint + "/groups/" + grp_id + "/attributes" + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + ret_attrs = rspJson["attributes"] + self.assertEqual(len(ret_attrs), attr_count) + for i in range(attr_count): + attr = ret_attrs[i] + self.assertTrue("name" in attr) + self.assertEqual(attr["name"], f"attr{i+1:04d}") + self.assertTrue("value" in attr) + attr_value = attr["value"] + self.assertEqual(len(attr_value), extent) + self.assertEqual(attr_value, [i*10+j for j in range(extent)]) + + # try writing again, should get 409 + req = self.endpoint + "/groups/" + root_id + "/attributes" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 409) + + + if __name__ == "__main__":