From 8095c4b72a9761580aa898ae5af54e1941e55a5e Mon Sep 17 00:00:00 2001 From: Michael Baudis <675030+mbaudis@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:54:24 +0100 Subject: [PATCH] 1.6.3 --- bycon/__init__.py | 3 +- bycon/beaconServer/beacon.py | 40 ++-- bycon/beaconServer/info.py | 7 +- bycon/beaconServer/service_info.py | 2 +- bycon/config.py | 11 +- bycon/definitions/argument_definitions.yaml | 15 +- bycon/definitions/beacon_defaults.yaml | 219 -------------------- bycon/definitions/entity_defaults.yaml | 151 ++++++++++++++ bycon/definitions/filter_definitions.yaml | 10 +- bycon/lib/beacon_response_generation.py | 18 +- bycon/lib/bycon_helpers.py | 2 +- bycon/lib/cgi_parsing.py | 6 +- bycon/lib/query_generation.py | 2 +- bycon/lib/read_specs.py | 45 ++-- bycon/lib/response_remapping.py | 3 +- bycon/lib/schema_parsing.py | 2 +- bycon/lib/service_utils.py | 40 ++-- docs/changes.md | 10 + docs/generated/argument_definitions.md | 26 +-- local/beacon_defaults.yaml | 112 ---------- local/instance_overrides.yaml | 134 +++++++++--- setup.py | 2 +- 22 files changed, 377 insertions(+), 483 deletions(-) delete mode 100644 bycon/definitions/beacon_defaults.yaml create mode 100644 bycon/definitions/entity_defaults.yaml delete mode 100644 local/beacon_defaults.yaml diff --git a/bycon/__init__.py b/bycon/__init__.py index fafa17879..1125ffdb7 100644 --- a/bycon/__init__.py +++ b/bycon/__init__.py @@ -35,8 +35,9 @@ } read_service_definition_files(byc) - # updates `beacon_defaults`, `dataset_definitions` and `local_paths` + # updates `entity_defaults`, `dataset_definitions` and `local_paths` update_rootpars_from_local(LOC_PATH, byc) + set_entity_mappings() set_beacon_defaults(byc) parse_arguments(byc) diff --git a/bycon/beaconServer/beacon.py b/bycon/beaconServer/beacon.py index cafc31753..ca192984f 100755 --- a/bycon/beaconServer/beacon.py +++ b/bycon/beaconServer/beacon.py @@ -24,22 +24,12 @@ def main(): ################################################################################ def beacon(): - # updates `beacon_defaults`, `dataset_definitions` and `local_paths` - # update_rootpars_from_local(LOC_PATH, byc) - # set_beacon_defaults(byc) - - defs = BYC["beacon_defaults"] - s_a_s = defs.get("service_path_aliases", {}) - r_w = defs.get("rewrites", {}) - d_p_s = defs.get("data_pipeline_path_ids", []) - """ - The type of execution depends on the requested entity defined in `beacon_defaults` - which can either be one of the Beacon entities (also recognizing aliases) - in `beacon_defaults.service_path_aliases` or targets of a rewrite from - `beacon_defaults.rewrites`. + The type of execution depends on the requested entity defined in the + `path_entry_type_mappings` generated from `request_entity_path_id` (or aliases) + in `entity_defaults`. The entity is determined from different potential inputs and overwritten - by the next one in the oreder, if existing: + by the next one in the order, if existing: 1. from the path (element after "beacon", e.g. `biosamples` from `/beacon/biosamples/...`) @@ -48,31 +38,25 @@ def beacon(): Fallback is `/info`. """ + + p_e_m = BYC.get("path_entry_type_mappings", {}) + e_p_m = BYC.get("entry_type_path_mappings", {}) + d_p_e = BYC.get("data_pipeline_entities", []) byc.update({"request_path_root": "beacon"}) rest_path_elements(byc) - # args_update_form(byc) - prdbug(f'beacon.py - request_entity_path_id: {byc.get("request_entity_path_id")}') e_p_id = BYC_PARS.get("request_entity_path_id", "___none___") - prdbug(f'beacon.py - form e_p_id: {e_p_id}') - if e_p_id in s_a_s or e_p_id in r_w: + if e_p_id in p_e_m: byc.update({"request_entity_path_id": e_p_id}) r_p_id = byc.get("request_entity_path_id", "info") - prdbug(f'beacon.py - request_entity_path_id: {r_p_id}') - # check for rewrites - if r_p_id in r_w: - uri = environ.get('REQUEST_URI') - pat = re.compile( rf"^.+\/{r_p_id}\/?(.*?)$" ) - if pat.match(uri): - stuff = pat.match(uri).group(1) - print_uri_rewrite_response(r_w[r_p_id], stuff) + e = p_e_m.get(r_p_id) + f = e_p_m.get(e) - f = s_a_s.get(r_p_id) if not f: pass - elif f in d_p_s: + elif e in d_p_e: initialize_bycon_service(byc, f) r = BeaconDataResponse(byc).resultsetResponse() print_json_response(r) diff --git a/bycon/beaconServer/info.py b/bycon/beaconServer/info.py index c308cc7d2..8223e98d5 100755 --- a/bycon/beaconServer/info.py +++ b/bycon/beaconServer/info.py @@ -25,8 +25,7 @@ def info(): initialize_bycon_service(byc, "info") r = BeaconInfoResponse(byc) - defs = BYC["beacon_defaults"] - b_e_d = defs.get("entity_defaults", {}) + b_e_d = BYC.get("entity_defaults", {}) info = b_e_d.get("info", {}) pgx_info = info.get("content", {}) beacon_info = object_instance_from_schema_name("beaconInfoResults", "") @@ -38,9 +37,11 @@ def info(): # TODO: All the schemas really only here? beacon_schemas = [] + entry_type_responses = ["beaconCollectionsResponse", "beaconResultsetsResponse"] for e_t, e_d in b_e_d.items(): b_s = e_d.get("beacon_schema", {}) - if e_d.get("is_entry_type", True) is True: + r_s = e_d.get("response_schema", "___none___") + if r_s in entry_type_responses: beacon_schemas.append(b_s) response.update( { "returned_schemas": beacon_schemas } ) diff --git a/bycon/beaconServer/service_info.py b/bycon/beaconServer/service_info.py index 15c997966..c901b0cc7 100755 --- a/bycon/beaconServer/service_info.py +++ b/bycon/beaconServer/service_info.py @@ -22,7 +22,7 @@ def main(): def service_info(): initialize_bycon_service(byc, "service_info") - b_e_d = BYC["beacon_defaults"].get("entity_defaults", {}) + b_e_d = BYC.get("entity_defaults", {}) pgx_info = b_e_d.get("info", {}) c = pgx_info.get("content", {}) info = object_instance_from_schema_name("ga4gh-service-info-1-0-0-schema", "") diff --git a/bycon/config.py b/bycon/config.py index d85ab1331..4a7d91cc8 100644 --- a/bycon/config.py +++ b/bycon/config.py @@ -44,7 +44,16 @@ "ERRORS": [], "WARNINGS": [], "USER": "anonymous", - "beacon_defaults": {}, + "beacon_defaults": { + "defaults": { + "default_dataset_id": "examplez", + "test_domains": ["localhost"] + }, + }, + "entity_defaults": {"info":{}}, + "path_entry_type_mappings": {}, + "entry_type_path_mappings": {}, + "data_pipeline_entities": [], "datatable_mappings": {} } diff --git a/bycon/definitions/argument_definitions.yaml b/bycon/definitions/argument_definitions.yaml index 7dc2e6a10..d3ce154ff 100644 --- a/bycon/definitions/argument_definitions.yaml +++ b/bycon/definitions/argument_definitions.yaml @@ -57,7 +57,8 @@ request_entity_path_id: type: string cmdFlags: - --requestEntityPathId - description: required data entry point, equal to the first REST path element in Beacon + description: >- + data entry point, equal to the first REST path element in Beacon requested_schema: type: string @@ -455,12 +456,12 @@ source: - --source description: some source label, e.g. `analyses` -query: - type: string - cmdFlags: - - -q - - --query - description: complete query string, e.g. `{"biosamples":{"external_references.id":"geo:GSE7428"}}` +# query: +# type: string +# cmdFlags: +# - -q +# - --query +# description: complete query string, e.g. `{"biosamples":{"external_references.id":"geo:GSE7428"}}` delivery_keys: type: array diff --git a/bycon/definitions/beacon_defaults.yaml b/bycon/definitions/beacon_defaults.yaml deleted file mode 100644 index a2755a0ae..000000000 --- a/bycon/definitions/beacon_defaults.yaml +++ /dev/null @@ -1,219 +0,0 @@ -defaults: - default_dataset_id: examplez - request_path_root: beacon - request_entity_path_id: info - response_schema: beaconInfoResponse - include_resultset_responses: HIT - response_entity: - entity_type: info - response_schema: beaconInfoResponse - beacon_schema: - entity_type: info - schema: https://progenetix.org/services/schemas/beaconInfoResults/ - -################################################################################ -# mappings and aliases ######################################################### -################################################################################ - -# aliases for the entry type path ids ########################################## -service_path_aliases: - "/": info - info: info - "service-info": info - configuration: configuration - map: map - entry_types: entry_types - filtering_terms: filtering_terms - datasets: datasets - cohorts: cohorts - individuals: individuals - biosamples: biosamples - analyses: analyses - runs: runs - g_variants: genomicVariations - -# entry type id for the given path ############################################# -path_entry_type_mappings: - analyses: analysis - cohorts: cohort - configuration: configuration - datasets: dataset - biosamples: biosample - individuals: individual - runs: run - genomicVariations: genomicVariant - map: beaconMap - info: info - filtering_terms: filteringTerm - filteringTerms: filteringTerm - entry_types: entryType - entryTypes: entryType - -# entry types for the standard query stack / entity aggregation ################ -data_pipeline_path_ids: - - analyses - - biosamples - - individuals - - genomicVariations - - runs - -################################################################################ -# beacon entity defaults ####################################################### -################################################################################ - -entity_defaults: - info: - is_entry_type: False - request_entity_path_id: info - response_entity_id: info - collection: Null - response_schema: beaconInfoResponse - beacon_schema: - entity_type: configuration - schema: https://progenetix.org/services/schemas/beaconInfoResults/ - content: {} - -################################################################################ - - beaconMap: - is_entry_type: False - request_entity_path_id: map - response_entity_id: beaconMap - collection: Null - response_schema: beaconMapResponse - beacon_schema: - entity_type: map - schema: https://progenetix.org/services/schemas/beaconMapSchema/ - -################################################################################ - - configuration: - is_entry_type: False - request_entity_path_id: configuration - response_entity_id: configuration - collection: Null - response_schema: beaconConfigurationResponse - beacon_schema: - entity_type: configuration - schema: https://progenetix.org/services/schemas/beaconConfigurationSchema/ - -################################################################################ - - entryType: - is_entry_type: False - request_entity_path_id: entryTypes - response_entity_id: entryType - collection: Null - response_schema: beaconEntryTypesResponse - beacon_schema: - entity_type: entryType - schema: https://progenetix.org/services/schemas/entryTypesSchema/ - -################################################################################ - - filteringTerm: - is_entry_type: False - request_entity_path_id: filtering_terms - response_entity_id: filteringTerm - collection: collations - response_schema: beaconFilteringTermsResponse - beacon_schema: - entity_type: filteringTerm - schema: https://progenetix.org/services/schemas/filteringTermsSchema/ - -################################################################################ - - analysis: - is_entry_type: True - request_entity_path_id: analyses - response_entity_id: analysis - collection: analyses - response_schema: beaconResultsetsResponse - beacon_schema: - entity_type: analysis - schema: https://progenetix.org/services/schemas/analysis/ - h->o_access_key: analyses._id - -################################################################################ - - biosample: - is_entry_type: True - request_entity_path_id: biosamples - response_entity_id: biosample - collection: biosamples - response_schema: beaconResultsetsResponse - beacon_schema: - entity_type: biosample - schema: https://progenetix.org/services/schemas/biosample/ - h->o_access_key: biosamples._id - - ############################################################################## - - genomicVariant: - is_entry_type: True - request_entity_path_id: genomicVariations - response_entity_id: genomicVariant - collection: variants - response_schema: beaconResultsetsResponse - beacon_schema: - entity_type: genomicVariant - schema: https://progenetix.org/services/schemas/genomicVariant/ - h->o_access_key: variants._id - - ############################################################################## - - individual: - is_entry_type: True - request_entity_path_id: individuals - response_entity_id: individual - collection: individuals - response_schema: beaconResultsetsResponse - beacon_schema: - entity_type: individual - schema: https://progenetix.org/services/schemas/individual/ - h->o_access_key: individuals._id - - - ############################################################################## - - run: - is_entry_type: True - request_entity_path_id: runs - response_entity_id: run - collection: analyses - response_schema: beaconResultsetsResponse - beacon_schema: - entity_type: run - schema: https://progenetix.org/services/schemas/run/ - h->o_access_key: analyses._id - -################################################################################ - - cohort: - is_entry_type: True - request_entity_path_id: cohorts - response_entity_id: cohort - collection: collations - response_schema: beaconCollectionsResponse - beacon_schema: - entity_type: cohort - schema: https://progenetix.org/services/schemas/cohort/ - pagination: - skip: 0 - limit: 10 - - ############################################################################## - - dataset: - is_entry_type: True - request_entity_path_id: datasets - response_entity_id: dataset - collection: Null - response_schema: beaconCollectionsResponse - beacon_schema: - entity_type: dataset - schema: https://progenetix.org/services/schemas/dataset/ - pagination: - skip: 0 - limit: 10 - diff --git a/bycon/definitions/entity_defaults.yaml b/bycon/definitions/entity_defaults.yaml new file mode 100644 index 000000000..53ac23e8f --- /dev/null +++ b/bycon/definitions/entity_defaults.yaml @@ -0,0 +1,151 @@ +info: + request_entity_path_id: info + request_entity_path_aliases: + - "/" + - "service-info" + response_entity_id: info + collection: Null + response_schema: beaconInfoResponse + beacon_schema: + entity_type: configuration + schema: https://progenetix.org/services/schemas/beaconInfoResults/ + content: {} + +################################################################################ + +beaconMap: + request_entity_path_id: map + response_entity_id: beaconMap + collection: Null + response_schema: beaconMapResponse + beacon_schema: + entity_type: map + schema: https://progenetix.org/services/schemas/beaconMapSchema/ + +################################################################################ + +configuration: + request_entity_path_id: configuration + response_entity_id: configuration + collection: Null + response_schema: beaconConfigurationResponse + beacon_schema: + entity_type: configuration + schema: https://progenetix.org/services/schemas/beaconConfigurationSchema/ + +################################################################################ + +entryType: + request_entity_path_id: entryTypes + response_entity_id: entryType + collection: Null + response_schema: beaconEntryTypesResponse + beacon_schema: + entity_type: entryType + schema: https://progenetix.org/services/schemas/entryTypesSchema/ + +################################################################################ + +filteringTerm: + request_entity_path_id: filtering_terms + response_entity_id: filteringTerm + collection: collations + response_schema: beaconFilteringTermsResponse + beacon_schema: + entity_type: filteringTerm + schema: https://progenetix.org/services/schemas/filteringTermsSchema/ + +################################################################################ + +analysis: + request_entity_path_id: analyses + response_entity_id: analysis + collection: analyses + response_schema: beaconResultsetsResponse + beacon_schema: + entity_type: analysis + schema: https://progenetix.org/services/schemas/analysis/ + h->o_access_key: analyses._id + +################################################################################ + +biosample: + request_entity_path_id: biosamples + request_entity_path_aliases: + - samples + response_entity_id: biosample + collection: biosamples + response_schema: beaconResultsetsResponse + beacon_schema: + entity_type: biosample + schema: https://progenetix.org/services/schemas/biosample/ + h->o_access_key: biosamples._id + +############################################################################## + +genomicVariant: + request_entity_path_id: genomicVariations + request_entity_path_aliases: + - g_variants + - variants + response_entity_id: genomicVariant + collection: variants + response_schema: beaconResultsetsResponse + beacon_schema: + entity_type: genomicVariant + schema: https://progenetix.org/services/schemas/genomicVariant/ + h->o_access_key: variants._id + +############################################################################## + +individual: + request_entity_path_id: individuals + response_entity_id: individual + collection: individuals + response_schema: beaconResultsetsResponse + beacon_schema: + entity_type: individual + schema: https://progenetix.org/services/schemas/individual/ + h->o_access_key: individuals._id + + +############################################################################## + +run: + request_entity_path_id: runs + response_entity_id: run + collection: analyses + response_schema: beaconResultsetsResponse + beacon_schema: + entity_type: run + schema: https://progenetix.org/services/schemas/run/ + h->o_access_key: analyses._id + +################################################################################ + +cohort: + request_entity_path_id: cohorts + response_entity_id: cohort + collection: collations + response_schema: beaconCollectionsResponse + beacon_schema: + entity_type: cohort + schema: https://progenetix.org/services/schemas/cohort/ + pagination: + skip: 0 + limit: 10 + +############################################################################## + +dataset: + request_entity_path_id: datasets + response_entity_id: dataset + collection: Null + response_schema: beaconCollectionsResponse + beacon_schema: + entity_type: dataset + schema: https://progenetix.org/services/schemas/dataset/ + pagination: + skip: 0 + limit: 10 + diff --git a/bycon/definitions/filter_definitions.yaml b/bycon/definitions/filter_definitions.yaml index b14d05485..d86c7c197 100644 --- a/bycon/definitions/filter_definitions.yaml +++ b/bycon/definitions/filter_definitions.yaml @@ -398,13 +398,13 @@ TCGAproject: - "" scope: biosamples entity: biosample - pattern: '^(?:pgx:)?TCGA\.\w{2,4}?$' - db_key: 'external_references.id' + pattern: '^(?:pgx:)?TCGA\-\w{2,4}?$' + db_key: 'references.tcgaproject.id' collationed: True examples: - - pgx:TCGA.DLBC - - pgx:TCGA.ESCA - - pgx:TCGA.GBM + - pgx:TCGA-DLBC + - pgx:TCGA-ESCA + - pgx:TCGA-GBM UBERON: id: uberon diff --git a/bycon/lib/beacon_response_generation.py b/bycon/lib/beacon_response_generation.py index 3893f70c7..2121b1807 100644 --- a/bycon/lib/beacon_response_generation.py +++ b/bycon/lib/beacon_response_generation.py @@ -24,11 +24,10 @@ class BeaconErrorResponse: The responses are then provided by the dedicated methods """ def __init__(self, byc: dict): - self.entity_defaults = BYC["beacon_defaults"].get("entity_defaults", {"info":{}}) self.response_schema = byc.get("response_schema", "beaconInfoResponse") self.beacon_schema = byc["response_entity"].get("beacon_schema", "___none___") self.error_response = object_instance_from_schema_name("beaconErrorResponse", "") - info = self.entity_defaults["info"].get("content", {"api_version": "___none___"}) + info = BYC["entity_defaults"]["info"].get("content", {"api_version": "___none___"}) r_m = self.error_response["meta"] for p in ["api_version", "beacon_id"]: if p in info.keys(): @@ -58,11 +57,10 @@ class BeaconInfoResponse: The responses are then provided by the dedicated methods """ def __init__(self, byc: dict): - self.entity_defaults = BYC["beacon_defaults"].get("entity_defaults", {"info":{}}) self.response_schema = byc.get("response_schema", "beaconInfoResponse") self.beacon_schema = byc["response_entity"].get("beacon_schema", "___none___") self.data_response = object_instance_from_schema_name(self.response_schema, "") - info = self.entity_defaults["info"].get("content", {"api_version": "___none___"}) + info = BYC["entity_defaults"]["info"].get("content", {"api_version": "___none___"}) r_m = self.data_response["meta"] for p in ["api_version", "beacon_id"]: if p in info.keys(): @@ -105,7 +103,6 @@ def __init__(self, byc: dict): self.dataset_ids = byc.get("dataset_ids", []) self.authorized_granularities = byc.get("authorized_granularities", {}) self.user_name = byc.get("user_name", "anonymous") - self.entity_defaults = BYC["beacon_defaults"].get("entity_defaults", {"info":{}}) self.response_schema = byc["response_schema"] self.returned_granularity = byc.get("returned_granularity", "boolean") self.include_handovers = BYC_PARS.get("include_handovers", False) @@ -277,7 +274,7 @@ def __meta_add_parameters(self): if "returned_schemas" in r_m: r_m.update({"returned_schemas":[self.beacon_schema]}) - info = self.entity_defaults["info"].get("content", {"api_version": "___none___"}) + info = BYC["entity_defaults"]["info"].get("content", {"api_version": "___none___"}) for p in ["api_version", "beacon_id"]: if p in info.keys(): r_m.update({p: info.get(p, "___none___")}) @@ -323,7 +320,7 @@ def __meta_add_received_request_summary_parameters(self): if q in BYC_PARS: r_r_s.update({"request_parameters": always_merger.merge( r_r_s.get("request_parameters", {}), { "cohort_ids": BYC_PARS.get(q) })}) - info = self.entity_defaults["info"].get("content", {"api_version": "___none___"}) + info = BYC["entity_defaults"]["info"].get("content", {"api_version": "___none___"}) for p in ["api_version", "beacon_id"]: r_r_s.update({p: info.get(p, "___none___")}) @@ -394,7 +391,6 @@ class ByconFilteringTerms: def __init__(self, byc: dict): self.byc = byc self.dataset_ids = byc.get("dataset_ids", []) - self.entity_defaults = BYC["beacon_defaults"].get("entity_defaults", {"info":{}}) self.filter_definitions = byc.get("filter_definitions", {}) self.filters = byc.get("filters", []) self.response_entity_id = byc.get("response_entity_id", "filteringTerm") @@ -520,7 +516,6 @@ class ByconCollections: def __init__(self, byc: dict): self.byc = byc self.dataset_ids = byc.get("dataset_ids", []) - self.entity_defaults = BYC["beacon_defaults"].get("entity_defaults", {"info":{}}) self.filter_definitions = byc.get("filter_definitions", {}) self.response_entity_id = byc.get("response_entity_id", "dataset") self.data_collection = byc["response_entity"].get("collection", "collations") @@ -600,7 +595,6 @@ def __collections_return_cohorts(self): class ByconResultSets: def __init__(self, byc: dict): self.byc = byc - self.entity_defaults = BYC["beacon_defaults"].get("entity_defaults", {"info":{}}) self.datasets_results = dict() # the object with matched ids per dataset, per h_o self.datasets_data = dict() # the object with data of requested entity per dataset self.result_sets = list() # data rewrapped into the resultSets list @@ -647,7 +641,7 @@ def datasetsResults(self): # -------------------------------------------------------------------------# def __get_handover_access_key(self): - e_d_s = self.entity_defaults.get(self.response_entity_id, {}) + e_d_s = BYC["entity_defaults"].get(self.response_entity_id, {}) self.handover_key = e_d_s.get("h->o_access_key", "___none___") return @@ -712,7 +706,7 @@ def __retrieve_datasets_data(self): if "variants" in self.data_collection: return - e_d_s = self.entity_defaults.get(self.response_entity_id, {}) + e_d_s = BYC["entity_defaults"].get(self.response_entity_id, {}) ds_d_start = datetime.datetime.now() for ds_id, ds_results in self.datasets_results.items(): diff --git a/bycon/lib/bycon_helpers.py b/bycon/lib/bycon_helpers.py index b3bd81ea5..5a677007e 100644 --- a/bycon/lib/bycon_helpers.py +++ b/bycon/lib/bycon_helpers.py @@ -42,7 +42,7 @@ def refactor_value_from_defined_type(parameter, values, definition): values = list(x for x in values if x is not None) values = list(x for x in values if x.lower() not in ["none", "null"]) if len(values) == 0: - return None + return False if "array" in p_d_t: p_i_t = definition.get("items", "string") if "int" in p_i_t: diff --git a/bycon/lib/cgi_parsing.py b/bycon/lib/cgi_parsing.py index e4a2b8645..6cedd7427 100644 --- a/bycon/lib/cgi_parsing.py +++ b/bycon/lib/cgi_parsing.py @@ -29,7 +29,6 @@ def parse_POST(byc): content_len = environ.get('CONTENT_LENGTH', '0') content_typ = environ.get('CONTENT_TYPE', '') - b_defs = BYC["beacon_defaults"] a_defs = byc.get("argument_definitions", {}) # TODO: catch error & return for non-json posts @@ -86,10 +85,11 @@ def parse_GET(byc): # CAVE: Only predefined parameters are accepted! if p_d in a_defs: values = form_return_listvalue(form_data, p) - if (v := refactor_value_from_defined_type(p, values, a_defs[p_d])): + v = refactor_value_from_defined_type(p, values, a_defs[p_d]) + if v is not False: BYC_PARS.update({p_d: v}) else: - w_m = '!!! Unmatched parameter {p_d}: {form_data.getvalue(p)}' + w_m = f'!!! Unmatched parameter {p_d}: {form_data.getvalue(p)}' BYC["WARNINGS"].append(w_m) prdbug(f'!!! Unmatched parameter {p_d}: {form_data.getvalue(p)}') BYC.update({"DEBUG_MODE": set_debug_state(BYC_PARS.get("debug_mode", False)) }) diff --git a/bycon/lib/query_generation.py b/bycon/lib/query_generation.py index 7c619620f..85785d51d 100644 --- a/bycon/lib/query_generation.py +++ b/bycon/lib/query_generation.py @@ -41,7 +41,7 @@ class ByconQuery(): """ def __init__(self, byc: dict, dataset_id=False): - self.response_types = BYC["beacon_defaults"].get("entity_defaults", {}) + self.response_types = BYC.get("entity_defaults", {}) f_t_d = self.response_types.get("filteringTerm", {}) self.filtering_terms_coll = f_t_d.get("collection", "___none___") if dataset_id is False: diff --git a/bycon/lib/read_specs.py b/bycon/lib/read_specs.py index d5e328c4e..65bbfdde7 100644 --- a/bycon/lib/read_specs.py +++ b/bycon/lib/read_specs.py @@ -35,6 +35,22 @@ def read_service_definition_files(byc): else: byc.update({d: o}) +################################################################################ + +def set_entity_mappings(): + b_e_d = BYC.get("entity_defaults", {}) + p_e_m = BYC.get("path_entry_type_mappings", {}) + e_p_m = BYC.get("entry_type_path_mappings", {}) + d_p_e = BYC.get("data_pipeline_entities", []) + for e, e_d in b_e_d.items(): + if (p := e_d.get("request_entity_path_id")): + p_e_m.update({ p: e }) + e_p_m.update({ e: p }) + for a in e_d.get("request_entity_path_aliases", []): + p_e_m.update({ a: e }) + if "beaconResultsetsResponse" in e_d.get("response_schema", ""): + d_p_e.append(e) + ################################################################################ @@ -45,25 +61,24 @@ def update_rootpars_from_local(loc_dir, byc): return p_c_p.append(loc_dir) - - b_p = 'beacon_defaults' - s_p = 'services_defaults' - - b_f = path.join(loc_dir, f'{b_p}.yaml') - b = load_yaml_empty_fallback(b_f) - s_f = path.join(loc_dir, f'{s_p}.yaml') + s_f = path.join(loc_dir, 'services_entity_defaults.yaml') s = load_yaml_empty_fallback(s_f) - b = always_merger.merge(s, b) - BYC.update({b_p: always_merger.merge(BYC.get(b_p, {}), b)}) + BYC.update({"entity_defaults": always_merger.merge(BYC.get("entity_defaults", {}), s)}) # overwriting installation-wide defaults with instance-specific ones # _i.e._ matching the current domain (to allow presentation of different # Beacon instances from the same server) - if ENV != "local": + i_ovr_f = path.join(loc_dir, "instance_overrides.yaml") + i_ovr = load_yaml_empty_fallback(i_ovr_f) + + if "local" in i_ovr: + i_o_bdfs = i_ovr["local"].get("beacon_defaults", {}) + i_o_edfs = i_ovr["local"].get("entity_defaults", {}) + BYC.update({"beacon_defaults": always_merger.merge(BYC.get("beacon_defaults", {}), i_o_bdfs)}) + BYC.update({"entity_defaults": always_merger.merge(BYC.get("entity_defaults", {}), i_o_edfs)}) + if not "local" in ENV: instance = "___none___" - host = environ.get("HTTP_HOST") - i_ovr_f = path.join(loc_dir, "instance_overrides.yaml") - i_ovr = load_yaml_empty_fallback(i_ovr_f) + host = environ.get("HTTP_HOST", "local") for i_k, i_v in i_ovr.items(): doms = i_v.get("domains", []) if host in doms: @@ -71,7 +86,9 @@ def update_rootpars_from_local(loc_dir, byc): break if instance in i_ovr: i_o_bdfs = i_ovr[instance].get("beacon_defaults", {}) - BYC.update({b_p: always_merger.merge(BYC.get(b_p, {}), i_o_bdfs)}) + i_o_edfs = i_ovr[instance].get("entity_defaults", {}) + BYC.update({"beacon_defaults": always_merger.merge(BYC.get("beacon_defaults", {}), i_o_bdfs)}) + BYC.update({"entity_defaults": always_merger.merge(BYC.get("entity_defaults", {}), i_o_edfs)}) # TODO: better way to define which files are parsed from local for p in ("authorizations", "dataset_definitions", "local_paths", "local_parameters", "datatable_mappings", "plot_defaults"): diff --git a/bycon/lib/response_remapping.py b/bycon/lib/response_remapping.py index c3aa77dee..a573be70c 100644 --- a/bycon/lib/response_remapping.py +++ b/bycon/lib/response_remapping.py @@ -157,7 +157,8 @@ def remap_biosamples(r_s_res, byc): # TODO: REMOVE VERIFIER HACKS e_r = [] for r_k, r_v in bs_r.get("references", {}).items(): - e_r.append(__reference_object_from_ontology_term(r_k, r_v, byc)) + if (r_i := __reference_object_from_ontology_term(r_k, r_v, byc)): + e_r.append(r_i) r_s_res[bs_i].update({ "sample_origin_type": {"id": "OBI:0001479", "label": "specimen from organism"}, diff --git a/bycon/lib/schema_parsing.py b/bycon/lib/schema_parsing.py index dc056882a..5a112d84e 100644 --- a/bycon/lib/schema_parsing.py +++ b/bycon/lib/schema_parsing.py @@ -12,7 +12,7 @@ def read_schema_file(schema_name, item, ext="json"): # some lookup for the `request_entity_path_id` value in the case of "true" # entry types where schemas are defined in a directory with the path id - b_e_d = BYC["beacon_defaults"].get("entity_defaults", {}) + b_e_d = BYC.get("entity_defaults", {}) if schema_name in b_e_d: r_p_id = b_e_d[schema_name].get("request_entity_path_id") if isinstance(r_p_id, str): diff --git a/bycon/lib/service_utils.py b/bycon/lib/service_utils.py index 88b285666..186d3b336 100644 --- a/bycon/lib/service_utils.py +++ b/bycon/lib/service_utils.py @@ -24,12 +24,13 @@ def set_beacon_defaults(byc): ################################################################################ def initialize_bycon_service(byc, service="info"): - # TODO - streamline, also for services etc. - defs = BYC["beacon_defaults"] - b_e_d = defs.get("entity_defaults", {}) - s_a_s = defs.get("service_path_aliases", {}) - if service in s_a_s: - service = s_a_s[service] + b_e_d = BYC.get("entity_defaults", {}) + p_e_m = BYC.get("path_entry_type_mappings", {}) + e_p_m = BYC.get("entry_type_path_mappings", {}) + if service in p_e_m.keys(): + e = p_e_m.get(service) + service = e_p_m.get(e) + entry_type = p_e_m.get(service, "___none___") """ Here we allow the addition of additional configuration files, necessary @@ -49,9 +50,6 @@ def initialize_bycon_service(byc, service="info"): "request_entity_path_id": service }) - p_e_m = defs.get("path_entry_type_mappings", {}) - entry_type = p_e_m.get(service, "___none___") - if entry_type in b_e_d: for d_k, d_v in b_e_d[entry_type].items(): byc.update({d_k: d_v}) @@ -82,21 +80,13 @@ def set_special_modes(byc): ################################################################################ def update_entity_ids_from_path(byc): - req_p_id = byc.get("request_entity_path_id") - s_a_s = BYC["beacon_defaults"].get("service_path_aliases", {}) - p_e_m = BYC["beacon_defaults"].get("path_entry_type_mappings", {}) - - if not req_p_id: + if not (req_p_id := byc.get("request_entity_path_id")): return - res_p_id = byc.get("response_entity_path_id") - if not res_p_id: + if not (res_p_id := byc.get("response_entity_path_id")): res_p_id = req_p_id - # TODO: in contrast to req_p_id, res_p_id hasn't been anti-aliased - if res_p_id in s_a_s: - res_p_id = s_a_s[res_p_id] - # TODO: this gets the correct entity_id w/ entity_path_id fallback + p_e_m = BYC.get("path_entry_type_mappings", {}) byc.update({ "request_entity_id": p_e_m.get(req_p_id, req_p_id), "response_entity_id": p_e_m.get(res_p_id, req_p_id) @@ -120,13 +110,11 @@ def update_requested_schema_from_request(byc): def set_response_entity(byc): prdbug(f'response_entity_id: {byc.get("response_entity_id")}') - b_rt_s = BYC["beacon_defaults"].get("entity_defaults", {}) + byc.update({"response_entity": {}}) + b_rt_s = BYC.get("entity_defaults", {}) r_e_id = byc.get("response_entity_id", "___none___") - r_e = b_rt_s.get(r_e_id) - if not r_e: - return - - byc.update({"response_entity": r_e}) + if (r_e := b_rt_s.get(r_e_id)): + byc.update({"response_entity": r_e}) ################################################################################ diff --git a/docs/changes.md b/docs/changes.md index 6c74dcdab..1dc5a4c88 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -13,6 +13,16 @@ through the Perl based [**PGX** project](http://github.com/progenetix/PGX/). ## Changes Tracker +### 2024-03-07 (v1.6.3) + +* configuration changes: + - `beacon_defaults` file changed to `entity_defaults` since only entities + defined in it + - paths are now defined within the entity definitions, no separate aliases etc. + - local overrides for the Beacon entity defaults now in `local/instance_overrides.yaml` + - for byconaut a separate `services_entity_defaults` file provides the additional + services (e.g. `sampleplots` ... pseudo-entities) + ### 2024-03-07 (v1.6.2) * adding a `__collections_response_remap_cohorts(self, colls=[])` function diff --git a/docs/generated/argument_definitions.md b/docs/generated/argument_definitions.md index fc8582d81..bffa0dc8c 100644 --- a/docs/generated/argument_definitions.md +++ b/docs/generated/argument_definitions.md @@ -9,7 +9,7 @@ The following is a list of arguments and parameters used in the `bycon` package faking a user name ### `test_mode` -**type:** string +**type:** boolean **cmdFlags:** `-t,--testMode` **description:** test setting, i.e. returning some random documents @@ -41,7 +41,7 @@ The requested granularity of the beacon **type:** string **cmdFlags:** `--requestEntityPathId` **description:** -required data entry point, equal to the first REST path element in Beacon +data entry point, equal to the first REST path element in Beacon ### `requested_schema` **type:** string @@ -53,7 +53,9 @@ requested schema, e.g. biosample **type:** string **cmdFlags:** `--includeResultsetResponses` **description:** -requested schema, e.g. biosample + +* include resultset responses, e.g. HIT, MISS +* kind of a holdover from Beacon pre-v1 ### `dataset_ids` **type:** array @@ -97,7 +99,6 @@ global treatment of descendant terms **cmdFlags:** `--assemblyId` **description:** assembly id -**default:** `GRCh38` ### `reference_name` **type:** string @@ -246,6 +247,7 @@ One or more ids; this parameter only makes sense for specific REST entry types ### `biosample_ids` **type:** array **items:** string +**byc_entity:** biosample **cmdFlags:** `--biosampleIds` **description:** biosample ids @@ -253,20 +255,15 @@ biosample ids ### `analysis_ids` **type:** array **items:** string +**byc_entity:** analysis **cmdFlags:** `--analysisIds` **description:** callset / analysis ids -### `callset_ids` -**type:** array -**items:** string -**cmdFlags:** `--callsetIds` -**description:** -callset / analysis ids - ### `individual_ids` **type:** array **items:** string +**byc_entity:** individual **cmdFlags:** `--individualIds` **description:** subject ids @@ -274,6 +271,7 @@ subject ids ### `variant_ids` **type:** array **items:** string +**byc_entity:** genomicVariant **cmdFlags:** `--variantIds` **description:** variant ids @@ -398,12 +396,6 @@ minimal number, e.g. for collations, where supported **description:** some source label, e.g. `analyses` -### `query` -**type:** string -**cmdFlags:** `-q,--query` -**description:** -complete query string, e.g. `{"biosamples":{"external_references.id":"geo:GSE7428"}}` - ### `delivery_keys` **type:** array **items:** string diff --git a/local/beacon_defaults.yaml b/local/beacon_defaults.yaml deleted file mode 100644 index e884613ca..000000000 --- a/local/beacon_defaults.yaml +++ /dev/null @@ -1,112 +0,0 @@ -################################################################################ -# Beacon defaults. Values reflect the developers' environment:Please adjust! -################################################################################ - -defaults: - default_dataset_id: progenetix - -################################################################################ -# mappings and aliases ######################################################### -################################################################################ - -# map path id flavours to canonical entry_type_path_id ######################### -# the aliases here are for non-standard speling or additional entry types - -service_path_aliases: - filteringTerms: filtering_terms # just for speling variations - entryTypes: entry_types # just for speling variations - variants: genomicVariations # just for speling variations - genomicVariations: genomicVariations # just for speling variations - phenopackets: phenopackets # Beacon+ specific example - - -################################################################################ -# here you can map additional path values to the corresponding (additional) -# entry type -################################################################################ - -path_entry_type_mappings: - phenopackets: phenopacket # Beacon+ specific example - -################################################################################ -# her you can add additional path ids to the data query aggregation pipeline -# that usually mapps/reduces queries against biosamples, genomicVariations, -# individuals ... -################################################################################ - -data_pipeline_path_ids: - - phenopackets # Beacon+ specific example - -################################################################################ -# Beacon entry type defaults - please adjust esp. info and schema paths... -# TODO: Some of the entry type configuration duplicates data from the Beacon -# framework and might be disentangled further on ... -################################################################################ - -entity_defaults: - info: - is_entry_type: False - collection: Null - response_schema: beaconInfoResponse - beacon_schema: - entity_type: configuration - schema: https://progenetix.org/services/schemas/beaconInfoResults/ - # TODO: move `content` to `local` file. - content: - beacon_id: org.progenetix - name: Progenetix Cancer Genomics Beacon+ - api_version: v2.1.0-beaconplus - version: v2.1.0-beaconplus - id: org.progenetix.beacon - environment: prod - description: >- - Beacon+ provides a forward looking implementation of the Beacon v2 API, with - focus on structural genome variants and metadata based on the cancer and reference - genome profiling data in the Progenetix oncogenomic data resource (https://progenetix.org). - type: - group: org.ga4gh - artifact: beacon - version: v2.1.0-beaconplus - documentation_url: http://docs.progenetix.org - service_url: http://progenetix.org/beacon/ - welcome_url: https://progenetix.org/biosamples/ - alternative_url: https://progenetix.org - contact_url: mailto:contact@progenetix.org - created_at: 2015-11-13T00:00:00 - updated_at: 2023-02-16T12:45:00 - organization: - id: org.baudisgroup.info - name: Theoretical Cytogenetics and Oncogenomics group at UZH and SIB - description: > - The *Theoretical Cytogenetics and Oncogenomics* group at the University of - Zurich (UZH) and the Swiss Institute of Bioinformatics (SIB) performs research - in cancer genomics and provides bioinformatics tools and cancer genome data - resources to the scientific community. - address: >- - Department of Molecular Life Sciences - University of Zurich - Winterthurerstrasse 190 - CH-8057 Zurich - Switzerland - welcome_url: https://progenetix.org/biosamples/ - contact_url: mailto:contact@progenetix.org - logoUrl: https://progenetix.org/img/progenetix-logo.png - info: - update_date_time: 2023-01-16T12:45:00 - create_date_time: 2015-11-13T00:00:00 - update_date_time: 2023-01-16T12:45:00 - - ############################################################################## - - phenopacket: - is_entry_type: True - request_entity_path_id: phenopackets - response_entity_id: phenopacket - collection: individuals - response_schema: beaconResultsetsResponse - beacon_schema: - entity_type: phenopacket - schema: https://progenetix.org/services/schemas/phenopacket/ - h->o_access_key: individuals._id - - diff --git a/local/instance_overrides.yaml b/local/instance_overrides.yaml index 8c8fc7a60..c376741f1 100644 --- a/local/instance_overrides.yaml +++ b/local/instance_overrides.yaml @@ -1,3 +1,79 @@ +################################################################################ +# general override for site installation ####################################### +################################################################################ + +local: + beacon_defaults: + defaults: + default_dataset_id: progenetix + entity_defaults: + info: + collection: Null + response_schema: beaconInfoResponse + beacon_schema: + entity_type: configuration + schema: https://progenetix.org/services/schemas/beaconInfoResults/ + # TODO: move `content` to `local` file. + content: + beacon_id: org.progenetix + name: Progenetix Cancer Genomics Beacon+ + api_version: v2.1.0-beaconplus + version: v2.1.0-beaconplus + id: org.progenetix.beacon + environment: prod + description: >- + Beacon+ provides a forward looking implementation of the Beacon v2 API, with + focus on structural genome variants and metadata based on the cancer and reference + genome profiling data in the Progenetix oncogenomic data resource (https://progenetix.org). + type: + group: org.ga4gh + artifact: beacon + version: v2.1.0-beaconplus + documentation_url: http://docs.progenetix.org + service_url: http://progenetix.org/beacon/ + welcome_url: https://progenetix.org/biosamples/ + alternative_url: https://progenetix.org + contact_url: mailto:contact@progenetix.org + created_at: 2015-11-13T00:00:00 + updated_at: 2023-02-16T12:45:00 + organization: + id: org.baudisgroup.info + name: Theoretical Cytogenetics and Oncogenomics group at UZH and SIB + description: > + The *Theoretical Cytogenetics and Oncogenomics* group at the University of + Zurich (UZH) and the Swiss Institute of Bioinformatics (SIB) performs research + in cancer genomics and provides bioinformatics tools and cancer genome data + resources to the scientific community. + address: >- + Department of Molecular Life Sciences + University of Zurich + Winterthurerstrasse 190 + CH-8057 Zurich + Switzerland + welcome_url: https://progenetix.org/biosamples/ + contact_url: mailto:contact@progenetix.org + logoUrl: https://progenetix.org/img/progenetix-logo.png + info: + update_date_time: 2023-01-16T12:45:00 + create_date_time: 2015-11-13T00:00:00 + update_date_time: 2023-01-16T12:45:00 + + ############################################################################ + + phenopacket: + request_entity_path_id: phenopackets + response_entity_id: phenopacket + collection: individuals + response_schema: beaconResultsetsResponse + beacon_schema: + entity_type: phenopacket + schema: https://progenetix.org/services/schemas/phenopacket/ + h->o_access_key: individuals._id + +################################################################################ +# domain specific ############################################################## +################################################################################ + progenetix: domains: - progenetix.org @@ -29,34 +105,34 @@ cancercelllines: default_dataset_id: cellz test_domains: - cancercelllines.test - entity_defaults: - info: - content: - beacon_id: org.cancercelllines - name: Cancer Cell Line Genomics Beacon+ - id: org.cancercelllines.beacon - environment: prod - description: >- - The cancercelllines.org Beacon is a specific instance of the Progenetix - Beacon+ environment providing information about genommic variations in - cancer cell lines. - type: - group: org.ga4gh - artifact: beacon - version: v2.1.0-beaconplus - documentation_url: http://docs.cancercelllines.org - service_url: http://cancercelllines.org/beacon/ - welcome_url: https://cancercelllines.org/biosamples/ - alternative_url: https://cancercelllines.org + entity_defaults: + info: + content: + beacon_id: org.cancercelllines + name: Cancer Cell Line Genomics Beacon+ + id: org.cancercelllines.beacon + environment: prod + description: >- + The cancercelllines.org Beacon is a specific instance of the Progenetix + Beacon+ environment providing information about genommic variations in + cancer cell lines. + type: + group: org.ga4gh + artifact: beacon + version: v2.1.0-beaconplus + documentation_url: http://docs.cancercelllines.org + service_url: http://cancercelllines.org/beacon/ + welcome_url: https://cancercelllines.org/biosamples/ + alternative_url: https://cancercelllines.org + contact_url: mailto:contact@progenetix.org + created_at: 2023-07-01T00:00:00 + updated_at: 2024-02-24T13:00:00 + organization: + welcome_url: https://cancercelllines.org/ contact_url: mailto:contact@progenetix.org - created_at: 2023-07-01T00:00:00 - updated_at: 2024-02-24T13:00:00 - organization: - welcome_url: https://cancercelllines.org/ - contact_url: mailto:contact@progenetix.org - logoUrl: https://cancercelllines.org/img/cancercelllines-icon-400x300.png - info: - update_date_time: 2024-02-24T12:45:00 - create_date_time: 2023-07-01T00:00:00 - update_date_time: 2024-02-24T13:00:00 + logoUrl: https://cancercelllines.org/img/cancercelllines-icon-400x300.png + info: + update_date_time: 2024-02-24T12:45:00 + create_date_time: 2023-07-01T00:00:00 + update_date_time: 2024-02-24T13:00:00 diff --git a/setup.py b/setup.py index 6a89ac4db..ccec33925 100755 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setup( name="bycon", - version="1.6.2", + version="1.6.3", description="A Python-based environment for the Beacon v2 genomics API", long_description=long_description, long_description_content_type="text/markdown",