Skip to content

Commit

Permalink
all text filtering queries
Browse files Browse the repository at this point in the history
  • Loading branch information
costero-e committed Feb 7, 2023
1 parent ffc6ded commit e197461
Show file tree
Hide file tree
Showing 9 changed files with 164 additions and 52 deletions.
11 changes: 7 additions & 4 deletions beacon/db/analyses.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,9 @@ def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParam
return query

def get_analyses(entry_id: Optional[str], qparams: RequestParams):
collection = 'analyses'
query = apply_request_parameters({}, qparams)
query = apply_filters(query, qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection)
schema = DefaultSchemas.ANALYSES
count = get_count(client.beacon.analyses, query)
docs = get_documents(
Expand All @@ -93,8 +94,9 @@ def get_analyses(entry_id: Optional[str], qparams: RequestParams):


def get_analysis_with_id(entry_id: Optional[str], qparams: RequestParams):
collection = 'analyses'
query = apply_request_parameters({}, qparams)
query = apply_filters(query, qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection)
query = query_id(query, entry_id)
schema = DefaultSchemas.ANALYSES
count = get_count(client.beacon.analyses, query)
Expand All @@ -108,14 +110,15 @@ def get_analysis_with_id(entry_id: Optional[str], qparams: RequestParams):


def get_variants_of_analysis(entry_id: Optional[str], qparams: RequestParams):
collection = 'analyses'
query = {"$and": [{"id": entry_id}]}
query = apply_request_parameters(query, qparams)
query = apply_filters(query, qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection)
count = get_count(client.beacon.analyses, query)
analysis_ids = client.beacon.analyses \
.find_one(query, {"biosampleId": 1, "_id": 0})
analysis_ids=get_cross_query(analysis_ids,'biosampleId','caseLevelData.biosampleId')
query = apply_filters(analysis_ids, qparams.query.filters)
query = apply_filters(analysis_ids, qparams.query.filters, collection)

schema = DefaultSchemas.GENOMICVARIATIONS
count = get_count(client.beacon.genomicVariations, query)
Expand Down
17 changes: 11 additions & 6 deletions beacon/db/biosamples.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,9 @@ def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParam


def get_biosamples(entry_id: Optional[str], qparams: RequestParams):
collection = 'biosamples'
query = apply_request_parameters({}, qparams)
query = apply_filters(query, qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection)
schema = DefaultSchemas.BIOSAMPLES
count = get_count(client.beacon.biosamples, query)
docs = get_documents(
Expand All @@ -93,8 +94,9 @@ def get_biosamples(entry_id: Optional[str], qparams: RequestParams):


def get_biosample_with_id(entry_id: Optional[str], qparams: RequestParams):
collection = 'biosamples'
query = apply_request_parameters({}, qparams)
query = apply_filters(query, qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection)
query = query_id(query, entry_id)
schema = DefaultSchemas.BIOSAMPLES
count = get_count(client.beacon.biosamples, query)
Expand All @@ -108,16 +110,17 @@ def get_biosample_with_id(entry_id: Optional[str], qparams: RequestParams):


def get_variants_of_biosample(entry_id: Optional[str], qparams: RequestParams):
collection = 'biosamples'
query = {"$and": [{"id": entry_id}]}
query = apply_request_parameters(query, qparams)
query = apply_filters(query, qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection)
count = get_count(client.beacon.biosamples, query)
biosamples_ids = client.beacon.biosamples \
.find_one(query, {"id": 1, "_id": 0})
LOG.debug(biosamples_ids)
biosamples_ids=get_cross_query(biosamples_ids,'id','caseLevelData.biosampleId')
LOG.debug(biosamples_ids)
query = apply_filters(biosamples_ids, qparams.query.filters)
query = apply_filters(biosamples_ids, qparams.query.filters, collection)

schema = DefaultSchemas.GENOMICVARIATIONS
count = get_count(client.beacon.genomicVariations, query)
Expand All @@ -131,9 +134,10 @@ def get_variants_of_biosample(entry_id: Optional[str], qparams: RequestParams):


def get_analyses_of_biosample(entry_id: Optional[str], qparams: RequestParams):
collection = 'biosamples'
query = {"biosampleId": entry_id}
query = apply_request_parameters(query, qparams)
query = apply_filters(query, qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection)
schema = DefaultSchemas.ANALYSES
count = get_count(client.beacon.analyses, query)
docs = get_documents(
Expand All @@ -146,9 +150,10 @@ def get_analyses_of_biosample(entry_id: Optional[str], qparams: RequestParams):


def get_runs_of_biosample(entry_id: Optional[str], qparams: RequestParams):
collection = 'biosamples'
query = {"biosampleId": entry_id}
query = apply_request_parameters(query, qparams)
query = apply_filters(query, qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection)
schema = DefaultSchemas.RUNS
count = get_count(client.beacon.runs, query)
docs = get_documents(
Expand Down
9 changes: 6 additions & 3 deletions beacon/db/cohorts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@


def get_cohorts(entry_id: Optional[str], qparams: RequestParams):
query = apply_filters({}, qparams.query.filters)
collection = 'cohorts'
query = apply_filters({}, qparams.query.filters, collection)
schema = DefaultSchemas.COHORTS
count = get_count(client.beacon.cohorts, query)
docs = get_documents(
Expand All @@ -23,7 +24,8 @@ def get_cohorts(entry_id: Optional[str], qparams: RequestParams):


def get_cohort_with_id(entry_id: Optional[str], qparams: RequestParams):
query = apply_filters({}, qparams.query.filters)
collection = 'cohorts'
query = apply_filters({}, qparams.query.filters, collection)
query = query_id(query, entry_id)
schema = DefaultSchemas.COHORTS
count = get_count(client.beacon.cohorts, query)
Expand All @@ -37,7 +39,8 @@ def get_cohort_with_id(entry_id: Optional[str], qparams: RequestParams):


def get_individuals_of_cohort(entry_id: Optional[str], qparams: RequestParams):
query = apply_filters({}, qparams.query.filters)
collection = 'cohorts'
query = apply_filters({}, qparams.query.filters, collection)
query = query_id(query, entry_id)
count = get_count(client.beacon.cohorts, query)
cohort_ids = client.beacon.cohorts \
Expand Down
29 changes: 18 additions & 11 deletions beacon/db/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@


def get_datasets(entry_id: Optional[str], qparams: RequestParams):
query = apply_filters({}, qparams.query.filters)
collection = 'datasets'
query = apply_filters({}, qparams.query.filters, collection)
schema = DefaultSchemas.DATASETS
count = get_count(client.beacon.datasets, query)
docs = get_documents(
Expand All @@ -24,7 +25,8 @@ def get_datasets(entry_id: Optional[str], qparams: RequestParams):


def get_dataset_with_id(entry_id: Optional[str], qparams: RequestParams):
query = apply_filters({}, qparams.query.filters)
collection = 'datasets'
query = apply_filters({}, qparams.query.filters, collection)
query = query_id(query, entry_id)
schema = DefaultSchemas.DATASETS
count = get_count(client.beacon.datasets, query)
Expand All @@ -38,8 +40,9 @@ def get_dataset_with_id(entry_id: Optional[str], qparams: RequestParams):


def get_variants_of_dataset(entry_id: Optional[str], qparams: RequestParams):
collection = 'datasets'
query = {"_info.datasetId": entry_id}
query = apply_filters(query, qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection)
schema = DefaultSchemas.GENOMICVARIATIONS
count = get_count(client.beacon.genomicVariations, query)
docs = get_documents(
Expand All @@ -52,13 +55,14 @@ def get_variants_of_dataset(entry_id: Optional[str], qparams: RequestParams):


def get_biosamples_of_dataset(entry_id: Optional[str], qparams: RequestParams):
query = apply_filters({}, qparams.query.filters)
collection = 'datasets'
query = apply_filters({}, qparams.query.filters, collection)
query = query_id(query, entry_id)
count = get_count(client.beacon.datasets, query)
biosample_ids = client.beacon.datasets \
.find_one(query, {"ids.biosampleIds": 1, "_id": 0})
biosample_ids=get_cross_query(biosample_ids['ids'],'biosampleIds','id')
query = apply_filters(biosample_ids, qparams.query.filters)
query = apply_filters(biosample_ids, qparams.query.filters, collection)

schema = DefaultSchemas.BIOSAMPLES
count = get_count(client.beacon.biosamples, query)
Expand All @@ -72,13 +76,14 @@ def get_biosamples_of_dataset(entry_id: Optional[str], qparams: RequestParams):


def get_individuals_of_dataset(entry_id: Optional[str], qparams: RequestParams):
query = apply_filters({}, qparams.query.filters)
collection = 'datasets'
query = apply_filters({}, qparams.query.filters, collection)
query = query_id(query, entry_id)
count = get_count(client.beacon.datasets, query)
individual_ids = client.beacon.datasets \
.find_one(query, {"ids.individualIds": 1, "_id": 0})
individual_ids=get_cross_query(individual_ids['ids'],'individualIds','id')
query = apply_filters(individual_ids, qparams.query.filters)
query = apply_filters(individual_ids, qparams.query.filters, collection)

schema = DefaultSchemas.INDIVIDUALS
count = get_count(client.beacon.individuals, query)
Expand All @@ -103,13 +108,14 @@ def get_filtering_terms_of_dataset(entry_id: Optional[str], qparams: RequestPara


def get_runs_of_dataset(entry_id: Optional[str], qparams: RequestParams):
query = apply_filters({}, qparams.query.filters)
collection = 'datasets'
query = apply_filters({}, qparams.query.filters, collection)
query = query_id(query, entry_id)
count = get_count(client.beacon.datasets, query)
biosample_ids = client.beacon.datasets \
.find_one(query, {"ids.biosampleIds": 1, "_id": 0})
biosample_ids=get_cross_query(biosample_ids['ids'],'biosampleIds','biosampleId')
query = apply_filters(biosample_ids, qparams.query.filters)
query = apply_filters(biosample_ids, qparams.query.filters, collection)

schema = DefaultSchemas.RUNS
count = get_count(client.beacon.runs, query)
Expand All @@ -123,13 +129,14 @@ def get_runs_of_dataset(entry_id: Optional[str], qparams: RequestParams):


def get_analyses_of_dataset(entry_id: Optional[str], qparams: RequestParams):
query = apply_filters({}, qparams.query.filters)
collection = 'datasets'
query = apply_filters({}, qparams.query.filters, collection)
query = query_id(query, entry_id)
count = get_count(client.beacon.datasets, query)
biosample_ids = client.beacon.datasets \
.find_one(query, {"ids.biosampleIds": 1, "_id": 0})
biosample_ids=get_cross_query(biosample_ids['ids'],'biosampleIds','biosampleId')
query = apply_filters(biosample_ids, qparams.query.filters)
query = apply_filters(biosample_ids, qparams.query.filters, collection)

schema = DefaultSchemas.ANALYSES
count = get_count(client.beacon.analyses, query)
Expand Down
83 changes: 80 additions & 3 deletions beacon/db/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,35 @@

CURIE_REGEX = r'^([a-zA-Z0-9]*):\/?[a-zA-Z0-9]*$'


def apply_filters(query: dict, filters: List[dict]) -> dict:
BIOSAMPLES_FILTERS_MAP = [
{"biosampleStatus.label" : {"$regex": ""}},
{"characteristics.organism.text" : {"$regex": ""}},
{"sampleOriginType.label" : {"$regex": ""}}
]

DATASETS_FILTERS_MAP = [
{"collections.dataUseConditions.duoDataUse.label" : {"$regex": ""}},
{"description" : {"$regex": ""}}
]

GENOMIC_VARIATIONS_FILTERS_MAP = [
{"molecularAttributes.molecularEffects.label" : {"$regex": ""}}
]

INDIVIDUALS_FILTERS_MAP = [
{"ethnicity.label" : {"$regex": ""}},
{"measurementValue.quantity.unit.label":{"$regex": ""}},
{"geographicOrigin.label":{"$regex": ""}},
{"measures.assayCode.label":{"$regex": ""}},
{"diseases.diseaseCode.label":{"$regex": ""}}
]

RUNS_FILTERS_MAP = [
{"platformModel.label" : {"$regex": ""}},
{"librarySource.label":{"$regex": ""}}
]

def apply_filters(query: dict, filters: List[dict], collection: str) -> dict:
LOG.debug("Filters len = {}".format(len(filters)))
if len(filters) > 0:
query["$and"] = []
Expand All @@ -32,6 +59,11 @@ def apply_filters(query: dict, filters: List[dict]) -> dict:
#partial_query = { "$text": { "$search": "" } }
LOG.debug(partial_query)
partial_query = apply_ontology_filter(partial_query, filter)
elif "text" in filter:
LOG.debug(filter)
filter = CustomFilter(**filter)
LOG.debug("Text filter: %s ", filter.id)
partial_query = apply_text_filter(filter, collection)
else:
filter = CustomFilter(**filter)
LOG.debug("Custom filter: %s", filter.id)
Expand Down Expand Up @@ -104,11 +136,56 @@ def apply_alphanumeric_filter(query: dict, filter: AlphanumericFilter) -> dict:
formatted_operator = format_operator(filter.operator)
if isinstance(formatted_value,list):
query[filter.id] = { formatted_operator: formatted_value }
else:
elif formatted_value.count('.') == 1:
query[filter.id] = { formatted_operator: float(formatted_value) }
else:
query[filter.id] = { formatted_operator: formatted_value }
LOG.debug("QUERY: %s", query)
return query

def apply_text_filter(filter: CustomFilter, collection: str) -> dict:
if collection == 'individuals':
for dict in INDIVIDUALS_FILTERS_MAP:
for k, v in dict.items():
v["$regex"] = f".*{filter.id}.*"
search_dict={}
search_dict["$or"] = INDIVIDUALS_FILTERS_MAP
query = search_dict
LOG.debug("QUERY: %s", query)
elif collection == 'biosamples':
for dict in BIOSAMPLES_FILTERS_MAP:
for k, v in dict.items():
v["$regex"] = f".*{filter.id}.*"
search_dict={}
search_dict["$or"] = BIOSAMPLES_FILTERS_MAP
query = search_dict
LOG.debug("QUERY: %s", query)
elif collection == 'datasets':
for dict in DATASETS_FILTERS_MAP:
for k, v in dict.items():
v["$regex"] = f".*{filter.id}.*"
search_dict={}
search_dict["$or"] = DATASETS_FILTERS_MAP
query = search_dict
LOG.debug("QUERY: %s", query)
elif collection == 'g_variants':
for dict in GENOMIC_VARIATIONS_FILTERS_MAP:
for k, v in dict.items():
v["$regex"] = f".*{filter.id}.*"
search_dict={}
search_dict["$or"] = GENOMIC_VARIATIONS_FILTERS_MAP
query = search_dict
LOG.debug("QUERY: %s", query)
elif collection == 'runs':
for dict in RUNS_FILTERS_MAP:
for k, v in dict.items():
v["$regex"] = f".*{filter.id}.*"
search_dict={}
search_dict["$or"] = RUNS_FILTERS_MAP
query = search_dict
LOG.debug("QUERY: %s", query)
return query


def apply_custom_filter(query: dict, filter: CustomFilter) -> dict:
LOG.debug(query)
Expand Down
Loading

0 comments on commit e197461

Please sign in to comment.