Skip to content

Commit

Permalink
297 visits stop-gap bugfix (#298)
Browse files Browse the repository at this point in the history
* intermediate work for the bug fix

* get features endpoint working

* taking into account > and < operators

* fixed the counting issue for association to all features

* minor condition check simplification
  • Loading branch information
hyi authored Nov 15, 2023
1 parent ecf8c41 commit ef94115
Showing 1 changed file with 107 additions and 56 deletions.
163 changes: 107 additions & 56 deletions icees_api/features/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def normalize_feature(year, feature):
}


def simplify_value(val_str):
def simplify_value(val_str, opr):
"""
simplify value string to integer string if appropriate, e.g., from 1.0 to 1
"""
Expand All @@ -456,19 +456,22 @@ def simplify_value(val_str):
return val_str
value_int = int(value_float)
if value_int == value_float:
return str(value_int)
# return int type if less or greater operator is involved, otherwise, return str type
if opr in ['>', '<', '>=', '<=']:
return value_int
else:
return str(value_int)
return val_str


def get_count(results, **constraints):
"""Get sum of result counts that meet constraints."""
count = 0

for result in results:
if all(
OP_MAP[constraint["operator"]](
simplify_value(result.get(feature, None)),
simplify_value(constraint.get("value", constraint.get("values"))),
simplify_value(result.get(feature, None), constraint["operator"]),
simplify_value(constraint.get("value", constraint.get("values")), constraint["operator"]),
)
for feature, constraint in constraints.items()
):
Expand Down Expand Up @@ -680,7 +683,6 @@ def select_feature_matrix(
} for j, cell in enumerate(row)
] for i, row in enumerate(feature_matrix)
]

feature_a_norm_with_biolink_class = {
**feature_a_norm
}
Expand Down Expand Up @@ -780,24 +782,65 @@ def select_feature_count_all_values(
values[value] = count
total = sum(values.values())
levels = list(levels)
for value in values.keys():
if value not in levels:
levels.append(value)

value_to_be_added = [value for value in values.keys() if value not in levels]
# handle level definition with operator included by separating operator and value into a dict item,
# e.g., one enum level defined for TotalEDInpatientVisits is >9. However, it should not separate
# operator and value if the operator and value together is treated as a value, e.g., for AgeStudyStart2
# feature variable, <5 is stored as a value generated by FHIR PIT and stored in database. This differentiation
# can be achieved by checking whether the level with operator is part of the keys in values dictionary and
# only separate the operator from the value when it is not part of the values.keys()
level_op_val = {}
for lev in levels:
if lev not in values.keys() and isinstance(lev, str) and lev[0] in ['<', '>']:
level_op_val[lev[0]] = int(lev[1:])
levels.remove(lev)
for value in value_to_be_added:
# only append value to levels when value is not in levels and level_op_val is empty or
# value does not satisfy the operations encoded in the level_op_val
if not level_op_val or \
('<' in level_op_val and int(value) >= level_op_val['<']) or \
('>' in level_op_val and int(value) <= level_op_val['>']):
levels.append(value)
feat_qualifiers = [{
"operator": "=",
"value": level
} for level in levels]

feat_matrix = [
{"frequency": a, "percentage": div(a, total)}
for level in levels if (a := values[level]) is not None
]
if level_op_val:
feat_qualifiers.extend([{
"operator": k,
"value": v
} for k, v in level_op_val.items()])
greater_value_sum = less_value_sum = 0
for value in value_to_be_added:
int_val = int(value)
if '<' in level_op_val and int_val < level_op_val['<']:
less_value_sum += values[value]
if '>' in level_op_val and int_val > level_op_val['>']:
greater_value_sum += values[value]
if greater_value_sum > 0:
feat_matrix.append({
"frequency": greater_value_sum,
"percentage": div(greater_value_sum, total)
})
if less_value_sum > 0:
feat_matrix.append({
"frequency": less_value_sum,
"percentage": div(less_value_sum, total)
})
feature_a_norm_with_biolink_class = {
"feature_name": feature_name,
"feature_qualifiers": [{
"operator": "=",
"value": level
} for level in levels],
"feature_qualifiers": feat_qualifiers,
"year": year
}
count = {
"feature": feature_a_norm_with_biolink_class,
"feature_matrix": [
{"frequency": a, "percentage": div(a, total)}
for level in levels if (a := values[level]) is not None
]
"feature_matrix": feat_matrix
}
return count

Expand Down Expand Up @@ -873,10 +916,8 @@ def select_associations_to_all_features(
feature_as = [
{
"feature_name": feature_name,
"feature_qualifiers": list(map(
lambda level: {"operator": "=", "value": level},
get_feature_levels(feature_name),
))
"feature_qualifiers": get_operator_and_value(get_feature_levels(feature_name),
feature_name)
}
for feature_name in filter(feature_filter_a, get_features(conn, table))
]
Expand All @@ -886,10 +927,8 @@ def select_associations_to_all_features(
feature_bs = [
{
"feature_name": feature_name,
"feature_qualifiers": list(map(
lambda level: {"operator": "=", "value": level},
get_feature_levels(feature_name),
))
"feature_qualifiers": get_operator_and_value(get_feature_levels(feature_name),
feature_name)
}
for feature_name in filter(feature_filter_b, get_features(conn, table))
]
Expand Down Expand Up @@ -992,6 +1031,40 @@ def validate_feature_value_in_table_column_for_equal_operator(conn, table_name,
return


def get_operator_and_value(input_levels, feat_name, append_feature_variable=False):
"""
get operator and value from each input level which will be in the format of '>' or '<' followed by a number or
just a number with implied equal operator, and return a list of feature qualifiers each being a dict with
operator and value keys
"""
fqs = []
for input_level in input_levels:
# checking if feature variable name contains Age is a stop-gap solution, which will be removed after
# Age-related variable binning is removed in FHIR PIT and dataset is updated
if 'Age' in feat_name:
op = '='
op_val = input_level
else:
non_op_idx = 0
if isinstance(input_level, str):
for lev in input_level:
if lev in ['<', '>']:
non_op_idx += 1
else:
break
if non_op_idx == 0:
op = '='
op_val = input_level
else:
op = input_level[:non_op_idx]
op_val = input_level[non_op_idx:]
if append_feature_variable:
fqs.append({feat_name: {"operator": op, "value": op_val}})
else:
fqs.append({"operator": op, "value": op_val})
return fqs


def compute_multivariate_table(conn, table_name, year, cohort_id, feature_variables):
cohort_meta = get_features_by_id(conn, table_name, cohort_id)
if cohort_meta is None:
Expand All @@ -1004,44 +1077,24 @@ def compute_multivariate_table(conn, table_name, year, cohort_id, feature_variab
"for computing multivariate associations")

# get feature_constraint list from the first feature variable
feat_constraint_list = []
levels0 = get_feature_levels(feature_variables[0], year=year)
for level in levels0:
non_op_idx = 0
if isinstance(level, str):
for lev in level:
if lev in ['<', '>', '=']:
non_op_idx += 1
else:
break
if non_op_idx == 0:
op = '='
op_val = level
else:
op = level[:non_op_idx]
op_val = level[non_op_idx:]
feat_constraint_list.append({
feature_variables[0]: {"operator": op, "value": op_val}
})
feat_constraint_list = get_operator_and_value(get_feature_levels(feature_variables[0], year=year),
feature_variables[0], append_feature_variable=True)

index = 1
while index + 2 <= feat_len:
feature_as = [
{
"feature_name": feature_variables[index],
"feature_qualifiers": list(map(
lambda level: {"operator": "=", "value": level},
get_feature_levels(feature_variables[index], year=year),
))
"feature_qualifiers": get_operator_and_value(get_feature_levels(feature_variables[index], year=year),
feature_variables[index])
}
]
feature_bs = [
{
"feature_name": feature_variables[index + 1],
"feature_qualifiers": list(map(
lambda level: {"operator": "=", "value": level},
get_feature_levels(feature_variables[index + 1], year=year),
))
"feature_qualifiers": get_operator_and_value(get_feature_levels(feature_variables[index + 1],
year=year),
feature_variables[index + 1])
}
]
# add more constraints to feat_constraint_list as needed depending on feature_a and feature_b levels
Expand All @@ -1066,10 +1119,8 @@ def compute_multivariate_table(conn, table_name, year, cohort_id, feature_variab
index += 2

if index < feat_len:
feature_qualifiers = list(map(
lambda level: {"operator": "=", "value": level},
get_feature_levels(feature_variables[index], year=year)
))
feature_qualifiers = get_operator_and_value(get_feature_levels(feature_variables[index], year=year),
feature_variables[index])
more_constraint_list = []
for feature_constraint in feat_constraint_list:
for fq in feature_qualifiers:
Expand Down

0 comments on commit ef94115

Please sign in to comment.