Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
dbirman committed Oct 16, 2024
2 parents b595b6b + 4d4036a commit 7b2214e
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 39 deletions.
1 change: 1 addition & 0 deletions data.json

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions src/aind_metadata_viz/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def build_csv_jscode(event):
download_button.on_click(build_csv_jscode)


def build_mid(selected_file, derived_filter, **args):
def field_present_chart(selected_file, derived_filter, **args):
""" """
db.set_file(selected_file)
db.derived_filter = derived_filter
Expand All @@ -249,16 +249,16 @@ def build_mid(selected_file, derived_filter, **args):
alt.Chart(sum_longform_df)
.mark_bar()
.encode(
x=alt.X("column:N", title=None, axis=alt.Axis(grid=False)),
x=alt.X("field:N", title=None, axis=alt.Axis(grid=False)),
y=alt.Y(
"count:Q",
"sum:Q",
title="Metadata assets (n)",
axis=alt.Axis(grid=False),
),
color=alt.Color(
"category:N",
"state:N",
scale=alt.Scale(
domain=["valid", "present", "missing", "excluded"],
domain=list(colors.keys()),
range=color_list,
),
legend=None,
Expand Down Expand Up @@ -342,7 +342,7 @@ def build_row(selected_modality, derived_filter):
)

mid_plot = pn.bind(
build_mid,
field_present_chart,
selected_file=top_selector,
selected_modality=modality_selector,
derived_filter=derived_selector,
Expand Down
65 changes: 38 additions & 27 deletions src/aind_metadata_viz/docdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ def __init__(
):
"""Initialize"""
# get data
self._data = _get_file_presence(test_mode=test_mode)
self._file_data = _get_file_presence(test_mode=test_mode)
self._field_data = _get_field_presence(test_mode=test_mode)

# setup
(expected_files, _) = self.get_expected_files()
Expand All @@ -84,7 +85,7 @@ def data_filtered(self):
"""
mod_filter = not (self.modality_filter == "all")

filtered_df = self._data.copy()
filtered_df = self._file_data.copy()

# Filter by modality
if mod_filter:
Expand All @@ -107,7 +108,7 @@ def data_modality_filtered(self, modality: str):
modality : str
Modality.ONE_OF
"""
filtered_df = self._data.copy()
filtered_df = self._file_data.copy()

# Apply derived filter
if not (self.derived_filter == "All assets"):
Expand Down Expand Up @@ -192,17 +193,23 @@ def set_file(self, file: str):
def get_file_field_presence(self):
"""Get the presence of fields in a specific file
"""
# expected_fields = (
# self.field_list[0].keys() if len(self.field_list) > 0 else []
# )
# processed = process_record_list(self.field_list, expected_fields)
field_df = self._field_data[self.file]

# print(processed)
# df = pd.DataFrame()
# df = pd.DataFrame(processed, columns=expected_fields)
# we need to filter by the derived/modality filters here but they are in the other dataframe
if not (self.derived_filter == "All assets"):
field_df = field_df[self._file_data["derived"] == (self.derived_filter == "Derived")]

if not self.modality_filter == "all":
field_df = field_df[self._file_data['modalities'].apply(lambda x: self.modality_filter in x.split(','))]

# return compute_count_true(df)
return pd.DataFrame()
df_melted = field_df.melt(
id_vars=[],
var_name="field",
value_name="state"
)
df_summary = df_melted.groupby(["field", "state"]).size().reset_index(name="sum")

return df_summary

def get_csv(self, vp_state: str = "Not Valid/Present"):
"""Build a CSV file of export data based on the selected file and field
Expand Down Expand Up @@ -246,9 +253,7 @@ def _get_file_presence(test_mode=False) -> pd.DataFrame:
_description_, by default False
"""
record_list = _get_all(test_mode=test_mode)
files = list(first_layer_field_mapping.keys())

processed = process_record_list(record_list, files)
processed = process_record_list(record_list, ALL_FILES)

# Now add some information about the records, i.e. modality, derived state, etc.
for i, record in enumerate(record_list):
Expand Down Expand Up @@ -279,26 +284,32 @@ def _get_file_presence(test_mode=False) -> pd.DataFrame:

return pd.DataFrame(
processed,
columns=files
columns=ALL_FILES
+ ["modalities", "derived", "name", "_id", "location", "created"],
)


# @pn.cache(ttl=CACHE_RESET_DAY)
# def _get_field_presence(file: str):
# """Get all and convert to data frame format
@pn.cache(ttl=CACHE_RESET_DAY)
def _get_field_presence(test_mode=False) -> dict:
"""Get all and convert to data frame format
returns a dictionary {file: field_df}
"""
record_list = _get_all(test_mode=test_mode)

# Parameters
# ----------
# test_mode : bool, optional
# _description_, by default False
# """
# record_list = _get_all()
file_dfs = {}
# filter by file
for file in ALL_FILES:
expected_fields = second_layer_field_mappings[file]
# get field presence
field_record_list = [record[file] if file in record else None for record in record_list]
processed = process_record_list(field_record_list, expected_fields, parent=file)

# # filter by file
file_df = pd.DataFrame(processed, columns=expected_fields)

# # get field presence
file_dfs[file] = file_df

return file_dfs

@pn.cache(ttl=CACHE_RESET_DAY)
def _get_all(test_mode=False):
Expand Down
37 changes: 31 additions & 6 deletions src/aind_metadata_viz/metadata_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from aind_metadata_viz.utils import MetaState, expected_files_from_modalities
from aind_data_schema_models.modalities import FileRequirement
from pydantic import ValidationError
from typing import Literal
from typing import Literal, Optional, Union


def _metadata_present_helper(json: str, check_present: bool = True):
Expand Down Expand Up @@ -45,10 +45,32 @@ def _metadata_valid_helper(
json["schema_version"] = first_layer_versions[field]

if field in mapping:
expected_type = mapping[field]
try:
return mapping[field](**json) is not None
origin_type = getattr(expected_type, "__origin__", None)

if origin_type is list:
item_type = expected_type.__args__[0]
return all([item_type(**item_json) for item_json in json])
elif origin_type is Optional:
# skip optional fields!
return True
elif origin_type is Union:
# Get all possible types in the Union
union_types = get_args(expected_type)

for union_type in union_types:
try:
return union_type(**json)
except ValidationError:
continue
else:
return False
else:
# validate as a pydantic model
return expected_type(**json) is not None
except Exception as e:
# print(e)
print(e)
return False


Expand All @@ -70,6 +92,9 @@ def check_metadata_state(field: str, object: dict, parent: str = None) -> str:
# if excluded, just return that
# get the excluded fields from the class map

if not object:
return MetaState.MISSING.value

if (
"data_description" in object
and object["data_description"]
Expand Down Expand Up @@ -100,7 +125,7 @@ def check_metadata_state(field: str, object: dict, parent: str = None) -> str:
# File is required or optional, get the mappings from field -> class
# if you're looking at a parent file's data then you need a different mapping
if parent:
print("not implemented")
class_map = second_layer_field_mappings[parent]
# we're at the top level, just check the first layer mappings
else:
class_map = first_layer_field_mapping
Expand Down Expand Up @@ -130,7 +155,7 @@ def check_metadata_state(field: str, object: dict, parent: str = None) -> str:
return MetaState.MISSING.value


def process_record_list(record_list: list, expected_fields: list):
def process_record_list(record_list: list, expected_fields: list, parent=None):
"""Process a list of Metadata JSON records from DocDB
For each record, check each of the expected fields and see if they are valid/present/missing/excluded
Expand All @@ -147,6 +172,6 @@ def process_record_list(record_list: list, expected_fields: list):
list[{field: MetaState}]
"""
return [
{field: check_metadata_state(field, data) for field in expected_fields}
{field: check_metadata_state(field, data, parent) for field in expected_fields}
for data in record_list
]
46 changes: 46 additions & 0 deletions src/aind_metadata_viz/temp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from aind_metadata_viz.metadata_helpers import *
from aind_metadata_viz.docdb import _get_all
import json
from aind_data_schema_models.modalities import (
Modality,
ExpectedFiles,
FileRequirement,
)
from aind_metadata_viz.metadata_helpers import (
process_record_list,
)
from aind_metadata_viz.metadata_class_map import (
first_layer_field_mapping,
second_layer_field_mappings,
)
ALL_FILES = sorted(
[
"data_description",
"acquisition",
"procedures",
"subject",
"instrument",
"processing",
"rig",
"session",
"quality_control",
]
)


# records = _get_all()

with open('data.json', 'r') as f:
record_list = json.loads(f.read())


file_dfs = {}
# filter by file
for file in ALL_FILES:
expected_fields = second_layer_field_mappings[file]
# get field presence
field_record_list = [record[file] if file in record else None for record in record_list]

processed = process_record_list(field_record_list, expected_fields, parent=file)

print(processed)

0 comments on commit 7b2214e

Please sign in to comment.