Skip to content

Commit

Permalink
Merge pull request #3 from AllenNeuralDynamics/2-consider-filefield-n…
Browse files Browse the repository at this point in the history
…on-present-if-it-is-empty-but-exists

2 consider filefield non present if it is empty but exists
  • Loading branch information
dbirman authored Aug 19, 2024
2 parents d2e540c + 19862ac commit 3d94284
Show file tree
Hide file tree
Showing 10 changed files with 326 additions and 187 deletions.
4 changes: 4 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[run]
omit =
src/aind_metadata_viz/app.py
src/aind_metadata_viz/flask_proxy.py
1 change: 1 addition & 0 deletions src/aind_metadata_viz/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"""Init package"""

__version__ = "0.3.1"
233 changes: 122 additions & 111 deletions src/aind_metadata_viz/app.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,51 @@
import panel as pn

# import param
import pandas as pd
import altair as alt

from io import StringIO

from aind_metadata_viz.docdb import get_all
from aind_metadata_viz.metadata_helpers import (
process_present_list,
check_present,
)

pn.extension('vega')
pn.extension(design='material')
pn.extension("vega")
pn.extension(design="material")

color_options = {
"default": ["grey", "red"],
"lemonade": ["yellow", "pink"]
}
color_options = {"default": ["grey", "red"], "lemonade": ["yellow", "pink"]}

colors = color_options[pn.state.location.query_params['color']] if 'color' in pn.state.location.query_params else color_options['default']
colors = (
color_options[pn.state.location.query_params["color"]]
if "color" in pn.state.location.query_params
else color_options["default"]
)

data_list = get_all()

# headers = ["_id", "name", "created", "location"]
expected_files = ["data_description", "acquisition", "procedures",
"subject", "instrument", "processing",
"rig", "session", "metadata"]
expected_files = [
"data_description",
"acquisition",
"procedures",
"subject",
"instrument",
"processing",
"rig",
"session",
"metadata",
]


# class Settings(param.Parameterized):
# selected_file = param.String(default=None)
# selected_field = param.String(default=None)


# Deal with setting up settings -- check first if we need to pull from query string
# Deal with setting up settings -- check first if we need to pull from
# query string
# QUERYSTR_FILE = 'file'
# QUERYSTR_FIELD = 'field'
# settings = Settings()
Expand All @@ -39,35 +54,6 @@
# 'selected_field': QUERYSTR_FIELD})


def process_present(data_list, expected_fields):
"""Process a data JSON
Parameters
----------
data_list : _type_
_description_
expected_files : _type_
_description_
Returns
-------
_type_
_description_
"""

output = []

for data in data_list:
present = {}
# For each data asset, check if the expected files are present or null
for field in expected_fields:
present[field] = not (data[field] == None) if field in data.keys() else False

output.append(present)

return output


def compute_count_true(df):
"""For each column, compute the count of true values
Expand All @@ -76,55 +62,67 @@ def compute_count_true(df):
df : _type_
_description_
"""
sum_df = df.sum().to_frame(name='present')
sum_df['absent'] = df.shape[0] - sum_df['present']
sum_df = df.sum().to_frame(name="present")
sum_df["absent"] = df.shape[0] - sum_df["present"]

return sum_df


def build_top():
processed = process_present(data_list, expected_files)
processed = process_present_list(data_list, expected_files)
df = pd.DataFrame(processed, columns=expected_files)

sum_df = compute_count_true(df)
# convert to long form
sum_longform_df = sum_df.reset_index().melt(id_vars='index', var_name='status', value_name='sum')

chart = alt.Chart(sum_longform_df).mark_bar().encode(
x=alt.X('index:N', title=None, axis=alt.Axis(grid=False)),
y=alt.Y('sum:Q', title='Data assets', axis=alt.Axis(grid=False)),
color=alt.Color('status:N',
scale=alt.Scale(domain=['present', 'absent'],
range=colors),
legend=None)
).properties(
width=400
sum_longform_df = sum_df.reset_index().melt(
id_vars="index", var_name="status", value_name="sum"
)

chart = (
alt.Chart(sum_longform_df)
.mark_bar()
.encode(
x=alt.X("index:N", title=None, axis=alt.Axis(grid=False)),
y=alt.Y("sum:Q", title="Data assets", axis=alt.Axis(grid=False)),
color=alt.Color(
"status:N",
scale=alt.Scale(domain=["present", "absent"], range=colors),
legend=None,
),
)
.properties(width=400)
)

legend = alt.Chart(pd.DataFrame({
'status': ['File present', 'File absent'],
'color': colors,
'x': [0, 0],
'y': [15, 0]
})).mark_text(
align='left',
dx=10
).encode(
text=alt.Text('status:N'),
color=alt.Color('color:N', scale=None),
x=alt.value(410), # Adjust position
y=alt.Y('y:Q', scale=None)
legend = (
alt.Chart(
pd.DataFrame(
{
"status": ["File present", "File absent"],
"color": colors,
"x": [0, 0],
"y": [15, 0],
}
)
)
.mark_text(align="left", dx=10)
.encode(
text=alt.Text("status:N"),
color=alt.Color("color:N", scale=None),
x=alt.value(410), # Adjust position
y=alt.Y("y:Q", scale=None),
)
)
return pn.panel(chart + legend)


def build_csv(file, field):
id_fields = ['name', '_id', 'location', 'creation']
# For everybody who is missing the currently active file/field
id_fields = ["name", "_id", "location", "creation"]

df_data = []
for data in data_list:
if not data[file] is None:
if mid_selector.value == ' ' or not field in data[file] or data[file][field] is None:
if mid_selector.value == " " or check_present(field, data[file]):
id_data = {}
for id_field in id_fields:
if id_field in data:
Expand All @@ -145,12 +143,14 @@ def build_csv(file, field):

def build_csv_jscode(event):
csv = build_csv(top_selector.value, mid_selector.value)
csv_escaped = csv.replace('\n', '\\n').replace('"', '\\"') # Escape newlines and double quotes
csv_escaped = csv.replace("\n", "\\n").replace(
'"', '\\"'
) # Escape newlines and double quotes

if not mid_selector.value == ' ':
filename = f'{top_selector.value}-{mid_selector.value}-missing.csv'
if not mid_selector.value == " ":
filename = f"{top_selector.value}-{mid_selector.value}-missing.csv"
else:
filename = f'{top_selector.value}-missing.csv'
filename = f"{top_selector.value}-missing.csv"

js_code = f"""
console.log('here');
Expand All @@ -171,69 +171,80 @@ def build_csv_jscode(event):
window.URL.revokeObjectURL(url);
"""
# it's not clear why this extra clear is needed, but it's necessary for the download to work
js_pane.object = ''
js_pane.object = f'<script>{js_code}</script>'
# it's not clear why this extra clear is needed, but it's
# necessary for the download to work
js_pane.object = ""
js_pane.object = f"<script>{js_code}</script>"


top_selector = pn.widgets.Select(name='Select file:',
options=expected_files)
pn.state.location.sync(top_selector, {'value': 'file'})
top_selector = pn.widgets.Select(name="Select file:", options=expected_files)
pn.state.location.sync(top_selector, {"value": "file"})

mid_selector = pn.widgets.Select(name='Sub-select for:',
options=[])
pn.state.location.sync(mid_selector, {'value': 'field'})
mid_selector = pn.widgets.Select(name="Sub-select for:", options=[])
pn.state.location.sync(mid_selector, {"value": "field"})


download_button = pn.widgets.Button(name='Download')
download_button = pn.widgets.Button(name="Download")
download_button.on_click(build_csv_jscode)


def build_mid(selected):
mid_list = []
for data in data_list:
if not data[selected]==None:
if data[selected] is not None:
mid_list.append(data[selected])

processed = process_present(mid_list, mid_list[0].keys())
processed = process_present_list(mid_list, mid_list[0].keys())
df = pd.DataFrame(processed, columns=mid_list[0].keys())

sum_df = compute_count_true(df)
# convert to long form
sum_longform_df = sum_df.reset_index().melt(id_vars='index', var_name='status', value_name='sum')

chart = alt.Chart(sum_longform_df).mark_bar().encode(
x=alt.X('index:N', title=None, axis=alt.Axis(grid=False)),
y=alt.Y('sum:Q', title='Data assets', axis=alt.Axis(grid=False)),
color=alt.Color('status:N',
scale=alt.Scale(domain=['present', 'absent'],
range=colors),
legend=None)
).properties(
width=400
sum_longform_df = sum_df.reset_index().melt(
id_vars="index", var_name="status", value_name="sum"
)

legend = alt.Chart(pd.DataFrame({
'status': ['File present', 'File absent'],
'color': colors,
'x': [0, 0],
'y': [15, 0]
})).mark_text(
align='left',
dx=10
).encode(
text=alt.Text('status:N'),
color=alt.Color('color:N', scale=None),
x=alt.value(410), # Adjust position
y=alt.Y('y:Q', scale=None)
chart = (
alt.Chart(sum_longform_df)
.mark_bar()
.encode(
x=alt.X("index:N", title=None, axis=alt.Axis(grid=False)),
y=alt.Y("sum:Q", title="Data assets", axis=alt.Axis(grid=False)),
color=alt.Color(
"status:N",
scale=alt.Scale(domain=["present", "absent"], range=colors),
legend=None,
),
)
.properties(width=400)
)

legend = (
alt.Chart(
pd.DataFrame(
{
"status": ["File present", "File absent"],
"color": colors,
"x": [0, 0],
"y": [15, 0],
}
)
)
.mark_text(align="left", dx=10)
.encode(
text=alt.Text("status:N"),
color=alt.Color("color:N", scale=None),
x=alt.value(410), # Adjust position
y=alt.Y("y:Q", scale=None),
)
)

# Also update the selected list
option_list = [' '] + list(mid_list[0].keys())
option_list = [" "] + list(mid_list[0].keys())
mid_selector.options = option_list

return pn.panel(chart + legend)


top_plot = build_top()
mid_plot = pn.bind(build_mid, selected=top_selector)
# Setup the rows
Expand Down
Loading

0 comments on commit 3d94284

Please sign in to comment.