diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..036d2af
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,4 @@
+[run]
+omit =
+ src/aind_metadata_viz/app.py
+ src/aind_metadata_viz/flask_proxy.py
\ No newline at end of file
diff --git a/src/aind_metadata_viz/__init__.py b/src/aind_metadata_viz/__init__.py
index b71b135..164bf60 100644
--- a/src/aind_metadata_viz/__init__.py
+++ b/src/aind_metadata_viz/__init__.py
@@ -1,2 +1,3 @@
"""Init package"""
+
__version__ = "0.3.1"
diff --git a/src/aind_metadata_viz/app.py b/src/aind_metadata_viz/app.py
index 1c17062..dc4409e 100644
--- a/src/aind_metadata_viz/app.py
+++ b/src/aind_metadata_viz/app.py
@@ -1,4 +1,5 @@
import panel as pn
+
# import param
import pandas as pd
import altair as alt
@@ -6,23 +7,36 @@
from io import StringIO
from aind_metadata_viz.docdb import get_all
+from aind_metadata_viz.metadata_helpers import (
+ process_present_list,
+ check_present,
+)
-pn.extension('vega')
-pn.extension(design='material')
+pn.extension("vega")
+pn.extension(design="material")
-color_options = {
- "default": ["grey", "red"],
- "lemonade": ["yellow", "pink"]
-}
+color_options = {"default": ["grey", "red"], "lemonade": ["yellow", "pink"]}
-colors = color_options[pn.state.location.query_params['color']] if 'color' in pn.state.location.query_params else color_options['default']
+colors = (
+ color_options[pn.state.location.query_params["color"]]
+ if "color" in pn.state.location.query_params
+ else color_options["default"]
+)
data_list = get_all()
# headers = ["_id", "name", "created", "location"]
-expected_files = ["data_description", "acquisition", "procedures",
- "subject", "instrument", "processing",
- "rig", "session", "metadata"]
+expected_files = [
+ "data_description",
+ "acquisition",
+ "procedures",
+ "subject",
+ "instrument",
+ "processing",
+ "rig",
+ "session",
+ "metadata",
+]
# class Settings(param.Parameterized):
@@ -30,7 +44,8 @@
# selected_field = param.String(default=None)
-# Deal with setting up settings -- check first if we need to pull from query string
+# Deal with setting up settings -- check first if we need to pull from
+# query string
# QUERYSTR_FILE = 'file'
# QUERYSTR_FIELD = 'field'
# settings = Settings()
@@ -39,35 +54,6 @@
# 'selected_field': QUERYSTR_FIELD})
-def process_present(data_list, expected_fields):
- """Process a data JSON
-
- Parameters
- ----------
- data_list : _type_
- _description_
- expected_files : _type_
- _description_
-
- Returns
- -------
- _type_
- _description_
- """
-
- output = []
-
- for data in data_list:
- present = {}
- # For each data asset, check if the expected files are present or null
- for field in expected_fields:
- present[field] = not (data[field] == None) if field in data.keys() else False
-
- output.append(present)
-
- return output
-
-
def compute_count_true(df):
"""For each column, compute the count of true values
@@ -76,55 +62,67 @@ def compute_count_true(df):
df : _type_
_description_
"""
- sum_df = df.sum().to_frame(name='present')
- sum_df['absent'] = df.shape[0] - sum_df['present']
+ sum_df = df.sum().to_frame(name="present")
+ sum_df["absent"] = df.shape[0] - sum_df["present"]
return sum_df
def build_top():
- processed = process_present(data_list, expected_files)
+ processed = process_present_list(data_list, expected_files)
df = pd.DataFrame(processed, columns=expected_files)
sum_df = compute_count_true(df)
# convert to long form
- sum_longform_df = sum_df.reset_index().melt(id_vars='index', var_name='status', value_name='sum')
-
- chart = alt.Chart(sum_longform_df).mark_bar().encode(
- x=alt.X('index:N', title=None, axis=alt.Axis(grid=False)),
- y=alt.Y('sum:Q', title='Data assets', axis=alt.Axis(grid=False)),
- color=alt.Color('status:N',
- scale=alt.Scale(domain=['present', 'absent'],
- range=colors),
- legend=None)
- ).properties(
- width=400
+ sum_longform_df = sum_df.reset_index().melt(
+ id_vars="index", var_name="status", value_name="sum"
+ )
+
+ chart = (
+ alt.Chart(sum_longform_df)
+ .mark_bar()
+ .encode(
+ x=alt.X("index:N", title=None, axis=alt.Axis(grid=False)),
+ y=alt.Y("sum:Q", title="Data assets", axis=alt.Axis(grid=False)),
+ color=alt.Color(
+ "status:N",
+ scale=alt.Scale(domain=["present", "absent"], range=colors),
+ legend=None,
+ ),
+ )
+ .properties(width=400)
)
- legend = alt.Chart(pd.DataFrame({
- 'status': ['File present', 'File absent'],
- 'color': colors,
- 'x': [0, 0],
- 'y': [15, 0]
- })).mark_text(
- align='left',
- dx=10
- ).encode(
- text=alt.Text('status:N'),
- color=alt.Color('color:N', scale=None),
- x=alt.value(410), # Adjust position
- y=alt.Y('y:Q', scale=None)
+ legend = (
+ alt.Chart(
+ pd.DataFrame(
+ {
+ "status": ["File present", "File absent"],
+ "color": colors,
+ "x": [0, 0],
+ "y": [15, 0],
+ }
+ )
+ )
+ .mark_text(align="left", dx=10)
+ .encode(
+ text=alt.Text("status:N"),
+ color=alt.Color("color:N", scale=None),
+ x=alt.value(410), # Adjust position
+ y=alt.Y("y:Q", scale=None),
+ )
)
return pn.panel(chart + legend)
def build_csv(file, field):
- id_fields = ['name', '_id', 'location', 'creation']
+ # For everybody who is missing the currently active file/field
+ id_fields = ["name", "_id", "location", "creation"]
df_data = []
for data in data_list:
if not data[file] is None:
- if mid_selector.value == ' ' or not field in data[file] or data[file][field] is None:
+ if mid_selector.value == " " or check_present(field, data[file]):
id_data = {}
for id_field in id_fields:
if id_field in data:
@@ -145,12 +143,14 @@ def build_csv(file, field):
def build_csv_jscode(event):
csv = build_csv(top_selector.value, mid_selector.value)
- csv_escaped = csv.replace('\n', '\\n').replace('"', '\\"') # Escape newlines and double quotes
+ csv_escaped = csv.replace("\n", "\\n").replace(
+ '"', '\\"'
+ ) # Escape newlines and double quotes
- if not mid_selector.value == ' ':
- filename = f'{top_selector.value}-{mid_selector.value}-missing.csv'
+ if not mid_selector.value == " ":
+ filename = f"{top_selector.value}-{mid_selector.value}-missing.csv"
else:
- filename = f'{top_selector.value}-missing.csv'
+ filename = f"{top_selector.value}-missing.csv"
js_code = f"""
console.log('here');
@@ -171,69 +171,80 @@ def build_csv_jscode(event):
window.URL.revokeObjectURL(url);
"""
- # it's not clear why this extra clear is needed, but it's necessary for the download to work
- js_pane.object = ''
- js_pane.object = f''
+ # it's not clear why this extra clear is needed, but it's
+ # necessary for the download to work
+ js_pane.object = ""
+ js_pane.object = f""
+
-top_selector = pn.widgets.Select(name='Select file:',
- options=expected_files)
-pn.state.location.sync(top_selector, {'value': 'file'})
+top_selector = pn.widgets.Select(name="Select file:", options=expected_files)
+pn.state.location.sync(top_selector, {"value": "file"})
-mid_selector = pn.widgets.Select(name='Sub-select for:',
- options=[])
-pn.state.location.sync(mid_selector, {'value': 'field'})
+mid_selector = pn.widgets.Select(name="Sub-select for:", options=[])
+pn.state.location.sync(mid_selector, {"value": "field"})
-download_button = pn.widgets.Button(name='Download')
+download_button = pn.widgets.Button(name="Download")
download_button.on_click(build_csv_jscode)
def build_mid(selected):
mid_list = []
for data in data_list:
- if not data[selected]==None:
+ if data[selected] is not None:
mid_list.append(data[selected])
- processed = process_present(mid_list, mid_list[0].keys())
+ processed = process_present_list(mid_list, mid_list[0].keys())
df = pd.DataFrame(processed, columns=mid_list[0].keys())
sum_df = compute_count_true(df)
# convert to long form
- sum_longform_df = sum_df.reset_index().melt(id_vars='index', var_name='status', value_name='sum')
-
- chart = alt.Chart(sum_longform_df).mark_bar().encode(
- x=alt.X('index:N', title=None, axis=alt.Axis(grid=False)),
- y=alt.Y('sum:Q', title='Data assets', axis=alt.Axis(grid=False)),
- color=alt.Color('status:N',
- scale=alt.Scale(domain=['present', 'absent'],
- range=colors),
- legend=None)
- ).properties(
- width=400
+ sum_longform_df = sum_df.reset_index().melt(
+ id_vars="index", var_name="status", value_name="sum"
)
- legend = alt.Chart(pd.DataFrame({
- 'status': ['File present', 'File absent'],
- 'color': colors,
- 'x': [0, 0],
- 'y': [15, 0]
- })).mark_text(
- align='left',
- dx=10
- ).encode(
- text=alt.Text('status:N'),
- color=alt.Color('color:N', scale=None),
- x=alt.value(410), # Adjust position
- y=alt.Y('y:Q', scale=None)
+ chart = (
+ alt.Chart(sum_longform_df)
+ .mark_bar()
+ .encode(
+ x=alt.X("index:N", title=None, axis=alt.Axis(grid=False)),
+ y=alt.Y("sum:Q", title="Data assets", axis=alt.Axis(grid=False)),
+ color=alt.Color(
+ "status:N",
+ scale=alt.Scale(domain=["present", "absent"], range=colors),
+ legend=None,
+ ),
+ )
+ .properties(width=400)
+ )
+
+ legend = (
+ alt.Chart(
+ pd.DataFrame(
+ {
+ "status": ["File present", "File absent"],
+ "color": colors,
+ "x": [0, 0],
+ "y": [15, 0],
+ }
+ )
+ )
+ .mark_text(align="left", dx=10)
+ .encode(
+ text=alt.Text("status:N"),
+ color=alt.Color("color:N", scale=None),
+ x=alt.value(410), # Adjust position
+ y=alt.Y("y:Q", scale=None),
+ )
)
# Also update the selected list
- option_list = [' '] + list(mid_list[0].keys())
+ option_list = [" "] + list(mid_list[0].keys())
mid_selector.options = option_list
return pn.panel(chart + legend)
-
+
top_plot = build_top()
mid_plot = pn.bind(build_mid, selected=top_selector)
# Setup the rows
diff --git a/src/aind_metadata_viz/docdb.py b/src/aind_metadata_viz/docdb.py
index 929ef98..682e591 100644
--- a/src/aind_metadata_viz/docdb.py
+++ b/src/aind_metadata_viz/docdb.py
@@ -7,44 +7,45 @@
COLLECTION = "data_assets"
docdb_api_client = MetadataDbClient(
- host=API_GATEWAY_HOST,
- database=DATABASE,
- collection=COLLECTION,
+ host=API_GATEWAY_HOST,
+ database=DATABASE,
+ collection=COLLECTION,
)
@pn.cache
-def get_all():
+def get_all(test_mode=False):
filter = {}
- limit = 0
+ limit = 0 if not test_mode else 10
paginate_batch_size = 1000
response = docdb_api_client.retrieve_docdb_records(
filter_query=filter,
limit=limit,
- paginate_batch_size=paginate_batch_size
+ paginate_batch_size=paginate_batch_size,
)
return response
+
@pn.cache
def get_subjects():
filter = {
- 'subject.subject_id': {'$exists': True},
- 'session': {'$ne': None}
- }
+ "subject.subject_id": {"$exists": True},
+ "session": {"$ne": None},
+ }
limit = 1000
paginate_batch_size = 100
response = docdb_api_client.retrieve_docdb_records(
filter_query=filter,
- projection={'_id': 0, 'subject.subject_id': 1},
+ projection={"_id": 0, "subject.subject_id": 1},
limit=limit,
- paginate_batch_size=paginate_batch_size
+ paginate_batch_size=paginate_batch_size,
)
# turn this into a list instead of a nested list
subjects = []
for data in response:
- subjects.append(np.int32(data['subject']['subject_id']))
+ subjects.append(np.int32(data["subject"]["subject_id"]))
return np.unique(subjects).tolist()
@@ -63,15 +64,16 @@ def get_sessions(subject_id):
_type_
_description_
"""
- filter = {"subject.subject_id": str(subject_id),
- "session": {"$ne": "null"}}
+ filter = {
+ "subject.subject_id": str(subject_id),
+ "session": {"$ne": "null"},
+ }
response = docdb_api_client.retrieve_docdb_records(
- filter_query=filter,
- projection={'_id': 0, 'session': 1}
+ filter_query=filter, projection={"_id": 0, "session": 1}
)
sessions = []
for data in response:
- sessions.append(data['session'])
+ sessions.append(data["session"])
return sessions
diff --git a/src/aind_metadata_viz/dynamic_js.py b/src/aind_metadata_viz/dynamic_js.py
deleted file mode 100644
index 923f84f..0000000
--- a/src/aind_metadata_viz/dynamic_js.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import panel as pn
-
-# Initialize Panel extension
-pn.extension()
-
-# Example parameter that you might want to change
-dynamic_parameter = pn.widgets.TextInput(name='Parameter', value='Hello')
-
-# Placeholder for JavaScript code
-js_code = """
-console.log("Static message");
-"""
-
-# Create a Panel HTML pane with the initial JavaScript code
-js_pane = pn.pane.HTML(f"", height=0, width=0)
-
-
-# Function to update the JavaScript dynamically
-def update_js(event):
- new_js_code = f"""
- console.log("{dynamic_parameter.value}");
- """
- js_pane.object = f""
-
-
-# Button to trigger JavaScript update
-button = pn.widgets.Button(name='Run JS', button_type='primary')
-
-# Link button click event to update function
-button.on_click(update_js)
-
-# Layout to display everything
-app = pn.Column(dynamic_parameter, button, js_pane)
-
-app.servable()
diff --git a/src/aind_metadata_viz/flask_proxy.py b/src/aind_metadata_viz/flask_proxy.py
index 65e3474..ca160d3 100644
--- a/src/aind_metadata_viz/flask_proxy.py
+++ b/src/aind_metadata_viz/flask_proxy.py
@@ -4,18 +4,28 @@
app = Flask(__name__)
# Start the Panel server
-subprocess.Popen(["panel", "serve", "src/aind_metadata_viz/app.py", "--address", "0.0.0.0", "--port", "5006"])
-
-
-@app.route('/')
+subprocess.Popen(
+ [
+ "panel",
+ "serve",
+ "src/aind_metadata_viz/app.py",
+ "--address",
+ "0.0.0.0",
+ "--port",
+ "5006",
+ ]
+)
+
+
+@app.route("/")
def index():
return redirect("/app")
-@app.route('/')
+@app.route("/")
def proxy(path):
- return send_from_directory('static', path)
+ return send_from_directory("static", path)
-if __name__ == '__main__':
- app.run(host="0.0.0.0", port=8000)
\ No newline at end of file
+if __name__ == "__main__":
+ app.run(host="0.0.0.0", port=8000)
diff --git a/src/aind_metadata_viz/metadata_helpers.py b/src/aind_metadata_viz/metadata_helpers.py
new file mode 100644
index 0000000..0b20fab
--- /dev/null
+++ b/src/aind_metadata_viz/metadata_helpers.py
@@ -0,0 +1,41 @@
+def check_present(key: str, object: dict):
+ """Return true if the value of a key exists and is not None, or any of
+ '' [] {} in a JSON object
+
+ Parameters
+ ----------
+ field : string
+ Key
+ object : dict
+ Dictionary
+ """
+ return (
+ object[key] is not None
+ and object[key] != ""
+ and object[key] != []
+ and object[key] != {}
+ if key in object
+ else False
+ )
+
+
+def process_present_dict(data: dict, expected_fields: list):
+ return {field: check_present(field, data) for field in expected_fields}
+
+
+def process_present_list(data_list: list, expected_fields: list):
+ """Process a data JSON
+
+ Parameters
+ ----------
+ data_list : _type_
+ _description_
+ expected_files : _type_
+ _description_
+
+ Returns
+ -------
+ _type_
+ _description_
+ """
+ return [process_present_dict(data, expected_fields) for data in data_list]
diff --git a/tests/test_docdb.py b/tests/test_docdb.py
new file mode 100644
index 0000000..7ea9fb5
--- /dev/null
+++ b/tests/test_docdb.py
@@ -0,0 +1,42 @@
+"""Example test template."""
+
+import unittest
+
+from aind_metadata_viz.docdb import get_subjects, get_sessions, get_all
+
+
+class DocDBTest(unittest.TestCase):
+ """Test the DocDB calls"""
+
+ # def setUp(self):
+
+ def test_get_subjects(self):
+ """Get the subjects list, check that some known subjects are in it"""
+ self.assertIn(596930, get_subjects())
+
+ def test_get_sessions(self):
+ """Get data from the test subject's sessions"""
+ self.assertEqual(1, len(get_sessions(596930)))
+
+ def test_get_all(self):
+ """Test all sessions"""
+ data = get_all(test_mode=True)
+ first_ten_subjects = [
+ "271246",
+ "666612",
+ "673594",
+ "719093",
+ "651474",
+ "666612",
+ "666612",
+ "651474",
+ "719093",
+ "673594",
+ ]
+ subj_id = [dat["subject"]["subject_id"] for dat in data]
+
+ self.assertEqual(subj_id, first_ten_subjects)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_example.py b/tests/test_example.py
deleted file mode 100644
index 06e9e0d..0000000
--- a/tests/test_example.py
+++ /dev/null
@@ -1,16 +0,0 @@
-"""Example test template."""
-
-import unittest
-
-
-class ExampleTest(unittest.TestCase):
- """Example Test Class"""
-
- def test_assert_example(self):
- """Example of how to test the truth of a statement."""
-
- self.assertTrue(1 == 1)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/test_metadata_helpers.py b/tests/test_metadata_helpers.py
new file mode 100644
index 0000000..217551f
--- /dev/null
+++ b/tests/test_metadata_helpers.py
@@ -0,0 +1,79 @@
+"""Test the main app code"""
+
+import unittest
+from aind_metadata_viz.metadata_helpers import (
+ check_present,
+ process_present_dict,
+ process_present_list,
+)
+
+
+class TestApp(unittest.TestCase):
+ """Test main app"""
+
+ def setUp(self) -> None:
+ self.dict = {
+ "test1": None,
+ "test2": "",
+ "test3": {},
+ "test4": [],
+ "test5": "actual data",
+ "test6": 1,
+ "test7": {"actual key": "actual value"},
+ "test8": object,
+ }
+ self.expected_fields = [
+ "test1",
+ "test2",
+ "test3",
+ "test4",
+ "test5",
+ "test6",
+ "test7",
+ "test8",
+ "meow",
+ ]
+ self.expected_out = {
+ "test1": False,
+ "test2": False,
+ "test3": False,
+ "test4": False,
+ "test5": True,
+ "test6": True,
+ "test7": True,
+ "test8": True,
+ "meow": False,
+ }
+
+ return super().setUp()
+
+ def test_check_present(self):
+ """Test the check_present function"""
+ self.assertFalse(check_present("test1", self.dict))
+ self.assertFalse(check_present("test2", self.dict))
+ self.assertFalse(check_present("test3", self.dict))
+ self.assertFalse(check_present("test4", self.dict))
+
+ self.assertTrue(check_present("test5", self.dict))
+ self.assertTrue(check_present("test6", self.dict))
+ self.assertTrue(check_present("test7", self.dict))
+ self.assertTrue(check_present("test8", self.dict))
+
+ def test_process_present_dict(self):
+ """Test the process_present_dict function"""
+ out_test = process_present_dict(self.dict, self.expected_fields)
+
+ self.assertEqual(self.expected_out, out_test)
+
+ def test_process_present(self):
+ """Test that process runs properly on a list"""
+ data_list = [self.dict, self.dict]
+
+ processed_list = process_present_list(data_list, self.expected_fields)
+ out_list = [self.expected_out, self.expected_out]
+
+ self.assertEqual(processed_list, out_list)
+
+
+if __name__ == "__main__":
+ unittest.main()