diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..036d2af --- /dev/null +++ b/.coveragerc @@ -0,0 +1,4 @@ +[run] +omit = + src/aind_metadata_viz/app.py + src/aind_metadata_viz/flask_proxy.py \ No newline at end of file diff --git a/src/aind_metadata_viz/__init__.py b/src/aind_metadata_viz/__init__.py index b71b135..164bf60 100644 --- a/src/aind_metadata_viz/__init__.py +++ b/src/aind_metadata_viz/__init__.py @@ -1,2 +1,3 @@ """Init package""" + __version__ = "0.3.1" diff --git a/src/aind_metadata_viz/app.py b/src/aind_metadata_viz/app.py index 1c17062..dc4409e 100644 --- a/src/aind_metadata_viz/app.py +++ b/src/aind_metadata_viz/app.py @@ -1,4 +1,5 @@ import panel as pn + # import param import pandas as pd import altair as alt @@ -6,23 +7,36 @@ from io import StringIO from aind_metadata_viz.docdb import get_all +from aind_metadata_viz.metadata_helpers import ( + process_present_list, + check_present, +) -pn.extension('vega') -pn.extension(design='material') +pn.extension("vega") +pn.extension(design="material") -color_options = { - "default": ["grey", "red"], - "lemonade": ["yellow", "pink"] -} +color_options = {"default": ["grey", "red"], "lemonade": ["yellow", "pink"]} -colors = color_options[pn.state.location.query_params['color']] if 'color' in pn.state.location.query_params else color_options['default'] +colors = ( + color_options[pn.state.location.query_params["color"]] + if "color" in pn.state.location.query_params + else color_options["default"] +) data_list = get_all() # headers = ["_id", "name", "created", "location"] -expected_files = ["data_description", "acquisition", "procedures", - "subject", "instrument", "processing", - "rig", "session", "metadata"] +expected_files = [ + "data_description", + "acquisition", + "procedures", + "subject", + "instrument", + "processing", + "rig", + "session", + "metadata", +] # class Settings(param.Parameterized): @@ -30,7 +44,8 @@ # selected_field = param.String(default=None) -# Deal with setting up settings -- check first if we need to pull from query string +# Deal with setting up settings -- check first if we need to pull from +# query string # QUERYSTR_FILE = 'file' # QUERYSTR_FIELD = 'field' # settings = Settings() @@ -39,35 +54,6 @@ # 'selected_field': QUERYSTR_FIELD}) -def process_present(data_list, expected_fields): - """Process a data JSON - - Parameters - ---------- - data_list : _type_ - _description_ - expected_files : _type_ - _description_ - - Returns - ------- - _type_ - _description_ - """ - - output = [] - - for data in data_list: - present = {} - # For each data asset, check if the expected files are present or null - for field in expected_fields: - present[field] = not (data[field] == None) if field in data.keys() else False - - output.append(present) - - return output - - def compute_count_true(df): """For each column, compute the count of true values @@ -76,55 +62,67 @@ def compute_count_true(df): df : _type_ _description_ """ - sum_df = df.sum().to_frame(name='present') - sum_df['absent'] = df.shape[0] - sum_df['present'] + sum_df = df.sum().to_frame(name="present") + sum_df["absent"] = df.shape[0] - sum_df["present"] return sum_df def build_top(): - processed = process_present(data_list, expected_files) + processed = process_present_list(data_list, expected_files) df = pd.DataFrame(processed, columns=expected_files) sum_df = compute_count_true(df) # convert to long form - sum_longform_df = sum_df.reset_index().melt(id_vars='index', var_name='status', value_name='sum') - - chart = alt.Chart(sum_longform_df).mark_bar().encode( - x=alt.X('index:N', title=None, axis=alt.Axis(grid=False)), - y=alt.Y('sum:Q', title='Data assets', axis=alt.Axis(grid=False)), - color=alt.Color('status:N', - scale=alt.Scale(domain=['present', 'absent'], - range=colors), - legend=None) - ).properties( - width=400 + sum_longform_df = sum_df.reset_index().melt( + id_vars="index", var_name="status", value_name="sum" + ) + + chart = ( + alt.Chart(sum_longform_df) + .mark_bar() + .encode( + x=alt.X("index:N", title=None, axis=alt.Axis(grid=False)), + y=alt.Y("sum:Q", title="Data assets", axis=alt.Axis(grid=False)), + color=alt.Color( + "status:N", + scale=alt.Scale(domain=["present", "absent"], range=colors), + legend=None, + ), + ) + .properties(width=400) ) - legend = alt.Chart(pd.DataFrame({ - 'status': ['File present', 'File absent'], - 'color': colors, - 'x': [0, 0], - 'y': [15, 0] - })).mark_text( - align='left', - dx=10 - ).encode( - text=alt.Text('status:N'), - color=alt.Color('color:N', scale=None), - x=alt.value(410), # Adjust position - y=alt.Y('y:Q', scale=None) + legend = ( + alt.Chart( + pd.DataFrame( + { + "status": ["File present", "File absent"], + "color": colors, + "x": [0, 0], + "y": [15, 0], + } + ) + ) + .mark_text(align="left", dx=10) + .encode( + text=alt.Text("status:N"), + color=alt.Color("color:N", scale=None), + x=alt.value(410), # Adjust position + y=alt.Y("y:Q", scale=None), + ) ) return pn.panel(chart + legend) def build_csv(file, field): - id_fields = ['name', '_id', 'location', 'creation'] + # For everybody who is missing the currently active file/field + id_fields = ["name", "_id", "location", "creation"] df_data = [] for data in data_list: if not data[file] is None: - if mid_selector.value == ' ' or not field in data[file] or data[file][field] is None: + if mid_selector.value == " " or check_present(field, data[file]): id_data = {} for id_field in id_fields: if id_field in data: @@ -145,12 +143,14 @@ def build_csv(file, field): def build_csv_jscode(event): csv = build_csv(top_selector.value, mid_selector.value) - csv_escaped = csv.replace('\n', '\\n').replace('"', '\\"') # Escape newlines and double quotes + csv_escaped = csv.replace("\n", "\\n").replace( + '"', '\\"' + ) # Escape newlines and double quotes - if not mid_selector.value == ' ': - filename = f'{top_selector.value}-{mid_selector.value}-missing.csv' + if not mid_selector.value == " ": + filename = f"{top_selector.value}-{mid_selector.value}-missing.csv" else: - filename = f'{top_selector.value}-missing.csv' + filename = f"{top_selector.value}-missing.csv" js_code = f""" console.log('here'); @@ -171,69 +171,80 @@ def build_csv_jscode(event): window.URL.revokeObjectURL(url); """ - # it's not clear why this extra clear is needed, but it's necessary for the download to work - js_pane.object = '' - js_pane.object = f'' + # it's not clear why this extra clear is needed, but it's + # necessary for the download to work + js_pane.object = "" + js_pane.object = f"" + -top_selector = pn.widgets.Select(name='Select file:', - options=expected_files) -pn.state.location.sync(top_selector, {'value': 'file'}) +top_selector = pn.widgets.Select(name="Select file:", options=expected_files) +pn.state.location.sync(top_selector, {"value": "file"}) -mid_selector = pn.widgets.Select(name='Sub-select for:', - options=[]) -pn.state.location.sync(mid_selector, {'value': 'field'}) +mid_selector = pn.widgets.Select(name="Sub-select for:", options=[]) +pn.state.location.sync(mid_selector, {"value": "field"}) -download_button = pn.widgets.Button(name='Download') +download_button = pn.widgets.Button(name="Download") download_button.on_click(build_csv_jscode) def build_mid(selected): mid_list = [] for data in data_list: - if not data[selected]==None: + if data[selected] is not None: mid_list.append(data[selected]) - processed = process_present(mid_list, mid_list[0].keys()) + processed = process_present_list(mid_list, mid_list[0].keys()) df = pd.DataFrame(processed, columns=mid_list[0].keys()) sum_df = compute_count_true(df) # convert to long form - sum_longform_df = sum_df.reset_index().melt(id_vars='index', var_name='status', value_name='sum') - - chart = alt.Chart(sum_longform_df).mark_bar().encode( - x=alt.X('index:N', title=None, axis=alt.Axis(grid=False)), - y=alt.Y('sum:Q', title='Data assets', axis=alt.Axis(grid=False)), - color=alt.Color('status:N', - scale=alt.Scale(domain=['present', 'absent'], - range=colors), - legend=None) - ).properties( - width=400 + sum_longform_df = sum_df.reset_index().melt( + id_vars="index", var_name="status", value_name="sum" ) - legend = alt.Chart(pd.DataFrame({ - 'status': ['File present', 'File absent'], - 'color': colors, - 'x': [0, 0], - 'y': [15, 0] - })).mark_text( - align='left', - dx=10 - ).encode( - text=alt.Text('status:N'), - color=alt.Color('color:N', scale=None), - x=alt.value(410), # Adjust position - y=alt.Y('y:Q', scale=None) + chart = ( + alt.Chart(sum_longform_df) + .mark_bar() + .encode( + x=alt.X("index:N", title=None, axis=alt.Axis(grid=False)), + y=alt.Y("sum:Q", title="Data assets", axis=alt.Axis(grid=False)), + color=alt.Color( + "status:N", + scale=alt.Scale(domain=["present", "absent"], range=colors), + legend=None, + ), + ) + .properties(width=400) + ) + + legend = ( + alt.Chart( + pd.DataFrame( + { + "status": ["File present", "File absent"], + "color": colors, + "x": [0, 0], + "y": [15, 0], + } + ) + ) + .mark_text(align="left", dx=10) + .encode( + text=alt.Text("status:N"), + color=alt.Color("color:N", scale=None), + x=alt.value(410), # Adjust position + y=alt.Y("y:Q", scale=None), + ) ) # Also update the selected list - option_list = [' '] + list(mid_list[0].keys()) + option_list = [" "] + list(mid_list[0].keys()) mid_selector.options = option_list return pn.panel(chart + legend) - + top_plot = build_top() mid_plot = pn.bind(build_mid, selected=top_selector) # Setup the rows diff --git a/src/aind_metadata_viz/docdb.py b/src/aind_metadata_viz/docdb.py index 929ef98..682e591 100644 --- a/src/aind_metadata_viz/docdb.py +++ b/src/aind_metadata_viz/docdb.py @@ -7,44 +7,45 @@ COLLECTION = "data_assets" docdb_api_client = MetadataDbClient( - host=API_GATEWAY_HOST, - database=DATABASE, - collection=COLLECTION, + host=API_GATEWAY_HOST, + database=DATABASE, + collection=COLLECTION, ) @pn.cache -def get_all(): +def get_all(test_mode=False): filter = {} - limit = 0 + limit = 0 if not test_mode else 10 paginate_batch_size = 1000 response = docdb_api_client.retrieve_docdb_records( filter_query=filter, limit=limit, - paginate_batch_size=paginate_batch_size + paginate_batch_size=paginate_batch_size, ) return response + @pn.cache def get_subjects(): filter = { - 'subject.subject_id': {'$exists': True}, - 'session': {'$ne': None} - } + "subject.subject_id": {"$exists": True}, + "session": {"$ne": None}, + } limit = 1000 paginate_batch_size = 100 response = docdb_api_client.retrieve_docdb_records( filter_query=filter, - projection={'_id': 0, 'subject.subject_id': 1}, + projection={"_id": 0, "subject.subject_id": 1}, limit=limit, - paginate_batch_size=paginate_batch_size + paginate_batch_size=paginate_batch_size, ) # turn this into a list instead of a nested list subjects = [] for data in response: - subjects.append(np.int32(data['subject']['subject_id'])) + subjects.append(np.int32(data["subject"]["subject_id"])) return np.unique(subjects).tolist() @@ -63,15 +64,16 @@ def get_sessions(subject_id): _type_ _description_ """ - filter = {"subject.subject_id": str(subject_id), - "session": {"$ne": "null"}} + filter = { + "subject.subject_id": str(subject_id), + "session": {"$ne": "null"}, + } response = docdb_api_client.retrieve_docdb_records( - filter_query=filter, - projection={'_id': 0, 'session': 1} + filter_query=filter, projection={"_id": 0, "session": 1} ) sessions = [] for data in response: - sessions.append(data['session']) + sessions.append(data["session"]) return sessions diff --git a/src/aind_metadata_viz/dynamic_js.py b/src/aind_metadata_viz/dynamic_js.py deleted file mode 100644 index 923f84f..0000000 --- a/src/aind_metadata_viz/dynamic_js.py +++ /dev/null @@ -1,35 +0,0 @@ -import panel as pn - -# Initialize Panel extension -pn.extension() - -# Example parameter that you might want to change -dynamic_parameter = pn.widgets.TextInput(name='Parameter', value='Hello') - -# Placeholder for JavaScript code -js_code = """ -console.log("Static message"); -""" - -# Create a Panel HTML pane with the initial JavaScript code -js_pane = pn.pane.HTML(f"", height=0, width=0) - - -# Function to update the JavaScript dynamically -def update_js(event): - new_js_code = f""" - console.log("{dynamic_parameter.value}"); - """ - js_pane.object = f"" - - -# Button to trigger JavaScript update -button = pn.widgets.Button(name='Run JS', button_type='primary') - -# Link button click event to update function -button.on_click(update_js) - -# Layout to display everything -app = pn.Column(dynamic_parameter, button, js_pane) - -app.servable() diff --git a/src/aind_metadata_viz/flask_proxy.py b/src/aind_metadata_viz/flask_proxy.py index 65e3474..ca160d3 100644 --- a/src/aind_metadata_viz/flask_proxy.py +++ b/src/aind_metadata_viz/flask_proxy.py @@ -4,18 +4,28 @@ app = Flask(__name__) # Start the Panel server -subprocess.Popen(["panel", "serve", "src/aind_metadata_viz/app.py", "--address", "0.0.0.0", "--port", "5006"]) - - -@app.route('/') +subprocess.Popen( + [ + "panel", + "serve", + "src/aind_metadata_viz/app.py", + "--address", + "0.0.0.0", + "--port", + "5006", + ] +) + + +@app.route("/") def index(): return redirect("/app") -@app.route('/') +@app.route("/") def proxy(path): - return send_from_directory('static', path) + return send_from_directory("static", path) -if __name__ == '__main__': - app.run(host="0.0.0.0", port=8000) \ No newline at end of file +if __name__ == "__main__": + app.run(host="0.0.0.0", port=8000) diff --git a/src/aind_metadata_viz/metadata_helpers.py b/src/aind_metadata_viz/metadata_helpers.py new file mode 100644 index 0000000..0b20fab --- /dev/null +++ b/src/aind_metadata_viz/metadata_helpers.py @@ -0,0 +1,41 @@ +def check_present(key: str, object: dict): + """Return true if the value of a key exists and is not None, or any of + '' [] {} in a JSON object + + Parameters + ---------- + field : string + Key + object : dict + Dictionary + """ + return ( + object[key] is not None + and object[key] != "" + and object[key] != [] + and object[key] != {} + if key in object + else False + ) + + +def process_present_dict(data: dict, expected_fields: list): + return {field: check_present(field, data) for field in expected_fields} + + +def process_present_list(data_list: list, expected_fields: list): + """Process a data JSON + + Parameters + ---------- + data_list : _type_ + _description_ + expected_files : _type_ + _description_ + + Returns + ------- + _type_ + _description_ + """ + return [process_present_dict(data, expected_fields) for data in data_list] diff --git a/tests/test_docdb.py b/tests/test_docdb.py new file mode 100644 index 0000000..7ea9fb5 --- /dev/null +++ b/tests/test_docdb.py @@ -0,0 +1,42 @@ +"""Example test template.""" + +import unittest + +from aind_metadata_viz.docdb import get_subjects, get_sessions, get_all + + +class DocDBTest(unittest.TestCase): + """Test the DocDB calls""" + + # def setUp(self): + + def test_get_subjects(self): + """Get the subjects list, check that some known subjects are in it""" + self.assertIn(596930, get_subjects()) + + def test_get_sessions(self): + """Get data from the test subject's sessions""" + self.assertEqual(1, len(get_sessions(596930))) + + def test_get_all(self): + """Test all sessions""" + data = get_all(test_mode=True) + first_ten_subjects = [ + "271246", + "666612", + "673594", + "719093", + "651474", + "666612", + "666612", + "651474", + "719093", + "673594", + ] + subj_id = [dat["subject"]["subject_id"] for dat in data] + + self.assertEqual(subj_id, first_ten_subjects) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_example.py b/tests/test_example.py deleted file mode 100644 index 06e9e0d..0000000 --- a/tests/test_example.py +++ /dev/null @@ -1,16 +0,0 @@ -"""Example test template.""" - -import unittest - - -class ExampleTest(unittest.TestCase): - """Example Test Class""" - - def test_assert_example(self): - """Example of how to test the truth of a statement.""" - - self.assertTrue(1 == 1) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_metadata_helpers.py b/tests/test_metadata_helpers.py new file mode 100644 index 0000000..217551f --- /dev/null +++ b/tests/test_metadata_helpers.py @@ -0,0 +1,79 @@ +"""Test the main app code""" + +import unittest +from aind_metadata_viz.metadata_helpers import ( + check_present, + process_present_dict, + process_present_list, +) + + +class TestApp(unittest.TestCase): + """Test main app""" + + def setUp(self) -> None: + self.dict = { + "test1": None, + "test2": "", + "test3": {}, + "test4": [], + "test5": "actual data", + "test6": 1, + "test7": {"actual key": "actual value"}, + "test8": object, + } + self.expected_fields = [ + "test1", + "test2", + "test3", + "test4", + "test5", + "test6", + "test7", + "test8", + "meow", + ] + self.expected_out = { + "test1": False, + "test2": False, + "test3": False, + "test4": False, + "test5": True, + "test6": True, + "test7": True, + "test8": True, + "meow": False, + } + + return super().setUp() + + def test_check_present(self): + """Test the check_present function""" + self.assertFalse(check_present("test1", self.dict)) + self.assertFalse(check_present("test2", self.dict)) + self.assertFalse(check_present("test3", self.dict)) + self.assertFalse(check_present("test4", self.dict)) + + self.assertTrue(check_present("test5", self.dict)) + self.assertTrue(check_present("test6", self.dict)) + self.assertTrue(check_present("test7", self.dict)) + self.assertTrue(check_present("test8", self.dict)) + + def test_process_present_dict(self): + """Test the process_present_dict function""" + out_test = process_present_dict(self.dict, self.expected_fields) + + self.assertEqual(self.expected_out, out_test) + + def test_process_present(self): + """Test that process runs properly on a list""" + data_list = [self.dict, self.dict] + + processed_list = process_present_list(data_list, self.expected_fields) + out_list = [self.expected_out, self.expected_out] + + self.assertEqual(processed_list, out_list) + + +if __name__ == "__main__": + unittest.main()