From 0e7e21cd31f702971444bd6a99f9d8e030c378cd Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Thu, 12 Dec 2024 23:32:43 +0000 Subject: [PATCH 01/68] first draft of venn --- code/pages/3_AIND data access playground.py | 93 +++++++++++++++++++-- 1 file changed, 85 insertions(+), 8 deletions(-) diff --git a/code/pages/3_AIND data access playground.py b/code/pages/3_AIND data access playground.py index 03324c7..f7f0c42 100644 --- a/code/pages/3_AIND data access playground.py +++ b/code/pages/3_AIND data access playground.py @@ -2,10 +2,24 @@ ''' import logging +import re +from matplotlib_venn import venn2, venn3 +import matplotlib.pyplot as plt import streamlit as st from streamlit_dynamic_filters import DynamicFilters -from util.fetch_data_docDB import load_data_from_docDB + +from util.fetch_data_docDB import load_data_from_docDB, load_client + +st.markdown( +""" +""", +unsafe_allow_html=True, +) try: st.set_page_config(layout="wide", @@ -19,12 +33,75 @@ except: pass -df = load_data_from_docDB() -st.markdown(f'### Note: the dataframe showing here has been merged in to the master table on the Home page!') +client = load_client() + +queries = { + "'dynamic_foraging' in software name, raw": { + "session.data_streams.software.name": "dynamic-foraging-task", + "name": {"$not": {"$regex": ".*processed.*"}}, + }, + "'dynamic_foraging' in software name, processed": { + "session.data_streams.software.name": "dynamic-foraging-task", + "name": {"$regex": ".*processed.*"}, + }, + "'fib' in 'data_description.modality', raw": { + "data_description.modality.abbreviation": "fib", + "name": {"$not": {"$regex": ".*processed.*"}}, + }, + "'fib' in 'rig.modalities', raw": { + "rig.modalities.abbreviation": "fib", + "name": {"$not": {"$regex": ".*processed.*"}}, + }, + "'fib' in 'session.data_streams', raw": { + "session.data_streams.stream_modalities.abbreviation": "fib", + "name": {"$not": {"$regex": ".*processed.*"}}, + } +} + +@st.cache_data(ttl=3600 * 12) # Cache the df_docDB up to 12 hours +def get_session_from_query(query): + results = client.retrieve_docdb_records( + filter_query=query, + projection={"name": 1, "_id": 1}, + ) + + sessions = [re.sub(r'_processed.*$', '', r["name"]) for r in results] + return sessions + +# Multiselect for selecting queries up to three +query_keys = list(queries.keys()) +selected_queries = st.multiselect( + "Select queries to filter sessions", + query_keys, + default=query_keys[:3], + key="selected_queries", +) + +# Generage venn diagram of the selected queries +query_results = {key: set(get_session_from_query(queries[key])) for key in selected_queries} + +fig, ax = plt.subplots() +if len(selected_queries) == 2: + venn2( + [query_results[key] for key in selected_queries], + set_labels=selected_queries, + ) +else: + venn3( + [query_results[key] for key in selected_queries], + set_labels=selected_queries, + ) + +st.columns([1, 1])[0].pyplot(fig, use_container_width=True) + + +# df = load_data_from_docDB() + +# st.markdown(f'### Note: the dataframe showing here has been merged in to the master table on the Home page!') -dynamic_filters = DynamicFilters( - df=df, - filters=['subject_id', 'subject_genotype']) -dynamic_filters.display_filters() -dynamic_filters.display_df() +# dynamic_filters = DynamicFilters( +# df=df, +# filters=['subject_id', 'subject_genotype']) +# dynamic_filters.display_filters() +# dynamic_filters.display_df() From 92c3fd34457f7964fea2c2c83cea12d2255a8747 Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Fri, 20 Dec 2024 20:57:19 +0000 Subject: [PATCH 02/68] add stimulus_epochs --- code/pages/3_AIND data access playground.py | 26 ++++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/code/pages/3_AIND data access playground.py b/code/pages/3_AIND data access playground.py index f7f0c42..be6aca7 100644 --- a/code/pages/3_AIND data access playground.py +++ b/code/pages/3_AIND data access playground.py @@ -37,23 +37,37 @@ client = load_client() queries = { - "'dynamic_foraging' in software name, raw": { - "session.data_streams.software.name": "dynamic-foraging-task", + "raw, 'dynamic_foraging' in ANY software name": { + "$or":[ + {"session.data_streams.software.name": "dynamic-foraging-task"}, + {"session.stimulus_epochs.software.name": "dynamic-foraging-task"}, + ], "name": {"$not": {"$regex": ".*processed.*"}}, }, - "'dynamic_foraging' in software name, processed": { + "raw, 'dynamic_foraging' in data_streams software name": { "session.data_streams.software.name": "dynamic-foraging-task", + "name": {"$not": {"$regex": ".*processed.*"}}, + }, + "raw, 'dynamic_foraging' in stimulus_epochs software name": { + "session.stimulus_epochs.software.name": "dynamic-foraging-task", + "name": {"$not": {"$regex": ".*processed.*"}}, + }, + "processed, 'dynamic_foraging' in ANY software name": { + "$or":[ + {"session.data_streams.software.name": "dynamic-foraging-task"}, + {"session.stimulus_epochs.software.name": "dynamic-foraging-task"}, + ], "name": {"$regex": ".*processed.*"}, }, - "'fib' in 'data_description.modality', raw": { + "raw, 'fib' in 'data_description.modality'": { "data_description.modality.abbreviation": "fib", "name": {"$not": {"$regex": ".*processed.*"}}, }, - "'fib' in 'rig.modalities', raw": { + "raw, 'fib' in 'rig.modalities'": { "rig.modalities.abbreviation": "fib", "name": {"$not": {"$regex": ".*processed.*"}}, }, - "'fib' in 'session.data_streams', raw": { + "raw, 'fib' in 'session.data_streams'": { "session.data_streams.stream_modalities.abbreviation": "fib", "name": {"$not": {"$regex": ".*processed.*"}}, } From 9d9e36dc63463c366249a9c8b29a96a9318e0951 Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Fri, 20 Dec 2024 21:19:02 +0000 Subject: [PATCH 03/68] refactor --- code/pages/3_AIND data access playground.py | 71 +++++++++++++-------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/code/pages/3_AIND data access playground.py b/code/pages/3_AIND data access playground.py index be6aca7..f9dacd2 100644 --- a/code/pages/3_AIND data access playground.py +++ b/code/pages/3_AIND data access playground.py @@ -83,39 +83,54 @@ def get_session_from_query(query): sessions = [re.sub(r'_processed.*$', '', r["name"]) for r in results] return sessions -# Multiselect for selecting queries up to three -query_keys = list(queries.keys()) -selected_queries = st.multiselect( - "Select queries to filter sessions", - query_keys, - default=query_keys[:3], - key="selected_queries", -) -# Generage venn diagram of the selected queries -query_results = {key: set(get_session_from_query(queries[key])) for key in selected_queries} +def app(): -fig, ax = plt.subplots() -if len(selected_queries) == 2: - venn2( - [query_results[key] for key in selected_queries], - set_labels=selected_queries, - ) -else: - venn3( - [query_results[key] for key in selected_queries], - set_labels=selected_queries, + # Multiselect for selecting queries up to three + query_keys = list(queries.keys()) + selected_queries = st.multiselect( + "Select queries to filter sessions", + query_keys, + default=query_keys[:3], + key="selected_queries", ) + + # Generage venn diagram of the selected queries + query_results = {key: set(get_session_from_query(queries[key])) for key in selected_queries} + + + # -- Show venn -- + fig, ax = plt.subplots() + if len(selected_queries) == 2: + venn2( + [query_results[key] for key in selected_queries], + set_labels=selected_queries, + ) + else: + venn3( + [query_results[key] for key in selected_queries], + set_labels=selected_queries, + ) + + st.columns([1, 1])[0].pyplot(fig, use_container_width=True) + + + # -- Show dataframe that summarize the selected queries -- + st.markdown(f"### Summary of selected queries") + query_results -st.columns([1, 1])[0].pyplot(fig, use_container_width=True) -# df = load_data_from_docDB() + # df = load_data_from_docDB() -# st.markdown(f'### Note: the dataframe showing here has been merged in to the master table on the Home page!') + # st.markdown(f'### Note: the dataframe showing here has been merged in to the master table on the Home page!') -# dynamic_filters = DynamicFilters( -# df=df, -# filters=['subject_id', 'subject_genotype']) -# dynamic_filters.display_filters() -# dynamic_filters.display_df() + # dynamic_filters = DynamicFilters( + # df=df, + # filters=['subject_id', 'subject_genotype']) + # dynamic_filters.display_filters() + # dynamic_filters.display_df() + + +if __name__ == "__main__": + app() From 629bf712d012fdfb7d361d821d4ad9728a4efc86 Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Fri, 20 Dec 2024 21:54:20 +0000 Subject: [PATCH 04/68] change page name --- .../{3_AIND data access playground.py => 3_docDB inventory.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename code/pages/{3_AIND data access playground.py => 3_docDB inventory.py} (100%) diff --git a/code/pages/3_AIND data access playground.py b/code/pages/3_docDB inventory.py similarity index 100% rename from code/pages/3_AIND data access playground.py rename to code/pages/3_docDB inventory.py From bc45811af1b44a58a8ffab12b108e035537b8971 Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Fri, 20 Dec 2024 23:24:23 +0000 Subject: [PATCH 05/68] reorder page; show queries --- ...ocDB inventory.py => 0_docDB inventory.py} | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) rename code/pages/{3_docDB inventory.py => 0_docDB inventory.py} (89%) diff --git a/code/pages/3_docDB inventory.py b/code/pages/0_docDB inventory.py similarity index 89% rename from code/pages/3_docDB inventory.py rename to code/pages/0_docDB inventory.py index f9dacd2..f1b68f1 100644 --- a/code/pages/3_docDB inventory.py +++ b/code/pages/0_docDB inventory.py @@ -11,16 +11,6 @@ from util.fetch_data_docDB import load_data_from_docDB, load_client -st.markdown( -""" -""", -unsafe_allow_html=True, -) - try: st.set_page_config(layout="wide", page_title='Foraging behavior browser', @@ -33,10 +23,19 @@ except: pass +st.markdown( +""" +""", +unsafe_allow_html=True, +) client = load_client() -queries = { +QUERY_PRESET = { "raw, 'dynamic_foraging' in ANY software name": { "$or":[ {"session.data_streams.software.name": "dynamic-foraging-task"}, @@ -85,9 +84,16 @@ def get_session_from_query(query): def app(): + + # + with st.expander("Show docDB queries", expanded=False): + st.write('See how to use these queries [here](https://aind-data-access-api.readthedocs.io/en/latest/UserGuide.html#document-database-docdb)') + for key, query in QUERY_PRESET.items(): + st.markdown(f"**{key}**") + st.code(query) # Multiselect for selecting queries up to three - query_keys = list(queries.keys()) + query_keys = list(QUERY_PRESET.keys()) selected_queries = st.multiselect( "Select queries to filter sessions", query_keys, @@ -96,7 +102,7 @@ def app(): ) # Generage venn diagram of the selected queries - query_results = {key: set(get_session_from_query(queries[key])) for key in selected_queries} + query_results = {key: set(get_session_from_query(QUERY_PRESET[key])) for key in selected_queries} # -- Show venn -- From bb79299c3a084a51e0849e56fbcd2a4d70449f6a Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Sat, 21 Dec 2024 00:22:22 +0000 Subject: [PATCH 06/68] add util.reformat.split_nwb_name --- code/util/reformat.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 code/util/reformat.py diff --git a/code/util/reformat.py b/code/util/reformat.py new file mode 100644 index 0000000..efa8dcd --- /dev/null +++ b/code/util/reformat.py @@ -0,0 +1,43 @@ + +""" Helper functions to reformat the data +""" +import re + + +# Function to split the `nwb_name` column +def split_nwb_name(nwb_name): + """Turn the nwb_name into subject_id, session_date, nwb_suffix in order to be merged to + the main df. + + Parameters + ---------- + nwb_name : str. The name of the nwb file. This function can handle the following formats: + "721403_2024-08-09_08-39-12.nwb" + "685641_2023-10-04.nwb", + "behavior_754280_2024-11-14_11-06-24.nwb", + "behavior_1_2024-08-05_15-48-54", + " + ... + + Returns + ------- + subject_id : str. The subject ID + session_date : str. The session date + nwb_suffix : int. The nwb suffix (converted from session time if available, otherwise 0) + """ + + pattern = R"(?:\w+_)?(?P\d+)_(?P\d{4}-\d{2}-\d{2})(?:_(?P