From 073faa63cf42375e977766e0de9f80e71b29a25b Mon Sep 17 00:00:00 2001 From: Helen Lin <46795546+helen-m-lin@users.noreply.github.com> Date: Wed, 18 Sep 2024 18:13:52 -0700 Subject: [PATCH] docs: readthedocs (#99) * docs: Contributor Guidelines * fix: docstrings format for sphinx build * docs: add User Guide * docs: update readme --- .readthedocs.yaml | 13 + README.md | 26 +- docs/Makefile | 20 ++ docs/make.bat | 35 +++ docs/source/Contributing.rst | 273 ++++++++++++++++++ docs/source/UserGuide.rst | 113 ++++++++ docs/source/_static/dark-logo.svg | 129 +++++++++ docs/source/_static/favicon.ico | Bin 0 -> 259838 bytes docs/source/_static/light-logo.svg | 128 ++++++++ docs/source/aind_data_asset_indexer.rst | 69 +++++ docs/source/conf.py | 53 ++++ docs/source/index.rst | 24 ++ docs/source/modules.rst | 7 + .../aind_bucket_indexer.py | 47 +-- .../codeocean_bucket_indexer.py | 21 +- src/aind_data_asset_indexer/models.py | 3 + .../populate_s3_with_metadata_files.py | 3 + src/aind_data_asset_indexer/utils.py | 28 +- 18 files changed, 950 insertions(+), 42 deletions(-) create mode 100644 .readthedocs.yaml create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/Contributing.rst create mode 100644 docs/source/UserGuide.rst create mode 100644 docs/source/_static/dark-logo.svg create mode 100644 docs/source/_static/favicon.ico create mode 100644 docs/source/_static/light-logo.svg create mode 100644 docs/source/aind_data_asset_indexer.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/modules.rst diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..eafd128 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,13 @@ +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.10" + +python: + install: + - method: pip + path: . + extra_requirements: + - dev diff --git a/README.md b/README.md index 6a7f81b..5d9bdc0 100644 --- a/README.md +++ b/README.md @@ -4,20 +4,18 @@ ![Code Style](https://img.shields.io/badge/code%20style-black-black) [![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release) -Script to create metadata analytics table and write to redshift table. -This script will parse through a list of s3 buckets and document whether data asset records in each of those buckets does or does not contain `metadata.nd.json` +Index jobs for AIND metadata in AWS DocumentDB and S3. +AIND metadata for data assets is stored in various places and must be +kept in sync: -## Usage -- Define the environment variables in the `.env.template` - - REDSHIFT_SECRETS_NAME: defining secrets name for Amazon Redshift - - BUCKETS: list of buckets. comma separated format (ex: "bucket_name1, bucket_name2") - - TABLE_NAME: name of table in redshift - - FOLDERS_FILEPATH: Intended filepath for txt file - - METADATA_DIRECTORY: Intended path for directory containing copies of metadata records - - AWS_DEFAULT_REGION: Default AWS region. -- Records containing metadata.nd.json file will be copies to `METADATA_DIRECTORY` and compared against list of all records in `FOLDERS_FILEPATH` -- An analytics table containing columns `s3_prefix`, `bucket_name`, and `metadata_bool` will be written to `TABLE_NAME` in Redshift +1. **S3 buckets** store raw metadata files, including the ``metadata.nd.json``. +2. A **document database (DocDB)** contains unstructured json + documents describing the ``metadata.nd.json`` for a data asset. +3. **Code Ocean**: data assets are mounted as CodeOcean data asssets. + Processed results are also stored in an internal Code Ocean bucket. -## Development -- It's a bit tedious, but the dependencies listed in the `pyproject.toml` file needs to be manually updated +We have automated jobs to keep changes in DocDB and S3 in sync. +This repository contains the code for these index jobs. + +More information including a user guide and contributor guidelines can be found at [readthedocs](https://aind-data-asset-indexer.readthedocs.io). \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..dc1312a --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/Contributing.rst b/docs/source/Contributing.rst new file mode 100644 index 0000000..fe53827 --- /dev/null +++ b/docs/source/Contributing.rst @@ -0,0 +1,273 @@ +Contributor Guidelines +====================== + +This document will go through best practices for contributing to this +project. We welcome and appreciate contributions or ideas for +improvement. + +- `Bug Reports and Feature + Requests <#bug-reports-and-feature-requests>`__ +- `Local Installation for + Development <#local-installation-for-development>`__ +- `Branches and Pull Requests <#branches-and-pull-requests>`__ +- `Release Cycles <#release-cycles>`__ + +Bug Reports and Feature Requests +-------------------------------- + +Before creating a pull request, we ask contributors to please open a bug +report or feature request first: +`issues `__ + +We will do our best to monitor and maintain the backlog of issues. + +Local Installation and Development +---------------------------------- + +For development, + +- For new features or non-urgent bug fixes, create a branch off of + ``dev`` +- For an urgent hotfix to our production environment, create a branch + off of ``main`` + +Consult the `Branches and Pull Requests <#branches-and-pull-requests>`__ +and `Release Cycles <#release-cycles>`__ for more details. + +From the root directory, run: + +.. code:: bash + + pip install -e .[dev] + +to install the relevant code for development. + +.. _running-indexer-jobs-locally: + +Running indexer jobs locally +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +The jobs are intended to be run as scheduled AWS ECS tasks in the same VPC +as the DocDB instance. The job settings are stored in AWS Parameter Store. + +If you wish to run the jobs locally, follow these steps: + +1. In a new terminal, start ssh session. Credentials can be found in AWS + Secrets Manager. + +.. code:: bash + + ssh -L 27017:{docdb_host}:27017 {ssh_username}@{ssh_host} -N -v + +2. For the `IndexAindBucketsJob`, you will need to set the ``INDEXER_PARAM_NAME``. + Then, run the following: + +.. code:: python + + from aind_data_asset_indexer.index_aind_buckets import IndexAindBucketsJob + from aind_data_asset_indexer.models import AindIndexBucketsJobSettings + + if __name__ == "__main__": + main_job_settings = AindIndexBucketsJobSettings.from_param_store(param_store_name=INDEXER_PARAM_NAME) + main_job_settings.doc_db_host = "localhost" + main_job = IndexAindBucketsJob(job_settings=main_job_settings) + main_job.run_job() + +3. For the `CodeOceanIndexBucketJob`, you will need to set the ``CO_INDEXER_PARAM_NAME`` + and ``DEVELOPER_CODEOCEAN_ENDPOINT``. Then, run the following: + +.. code:: python + + from aind_data_asset_indexer.models import CodeOceanIndexBucketJobSettings + from aind_data_asset_indexer.codeocean_bucket_indexer import CodeOceanIndexBucketJob + + if __name__ == "__main__": + main_job_settings = CodeOceanIndexBucketJobSettings.from_param_store(param_store_name=CO_INDEXER_PARAM_NAME) + main_job_settings.doc_db_host = "localhost" + main_job_settings.temp_codeocean_endpoint=DEVELOPER_CODEOCEAN_ENDPOINT + main_job = CodeOceanIndexBucketJob(job_settings=main_job_settings) + main_job.run_job() + +4. Close the ssh session when you are done. + + +Branches and Pull Requests +-------------------------- + +Branch naming conventions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Name your branch using the following format: +``--`` + +where: + +- ```` is one of: + + - **build**: Changes that affect the build system + or external dependencies (e.g., pyproject.toml, setup.py) + - **ci**: Changes to our CI configuration files and scripts + (examples: .github/workflows/ci.yml) + - **docs**: Changes to our documentation + - **feat**: A new feature + - **fix**: A bug fix + - **perf**: A code change that improves performance + - **refactor**: A code change that neither fixes a bug nor adds + a feature, but will make the codebase easier to maintain + - **test**: Adding missing tests or correcting existing tests + - **hotfix**: An urgent bug fix to our production code +- ```` references the GitHub issue this branch will close +- ```` is a brief description that shouldn’t be more than 3 + words. + +Some examples: + +- ``feat-12-adds-email-field`` +- ``fix-27-corrects-endpoint`` +- ``test-43-updates-server-test`` + +We ask that a separate issue and branch are created if code is added +outside the scope of the reference issue. + +Commit messages +~~~~~~~~~~~~~~~ + +Please format your commit messages as ``: `` where +```` is from the list above and the short summary is one or two +sentences. + +Testing and docstrings +~~~~~~~~~~~~~~~~~~~~~~ + +We strive for complete code coverage and docstrings, and we also run +code format checks. + +- To run the code format check: + +.. code:: bash + + flake8 . + +- There are some helpful libraries that will automatically format the + code and import statements: + +.. code:: bash + + black . + +and + +.. code:: bash + + isort . + +Strings that exceed the maximum line length may still need to be +formatted manually. + +- To run the docstring coverage check and report: + +.. code:: bash + + interrogate -v . + +This project uses NumPy’s docstring format: `Numpy docstring +standards `__ + +Many IDEs can be configured to automatically format docstrings in the +NumPy convention. + +- To run the unit test coverage check and report: + +.. code:: bash + + coverage run -m unittest discover && coverage report + +- To view a more detailed html version of the report, run: + +.. code:: bash + + coverage run -m unittest discover && coverage report + coverage html + +and then open ``htmlcov/index.html`` in a browser. + +Pull requests +~~~~~~~~~~~~~ + +Pull requests and reviews are required before merging code into this +project. You may open a ``Draft`` pull request and ask for a preliminary +review on code that is currently a work-in-progress. + +Before requesting a review on a finalized pull request, please verify +that the automated checks have passed first. + +Release Cycles +-------------------------- + +For this project, we have adopted the `Git +Flow `__ system. We will +strive to release new features and bug fixes on a two week cycle. The +rough workflow is: + +Hotfixes +~~~~~~~~ + +- A ``hotfix`` branch is created off of ``main`` +- A Pull Request into is ``main`` is opened, reviewed, and merged into + ``main`` +- A new ``tag`` with a patch bump is created, and a new ``release`` is + deployed +- The ``main`` branch is merged into all other branches + +Feature branches and bug fixes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- A new branch is created off of ``dev`` +- A Pull Request into ``dev`` is opened, reviewed, and merged + +Release branch +~~~~~~~~~~~~~~ + +- A new branch ``release-v{new_tag}`` is created +- Documentation updates and bug fixes are created off of the + ``release-v{new_tag}`` branch. +- Commits added to the ``release-v{new_tag}`` are also merged into + ``dev`` +- Once ready for release, a Pull Request from ``release-v{new_tag}`` + into ``main`` is opened for final review +- A new tag will automatically be generated +- Once merged, a new GitHub Release is created manually + +Pre-release checklist +~~~~~~~~~~~~~~~~~~~~~ + +- ☐ Increment ``__version__`` in + ``aind_data_asset-indexer/__init__.py`` file +- ☐ Run linters, unit tests, and integration tests +- ☐ Verify code is deployed and tested in test environment +- ☐ Update examples +- ☐ Update documentation + + - Run: + + .. code:: bash + + sphinx-apidoc -o docs/source/ src + sphinx-build -b html docs/source/ docs/build/html + +- ☐ Update and build UML diagrams + + - To build UML diagrams locally using a docker container: + + .. code:: bash + + docker pull plantuml/plantuml-server + docker run -d -p 8080:8080 plantuml/plantuml-server:jetty + +Post-release checklist +~~~~~~~~~~~~~~~~~~~~~~ + +- ☐ Merge ``main`` into ``dev`` and feature branches +- ☐ Edit release notes if needed +- ☐ Post announcement diff --git a/docs/source/UserGuide.rst b/docs/source/UserGuide.rst new file mode 100644 index 0000000..a873674 --- /dev/null +++ b/docs/source/UserGuide.rst @@ -0,0 +1,113 @@ +User Guide +========== + +Thank you for using ``aind-data-asset-indexer``! This guide is +intended for engineers in AIND who wish to index metadata in AIND +databases. + +Overview +----------------------------------------- + +AIND metadata for data assets is stored in various places and must be +kept in sync: + +1. **S3 buckets** store raw metadata files. Each data asset folder + (prefix) contains: + + - ``core_schema}.json``: core schema files, e.g., + ``acquisition.json``, ``subject.json``. + - ``metadata.nd.json``: top-level metadata file, containing + all core schema fields. + - ``original_metadata/{core_schema}.json``: a copy of each + core schema file as it was originally uploaded to S3. +2. A **document database (DocDB)** contains unstructured json + documents describing the ``metadata.nd.json`` for a data asset. +3. **Code Ocean**: data assets are mounted as CodeOcean data asssets. + Processed results are also stored in an internal Code Ocean bucket. + +Once the data is initially uploaded, the DocDB is assumed to be the +source of truth for metadata. All updates to existing metadata should +be made in the DocDB. + +We have automated jobs to keep changes in DocDB and S3 in sync. +This repository contains the code for these index jobs: + +1. `AindIndexBucketJob <#aindindexbucketjob>`__: Syncs changes in S3 and DocDB. +2. `CodeOceanIndexBucketJob <#codeoceanindexbucketjob>`__: Syncs changes in CodeOcean and DocDB. + + +AindIndexBucketJob +------------------ + +The `AindIndexBucketJob` handles syncing changes from DocDB to S3 for a +particular S3 bucket. There is a `IndexAindBucketsJob` wrapper job that +runs the `AindIndexBucketJob` for a list of buckets. + + +The workflow is generally as follows: + +1. Paginate DocDB to get all records for a particular bucket. +2. For each DocDB record, process by syncing any changes in DocDB to S3. + + - If the record does not have a valid location, log a warning. + - If the S3 location does not exist, remove the record from DocDB. + - If the core schema files or original metadata folder is out of + sync, update them. + - If the metadata.nd.json file is outdated, update it. +3. Paginate S3 to get all prefixes for a particular bucket. +4. For each prefix, process by checking if it is a new data asset + and adding it to DocDB if necessary. + + - If the prefix is not valid (does not adhere to data asset + naming convention), log a warning. + - If the metadata record exists in S3 but not in DocDB, copy it + to DocDB. + - If the metadata record does not exist in S3, create it and save + it to S3. Assume a lambda function will move it over to DocDB. + - In both cases above, ensure the original metadata folder and core + files are in sync with the metadata.nd.json file. + +Please refer to the job's docstrings for more details on the implementation. + + +CodeOceanIndexBucketJob +----------------------- + +The `CodeOceanIndexBucketJob` updates the external links for DocDB records +with their CO data asset ids and indexes Code Ocean (CO) processed results. + +The workflow is generally as follows: + +1. For records in AIND buckets, update the external links with CO data + asset ids if needed. + + - Retrieve a list of CO data asset ids and locations + - Paginate through docdb records where the location does not match + internal CO bucket + - Add or remove the external links from the DocDB record as needed. +2. Index CO processed results from the CO internal bucket. + + - Get all processed CO results as CO records. + - Paginate DocDB to get all records for the CO bucket. + - Find all records in CO that are not in DocDB and add them to DocDB. + - Find all records in DocDB that are not in CO and remove them from + DocDB. + +Please refer to the job's docstrings for more details on the implementation. + + +Running Indexer Jobs Locally +---------------------------- + +The jobs are intended to be run as scheduled AWS ECS tasks in the same VPC +as the DocDB instance. The job settings are stored in AWS Parameter Store. + +If you wish to run the jobs locally, please refer to this section in the +Contributor Guidelines: :ref:`running-indexer-jobs-locally`. + + +Reporting bugs or making feature requests +----------------------------------------- + +Please report any bugs or feature requests here: +`issues `__ diff --git a/docs/source/_static/dark-logo.svg b/docs/source/_static/dark-logo.svg new file mode 100644 index 0000000..dcc68fb --- /dev/null +++ b/docs/source/_static/dark-logo.svg @@ -0,0 +1,129 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/_static/favicon.ico b/docs/source/_static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..4cec150488e1d2c857ce6e4c1f73b0f048a6bc42 GIT binary patch literal 259838 zcmeI52b^6+`TlPpbVEY#B+^Mj3F#0?C>9_T6|jL6D13GVxKd+g&ZTpOYJrVu~1_A?tfxtjuATSUZ2n+-U z0t118z(8OiFc26B3k;6(5qZ~)i`>;N_e z>vc9yTLZUGdb&8*zFhT@Fe&n_yf2eoDV(*4hFk{jnMx!adYa04)@#37Tmx< zU?7))u0_)_-3w;s=uKyf(WCXzC+YejK=1GCzzXmLcm)hqw}(<66uRf%q~mX(8{7cC z2Hp+EgAKdY9&mMl+6NuTs^8%V3^XzW(gFW&^!kBI+j{CQ;ApT2=v^yae+?AUzgj%6 zcE{7;E^rw*0qlwXzeRNbeuq#OG_npfn(TU~ceG^t@lF@iXP*tu0>1%I0kykpQLl%Z zCbd(Z0k?q#;83u2=c4KA&&2A3&@N~+ok+X99nI4+_g%1SRK~lojZx+|18URD4yShW z%V4OwRgd?n&u}03CO8&s*U@5Z8-}M2XZuKd*$DEbmjS)oyKEoc>;5RpxoqhRf$VhB zfqK;4Qm0vMod>{G;BcTZ2G1Gpv`kO)r1Tb)AX#b{=xUxEE4S!>>lBTX?Fvo@OF^T( z=Zmc?R3~Or{P z+6#UNckcxo1H*4eD=zJ-a0CXb%|NHh;-$7+y7DPo-Uq?2L3r1frry)o$8vBQ*sjaj z2D<1M2;m3}RGI}ZA#Gl^lJ$fIn!_^k}DcIBPtzz=N0gHVQ83rf^2Cl!aJ*+f$FJ4}PKb?F$L_U8A zj09#aLD$0RK_?o70j-EIt_Kc$8PI;|TF0gFA5&IRstOG!md{W2k*hN~ zU~#Yu)>|FWwK!)VKCRWg0%-gI}P=w1C5&A&erVr zX2?I)?Rzpp*5U$}b0v?`gi*K>1uuxY~#6C%ypm z&8tVjy+CV(mIIZUrQi=hdj#A8RsikyEjx2B_&azB{09{C>~L-6`RXK(CIRLQhy4a? zS^L|XbN1-jo;N&`0Nw?rf!%=S3~vFn@5CrDy!Oz6)_b0QLb}fpvRYa_kB5XC?eOa|;t;QM;ti`EKnaY{#B7 zA8mJ_I!5ynzX|RFYHOtJn_$J{PqnXAy*wF2c~^T{(6}jz<;&Lie+dRtUY_N;rQkfE zckwu|4v6wN=`RPveD^BfcR+0#7MXfS?FZEfM}q}m8BiPHe_$|jhHFa0QxAOa1;{Vz z`CtzyhdLA;r}D!=@6I2B{>r;_Ui|`nk7p9t0Qmcj+r~b!`o=^xkJU|?OnXaCeyb}JB{meY>V0Z<8ckq-?5N)3h3|9e3Xtv8u-588pyH@I10#y zpt>OR6Y`gXd}U)O=VaR*3rxGMzdQ?JT&r()=)U;TMgRKuOFnAjis=&fs7}#)btX6t zoCCfCRQKNlo&*{n_!m&y`wft+f4siwyxz&W?iHZ>{sR64t_O$EpB$+^q2gD$Bj#0w$1O;me;xuy_1gs=YpHSgWyF_YCD_if==*7FcGZt$h_?{9Rqy3hU?5; z6~=XXuJz3ELwyMCTOPj#zuFle1hRW*Z``2wrQtwzeiP+zOkY=YfR^J;OF2mpm%h+^h5QO>Xl=`Ccw*K zzA2_Qef+TDx%E`XMN{=10@*q?iwTr(1ZUtKRS?==s z63_S{Q2$>#XwvD%{3C|$W>4C7V~fm|rOxKb((z4!`XQ%+Z-N!zZlLz(`@r_tC}o>4 z@p>Dfk68PgCHe&yYNji`N7Zu!$5|6k$oK)~Md(=y9jXL{;`TVe*pG1v%v8OY|Q ze!AVm4c8y~4kWg5qYozBzHi@)zmj&bS9#vgz&n9Wr_!oWvp^g6OdWSF*pIk#IMH(s20sBxYrgoyxxw+$ z1AjXK!?__m{3|&Fj$g{STc7Fp_HAeXjR$j`)^6Hyn$vOHK{?y@aXnGLa2a?vSPz&o zA=mFaZ*;U}Ms%KZs}H#|_!#&xcoHa{c)SDoC7myZ}iW&c22L_if4c}@ERzW{%G#gZQvt7?Ix3sHu@m)>qzz&I6oe!-yz#p zxzd%^c>e)shk?wh0p@d+RWTYMa&a8PDEudbm7vmPOFDcQ{dl$E>Dyb|gV8OuMVxJz z_2YUk>;)ErM?fXhslG%f_#}wx0ev4=^PRG3jW|``;b@?KZ6(v0hG&Vhw}PmSZ2)zw zn27QB6?0~!Q~QIPK&9un=$$WJ-Wo)8k6}zrk;O=;EECcha|x872RL;C4{yav;6A9;p8wrAgTn z|BYYoR@nh%53F<=i+SG5r0-i`FK@TNPanqab!FRH(VQvS1vEAh#s~(Nr>ZY^5uZXh zhQ@&QU24nj>!tNFUj)^7=RePLzXUb_nOi<{n90{}_8W9#4vgiS{>VXu-wM>{E=Cut zdF|t*_5T4oZlSgS(4qEBThtaX<6B?jx))oI@~$MgFs zP>p=Qf#+_g^knDGPGg?8we53$6wrKwXF)a6SFUGj9@0%)`vHfBCzq0Z3J5~=E8=&fk zYV=1cr4x@J-|_SVMoM-g|NO~Wdxt$ITc^f0D?R>NDGo~U9F6-<7JsTw!;zl>!;dXf zvazWztvT(b;J=tS^h}M}tq)9@kAF+djqe>mSQjWJ-g>=OWB&_*sSA79J3)ID#K&X^ zTNj)SUIq0kXSvh;ZMZSEz_!It?X`UAt+jLF(jGM-wfB}gdM4Y_N>J%CptgqE z9FeV01)1L-qw!x2s?jE?G#AA_Pj*4g#VpWfGV2{bO}OU36dQM%uh%%qaUj#>_OCFN zXM_F|4`3_sXHe;U{~piO_?6N3gzl@Kb|nb9UpQaXo^4`IfU}qOnJ1+7fov+F-;~SY zGB`2iu48dyj|akobZ;DZ0#s7}m-4*rKtA1HkMm1FX!8%8CwiZq%U%L`$4powsq>2h zmpNry_2IFg;pYPw4(#_W|AxO(`hP#q+RHsVqx}QDA6Ii-nt<#Al}b;&KSy=I*Fk?a zly3z-bIQ}r@S%AwnMR%yDEWbO@i|ZlJ&^6{2*4P&*0%D!Pn(9!3cJqjQ-p0|8lJH8^L-kNbi%p~I&C}YOXUP5^mFw6Vf{C2J7Ze+BTCcy3ITWV8Y;gVGk20b9 zu-y6leV$?3T)2kvSlQg0a}QP^4ds@x+VQI%|B!84X}yH@LsT0p`mTxQz&#syHf3FX zUegcNw`UviKz_X^-UiA&PV^_9v8#KY)qVGl;@lP>v`tG_|7w2p+r@Pk-#PI8EB$U? zV)Fa*_+>Mwc3o8q&!4AnS+*~-`>-}z9iTD+W`lC}xiwaOjQaX3=kH;^f#LTQYBA&2 zvh~+OSx8eF)Xv`nWcvZm&pC~+TnEx5?@(n~2@jitO#C~wjZzOtZg2qjHyA311KoEy zahrK6?8BWg>-oI?+qoF!ZhH)rzO;s`O|>P+R|kyad^bp|?1Pm{V}b8A9JxK$8b$Xd zr;*$4;ax5r{3FkJt9y>2*86eCg9pH1^D10dJXcq+my^~9M`PU`%zHDx?RvO>1pcR9 zFFT-LPt}0>UoQKf>edrf*18w%k;yxlw+;PkBQgX`zdoeI$YV~yy^vwSvPvwtAzZ2 zh3EYNdEIA@@MfG}*5LV|Ip&`<{J3)J^&>5=#@H^$X-*2c$*ibL5a*V11zVv{9HlzZV$9&DcI zI`#3)+-uDVO5}^yV)cMx;!E4LrVr50oQX6YR{xF7=j-IJ8SC)!@mqSS@lVaqDOG-6 z(A{F+_j-=z`06>l`(=l|p@h#VReBpKo=*7RM)@J=xb5N@zHZjK_(rzL{E+sM8K*YH zq~xE=Yqk6D1Rv9Hf4HzwxOah4=z-q-ySsSJW4s@CF3?<{p*% zY6BW)d=#i{QO>yxLrHtB-Ls&xWwPm;X>Op#z2HpP0Q?lxT6!y$9+e+6R_*np-rcq( z=KDCx+sWXc;7kzhe`fr4ZqCo~mxAY(yopm74 zd;z_?HU^`Z+pahT)oq&swPE)JZwH?MnpgNcpz)p8K`G=*LmXOTH5-_`Q9gOUJKw0b zK+TCZ?c=n}1?XmvGncog;HvCOC)oQaI}YU9=U2M$;goOL{8i6Hb4`rjmNUKk2byD~ za(6TMG}s%cUEe6*^ ze*_A}-Ah<2m;u%US-p0)gDAp5dW>z`tuyxV-Lqtt;@t`y4ZaUlUlmGI+Rv*FJPBl0 zcW-Q1mES>i?_WULImjp9UC6zm|NinIi(AcJ=JIn3+1*{kGKS zKGMaNEKgcrsJ=&%Z_P_t0Cr#T?Sp;U%dBXbl<6t)dMw#M6YT}nQ&YfV@DxaQKDiVu8Zo1!TYCVZoTg9nV$KR zVlh#^-8=*zPsakYU;ZBaz63l6;^}Mz|Celg*Y*cvw!n#S@k9gip?9FhK{Q^KQ+Dd@ zhrSahIl!sFm*p4m7n%bg`=`IISa%-xn&*ipap!*Th#QahF@ArH_EkOemuW}71-AlUYJGR&PdO+oAd+TJqf3Jz}TQsS90s_dMZF0)GSb zDARD#EIn8s`c0P@H4Q03%cV9G?j{!{wFA>cI-m+t|7n|HC7jo-RVf6pDB8Ok4`kjFZT}X#7o_>k zIl|NqR+}Ns??JVlJ|UTV7$eaU8w)sr^NnN^Re8V6z60u1|Cx8YqSd^IuRxZ3`uj!Y z^&MB+wN9O8KR(m$T~Ofqd~xWU`t@rlE`P`mq%-QLoA-zh^KKq}Uzc-l1#bgKgA>6? z;21C)>E`;GXYtJKfb4&T(wsd1GS{k)qWespFIR{CZTdE{qn-l% zIBGJEH2hwMe6klA+11~YAE;e$FGz!ogO%Yo$g6jl;iBIDDcvspzs1cb^Dj6b*#Khg z-FI>R)dKhAi$^v%o|Ua9?QipZ(^rpkcld-2d4KcHw|YDhcQ5cI@N3WuUIwp&)edjq zz5<>AzXNB3sbFp3<1vruV9roXj*UsjHAToF*;au4qjm8v?>Q$fW!F$W_c`!ykdKRc zKc_y9^55_%Qe_9^`=XM6QA%YllxM2%a@w`=a$M_vn}?=vTd3ZCf@?HZ9-FUY@_BR4 ztpbI3iJw<_F@66oY<~S6N9;Z(($ym;tUm(Xoer)6PXn*(gLR+dp6`MEsY}+t&CCTy zPTbJ$Eo{oKkwtxv?+sEGFV|CCe_ZE6yFL!T5lgOv`U&R)wU_-pX>zgmPobW3`?Y1| z-XD3>-2X}D;WNmqd13t(>zWNyy1{rL`+HJ0{Z{$?Ffe^y31a=`zQNY@k8#g9-Phl7 z1bX@Qu^V~)-voVa0z=XJWc~0w@ic?20PPC32dr(t$YMEA+jSYp$3dZUD(7m~j0A?4 zh#&iV6z9JS3f*7N=X;UMycg8|kI5~WZ%e7n`SMF`ytjk?igwM0DcwNt=Kk99dN-Of z9?NIlH`uyf<-yqY3lX9B^5GMscKUeq^k$IM9x>n<9vGOgQE@U@|bADY%^Yjt^-Xu@5(;q!{W3Q9a&u%bWT`zS1 z{lq<6`ahTF9zJ0s%C9fCrXJAyyjUBy>VQwNZ|Vqfq&BhER9HKM;rE@Ke;&j+NrV3; zWQuHGvH2m>IRB3n%3CPU&PLW~Eqk(j*Xr-i?3({G29Z~%O2(d6+kcPQd{y@iq%ZSy z@5%ywf9axw-uva$eP1X1ndith$a6O|*`xG1jKTdKD3sTQ&a1p1ZR40?#>TZ?-u$-p z!g)i8v(U3@<-GcgnhQeN&v`F2S|8wYz1uV&w4RRk?3(`OL6lADL^O9dD%*Ci=#>N9 z{}GiDzuqs-k@U~${-(I)X!CoWPr7&TylvdGO_F58Ryx5^puhf5A^m%V`=+=!Oq|}2 zyCrA`h3>7_^J*_;x|i7XldJFouGrXXq$#&qIX&JU$t>F;^f4%7IsnZKdDjg6}w#Ci3Ls-3<{Jbx*2YW`!g zc-QoAr5CIPehCJ{)Bm|SZbq)yysz!J|DFPIXdcYIpbr-@d!NSmN@3?Ow!V0Q=N>Hm z7j(qR?@6S2pngpu&dzC{KV^jG2WY%AHpfzBhDbk(YvjyIhsrJ8P4akAwJ9bJ#^R5T|<$Hf&Kcy`}-8v=HvzYyju$wAg zupapNFyig+nbiur!hb8+@W;AMC+jIGld}-ND&sp!=6X6@9%=64eV~wR$@72XTH6M2 z^167G=kEo{K3^~Aw%0ZB07)pavC_d<%~ER+ug+qj018vUpI0_wXz zT4Www%|2KoHJ9J)%jfpq<((-z(6m{JAfxaF)~Zu1A!^e64v{1#{>0;e0&e%@6$JO`acf$F;K1dzJKoDNy9{? z$seRyGK~Yhz{^sstG3XgfHsBOH#0kC(lm^Ci%mzluh(3_6OEk6s>Au!hq@7z`}x(5 zU+;+hBy&9-N-vP@_OGB=4u8cxqX2RA*%MZ_^VeLQkKw9~kwcAi4XEF(b$nhAtL6TO zG#mz^G%CG7bAh@*vE0P3mko@zQdTeI2Qz`{(fGCX?7vv^sIzlmej*u#vr;5uq5RR9 z?i|TmPlwVA^uB%?6q2pb`PaFJ`QMr|%Y0Yn(Z|LE)epGJ#i_Q1+88<1NY}u7L3|Bw zwe(-l{fN@!52Y7qZtL$sAr1y}Uh=Ds&2(K7-#ctbd#DWNdDXfGo~ErYId!aD64?OW z&AI=AYNe|Z&y}s`ATPTwL$&TIyZTtz4>6mZl$ZGDSTXVEvm~s1O)PgG;(pbsq z5Bb5KK=!d(NMpIuptkNifET?kT%{LG1Vhz-@u@Mg^+2``$lA}I1laGQ?M!-i_)5zYRqBEI*J9@E%Yu-K&Lo>skMY+Wz;vYaHto zVDw*eXw5n>jS1L#;5~$CU2Rk@|D|aEi>HTx+Cvcs@&ok? z{sKIg_3TDAz|O@v`y#G`d^e{=#zOg{_W!{k%HMkMD7`>sa47b_I2Y>U%AS>6AFTM- z04f7HlsNt0ANQ3S(0|$g4p*A|q4a`M*#E?n>_a)_!^J-uw++O(tY`mulB@fo{S1w! z{pKgtc<2S zd@)5b=Hv4f0=6+fRK>K~E{avj#z)L)P4v6$$b69}d=HDqHFCD}sdqTvA{NQNt zIw%$Y)r?1WB-xXaGNyeDP&VA1;&Mj>)UZeC1!}LW?_SDsqx$gZ?0g_Q?wzE<=-h_??~PBr6s5jk?k zdigCG$AVr^&9s&C*_z|NE$~@e7p~F^)FvJ79G86DK1IBTX>OpNlbsiurN-&M+5nmd ztiAdxT?c6H@u48L_SdGDG@k&}pUEfNP|iJ0Tw_5*fc!vvqiF1WC~?->JsJx>1SDnL zjB{z81F#DDHi^p}5m3V(y`Q^Vri~!o-v;F0>9+!;g^$PZ*UlKrTZ&#G2jvhm5*my}WKM3$#S z#(dnW{j4$*$z2bg*nZ8eWy~AZ0f%M#0=XQSegF0ToiKV!@CQ(-Hb4i@8VAhxK$Ks0 zErZd?!%6eMO3aD+#=C&1XvhzyfHbWGGIPPakXFWN>*C%D^65?*&ixeG){?CCbj0+( z+~c!)_Z{82AhN6H(4=qiNDsjza4#r#9q=g6Fnj(S$#;TvDDNCL#JvTSx?HM1lw%`u z&zpvumiNCnJ415$!^tS!_&F#wr?nE_RsI_?UhaBvUH%-8tNEMdw^1lf;8ai!dwehP%BJYW_VvwvoO=b7llw}> zJufLEZ9k0%e?LVsCi!fV%!R&h6J>Y}dX#P;8+0YcXI0L>2G$3r{A>R}9p+m++3$bt zgMKaprEUkP%|9#K=jik8kIAH~7bNqy6u<8y-h5-@@8G=FkCfsWwGhVwWQj_;zNv8g zV$<%sHbpWfdH!fz=1BND@F?9tYXF}CNx91LTkqCy0L?oxb%0zQ+7tAMiKBcwK;Pr{6e8+Vkkb>vZA0hRsxK_z+r9nYHta_WG&d+2*WwlBbYdQJ58 z0B|E1NuG}uZ}w}-OtPYsWLyel6k6UT_PD* ze0`>o^T~6zGwou|U6wLAJ+JE5?j@P)@ld+KdO-T5YK!+bK=!M)VGK--EN;e z^wqZmR3EIv@0s9Sa0mDscnu``2U^Fcz5G^ygc@9lQ&i2u^c28TU}2eLP3U<`+lz zD^jgZGqBH6zFh~(t@|n~#x9=tPG&jhUik7^Y4=L)Z`8KX?g3ypw_K}VQ0cT(;(2;+ z>iucTY`y){yga2HsC-s}uR_oG1J9q;wa|_cDqV4VDmtC7E{Js7hG*-ExyF`g?R(0T z*jT4&XYi~k;P(}h`9C~Ia|)v3AwPHvDD~XyLi|?h{N>XB+s}KOk+pk%G^a}af=Z>K zlFzxagFSDQpY?hu?Lg&I<+PIAC7<;-p8sXADKMP&#MjIYf#6R16P?!nWa1-U?YVIp z(7dGN^UL*HbANUNkp%LCaiABJ>$%m4_hQL*$N962toNQDt)*7Kpc?5Z_cK2$dG*bh zdOehWFdjS&%FWk6@vAL<2Y5e_t}1N?J?Qd^))@oR+}(=JIlG3}->-M=kw7;7*TF#P zC}p_h83jz*yO%`o#7d33mQt31;%f17O4v@?0_yW{u3G)efzmsi@P8xMJLB>WSU79& z0;L~}0UcmCe3bHDjjP-SWFObOc{9Hf*Xwh_jruY1^CP+bt>7czx8PM!$}|o(E~68& zZ|dlvZ%jIb&sQlugH88PuDd|8`9uA#_7?Mv`ER9A)+do^r?|Xc?s{^SM;$F@y?ZU@ zk=g@~gC7Cui{6>457bA{nu~QE(Y(c#?+Uo<+7!;=^g$T@JDb5PiY6@&5yp- zGMl`6C52_On&sqb#xh2DosjD=7gv3pYNo9m&pui5`NPSo@v&tolU41~lO&(kw$}S0 zIlxw+((?ezQ3h)zPT7YJbKIIDqP0dHEoP6QG|q>SUG+VZ*B?&yjc~g_t>jB7((xSf z>?8ToaHtLgY3-AiBEN?c$8ySt%DyQgv3-TN0Ti28)w9wbE6t=l%nk za^8vjmb^QHr$D)$Ta9=x?O9~Und@2aecJzsEH1C6xLyX^! zW2SX2ylk)1bziJ}nVQ16G?gj!bKWO86vG0ykB!+bvSP2 zN*A6D*{G~ZwwGBTm z9@`gZbzi^1?%;8dR{5`Id}9sCs-v?tDo z{R-YT1ZK^l(F6S5T6f$$b$-e@Jx_A?EZNJbL`TbX_H!_Hr7Kb-xB6~-OV0Qj=d{!T zlFMN%?wz1gdXt9dJPlv79Dk-rct7qWkTx55GS5CDdA0sZhm&3HWbJ*FytZC{{~B3x z`Zz||l>A%TG{@(gT$R~-Q1?L2iMTjuDdVH|4q9{_&WBuHCw*y&Ou(Sp(z!AX^rP6a5P$FL(t*< zxJLl(bCg!uCD&iUt{^J;^1E{iOM~o^V}j(8j->gBtqD>dz6)r~BMo_24jD9;Z#R%n z_chmVc>{3QiHsUMj?_Yaa4=BYpd7qcI!@LwPNOZC-gkeLd3e~D)44T(*}@kz2xs4?zq6+LJUYz2OIH6snG*RmuJb`a9yf;mD<3r< z!0r)f_Ha26`R`9jewLyR(44m8%oF$?^)Zw z-FMCKaB)iH{}Xa-Eg3r)*KDN2ejh8^Uq|!TR-`0Phoe*K&wr|O@g%dh*S85(J{&aG zuQD^7XVlBRTFY>xzgvLc6a~2so8bN&B=1Y>Z}owU{ln{o<3Z~m9!UWY&mx1~d*pB9wEcZv zdRJ}R*qD}VJ?8`ILR!mpKDo5+`{VGSzP-_R@#FoRUl;!+Am6=dJ$D!KM*Ew#Utsre z=bY98zmftjZa{{$fmelwb?;mdeYZ+$Th@e=V}aJXrL}Cw<$4S`-`m+bKGJuq`?hby z{OOT|e-)&0ZEu{1r2kT6j_Sg$#Zh1Jbj~NQt+(GAyExp)hkYj;mI0sN&Q;s*J0MMK zyprWq@3@(6eN3GkBtq-x0+4hmjL^+;W$vA(H z3>!+mwk6qb8Z}xy(C)s=6VGSuu^-!KU$!2V1C51OGJnebEUkmO3Ty|A4MhDQU(U@r zTh>2~ztQ?a;{NYo5Aom+`N3vj6)1Q9lrw&f`F$8WwY$Rk%)x&v>bIF#QX>V=ZcYmzBPlk(Az+1$L|E`t~*SC9hepTz< za&_(1K(^C*$m@6-HD-SW*ai6VEHUv%wk@6C0GtQZMu^|vNd618ZoHdstm){m?+J19 z**fRs4buNHz})_vuxcmV5zAAlRd?!ceV+<49?1MAFmEbR+*7ZVt z_n!gH=aVhZe!t4S^CeeIwyg=j7O0%mx?JT-pX$9sff*O_`Oj~0cLONbb1M;V8|hac z#*}GOxEuJNnG0;WkZ!20`v6dXU*la-J>V&dpX+KdV`b_iHUaesYt25ccIr=o?7XVu z`jN-H|EF=yQc#IerP<_mb8+Jftk28!cRAg=q%i*kkOz5Nil1nduT>ajNV zL)M}6$7Iko9|dXN!>1U&zuDP3ZG`%Po-4RfJ|6}C1B!{Omabh&+Q$IX-ZR35e@3sx z378IK_w{)ueK-?r39O#Gxq_ZHo!4HNQ8;Tw@V^zj52%084K$zrP2l4#wX6E-L2x}d z4veKeKLR&Om!I3+zJPixJKs^^>hGw|dJU91UG*5B>TTtn;m+%So6X}TbHj zYTUVgUT@>Bwe`~nZZSI3oXBJ8Z6`PxYz$1fXlE^q#t8lK(8W8&#|+Iit1dEgigeFZ zcmGAWzXg8=nr|naE2du1wbFIfQFntMgR{ZAz_vhjk9oFn-FTkTsBxa0F#8_R*q-W* zdeqZ$rCII!vEs@f@&ommdqBCKTkd#&NZQu}W(}j4Ex48e!;8)5o$yOPllh_d!X4lw zusJaL^hZ!Bf;;$8z3oXqyPRU^B~rM7eire@7T99;`?xK#U-X^BVXn(<=WA?pF8CR!*K%BL-BaIQeSFhLQYjJtu8v>C z`EozMT=6UIra#!obN%vbefDguUo0ERErZDK$GGMSp!fJjppRZ>WAESh$oYF@o;zzV z_LZGZKScX&Ok;d@YUWS8%kN&8eV_Lii^^&{_ebflb=lVZR=eRhAn1MnI;=j`79gTo zexSDhN>DESEN8rbB;DhHDHoAo!IPJP*m&RpaHc-`VDd(K)eh87KM-uj{?Ix0H_m15 zYTh@qYLVSXBJabCJgK>hZOmVew-MYpzUr?zU?KPuDAbl2jBJf`&4niao!unXhtv3} zY&G@9h0?6LKeMuVMv00RFBIhQ_Zvu_1W6H%-H-?zQQp$24ZahwVm6Bk@s(Mui6l@7id1gSAoWsRL*t) zn*im{$ez|2zP$Bu+b-RabJJwom+esZ>fLI0j<*e;E8}{JvEArMCEs;0d61!XJRf@va4zfMy__ zz7WW6Bm2L`18xMr1L}wB-j~7uK&jHz$nnUgdmO<2>*oAqzZ-oD=ijU|p8rdFKiR1{ z3&^9f>2MUsfS=PtyJ-!$lrA>1exzM?jq4k`U*a1-8*u*CI?Q{uHQjvg>>QQiG$33J zhrTVY@qWFh)2;`ND&Lot*X@zrFUt1=Isa0f<@Gp$@2D4)pHd2ShrvYQaoDm-2MZ*UOln=9kPhb5^u| zg)zTyl!pP$$&bwqdN;g34?-PKpB!A&?bgJow2+S;TjH;mb-uDiop$5J2N?TB(Eal0 zeLPmL|89QJXW>8C1M5%!pCI4%0Fm9VBfD%)Hxf_~LD2U>e^ zD&U>zzjxx+d)oDr_trrkt{{zOzLMn7w`#&sJqB!9GHtN!@T*=9bwKTMp!HK5`8-pp z(Yn|UyHCp3>LBm#CXHskLOc67ck*4Da8#E8?X~C26R!3STn0iNPq$Jh(lfs;eE1@pLmTm@P|co83|4WvQ0SdAZVd8F^y% zOYLGmwVs9Udwz?i>0GVv@9}5SI16|?MPn#d9Z9LRBOTgX-_`?Ke--M0+IgR@qS#j!L)9s7Sid{@EGa&0BsYq4+;LdFY7^o{0t~>W6=2E55fO{FAKpP z9v5oUe=6I~pTeHLGu&9evBwZL^}jX5_wA%*I*7_e{*n{UriB6R8?dU`%{^j_Ek!)YF#rR1F4!-s@5y(y z``!aIM=jI=L+9W(@V5ns>3;NH)%m40!msKezRhF5HKz1-yS0b|6q^v7G6t0QG3<(8 z^8{+CeOfQ|k><%w@LZ~xct7r1;380MS+uXfZ@~=B<-?8YsgxD#AWjhsXz!V>#nD^= z*{9S;?FH2?ul4f$zrfYOfOWj=>64w?m+69A&VL;I8&p%C=Xib-*aVpINV(w%42XfQ zt8&&5tISRUKLVjG(7)bl-Oh2E&ui9Sp;PTI-Isx$i=(jt|E}1DGITkp%P*}2HpqUJU(mJ8El@1>jDEw1}IDMvb`a{PX^U&*H&7ulF$1F@{5Oj zy07`wUj>z}BOm0MX8?^eM){?2-f#p4hRZ+??MU~%5nr~pa^>xQ+@yY_`S-QRJI3pn zc=i6}i_LeMW$S+hREmQqc-94AC%10x?rYBhsGK;t{;xTK)D&PdqG(} z&|bEbL)8Okfu$<4(-dwWv_3iIV@9bP3#@44kRz_Ks2Cdap z{kn74qJ0e4U8;LXLpTBh)nF2undO+{WqreZq-$6Nbv|Mp(9PNBi$j&cW=YWSm zInto<)|8C!W`Iadtfd6lfXRC1*B8u(3Mih`6B83 z98mg=JQ7y&`QNy+f$Z+3e75ElXdRaN_HP4G-P5i)h9De)fm&yvZHa8*SzU2^-D^(F zTyPbTy-;Pnl)6)@xYUn7zPrWF>%(v8mTO_`9S7kr)q7v#_K$<_f)9Xg+OyvRH+u(m zvY&G}0t5BP0Oi@(rR6%vZnYhdz5JU%V~Dk4E6_T=gZvsEZ%^l(PrF>%`W^$rsc%&G z$(E=7k>*qEOkZB#0W#0gT)i+~uO3T#NOOM-`0^cdRTs!+coH}$R;4xraT<7_1Y$##QPJbmRgr$W6mG?Z5 zoqaEG61Wny1KAH{10Rac4dx!TJ*1PH1Ln`DeQ)Q_=(oY5DVo=(xjoN=!NjRCLtQJo z{kOrV!E~^3=K^cL$M5Fl^UPq!PfM{5atwh1qYE*6fqt*LXqI$ObM5p_)_eC{@Kc~U z2C5VO0|uju{avHII!^`Bo?RN#zqg5f@15?`2UrjPS>RtFUtIs?ymb9Oa1&Srjt7$g zHjgQ5;70Lz{}5;jw*&?nf`N`iU0`&^jF>cUy z@pK=3a03H@fg%{_XjQvBtEX|@QeDCK&eT311!POu9qbEKKO6`2e*ZE!A1nsyS2Tfb zL8PPc}M_bg|)h+#FigH+*cIfTq6TIRx|#&LKeJk6xWCL_7x(Uq5@}(ZmVh&RK-bj)%wVnb$HS zniQTx!@R5jFJ#u$t+R8oVRkmWH#0}!`)8KPaYrnC*$8uP@5~q0P zM?a$Q9NMGcYzgdJ`prdSef)WoAIpH=Y@BgllmW})XS4n&yeWGw?u#-Yelq6w;RVEt z>ZqZH`wS{{|3KkhLkitLV7SY`g7*&;KHFws!TX0BK6ZfkW8rbdjk3@cfP(3B;TJ?V zBs{V3If)?i(-n}l^M&bLEW9b-H7-1+)45mK@HYv!^;2%Bf5#|1_h@oIZ4ILEK9`s) zvf=#9tz2W_Z*cQSy{}xXS;rPog;N`5GACw3tj~p$_;ppcn>spvO+37B?EW43hJ+uH zFWeUJ*9af8&g!Ok%+7Dhk6ue-Mz79S$++;Q%$RNC6saONM-;yI-u_gMn+=~6KOeCj zJ%4snJUsHd@HvS$Ulcx17bxLhi%!WCm4+z%t2yD(4;MZzb7piZ#}nP32yb#%x$rTW z({pZ%-g43XhFpJF_Sz`C*Pipl%yF0GhWoRbjQ1tOz2JqyU9^#tyx%8$oVy}7+@H;4 zyszl+IX2>g_q*^h_A=X%5qG^hF;4%?Q80X5f8leYD3bTD)2sXD3^{zw#Qj?;Iu6gz zh+gwEliAz4$?%M16!|XP*989lsC*R)_YoEf_YoEfA8FH7@cF)Ajf&06%t_yeX@9cT+D=xgr z-xj&K_ZJLz?UQBEecm(9dR%y~KOMQ_tj~qN5S{lP7e3aM-<$$$ugDFL7m%Fr+4lZC z8E7wa;eB82b8^F*?9c4~Io3PR#=m-5HpKeoSjUC;O`p5o-QG96$zI^YeeK|G^SwM< zd{?h7biWTTB!4#C<+qRD6=PTYXn5J`Yg{7S6+bo1niORI$>WYFyvfvi*=DMf$7TO& znojF*%ijJzTh+umxXUt`H!`D}qHyOK>-Z7>>%>cmt8Jt3-sGM3_iTTE-vVeZm{%y= zRWDKaYx(2%xA(~)M|Z(+8=cRd)vNPoV3a+1TE>bYqJQAc?pWqrxM!dh|!amps}&^apqa`;^BD+hzI8(ATEAB0&>L9LqLxBc?if6KVe!^`e)~)uW#>XUT%0F0ds@< zs6eiGTx>YY{!1jkj{u3}_Yq)r;`vDd<|V^(E8#xLOC+#Q@)8N`le|O%`y?;7nddDp z@kGWekwoU(%bK$6<$1jA2f9n<#Uu9qECYF@*G16-@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/aind_data_asset_indexer.rst b/docs/source/aind_data_asset_indexer.rst new file mode 100644 index 0000000..4f29087 --- /dev/null +++ b/docs/source/aind_data_asset_indexer.rst @@ -0,0 +1,69 @@ +aind\_data\_asset\_indexer package +================================== + +Submodules +---------- + +aind\_data\_asset\_indexer.aind\_bucket\_indexer module +------------------------------------------------------- + +.. automodule:: aind_data_asset_indexer.aind_bucket_indexer + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_asset\_indexer.codeocean\_bucket\_indexer module +------------------------------------------------------------ + +.. automodule:: aind_data_asset_indexer.codeocean_bucket_indexer + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_asset\_indexer.index\_aind\_buckets module +------------------------------------------------------ + +.. automodule:: aind_data_asset_indexer.index_aind_buckets + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_asset\_indexer.models module +---------------------------------------- + +.. automodule:: aind_data_asset_indexer.models + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_asset\_indexer.populate\_aind\_buckets module +--------------------------------------------------------- + +.. automodule:: aind_data_asset_indexer.populate_aind_buckets + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_asset\_indexer.populate\_s3\_with\_metadata\_files module +--------------------------------------------------------------------- + +.. automodule:: aind_data_asset_indexer.populate_s3_with_metadata_files + :members: + :undoc-members: + :show-inheritance: + +aind\_data\_asset\_indexer.utils module +--------------------------------------- + +.. automodule:: aind_data_asset_indexer.utils + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: aind_data_asset_indexer + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..528ac66 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,53 @@ +"""Configuration file for the Sphinx documentation builder.""" +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +from datetime import date + +# -- Path Setup -------------------------------------------------------------- +from os.path import abspath, dirname +from pathlib import Path + +from aind_data_asset_indexer import __version__ as package_version + +INSTITUTE_NAME = "Allen Institute for Neural Dynamics" + +current_year = date.today().year + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = Path(dirname(dirname(dirname(abspath(__file__))))).name +copyright = f"{current_year}, {INSTITUTE_NAME}" +author = INSTITUTE_NAME +release = package_version + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.duration", + "sphinx.ext.doctest", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", +] +templates_path = ["_templates"] +exclude_patterns = [] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "furo" +html_static_path = ["_static"] +html_favicon = "_static/favicon.ico" +html_theme_options = { + "light_logo": "light-logo.svg", + "dark_logo": "dark-logo.svg", +} + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +html_show_sphinx = False + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +html_show_copyright = False diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..fea31d1 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,24 @@ +.. Doc Template documentation master file, created by + sphinx-quickstart on Wed Aug 17 15:36:32 2022. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + + +Welcome to this repository's documentation! +=========================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + UserGuide + Contributing + modules + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` \ No newline at end of file diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000..cbf7f9f --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +src +=== + +.. toctree:: + :maxdepth: 4 + + aind_data_asset_indexer diff --git a/src/aind_data_asset_indexer/aind_bucket_indexer.py b/src/aind_data_asset_indexer/aind_bucket_indexer.py index 797dae2..e01cf95 100644 --- a/src/aind_data_asset_indexer/aind_bucket_indexer.py +++ b/src/aind_data_asset_indexer/aind_bucket_indexer.py @@ -54,16 +54,19 @@ class AindIndexBucketJob: does not exist, then remove the record from DocDB. 2.1) If the S3 location exists, check if there is a metadata.nd.json file. 2.1.0) If there is no file, log a warning and remove the record from DocDb. - 2.1.1) If there is a file, compare the md5 hashes. If they are different, - overwrite the record in S3 with the record from DocDb. - 2.1.2) If they are the same, then do nothing. + 2.1.1) If there is a file, resolve the core schema json files in the root + folder and the original_metadata folder to ensure they are in sync. + 2.1.2) Then compare the md5 hashes. If they are different, overwrite the + record in S3 with the record from DocDb. Otherwise, do nothing. 3) Scan through each prefix in S3. 4) For each prefix, check if a metadata record exists in S3. 4.0) If a metadata record exists, check if it is in DocDB. 4.1) If already in DocDb, then don't do anything. Otherwise, copy record to DocDB. - 4.2) If a metadata record does not exist, then build one and save it S3. + 4.2) If a metadata record does not exist, then build one and save it to S3. Assume a lambda function will move it over to DocDb. + 4.3) In both cases above, ensure the original metadata folder and core + files are in sync with the metadata.nd.json file. """ def __init__(self, job_settings: AindIndexBucketJobSettings): @@ -82,6 +85,7 @@ def _write_root_file_with_record_info( Write a core schema file in the s3 prefix root folder using the docdb record info. To avoid unnecessary s3 calls, the md5 hashes will be compared first. + Parameters ---------- s3_client : S3Client @@ -137,6 +141,7 @@ def _copy_file_from_root_to_subdir( Write a core schema file in the s3 prefix root folder using the docdb record info. To avoid unnecessary s3 calls, the md5 hashes will be compared first. + Parameters ---------- s3_client : S3Client @@ -195,6 +200,7 @@ def _resolve_schema_information( prefix, and there is no file in the original_metadata folder, then the field in the DocDB record will require updating. This method will return a dictionary of updates needed to the DocDB record. + Parameters ---------- prefix : str @@ -266,14 +272,12 @@ def _resolve_schema_information( object_key = create_object_key( prefix=prefix, filename=core_schema_file_name ) - common_kwargs[ - "core_schema_info_in_root" - ] = get_dict_of_file_info( - s3_client=s3_client, - bucket=self.job_settings.s3_bucket, - keys=[object_key], - ).get( - object_key + common_kwargs["core_schema_info_in_root"] = ( + get_dict_of_file_info( + s3_client=s3_client, + bucket=self.job_settings.s3_bucket, + keys=[object_key], + ).get(object_key) ) self._copy_file_from_root_to_subdir(**common_kwargs) # If field is null, a file exists in the root folder, and @@ -341,9 +345,12 @@ def _process_docdb_record( ): """ For a given record, - 1. Check if it needs to be deleted (no s3 object found) - 2. If there is an s3 object, then overwrite the s3 object if the docdb - is different. + 1. Check if its location field is valid. If not, log a warning. + 2. Check if it needs to be deleted (no s3 object found) + 3. If there is an s3 object, then overwrite the s3 object if the docdb + is different. Also resolves the core schema json files in the root + folder and the original_metadata folder to ensure they are in sync. + Parameters ---------- docdb_record : dict @@ -415,9 +422,9 @@ def _process_docdb_record( ) db = docdb_client[self.job_settings.doc_db_db_name] collection = db[self.job_settings.doc_db_collection_name] - fields_to_update[ - "last_modified" - ] = datetime.utcnow().isoformat() + fields_to_update["last_modified"] = ( + datetime.utcnow().isoformat() + ) response = collection.update_one( {"_id": docdb_record["_id"]}, {"$set": fields_to_update}, @@ -445,6 +452,7 @@ def _dask_task_to_process_record_list( The task to perform within a partition. If n_partitions is set to 20 and the outer record list had length 1000, then this should process 50 records. + Parameters ---------- record_list : List[dict] @@ -483,6 +491,7 @@ def _process_records(self, records: List[dict]): """ For a list of records (up to a 1000 in the list), divvy up the list across n_partitions. Process the set of records in each partition. + Parameters ---------- records : List[dict] @@ -652,6 +661,7 @@ def _dask_task_to_process_prefix_list(self, prefix_list: List[str]): The task to perform within a partition. If n_partitions is set to 20 and the outer prefix list had length 1000, then this should process 50 prefixes. + Parameters ---------- prefix_list : List[str] @@ -698,6 +708,7 @@ def _process_prefixes(self, prefixes: List[str]): """ For a list of prefixes (up to a 1000 in the list), divvy up the list across n_partitions. Process the set of prefixes in each partition. + Parameters ---------- prefixes : List[str] diff --git a/src/aind_data_asset_indexer/codeocean_bucket_indexer.py b/src/aind_data_asset_indexer/codeocean_bucket_indexer.py index 6b67787..34457f3 100644 --- a/src/aind_data_asset_indexer/codeocean_bucket_indexer.py +++ b/src/aind_data_asset_indexer/codeocean_bucket_indexer.py @@ -1,4 +1,5 @@ -"""Module to index Code Ocean processed results in DocDB.""" +"""Module to index Code Ocean processed results and update external links in +DocDB.""" import argparse import json @@ -34,14 +35,16 @@ class CodeOceanIndexBucketJob: """This job will: - 1) Download all processed results records from the Code Ocean index - 2) Download all the records in DocDB for the Code Ocean bucket. The + 1) For records in AIND buckets, update the external links with Code + Ocean data asset ids if needed. + 2) Download all processed results records from the Code Ocean index + 3) Download all the records in DocDB for the Code Ocean bucket. The response is projected to just the {_id, location} fields. - 3) Creates a list of locations found in Code Ocean and a list of + 4) Creates a list of locations found in Code Ocean and a list of locations found in DocDB. - 4) For locations found in Code Ocean not in DocDB, a new record will be + 5) For locations found in Code Ocean not in DocDB, a new record will be created from the aind-data-schema json files in S3. - 5) For locations in DocDB not found in Code Ocean, the records will be + 6) For locations in DocDB not found in Code Ocean, the records will be removed from DocDB. """ @@ -82,6 +85,7 @@ def _map_external_list_to_dict(external_recs: List[dict]) -> dict: [{"id": "abc", "location": "s3://bucket/prefix}, {"id": "def", "location": "s3://bucket/prefix"}] will be mapped to {"s3://bucket/prefix": ["abc", "def"]} + Parameters ---------- external_recs : List[dict] @@ -110,6 +114,7 @@ def _get_co_links_from_record( """ Small utility to parse the external_links field of the docdb record. Supports the legacy type. + Parameters ---------- docdb_record : dict | list @@ -143,6 +148,7 @@ def _update_external_links_in_docdb( 2) Paginate through the docdb records where the location doesn't match the internal co bucket. 3) Add or remove the external_links from the docdb record if needed. + Parameters ---------- docdb_client : MongoClient @@ -283,6 +289,7 @@ def _dask_task_to_process_record_list(self, record_list: List[dict]): The task to perform within a partition. If n_partitions is set to 20 and the outer prefix list had length 1000, then this should process 50 code ocean records. + Parameters ---------- record_list : List[dict] @@ -318,6 +325,7 @@ def _process_codeocean_records(self, records: List[dict]): """ For a list of codeocean records, divvy up the list across n_partitions. Process the set of records in each partition. + Parameters ---------- records : List[dict] @@ -336,6 +344,7 @@ def _dask_task_to_delete_record_list(self, record_list: List[str]): The task to perform within a partition. If n_partitions is set to 20 and the outer prefix list had length 1000, then this should process 50 ids. + Parameters ---------- record_list : List[str] diff --git a/src/aind_data_asset_indexer/models.py b/src/aind_data_asset_indexer/models.py index a01af0f..e7c91d6 100644 --- a/src/aind_data_asset_indexer/models.py +++ b/src/aind_data_asset_indexer/models.py @@ -33,6 +33,7 @@ class IndexJobSettings(BaseSettings): def from_param_store(cls, param_store_name: str): """ Construct class from aws param store + Parameters ---------- param_store_name : str @@ -60,6 +61,7 @@ class AindIndexBucketJobSettings(IndexJobSettings): def from_param_store(cls, param_store_name: str): """ Construct class from aws param store and secrets manager + Parameters ---------- param_store_name : str @@ -133,6 +135,7 @@ class CodeOceanIndexBucketJobSettings(IndexJobSettings): def from_param_store(cls, param_store_name: str): """ Construct class from aws param store and secrets manager + Parameters ---------- param_store_name : str diff --git a/src/aind_data_asset_indexer/populate_s3_with_metadata_files.py b/src/aind_data_asset_indexer/populate_s3_with_metadata_files.py index ef94722..569a93c 100644 --- a/src/aind_data_asset_indexer/populate_s3_with_metadata_files.py +++ b/src/aind_data_asset_indexer/populate_s3_with_metadata_files.py @@ -50,6 +50,7 @@ def _process_prefix(self, prefix: str, s3_client: S3Client): Original core json files will be first copied to a subfolder, and then overwritten with the new fields from metadata.nd.json, or deleted if the new field is None. + Parameters ---------- prefix : str @@ -104,6 +105,7 @@ def _dask_task_to_process_prefix_list( The task to perform within a partition. If n_partitions is set to 20 and the outer prefix list had length 1000, then this should process 50 prefixes. + Parameters ---------- prefix_list : List[str] @@ -122,6 +124,7 @@ def _process_prefixes(self, prefixes: List[str]): """ For a list of prefixes (up to a 1000 in the list), divvy up the list across n_partitions. Process the set of prefixes in each partition. + Parameters ---------- prefixes : List[str] diff --git a/src/aind_data_asset_indexer/utils.py b/src/aind_data_asset_indexer/utils.py index 39ebf83..ea93629 100644 --- a/src/aind_data_asset_indexer/utils.py +++ b/src/aind_data_asset_indexer/utils.py @@ -67,6 +67,7 @@ def create_object_key(prefix: str, filename: str) -> str: """ For a given s3 prefix and filename, create the expected object key for the file. + Parameters ---------- prefix : str @@ -89,6 +90,7 @@ def create_metadata_object_key(prefix: str) -> str: """ For a given s3 prefix, create the expected object key for the metadata.nd.json file. + Parameters ---------- prefix : str @@ -126,6 +128,7 @@ def is_record_location_valid( ) -> bool: """ Check if a given record has a valid location url. + Parameters ---------- record : dict @@ -179,6 +182,7 @@ def get_s3_bucket_and_prefix(s3_location: str) -> Dict[str, str]: For a location url like s3://bucket/prefix, it will return the bucket and prefix. It doesn't check the scheme is s3. It will strip the leading and trailing forward slashes from the prefix. + Parameters ---------- s3_location : str @@ -199,6 +203,7 @@ def get_s3_location(bucket: str, prefix: str) -> str: """ For a given bucket and prefix, return a location url in format s3://{bucket}/{prefix} + Parameters ---------- bucket : str @@ -218,6 +223,7 @@ def compute_md5_hash(json_contents: str) -> str: """ Computes the md5 hash of the object as it would be stored in S3. Useful for comparing against the S3 object e-tag to check if they are the same. + Parameters ---------- json_contents : str @@ -240,6 +246,7 @@ def upload_json_str_to_s3( ) -> PutObjectOutputTypeDef: """ Upload JSON string contents to a location in S3. + Parameters ---------- bucket : str @@ -274,6 +281,7 @@ def upload_metadata_json_str_to_s3( """ Upload JSON string representation of the contents of the metadata.nd.json file to a location in S3. + Parameters ---------- bucket : str @@ -301,6 +309,7 @@ def does_s3_object_exist(s3_client: S3Client, bucket: str, key: str) -> bool: """ Check that a file exists inside a bucket. Uses the head_object operation, which is cheaper compared to the list_objects operation. + Parameters ---------- s3_client : S3Client @@ -407,6 +416,7 @@ def get_dict_of_file_info( """ For a list of object keys, returns a list of metadata info for each object that exists in the bucket. + Parameters ---------- s3_client : S3Client @@ -445,6 +455,7 @@ def get_dict_of_core_schema_file_info( ) -> Dict[str, Optional[dict]]: """ For a bucket and prefix get list of core schema file info. + Parameters ---------- s3_client : S3Client @@ -455,10 +466,10 @@ def get_dict_of_core_schema_file_info( ------- Dict[str, Optional[dict]] {"subject.json": - {"last_modified": datetime, "e_tag": str, "version_id": str}, - "procedures.json": - {"last_modified": datetime, "e_tag": str, "version_id": str}, - ... + {"last_modified": datetime, "e_tag": str, "version_id": str}, + "procedures.json": + {"last_modified": datetime, "e_tag": str, "version_id": str}, + ... } """ key_map = dict( @@ -482,6 +493,7 @@ def iterate_through_top_level( Returns an iterator of s3 responses. If prefix is None, then will return an iterator of top-level prefixes of a bucket. Otherwise, will return an iterator of the top level items under a prefix. + Parameters ---------- s3_client : S3Client @@ -514,6 +526,7 @@ def iterate_through_top_level( def is_dict_corrupt(input_dict: dict) -> bool: """ Checks that all the keys, included nested keys, don't contain '$' or '.' + Parameters ---------- input_dict : dict @@ -542,6 +555,7 @@ def download_json_file_from_s3( """ Downloads json file contents from S3. Will return None if object is not a valid json file. + Parameters ---------- s3_client : S3Client @@ -575,6 +589,7 @@ def build_metadata_record_from_prefix( constructed from any non-corrupt core schema json files found under the prefix. If there are issues with Metadata construction, then it will return None. + Parameters ---------- bucket : str @@ -895,6 +910,7 @@ def does_metadata_record_exist_in_docdb( ) -> bool: """ For a given bucket and prefix, check if there is already a record in DocDb + Parameters ---------- docdb_client : MongoClient @@ -930,6 +946,7 @@ def get_record_from_docdb( ) -> Optional[dict]: """ Download a record from docdb using the record _id. + Parameters ---------- docdb_client : MongoClient @@ -963,6 +980,7 @@ def paginate_docdb( ) -> Iterator[List[dict]]: """ Paginate through records in DocDb. + Parameters ---------- db_name : str @@ -1006,6 +1024,7 @@ def build_docdb_location_to_id_map( like {'s3://bucket/prefix': 'abc-1234'} where the value is the id of the record in DocDb. If the record does not exist, then there will be no key in the dictionary. + Parameters ---------- db_name : str @@ -1036,6 +1055,7 @@ def get_all_processed_codeocean_asset_records( Gets all the data asset records we're interested in indexing. The location field in the output is the expected location of the data asset. It may still require double-checking that the s3 location is valid. + Parameters ---------- co_client : CodeOceanClient