From d0189dec8792fea401ed0e92288ec0420bb0383d Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Sun, 2 Feb 2025 17:55:10 -0800 Subject: [PATCH 1/3] doc stuff --- .gitignore | 8 + docs/Makefile | 21 +++ docs/_static/css/notebooks.css | 32 ++++ docs/conf.py | 193 ++++++++++++++++++++++ docs/demos.rst | 46 ++++++ docs/index.rst | 60 +++++++ docs/source/contributing.rst | 147 ++++++++++++++++ docs/source/fix_an_issue.rst | 24 +++ docs/source/installation.rst | 107 ++++++++++++ docs/source/new_data_extractor.rst | 60 +++++++ docs/source/new_dataset_holder.rst | 101 +++++++++++ docs/source/new_plotter.rst | 99 +++++++++++ docs/source/overview.rst | 59 +++++++ examples/rail_plotting_control.ipynb | 14 +- examples/rail_project_example.ipynb | 10 ++ examples/rail_project_library.ipynb | 10 ++ pyproject.toml | 9 + src/rail/plotting/dataset_factory.py | 62 ++++--- src/rail/plotting/plot_group_factory.py | 31 ++-- src/rail/plotting/plotter.py | 29 +++- src/rail/plotting/plotter_factory.py | 38 +++-- src/rail/projects/algorithm_factory.py | 37 +++-- src/rail/projects/catalog_factory.py | 35 ++-- src/rail/projects/configurable.py | 7 +- src/rail/projects/pipeline_factory.py | 23 +-- src/rail/projects/project.py | 66 ++++++-- src/rail/projects/project_file_factory.py | 23 ++- src/rail/projects/selection_factory.py | 14 +- src/rail/projects/subsample_factory.py | 14 +- 29 files changed, 1235 insertions(+), 144 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/_static/css/notebooks.css create mode 100644 docs/conf.py create mode 100644 docs/demos.rst create mode 100644 docs/index.rst create mode 100644 docs/source/contributing.rst create mode 100644 docs/source/fix_an_issue.rst create mode 100644 docs/source/installation.rst create mode 100644 docs/source/new_data_extractor.rst create mode 100644 docs/source/new_dataset_holder.rst create mode 100644 docs/source/new_plotter.rst create mode 100644 docs/source/overview.rst diff --git a/.gitignore b/.gitignore index 5d704c5..faf6779 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,11 @@ MANIFEST pip-log.txt pip-delete-this-directory.txt +# docs stuff +docs/_build +docs/api +docs/examples + # Unit test / coverage reports htmlcov/ .tox/ @@ -51,6 +56,9 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +tests/ci_test.tgz +tests/temp_data + # backups *~ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..5c377ab --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SPHINXPROJ = RAIL_PROJECT +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/css/notebooks.css b/docs/_static/css/notebooks.css new file mode 100644 index 0000000..329fec1 --- /dev/null +++ b/docs/_static/css/notebooks.css @@ -0,0 +1,32 @@ +/* Hide the title of each notebook -- otherwise we get a redundancy with the + required rst title. Choosing to keep the rst title because it allows users + to use Read The Docs' heading link feature */ + +div.jp-RenderedHTMLCommon > h1 { + display: none; +} + +section:not(:first-child) > h2 { + padding-top: 30px; +} + +/* nbconvert overrides link color */ + +p.notebook-link { + margin-bottom: 20px; + font-weight: bold; +} + +p.notebook-link > a { + margin-left: 6px; + color: rgb(25, 118, 210); + font-family: sans-serif; + font-weight: bold; +} + +/* Keeps the bottom of the sidebar from overlapping and becoming unscrollable + all the way down, hiding reference > API Documentation */ + +div.wy-menu-vertical { + padding-bottom: 4em; +} \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..49f4d6f --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import subprocess +import sys +import pkgutil +import rail.projects +import rail.plotting + +sys.path.insert(0, os.path.abspath('..')) + +print(sys.path) + + +# Use unittest mock module to shield some modules away from docs building. +# This way one does not need to install them when dealing with the doc. +from unittest.mock import MagicMock + +MOCK_MODULES = [ + 'qp', + 'tables_io', +] +for mod_name in MOCK_MODULES: + if mod_name in sys.modules: + sys.modules[mod_name] = MagicMock() + + + + +# -- Project information ----------------------------------------------------- + +project = 'RAIL_Projects' +copyright = '2025, LSST DESC RAIL Contributors' +author = 'LSST DESC RAIL Contributors' + +# The short X.Y version +from rail.projects import _version +version = "%i.%i" % (_version.version_tuple[0], _version.version_tuple[1]) +# The full version, including alpha/beta/rc tags +release = _version.version + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.ifconfig', + 'sphinx.ext.mathjax', + 'nbsphinx', + 'sphinx.ext.viewcode', + 'sphinx.ext.autosummary', + 'sphinx.ext.githubpages', + 'sphinx.ext.napoleon', + 'sphinx.ext.autosectionlabel', + 'sphinx_tabs.tabs', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = 'en' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'setup.rst', 'api/.*.rst'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# Allow NB to fail +nbsphinx_allow_errors = True + +# By default, tabs can be closed by selecting the open tab. This +# functionality can be disabled using the sphinx_tabs_disable_tab_closing +# configuration option: +sphinx_tabs_disable_tab_closing = True + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} +html_theme_options = {'prev_next_buttons_location': None, + 'collapse_navigation': False, + 'titles_only': False} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# These paths are either relative to html_static_path +# or fully qualified paths (eg. https://...) +html_css_files = [ + 'css/notebooks.css', +] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} +html_sidebars = { + '**': [ + 'about.html', + 'navigation.html', + 'relations.html', # needs 'show_related': True theme option to display + 'searchbox.html', + 'donate.html', + ] +} + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'rail_projects_doc' + + +# -- Options for Autodoc-------------------------------------------------- +# Autodoc collects docstrings and builds API pages +# from sphinxcontrib.apidoc import main as apidoc_main + +def run_apidoc(_): + + if not os.path.exists('examples'): + os.system('ln -s ../examples') + + + from sphinx.ext.apidoc import main as apidoc_main + cur_dir = os.path.normpath(os.path.dirname(__file__)) + output_path = os.path.join(cur_dir, 'api') + + top_dir = os.path.abspath(os.path.join(os.path.dirname(rail.projects.__file__), '..')) + + #paramlist = ['--separate', '--implicit-namespaces', '--no-toc', '-M', '-o', output_path, '-f', top_dir] + paramlist = ['--separate', '--implicit-namespaces', '-M', '-o', output_path, '-f', top_dir] + print(f"running {paramlist}") + apidoc_main(paramlist) + + +def setup(app): + app.connect('builder-inited', run_apidoc) + + diff --git a/docs/demos.rst b/docs/demos.rst new file mode 100644 index 0000000..e2e2d76 --- /dev/null +++ b/docs/demos.rst @@ -0,0 +1,46 @@ + +***************** +Example Notebooks +***************** + +`rail_projects` comes with several notebooks that demonstrate how to use it to analyze data in a number of different ways. + +Here we describe the various notebooks and suggest other ways in which you might study the data. + + +Demos +===== + +We recommend starting with notebook this this notebook, +which demonstrates top-level rail project functionality, such as how to create a project from an example +configuration, and how to do simple things with such as reducing the input data, sub-selecting data to +make training and tests data sets and how to run simple analysis pipelines on those. + + +.. toctree:: + :maxdepth: 1 + :caption: Creating and using a RailProject + + examples/rail_project_example.ipynb + + +This notebook shows how to manage the library of components that make up a `RailProject` + + +.. toctree:: + :maxdepth: 1 + :caption: The rail project component library + + examples/rail_project_library.ipynb + + +This notebook shows how to use the `rail.plotting.control` module to +make plots from a `RailProject` + +.. toctree:: + :maxdepth: 1 + :caption: Making plots + + examples/rail_plotting_control.ipynb + + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..d4e146d --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,60 @@ +========================================================================= +rail_projects: a toolkit for managing `RAIL`-based data analysis projects +========================================================================= + +---- +RAIL +---- + +RAIL is a flexible open-source software library providing tools to produce at-scale photometric redshift data products, including uncertainties and summary statistics, and stress-test them under realistically complex systematics. + +RAIL serves as the infrastructure supporting many extragalactic applications of `the Legacy Survey of Space and Time (LSST) `_ on `the Vera C. Rubin Observatory `_, including Rubin-wide commissioning activities. +RAIL was initiated by the Photometric Redshifts (PZ) Working Group (WG) of the `LSST Dark Energy Science Collaboration (DESC) `_ as a result of the lessons learned from the `Data Challenge 1 (DC1) experiment `_ to enable the PZ WG Deliverables in the `LSST-DESC Science Roadmap (see Sec. 5.18) `_, aiming to guide the selection and implementation of redshift estimators in DESC analysis pipelines. + +RAIL is developed and maintained by a diverse team comprising DESC Pipeline Scientists (PSs), international in-kind contributors, LSST Interdisciplinary Collaboration for Computing (LINCC) Frameworks software engineers, and other volunteers, but all are welcome to join the team regardless of LSST data rights. +To get involved, chime in on the issues in any of the RAIL repositories described in the Overview section. + +See `guideline for citing RAIL +`_ for +guidance on citing RAIL and the underlying algorithms. + + +--------------- +`rail_projects` +--------------- + +`rail_projects` is a tool-kit to manage RAIL-baseed data analysis +projects. + + + + +.. toctree:: + :maxdepth: 1 + :caption: Getting Started + + source/overview + source/installation + +.. toctree:: + :maxdepth: 1 + :caption: Contributing + + source/contributing + source/fix_an_issue + source/new_plotter + source/new_data_extractor + source/new_dataset_holder + +.. toctree:: + :maxdepth: 1 + :caption: Demostrations + + demos + +.. toctree:: + :maxdepth: 1 + :caption: API + + api/modules + api/rail.cli diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst new file mode 100644 index 0000000..f5fb7e6 --- /dev/null +++ b/docs/source/contributing.rst @@ -0,0 +1,147 @@ +********************** +Contribution Overview +********************** + +RAIL is a constellation of multiple packages developed publicly on GitHub and +welcomes all interested developers, regardless of DESC membership or LSST data rights. + + +Contributing to RAIL +==================== + +If you are interested in contributing to RAIL itself, e.g., by adding new +algorithms or similar analysis tools, please visit +`contributing to RAIL `_ + + +--------------------------------- +Contributing to ``rail_projects`` +--------------------------------- + +If you're interested in contributing to `rail_projects`, but don't know where to start, take a look +at the +`list of issues `_. +Or, `create a new issue `_ to +suggest a change. + +In addition to GitHub, the RAIL team uses the LSSTC Slack workspace for organization. +Professional astronomers (including students!) based in the US, Chile, or a +French IN2P3 institution are encouraged to +`join the LSST-DESC `_ to gain access to +the `\#desc-pz-rail `_ channel on +the LSSTC Slack workspace. + +Those without data rights who wish to gain access to the Slack channel should +`create an Issue `_ to request that +the team leads initiate the process for adding a DESC External Collaborator. + + +Where to contribute: +==================== + +In all cases, begin by following the developer installation instructions +:ref:`Developer Installation` and follow the contribution workflow instructions below. + + +Contribution workflow +===================== + +The ``rail_projects`` repository use an issue-branch-review workflow, +similar to the standard `GitHub Flow `_. +We typically use ``git`` as our version control tool, there are many resources +available online, but here is a `nice cheat sheet `_ +created by GitHub. + +----- +Issue +----- + +When you identify something that should be done, `make an issue `_ +for it. + +------ +Branch +------ + +See :ref:`Developer Installation` for installation instructions. + +While developing in a branch, don't forget to pull from ``main`` regularly (at +least daily) to make sure your work is compatible with other recent changes. + +When you're ready to merge your branch into the ``main`` branch, create a pull request +("PR") in the rail repository you cloned from. GitHub has instructions +`here `_. + +Several continuous integration checks will be performed for new pull requests. +If any of these automatic processes find issues with the code, you should address +them in the branch before sending for review. These include unit tests (does the +code function correctly), pylint (code style), or coverage (how much code is +exercised in unit tests). + +Once you are satisfied with your PR, request that other team members review and +approve it. You could send the request to someone whom you've worked with on the +topic, or one of the core maintainers of rail. + + +Merge +----- + +Once the changes in your PR have been approved, these are your next steps: + +1. the author merges the change by selecting "Squash and merge" on the approved pull request +2. enter ``closes #[#]`` in the comment field to close the resolved issue +3. delete your branch using the button on the merged pull request. + + + +Reviewing a PR +-------------- + +To review a pull request, it's a good idea to start by pulling the changes and +running the unit tests locally. If the continuous integration tests have run +successfully, there is good hope that the unit tests will run locally as well! + +Check the code for complete and accurate docstrings, sufficient comments, and +ensure any instances of ``#pragma: no cover`` (excluding the code from unit test +coverage accounting) are extremely well-justified. + +Feel free to mark the PR with “Request changes” for necessary changes. e.g. +writing an exception for an edge case that will break the code, updating names +to adhere to the naming conventions, etc. + +It is also considered good practice to make suggestions for optional improvements, +such as adding a one-line comment before a clever block of code or including a +demonstration of new functionality in the example notebooks. + +Naming conventions +================== + +We follow the `pep8 `_ +recommendations for naming new modules. + + +Modules +------- + +Modules should use all lowercase, with underscores where it aids the readability +of the module name. + + +Classes +------- + +Python classes and so should use the CapWords convention. + + + +Contribution Types +================== + +We anticipate a few types of contributions, and provide separate instructions +for those workflows: + +* :ref:`Fix an Issue` in the codebase +* :ref:`Adding a new RailPlotter` +* :ref:`Adding a new DataExtractor` +* :ref:`Adding a new RailDatasetHolder` + diff --git a/docs/source/fix_an_issue.rst b/docs/source/fix_an_issue.rst new file mode 100644 index 0000000..6fbecae --- /dev/null +++ b/docs/source/fix_an_issue.rst @@ -0,0 +1,24 @@ +************ +Fix an Issue +************ + +The typical workflow for fixing a specific issue will look something like the following: + + +#. Identify `an issue + `_ to work on. + +#. Assign the issue to yourself. + +#. Leave a comment on the issue to let others know you're working on it. + +Following the :ref:`Contribution workflow` guide, make a branch with a name like +``issue/[#]/brief-description`` and make changes in your branch. +While developing in a branch, don't forget to pull from ``main`` regularly to +make sure your work is compatible with other recent changes. + +When your code is complete, continue following the :ref:`Contribution workflow`, +and create a pull request. + +If you find that you no longer have time to complete an issue, please unassign yourself +and leave a note in the comments describing what work has been done. diff --git a/docs/source/installation.rst b/docs/source/installation.rst new file mode 100644 index 0000000..e517687 --- /dev/null +++ b/docs/source/installation.rst @@ -0,0 +1,107 @@ +************ +Installation +************ + + +RAIL is actually distributed as several software packages. + +Some of the RAIL algorithms have dependencies that are sensitive to out-of-date code versions, therefore it is strongly recommended that you create a new dedicated virtual environment for RAIL to avoid problems with pip/conda failing to update some packages that you have previously installed during installation of RAIL. Also, having multiple version of RAIL in your path can cause difficult to diagnose problems, so we encourage you to make sure that you don't have an existing version of RAIL installed in your `.local` area or in your base conda environment. + + +There are two ways you might choose to install `rail_projects` + +1. `Production Installation`_: Just install `rail_projects` in an + existing an existing conda environment using pip. +2. `Exploration Installation`_: Download the `rail_projects` source + code and example notebooks, and install from the local version using pip. +3. `Developer Installation`_: Download the `rail_projects` source + code and example notebooks and install from the local version using + pip in "editable" mode. + + +In call cases we recommend you first install RAIL by following the either the "Production +Installation" (if you want all of the RAIL "ecosystem") or "Algorithm +Installation" (if you only want access to a sub-set of the RAIL +algorithms) instructions +`on the RAIL installation page `_ + + +Production Installation +----------------------- + +Here we will be installing ``rail_projects`` into an existing conda environment "[env]". + +.. code-block:: bash + + conda activate [env] + pip install pz-rail-projects + + +Exploration Installation +------------------------ + +Here we will be installing the source code from `rail +`_ to access all of the +demonstration notebooks. + + +.. code-block:: bash + + conda activate [env] + git clone https://github.com/LSSTDESC/rail_projects.git + cd rail_projects + pip install .[dev] + + +At that point you should be able to run the demonstration notebooks, e.g.; + +.. code-block:: bash + + jupyter-notebook examples + + + +Developer Installation +---------------------- + +Here we will be installing the source code from `rail +`_ to be able to develop +the source code. + + +.. tabs:: + + .. group-tab:: General + + .. code-block:: bash + + conda activate [env] + git clone https://github.com/LSSTDESC/rail_projects.git + cd rail_projects + pip install -e .[dev] + + + .. group-tab:: zsh (e.g., Mac M1+ default) + + .. code-block:: bash + + conda activate [env] + git clone https://github.com/LSSTDESC/rail_projects.git + cd rail_projects + pip install -e '.[dev]' + + + +RAIL packages +============= + +Depending on how you want to use RAIL you will be installing one or +more `RAIL packages `_ + + +Adding your kernel to jupyter +============================= +If you want to use the kernel that you have just created to run RAIL example demos, then you may need to explicitly add an ipython kernel. You may need to first install ipykernel with `conda install ipykernel`. You can do then add your kernel with the following command, making sure that you have the conda environment that you wish to add activated. From your environment, execute the command: +`python -m ipykernel install --user --name [nametocallnewkernel]` +(you may or may not need to prepend `sudo` depending on your permissions). When you next start up Jupyter you should see a kernel with your new name as an option, including using the Jupyter interface at NERSC. + diff --git a/docs/source/new_data_extractor.rst b/docs/source/new_data_extractor.rst new file mode 100644 index 0000000..c736f2d --- /dev/null +++ b/docs/source/new_data_extractor.rst @@ -0,0 +1,60 @@ +************************** +Adding a new DataExtractor +************************** + +Because of the variety of formats of files in RAIL, and the variety of analysis flavors +in a ``RailProject``, it is useful to be able to have re-usable tools that extract particular +datasets from a ``RailProject`` These are implemented as subclasses of the :py:class:`rail.plotting.data_extractor.RailProjectDataExtractor` class. +A ``RailProjectDataExtractor`` is intended to take a particular set of inputs and +extract a particular set of data from the ``RailProject``. The inputs and outputs +are all defined in particular ways to allow ``RailProjectDataExtractor`` +objects to be integrated into larger data analysis pipelines. + +Example +======= + +The following example has all of the required pieces of a ``RailProjectDataExtractor`` and almost nothing else. + +.. code-block:: python + + class PZPointEstimateDataExtractor(RailProjectDataExtractor): + """Class to extract true redshifts and one p(z) point estimate + from a RailProject. + + This will return a dict: + + truth: np.ndarray + True redshifts + + pointEstimate: np.ndarray + Point estimates of the true redshifts + """ + + inputs: dict = { + "project": RailProject, + "selection": str, + "flavor": str, + "tag": str, + "algo": str, + } + + def _get_data(self, **kwargs: Any) -> dict[str, Any] | None: + return get_pz_point_estimate_data(**kwargs) + + @classmethod + def generate_dataset_dict( + cls, + **kwargs: Any, + ) -> list[dict[str, Any]]: + + +The required pieces, in the order that they appear are: + +#. The ``PZPointEstimateDataExtractor(RailProjectDataExtractor):`` defines a class called ``PZPointEstimateDataExtractor`` and specifies that it inherits from ``RailProjectDataExtractor``. + +#. The ``inputs = [('input', PqHandle)]`` and ``outputs = [('output', PqHandle)]`` define the inputs, and the expected data types for those, in this case a ``RailProject`` and the keys needed to extract information from it + +#. The ``_get_data()`` method does the actual work (in this case it passes it off to a utility function ``get_pz_point_estimate_data`` which knows how to extract data from the ``RailProject`` + +#. The ``generate_dataset_dict()`` can scan a ``RailProject`` and generate a dictionary of all the available datasets + diff --git a/docs/source/new_dataset_holder.rst b/docs/source/new_dataset_holder.rst new file mode 100644 index 0000000..4639d5e --- /dev/null +++ b/docs/source/new_dataset_holder.rst @@ -0,0 +1,101 @@ +****************************** +Adding a new RailDatasetHolder +****************************** + +Because of the variety of formats of files in RAIL, and the variety of analysis flavors +in a ``RailProject``, it is useful to be able to have re-usable tools that wrap particular types +datasets from a ``RailProject`` These are implemented as subclasses of the :py:class:`rail.plotting.dataset_holder.RailDatasetHolder` class. +A ``RailDatasetHolder`` is intended to take a particular set of inputs and +extract a particular set of data from the ``RailProject``. The inputs and outputs +are all defined in particular ways to allow ``RailDatasetHolder`` +objects to be integrated into larger data analysis pipelines. + + +Example +======= + +The following example has all of the required pieces of a ``RailDatasetHolder`` and almost nothing else. + +.. code-block:: python + + class RailProjectDatasetHolder(RailDatasetHolder): + """Simple class for holding a dataset for plotting data that comes from a RailProject""" + + config_options: dict[str, StageParameter] = dict( + name=StageParameter(str, None, fmt="%s", required=True, msg="Dataset name"), + extractor=StageParameter( + str, None, fmt="%s", required=True, msg="Dataset extractor class name" + ), + project=StageParameter( + str, None, fmt="%s", required=True, msg="RailProject name" + ), + selection=StageParameter( + str, None, fmt="%s", required=True, msg="RailProject data selection" + ), + flavor=StageParameter( + str, None, fmt="%s", required=True, msg="RailProject analysis flavor" + ), + tag=StageParameter( + str, None, fmt="%s", required=True, msg="RailProject file tag" + ), + algo=StageParameter( + str, None, fmt="%s", required=True, msg="RailProject algorithm" + ), + ) + + extractor_inputs: dict = { + "project": RailProject, + "extractor": RailProjectDataExtractor, + "selection": str, + "flavor": str, + "tag": str, + "algo": str, + } + + def __init__(self, **kwargs: Any): + RailDatasetHolder.__init__(self, **kwargs) + self._project: RailProject | None = None + self._extractor: RailProjectDataExtractor | None = None + + def __repr__(self) -> str: + ret_str = ( + f"{self.config.extractor} " + "( " + f"{self.config.project}, " + f"{self.config.selection}_{self.config.flavor}_{self.config.tag}_{self.config.algo}" + ")" + ) + return ret_str + + def get_extractor_inputs(self) -> dict[str, Any]: + if self._project is None: + self._project = RailDatasetFactory.get_project(self.config.project)() + if self._extractor is None: + self._extractor = RailProjectDataExtractor.create_from_dict( + dict(name=self.config.name, class_name=self.config.extractor), + ) + the_extractor_inputs = dict( + project=self._project, + extractor=self._extractor, + selection=self.config.selection, + flavor=self.config.flavor, + tag=self.config.tag, + algo=self.config.algo, + ) + self._validate_extractor_inputs(**the_extractor_inputs) + return the_extractor_inputs + + +The required pieces, in the order that they appear are: + +#. The ``RailProjectDatasetHolder(RailDatasetHolder):`` defines a class called ``RailProjectDatasetHolder`` and specifies that it inherits from ``RailDatasetHolder``. + +#. The ``config_options`` lines define the configuration parameters for this class, as well as their default values. Note that we are specifying a helper class to actually extract the data. + +#. The ``extractor_inputs = [('input', PqHandle)]`` and ``outputs = [('output', PqHandle)]`` define the inputs that will be based to the + +#. The ``__init__`` method does any class-specific initialization, in this case defining that this class will store and project and extractor + +#. The ``__repr__`` method is optional, here it gives a useful representation of the class + +#. The ``get_extractor_inputs()`` method does the actual work, note that it doesn't take any arguments, that it uses the factories to find the helper objects and passes algo it's configuration and validates it's outputs diff --git a/docs/source/new_plotter.rst b/docs/source/new_plotter.rst new file mode 100644 index 0000000..22c9465 --- /dev/null +++ b/docs/source/new_plotter.rst @@ -0,0 +1,99 @@ +************************ +Adding a new RailPlotter +************************ + +All of the various plotting classes +are implemented as subclasses of the :py:class:`rail.plotting.plotter.RailPlotter` class. +A ``RailPlotter`` is intended to take a particular set of inputs and configuration parameters, +run a single bit of analysis, and produce one or more plots. The inputs, outputs +and configuration parameters are all defined in particular ways to allow ``RailPlotter`` +objects to be integrated into larger data analysis pipelines. + + +Example +======= + +The following example has all of the required pieces of a ``RailPlotter`` and almost nothing else. + +.. code-block:: python + + class PZPlotterPointEstimateVsTrueHist2D(RailPlotter): + """Class to make a 2D histogram of p(z) point estimates + versus true redshift + """ + + config_options: dict[str, StageParameter] = RailPlotter.config_options.copy() + config_options.update( + z_min=StageParameter(float, 0.0, fmt="%0.2f", msg="Minimum Redshift"), + z_max=StageParameter(float, 3.0, fmt="%0.2f", msg="Maximum Redshift"), + n_zbins=StageParameter(int, 150, fmt="%i", msg="Number of z bins"), + ) + + inputs: dict = { + "truth": np.ndarray, + "pointEstimate": np.ndarray, + } + + def _make_2d_hist_plot( + self, + prefix: str, + truth: np.ndarray, + pointEstimate: np.ndarray, + dataset_holder: RailDatasetHolder | None = None, + ) -> RailPlotHolder: + figure, axes = plt.subplots() + bin_edges = np.linspace( + self.config.z_min, self.config.z_max, self.config.n_zbins + 1 + ) + axes.hist2d( + truth, + pointEstimate, + bins=(bin_edges, bin_edges), + ) + plt.xlabel("True Redshift") + plt.ylabel("Estimated Redshift") + plot_name = self._make_full_plot_name(prefix, "") + return RailPlotHolder( + name=plot_name, figure=figure, plotter=self, dataset_holder=dataset_holder + ) + + def _make_plots(self, prefix: str, **kwargs: Any) -> dict[str, RailPlotHolder]: + find_only = kwargs.get("find_only", False) + figtype = kwargs.get("figtype", "png") + dataset_holder = kwargs.get("dataset_holder") + out_dict: dict[str, RailPlotHolder] = {} + truth: np.ndarray = kwargs["truth"] + pointEstimate: np.ndarray = kwargs["pointEstimate"] + if find_only: + plot_name = self._make_full_plot_name(prefix, "") + assert dataset_holder + plot = RailPlotHolder( + name=plot_name, + path=os.path.join(dataset_holder.config.name, f"{plot_name}.{figtype}"), + plotter=self, + dataset_holder=dataset_holder, + ) + else: + plot = self._make_2d_hist_plot( + prefix=prefix, + truth=truth, + pointEstimate=pointEstimate, + dataset_holder=dataset_holder, + ) + out_dict[plot.name] = plot + return out_dict + + +The required pieces, in the order that they appear are: + +#. The ``PZPlotterPointEstimateVsTrueHist2D(RailPlotter):`` defines a class called ``PZPlotterPointEstimateVsTrueHist2D`` and specifies that it inherits from ``RailPlotter``. + +#. The ``config_options`` lines define the configuration parameters for this class, as well as their default values. Note that here we are copying the configuration parameters from the ``RailPlotter`` as well as defining some new ones. + +#. The ``inputs: dict = ...`` define the inputs and expected data types for those, in this case two numpy arrays + +#. The ``__init__`` method does any class-specific initialization. In this case there isn't any and the method is superfluous. + +#. The ``_make_2d_hist_plot(...)`` method does the actual work, note it takes some of the same arguements are define in ``inputs`` and that it uses ``self.config`` to access the configuration parameters. + +#. The ``_make_plots(self, prefix: str, **kwargs: Any)`` method provides an interface to format the data for _make_2d_hist_plot(), the arguments to this function are specified in the ``RailPlotter`` class diff --git a/docs/source/overview.rst b/docs/source/overview.rst new file mode 100644 index 0000000..33dfe84 --- /dev/null +++ b/docs/source/overview.rst @@ -0,0 +1,59 @@ +******** +Overview +******** + +------------- +RAIL Overview +------------- + +If you are interested in RAIL itself, please visit +`RAIL overiew `_ + + +---------------------- +rail_projects Overview +---------------------- + + + + +Introduction to components, factories, libraries, and projects +************************************************************** + +**components** + +Doing series of related studies using RAIL requires many pieces, such +as the lists of algorithms available, sets of analysis pipelines we +might run, types of plots we might make, types of data we can extract +from out analyses, references to particular files or sets of files we +want to use for out analysses, and so for. In general we call these +analysis components, and we need ways to keep track of them. + + +**factories** + +A factory is a python class that can make specific type or types of +components, assign names to each, and keep track of what it has made. + + +**libraries**: + +A library is + + +**projects**: + + + +Organizational philosophy and included functionality +**************************************************** + + + +`projects` +========== + + + +`plotting` +========== diff --git a/examples/rail_plotting_control.ipynb b/examples/rail_plotting_control.ipynb index 372ba5f..7b54728 100644 --- a/examples/rail_plotting_control.ipynb +++ b/examples/rail_plotting_control.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "70ba26fa-d1d1-4ed4-9926-b3245f2743db", + "metadata": {}, + "source": [ + "## Making plots with the `rail.plotting` package\n", + "\n", + "This notebook will show you the basics of making plots with the `rail.plotting` package" + ] + }, { "cell_type": "markdown", "id": "13048f2e-11cf-4ed3-9764-47dbb5999776", @@ -120,9 +130,7 @@ "cell_type": "code", "execution_count": null, "id": "4d34d6fe-fb77-4fe4-b9ab-99a7b9ea5bf8", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "plot_group = plot_groups['zestimate_v_ztrue_test_plots']" diff --git a/examples/rail_project_example.ipynb b/examples/rail_project_example.ipynb index 1aa6037..8147443 100644 --- a/examples/rail_project_example.ipynb +++ b/examples/rail_project_example.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "3f31a160-8ce9-4fe6-8805-9df7a97b7adc", + "metadata": {}, + "source": [ + "## Using `RailProject` \n", + "\n", + "This notebook will show you the basics using the `RailProject` class to manage an analysis project" + ] + }, { "cell_type": "markdown", "id": "d436cc4a-49c9-4f27-93cf-9a196d251a77", diff --git a/examples/rail_project_library.ipynb b/examples/rail_project_library.ipynb index e0256b5..c19f53d 100644 --- a/examples/rail_project_library.ipynb +++ b/examples/rail_project_library.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "3da49051-75c6-4b8f-8894-2d65d4f80112", + "metadata": {}, + "source": [ + "## Using the `rail.projects` analysis component library \n", + "\n", + "This notebook will show you the basics of using the `rail.projects.library` component library" + ] + }, { "cell_type": "markdown", "id": "68006889-06ba-46ea-be1a-556d9df7ebaa", diff --git a/pyproject.toml b/pyproject.toml index 83e6b90..02a8a92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,15 @@ dev = [ "pylint", # Used for static linting of files ] +docs = [ + "nbsphinx", + "sphinx", # Used to automatically generate documentation + "sphinx_rtd_theme", # Used to render documentation + "sphinx-autoapi", # Used to automatically generate api documentation + "sphinx-tabs", # Used to create tabbed content within the docs +] + + [project.scripts] rail_project = "rail.cli.rail_project.project_commands:project_cli" rail_plot = "rail.cli.rail_plot.plot_commands:plot_cli" diff --git a/src/rail/plotting/dataset_factory.py b/src/rail/plotting/dataset_factory.py index dda8c51..2eea708 100644 --- a/src/rail/plotting/dataset_factory.py +++ b/src/rail/plotting/dataset_factory.py @@ -18,37 +18,45 @@ class RailDatasetFactory(RailFactoryMixin): Expected usage is that user will define a yaml file with the various datasets that they wish to use with the following example syntax: - Data: - - Project: - name: some_project - yaml_file: /path/to/rail_project_file - - Dataset: - name: gold_baseline_test - class: rail.plotting.project_dataset_holder.RailProjectDatasetHolder - extractor: rail.plotting.pz_data_extractor.PZPointEstimateDataExtractor - project: some_project - selection: gold - flavor: baseline - tag: test - algos: ['all'] - - Dataset: - name: blend_baseline_test - class: rail.plotting.project_dataset_holder.RailProjectDatasetHolder - exctractor: rail.plottings.pz_data_extractor.PZPointEstimateDataExtractor - project: some_project - selection: blend - flavor: baseline - tag: test - algos: ['all'] + + .. highlight:: yaml + .. code-block:: yaml + + Data: + - Project: + name: some_project + yaml_file: /path/to/rail_project_file + - Dataset: + name: gold_baseline_test + class: rail.plotting.project_dataset_holder.RailProjectDatasetHolder + extractor: rail.plotting.pz_data_extractor.PZPointEstimateDataExtractor + project: some_project + selection: gold + flavor: baseline + tag: test + algos: ['all'] + - Dataset: + name: blend_baseline_test + class: rail.plotting.project_dataset_holder.RailProjectDatasetHolder + exctractor: rail.plottings.pz_data_extractor.PZPointEstimateDataExtractor + project: some_project + selection: blend + flavor: baseline + tag: test + algos: ['all'] And group them into lists of dataset that can be run over particular types of data, using the following example syntax: - - DatasetList: - name: baseline_test - datasets: - - gold_baseline_test - - blend_baseline_test + .. highlight:: yaml + .. code-block:: yaml + + Data: + - DatasetList: + name: baseline_test + datasets: + - gold_baseline_test + - blend_baseline_test """ yaml_tag: str = "Data" diff --git a/src/rail/plotting/plot_group_factory.py b/src/rail/plotting/plot_group_factory.py index c20d9d0..bbe6889 100644 --- a/src/rail/plotting/plot_group_factory.py +++ b/src/rail/plotting/plot_group_factory.py @@ -15,19 +15,24 @@ class RailPlotGroupFactory(RailFactoryMixin): """Factory class to make plot_groups - The yaml file should look something like this:. . - Includes: - - - - - PlotGroups: - - PlotGroup: - name: some_name - plotter_list_name: nice_plots - dataset_dict_name: nice_data - - PlotGroup: - name: some_other_name - plotter_list_name: janky_plots - dataset_dict_name: janky_data + The yaml file should look something like this: + + .. highlight:: yaml + .. code-block:: yaml + + Includes: + - + - + + PlotGroups: + - PlotGroup: + name: some_name + plotter_list_name: nice_plots + dataset_dict_name: nice_data + - PlotGroup: + name: some_other_name + plotter_list_name: janky_plots + dataset_dict_name: janky_data """ yaml_tag: str = "PlotGroups" diff --git a/src/rail/plotting/plotter.py b/src/rail/plotting/plotter.py index eae3350..8bb6303 100644 --- a/src/rail/plotting/plotter.py +++ b/src/rail/plotting/plotter.py @@ -21,7 +21,11 @@ class RailPlotter(Configurable, DynamicClass): """Base class for making matplotlib plot The main function in this class is: - __call__(prefix: str, kwargs**: Any) -> dict[str, RailPlotHolder] + + .. highlight:: python + .. code-block:: python + + __call__(prefix: str, kwargs**: Any) -> dict[str, RailPlotHolder] This function will make a set of plots and return them in a dict. prefix is string that gets prepended to plot names. @@ -31,16 +35,29 @@ class RailPlotter(Configurable, DynamicClass): Sub-classes should implement - config_options: a dict[str, `ceci.StageParameter`] that - will be used to configure things like the axes binning, selection functions, + .. highlight:: python + .. code-block:: python + + config_options: dict[str, ceci.StageParameter] + + that will be used to configure things like the axes binning, selection functions, and other plot-specfic options - _inputs: a dict [str, type] that specifics the inputs + .. highlight:: python + .. code-block:: python + + _inputs: dict[str, type] + + that specifics the inputs that the sub-classes expect, this is used the check the kwargs - that are passed to the __call__ function. + that are passed to the `__call__` function. A function: - _make_plots(self, prefix: str, **kwargs: Any) -> dict[str, RailPlotHolder]: + + .. highlight:: python + .. code-block:: python + + _make_plots(self, prefix: str, **kwargs: Any) -> dict[str, RailPlotHolder]: That actually makes the plots. It does not need to do the checking that the correct kwargs have been given. diff --git a/src/rail/plotting/plotter_factory.py b/src/rail/plotting/plotter_factory.py index 3c053ff..7afbb48 100644 --- a/src/rail/plotting/plotter_factory.py +++ b/src/rail/plotting/plotter_factory.py @@ -18,25 +18,33 @@ class RailPlotterFactory(RailFactoryMixin): Expected usage is that user will define a yaml file with the various plotters that they wish to use with the following example syntax: - - Plotter: - name: zestimate_v_ztrue_hist2d - class_name: rail.plotters.pz_plotters.PZPlotterPointEstimateVsTrueHist2D - z_min: 0.0 - z_max: 3.0 - n_zbins: 150 - - Plotter: - name: zestimate_v_ztrue_profile - class_name: rail.plotters.pz_plotters.PZPlotterPointEstimateVsTrueProfile - z_min: 0.0 - z_max: 3.0 - n_zbins: 60 + .. highlight:: yaml + .. code-block:: yaml + + Plots: + - Plotter: + name: zestimate_v_ztrue_hist2d + class_name: rail.plotters.pz_plotters.PZPlotterPointEstimateVsTrueHist2D + z_min: 0.0 + z_max: 3.0 + n_zbins: 150 + - Plotter: + name: zestimate_v_ztrue_profile + class_name: rail.plotters.pz_plotters.PZPlotterPointEstimateVsTrueProfile + z_min: 0.0 + z_max: 3.0 + n_zbins: 60 And group them into lists of plotter that can be run over particular types of data, using the following example syntax: - - PlotterList: - name: z_estimate_v_z_true - plotters: + .. highlight:: yaml + .. code-block:: yaml + + Plots: + - PlotterList: + name: z_estimate_v_z_true + plotters: - zestimate_v_ztrue_hist2d - zestimate_v_ztrue_profile """ diff --git a/src/rail/projects/algorithm_factory.py b/src/rail/projects/algorithm_factory.py index 5dba3de..5b4d970 100644 --- a/src/rail/projects/algorithm_factory.py +++ b/src/rail/projects/algorithm_factory.py @@ -33,23 +33,26 @@ class RailAlgorithmFactory(RailFactoryMixin): Expected usage is that user will define a yaml file with the various datasets that they wish to use with the following example syntax: - SpecSelections: - - SpecSelection: - name: zCOSMOS - Select: SpecSelection_zCOSMOS - Module: rail.creation.degraders.spectroscopic_selections - - PZAlgorithms: - - PZAlgorithm: - name: trainz - Estimate: TrainZEstimator - Inform: TrainZInformer - Module: rail.estimation.algos.train_z - - PZAlgorithm: - name: simplenn - Estimate: SklNeurNetEstimator - Inform: SklNeurNetInformer - Module: rail.estimation.algos.sklearn_neurnet + .. highlight:: yaml + .. code-block:: yaml + + SpecSelections: + - SpecSelection: + name: zCOSMOS + Select: SpecSelection_zCOSMOS + Module: rail.creation.degraders.spectroscopic_selections + + PZAlgorithms: + - PZAlgorithm: + name: trainz + Estimate: TrainZEstimator + Inform: TrainZInformer + Module: rail.estimation.algos.train_z + - PZAlgorithm: + name: simplenn + Estimate: SklNeurNetEstimator + Inform: SklNeurNetInformer + Module: rail.estimation.algos.sklearn_neurnet and so on. """ diff --git a/src/rail/projects/catalog_factory.py b/src/rail/projects/catalog_factory.py index 27d1739..30018e9 100644 --- a/src/rail/projects/catalog_factory.py +++ b/src/rail/projects/catalog_factory.py @@ -9,24 +9,31 @@ class RailCatalogFactory(RailFactoryMixin): Expected usage is that user will define a yaml file with the various datasets that they wish to use with the following example syntax: - Catalogs: - - CatalogTemplate - name: truth - path_template: "{catalogs_dir}/{project}_{sim_version}/{healpix}/part-0.parquet" - iteration_vars: ['healpix'] - - CatalogTemplate - name: reduced - path_template: "{catalogs_dir}/{project}_{sim_version}_{selection}/{healpix}/part-0.pq" - iteration_vars: ['healpix'] + + .. highlight:: yaml + .. code-block:: yaml + + Catalogs: + - CatalogTemplate + name: truth + path_template: "{catalogs_dir}/{project}_{sim_version}/{healpix}/part-0.parquet" + iteration_vars: ['healpix'] + - CatalogTemplate + name: reduced + path_template: "{catalogs_dir}/{project}_{sim_version}_{selection}/{healpix}/part-0.pq" + iteration_vars: ['healpix'] Or the used can specifiy particular catalog instances where everything except the interation_vars are resolved - Catalogs: - - CatalogTemplate - name: truth_roman_rubin_v1.1.3_gold - path_template: "full_path_to_catalog/{healpix}/part-0.parquet" - iteration_vars: ['healpix'] + .. highlight:: yaml + .. code-block:: yaml + + Catalogs: + - CatalogTemplate + name: truth_roman_rubin_v1.1.3_gold + path_template: "full_path_to_catalog/{healpix}/part-0.parquet" + iteration_vars: ['healpix'] """ yaml_tag: str = "Catalogs" diff --git a/src/rail/projects/configurable.py b/src/rail/projects/configurable.py index 0b265d0..52e52a0 100644 --- a/src/rail/projects/configurable.py +++ b/src/rail/projects/configurable.py @@ -10,16 +10,17 @@ class Configurable: This implements: - 1. being able to define parameters that are attached to a class, + 1. being able to define parameters that are attached to a class 2. being able to create an object of that class from a dict with the required paramters 3. checking that all the required parameters are present and of the correct types 4. check that there are no additional parameters given - 3. being able to write a snapshot of the current values of the paramters to yaml + 5. being able to write a snapshot of the current values of the paramters to yaml - Subclasses should + Subclasses should: 1. add parameters to the config_options class member 2. set the yaml_tag class member to a unique value + """ config_options: dict[str, StageParameter] = dict( diff --git a/src/rail/projects/pipeline_factory.py b/src/rail/projects/pipeline_factory.py index eb4cb6c..6bfbe6a 100644 --- a/src/rail/projects/pipeline_factory.py +++ b/src/rail/projects/pipeline_factory.py @@ -10,21 +10,24 @@ class RailPipelineFactory(RailFactoryMixin): Expected usage is that user will define a yaml file with the various datasets that they wish to use with the following example syntax: - Pipelines: - - PipelineTemplate: + .. highlight:: yaml + .. code-block:: yaml + + Pipelines: + - PipelineTemplate: name: pz: pipeline_class: rail.pipelines.estimation.pz_all.PzPipeline - input_catalog_template: degraded + input_catalog_template: degraded output_catalog_template: degraded input_file_templates: - input_train: - flavor: baseline - tag: train - input_test: - flavor: baseline - tag: test + input_train: + flavor: baseline + tag: train + input_test: + flavor: baseline + tag: test kwargs: - algorithms: ['all'] + algorithms: ['all'] """ diff --git a/src/rail/projects/project.py b/src/rail/projects/project.py index 0161a97..5a98a17 100644 --- a/src/rail/projects/project.py +++ b/src/rail/projects/project.py @@ -62,20 +62,34 @@ def __init__(self, **kwargs: Any): class RailProject(Configurable): """Main analysis driver class, this collects all the elements - run a collection of studies using RAIL + run a collection of studies using RAIL. - -------------- + The key concepts are: + + 1. analysis 'Flavors', which are versions of + similar analyses with slightly different parameter settings and/or + input files. + + 2. ceci 'Pipelines", which run blocks of analysis code + + A RailProject basically specifies which Pipelines to run under which + flavors, and keeps track of the outputs. + + + ============= Functionality - -------------- + ============= RailProject.load_config() Read a yaml file and create a RailProject reduce_data() Make a reduced catalog from an input catalog by applying a selction - and trimming unwanted colums + and trimming unwanted colums. This is run before the analysis pipelines. subsample_data() - Subsample data from a catalog to make a testing or training file + Subsample data from a catalog to make a testing or training file. + This is run after catalog level pipelines, but before pipeliens run + on indvidudal training/ testing samples build_pipelines() Build ceci pipeline yaml files @@ -86,18 +100,25 @@ class RailProject(Configurable): run_pipeline_catalog() Run a pipeline on a catalog of files - -------------- + ============= Configuration - -------------- + ============= Most of these element come from the shared library of elements, which is accesible from rail.projects.library - -------------- + -------------------------- + Shared configuration files + -------------------------- + Includes: list[str] List of shared configuration files to load - -------------- + + ------------------------ + Project analysis flavors + ------------------------ + Baseline: dict[str, Any] Baseline configuration for this project. This is included in all the other analysis flavors @@ -105,9 +126,12 @@ class RailProject(Configurable): Flavors: list[dict[str, Any]] List of all the analysis flavors that have been defined in this project - -------------- - Bookkeeping elements, used to define the file paths for the - project. + + -------------------- + Bookkeeping elements + -------------------- + + These used to define the file paths for the project. PathTemplates: dict[str, str] Overrides for templates used to construct file paths @@ -118,7 +142,10 @@ class RailProject(Configurable): IterationVars: dict[str, list[str]] Iteration variables to construct the catalogs - -------------- + + --------------- + Shared elements + --------------- Things that are pulled from the library, each of these is just a list of the names of things that are defined in the library that can be used in this project. The default is to use all the @@ -257,8 +284,8 @@ def generate_ceci_command( ) -> list[str]: """Generate a ceci command to run a pipeline - Paramters - --------- + Parameters + ---------- pipeline_path: str Path to the pipline yaml file @@ -274,8 +301,10 @@ def generate_ceci_command( log_dir: str = "." Pipeline log directory - **kwargs: Any - These are appended to the command in key=value pairs + Keywords + -------- + These are appended to the command in key=value pairs + """ if config is None: @@ -346,12 +375,13 @@ def reduce_data( Keywords -------- - Used to provide values for additional interpolants, e.g., + Used to provide values for additional interpolants. Returns ------- sinks: list[str] Paths to output files + """ sources = self.get_catalog_files( catalog_template, selection=input_selection, **kwargs diff --git a/src/rail/projects/project_file_factory.py b/src/rail/projects/project_file_factory.py index 86869c1..27dbb06 100644 --- a/src/rail/projects/project_file_factory.py +++ b/src/rail/projects/project_file_factory.py @@ -9,18 +9,25 @@ class RailProjectFileFactory(RailFactoryMixin): Expected usage is that user will define a yaml file with the various datasets that they wish to use with the following example syntax: - Files: - - FileTemplate: - name: test_file_100k - path_template: "{catalogs_dir}/test/{project}_{selection}_baseline_100k.hdf5" + + .. highlight:: yaml + .. code-block:: yaml + + Files: + - FileTemplate: + name: test_file_100k + path_template: "{catalogs_dir}/test/{project}_{selection}_baseline_100k.hdf5" Or the used can specifiy particular file instances where everything except the interation_vars are resolved - Files: - - FileInstance - name: test_file_100k_roman_rubin_v1.1.3_gold - path: + .. highlight:: yaml + .. code-block:: yaml + + Files: + - FileInstance + name: test_file_100k_roman_rubin_v1.1.3_gold + path: """ yaml_tag: str = "Files" diff --git a/src/rail/projects/selection_factory.py b/src/rail/projects/selection_factory.py index 55c50d2..7cf8494 100644 --- a/src/rail/projects/selection_factory.py +++ b/src/rail/projects/selection_factory.py @@ -45,11 +45,15 @@ class RailSelectionFactory(RailFactoryMixin): Expected usage is that user will define a yaml file with the various datasets that they wish to use with the following example syntax: - Selections: - - Selection: - name: maglim_25.5 - cuts: - maglim_i: [null, 25.5] + + .. highlight:: yaml + .. code-block:: yaml + + Selections: + - Selection: + name: maglim_25.5 + cuts: + maglim_i: [null, 25.5] """ yaml_tag = "Selections" diff --git a/src/rail/projects/subsample_factory.py b/src/rail/projects/subsample_factory.py index 87a0783..b0306d2 100644 --- a/src/rail/projects/subsample_factory.py +++ b/src/rail/projects/subsample_factory.py @@ -45,11 +45,15 @@ class RailSubsampleFactory(RailFactoryMixin): Expected usage is that user will define a yaml file with the various datasets that they wish to use with the following example syntax: - Subsamples: - - Subsample: - name: test_100k - seed: 1234 - num_objects: 100000 + + .. highlight:: yaml + .. code-block:: yaml + + Subsamples: + - Subsample: + name: test_100k + seed: 1234 + num_objects: 100000 """ yaml_tag = "Subsamples" From 23d29b35706b94b76dd14601e8a9ca410993881a Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Sun, 2 Feb 2025 17:57:30 -0800 Subject: [PATCH 2/3] delinting --- tests/projects/test_project.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/projects/test_project.py b/tests/projects/test_project.py index 5c95cb6..28368a8 100644 --- a/tests/projects/test_project.py +++ b/tests/projects/test_project.py @@ -167,3 +167,4 @@ def test_project_class(setup_project_area: int) -> None: ) flavor_info = project.get_flavor("test_flavor") + assert flavor_info From e276c32acf52517637676f158a3603a9307aeb70 Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Sun, 2 Feb 2025 18:05:07 -0800 Subject: [PATCH 3/3] added readthedocs --- .readthedocs.yaml | 31 +++++++++++++++++++++++++++++++ docs/requirements.txt | 3 +++ 2 files changed, 34 insertions(+) create mode 100644 .readthedocs.yaml create mode 100644 docs/requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..6e6a835 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,31 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.11" + + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# If using Sphinx, optionally build your docs in additional formats such as PDF +# formats: +# - pdf + +# Optionally declare the Python requirements required to build your docs +python: + install: + - requirements: docs/requirements.txt + - method: pip + path: . + extra_requirements: + - dev + - docs diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..4a68739 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +setuptools_scm +wheel +numpy