diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 5480abee7..0cc50170d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,5 +1,5 @@ --- -name: "\U0001F41B Bug report" +name: "Bug report" about: Create a report to help us improve title: "[BUG] " labels: bug @@ -7,7 +7,7 @@ assignees: '' --- -## 🐛 Bug Report +## Bug Report **Describe the bug** A clear and concise description of what the bug is. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 33cfac3d0..992cb3211 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,5 +1,5 @@ --- -name: "\U0001F680 Feature request" +name: "Feature request" about: Suggest an idea for Merlin title: "[FEAT] " labels: enhancement @@ -9,7 +9,7 @@ assignees: '' -## 🚀 Feature Request +## Feature Request **What problem is this feature looking to solve?** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md index b20c3af1f..58824b272 100644 --- a/.github/ISSUE_TEMPLATE/question.md +++ b/.github/ISSUE_TEMPLATE/question.md @@ -1,9 +1,9 @@ --- -name: 🤓 General question +name: General question labels: 'question' title: '[Q/A] ' about: Ask, discuss, debate with the Merlin team --- -## 🤓 Question +## Question diff --git a/.github/workflows/push-pr_workflow.yml b/.github/workflows/push-pr_workflow.yml index 3b2f809eb..4d7a51ccb 100644 --- a/.github/workflows/push-pr_workflow.yml +++ b/.github/workflows/push-pr_workflow.yml @@ -14,7 +14,7 @@ jobs: - name: Check that CHANGELOG has been updated run: | # If this step fails, this means you haven't updated the CHANGELOG.md file with notes on your contribution. - git diff --name-only $(git merge-base origin/main HEAD) | grep '^CHANGELOG.md$' && echo "Thanks for helping keep our CHANGELOG up-to-date!" + git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep '^CHANGELOG.md$' && echo "Thanks for helping keep our CHANGELOG up-to-date!" Lint: runs-on: ubuntu-latest @@ -67,10 +67,15 @@ jobs: Local-test-suite: runs-on: ubuntu-latest + env: + GO_VERSION: 1.18.1 + SINGULARITY_VERSION: 3.9.9 + OS: linux + ARCH: amd64 strategy: matrix: - python-version: ['3.6', '3.7', '3.8', '3.9', '3.10'] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 @@ -90,6 +95,27 @@ jobs: python3 -m pip install --upgrade pip if [ -f requirements.txt ]; then pip install -r requirements.txt; fi pip3 install -r requirements/dev.txt + + - name: Install singularity + run: | + sudo apt-get update && sudo apt-get install -y \ + build-essential \ + libssl-dev \ + uuid-dev \ + libgpgme11-dev \ + squashfs-tools \ + libseccomp-dev \ + pkg-config + wget https://go.dev/dl/go$GO_VERSION.$OS-$ARCH.tar.gz + sudo tar -C /usr/local -xzf go$GO_VERSION.$OS-$ARCH.tar.gz + rm go$GO_VERSION.$OS-$ARCH.tar.gz + export PATH=$PATH:/usr/local/go/bin + wget https://github.com/sylabs/singularity/releases/download/v$SINGULARITY_VERSION/singularity-ce-$SINGULARITY_VERSION.tar.gz + tar -xzf singularity-ce-$SINGULARITY_VERSION.tar.gz + cd singularity-ce-$SINGULARITY_VERSION + ./mconfig && \ + make -C ./builddir && \ + sudo make -C ./builddir install - name: Install merlin to run unit tests run: | @@ -132,7 +158,7 @@ jobs: strategy: matrix: - python-version: ['3.6', '3.7', '3.8', '3.9', '3.10'] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index 9403886e3..000000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,19 +0,0 @@ -image: python:3.8-slim-buster - -job1: - script: - - python3 -m venv venv - - source venv/bin/activate - - pip3 install --upgrade pip - - pip3 install -r requirements.txt - - pip3 install -r requirements/dev.txt - - pip3 install -r merlin/examples/workflows/feature_demo/requirements.txt - - pip3 install -e . - - pip3 install --upgrade sphinx - - merlin config - - - merlin stop-workers - - - python3 -m pytest tests/ - - python3 tests/integration/run_tests.py --verbose --local - diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..c1c252e30 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,13 @@ +version: 2 + +build: + os: "ubuntu-20.04" + tools: + python: "3.8" + +sphinx: + configuration: docs/source/conf.py + +python: + install: + - requirements: docs/requirements.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index b472cfcf7..0a9cde753 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,13 +4,51 @@ All notable changes to Merlin will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [unreleased] +## [1.9.0] ### Added +- Added support for Python 3.11 - Update docker docs for new rabbitmq and redis server versions - Added lgtm.com Badge for README.md - More fixes for lgtm checks. +- Added merlin server command as a container option for broker and results_backend servers. +- Added new documentation for merlin server in docs and tutorial +- Added the flux_exec batch argument to allow for flux exec arguments, + e.g. flux_exec: flux exec -r "0-1" to run celery workers only on + ranks 0 and 1 of a multi-rank allocation +- Additional argument in test definitions to allow for a post "cleanup" command +- Capability for non-user block in yaml +- .readthedocs.yaml and requirements.txt files for docs +- Small modifications to the Tutorial, Getting Started, Command Line, and Contributing pages in the docs +- Compatibility with the newest version of Maestro (v. 1.1.9dev1) +- JSON schema validation for Merlin spec files +- New tests related to JSON schema validation +- Instructions in the "Contributing" page of the docs on how to add new blocks/fields to the spec file +- Brief explanation of the $(LAUNCHER) variable in the "Variables" page of the docs + ### Changed +- Removed support for Python 3.6 - Rename lgtm.yml to .lgtm.yml +- New shortcuts in specification file (sample_vector, sample_names, spec_original_template, spec_executed_run, spec_archived_copy) +- Update requirements to require redis 4.3.4 for acl user channel support +- Added ssl to the broker and results backend server checks when "merlin info" is called +- Removed theme_override.css from docs/_static/ since it is no longer needed with the updated version of sphinx +- Updated docs/Makefile to include a pip install for requirements and a clean command +- Update to the Tutorial and Contributing pages in the docs +- Changed what is stored in a Merlin DAG + - We no longer store the entire Maestro ExecutionGraph object + - We now only store the adjacency table and values obtained from the ExecutionGraph object +- Modified spec verification +- Update to require maestrowf 1.9.1dev1 or later + +### Fixed +- Fixed return values from scripts with main() to fix testing errors. +- CI test for CHANGELOG modifcations +- Typo "cert_req" to "cert_reqs" in the merlin config docs +- Removed emoji from issue templates that were breaking doc builds +- Including .temp template files in MANIFEST +- Styling in the footer for docs +- Horizontal scroll overlap in the variables page of the docs +- Reordered small part of Workflow Specification page in the docs in order to put "samples" back in the merlin block ## [1.8.5] ### Added diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 9f96bc6e7..a920e45b7 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -5,3 +5,4 @@ Benjamin Bay Joe Koning Jeremy White Aidan Keogh +Ryan Lee diff --git a/MANIFEST.in b/MANIFEST.in index f5b32237d..cefbd23a5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ recursive-include merlin/data *.yaml *.py -recursive-include merlin/examples *.py *.yaml *.c *.json *.sbatch *.bsub *.txt +recursive-include merlin/server *.yaml *.py +recursive-include merlin/examples *.py *.yaml *.c *.json *.sbatch *.bsub *.txt *.temp include requirements.txt include requirements/* diff --git a/Makefile b/Makefile index d1e441a87..b0b1fb0a9 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.4. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -164,12 +164,12 @@ checks: check-style check-camel-case # automatically make python files pep 8-compliant fix-style: . $(VENV)/bin/activate; \ - isort --line-length $(MAX_LINE_LENGTH) $(MRLN); \ - isort --line-length $(MAX_LINE_LENGTH) $(TEST); \ - isort --line-length $(MAX_LINE_LENGTH) *.py; \ - black --target-version py36 -l $(MAX_LINE_LENGTH) $(MRLN); \ - black --target-version py36 -l $(MAX_LINE_LENGTH) $(TEST); \ - black --target-version py36 -l $(MAX_LINE_LENGTH) *.py; \ + $(PYTHON) -m isort --line-length $(MAX_LINE_LENGTH) $(MRLN); \ + $(PYTHON) -m isort --line-length $(MAX_LINE_LENGTH) $(TEST); \ + $(PYTHON) -m isort --line-length $(MAX_LINE_LENGTH) *.py; \ + $(PYTHON) -m black --target-version py36 -l $(MAX_LINE_LENGTH) $(MRLN); \ + $(PYTHON) -m black --target-version py36 -l $(MAX_LINE_LENGTH) $(TEST); \ + $(PYTHON) -m black --target-version py36 -l $(MAX_LINE_LENGTH) *.py; \ # Increment the Merlin version. USE ONLY ON DEVELOP BEFORE MERGING TO MASTER. diff --git a/docs/Makefile b/docs/Makefile index 97642ceda..662696c6f 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -23,6 +23,9 @@ view: code-docs html # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile + pip install -r requirements.txt echo $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) +clean: + rm -rf build/ diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..87333eb50 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,56 @@ +# +# This file is autogenerated by pip-compile with python 3.8 +# To update, run: +# +# pip-compile requirements.in +# +alabaster==0.7.12 + # via sphinx +babel==2.10.3 + # via sphinx +certifi==2022.12.7 + # via requests +charset-normalizer==2.1.1 + # via requests +docutils==0.17.1 + # via sphinx +idna==3.4 + # via requests +imagesize==1.4.1 + # via sphinx +importlib-metadata==5.0.0 + # via sphinx +jinja2==3.0.3 + # via sphinx +markupsafe==2.1.1 + # via jinja2 +packaging==21.3 + # via sphinx +pygments==2.13.0 + # via sphinx +pyparsing==3.0.9 + # via packaging +pytz==2022.5 + # via babel +requests==2.28.1 + # via sphinx +snowballstemmer==2.2.0 + # via sphinx +sphinx==5.3.0 + # via -r requirements.in +sphinxcontrib-applehelp==1.0.2 + # via sphinx +sphinxcontrib-devhelp==1.0.2 + # via sphinx +sphinxcontrib-htmlhelp==2.0.0 + # via sphinx +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-qthelp==1.0.3 + # via sphinx +sphinxcontrib-serializinghtml==1.1.5 + # via sphinx +urllib3==1.26.12 + # via requests +zipp==3.10.0 + # via importlib-metadata diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css index 367d8e1f2..b89e9d889 100644 --- a/docs/source/_static/custom.css +++ b/docs/source/_static/custom.css @@ -18,3 +18,22 @@ div.highlight .copybtn:hover { div.highlight { position: relative; } +div.sphinxsidebar { + max-height: 100%; + overflow-y: auto; +} +td { + max-width: 300px; +} +@media screen and (min-width: 875px) { + .sphinxsidebar { + background-color: #fff; + margin-left: 0; + z-index: 1; + height: 100vh; + top: 0px; + } +} +.underline { + text-decoration: underline; +} diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css deleted file mode 100644 index 0d042ea81..000000000 --- a/docs/source/_static/theme_overrides.css +++ /dev/null @@ -1,14 +0,0 @@ -/* override table width restrictions */ -@media screen and (min-width: 767px) { - - .wy-table-responsive table td { - /* !important prevents the common CSS stylesheets from overriding - this as on RTD they are loaded after this stylesheet */ - white-space: normal !important; - } - - .wy-table-responsive { - overflow: visible !important; - } -} - diff --git a/docs/source/conf.py b/docs/source/conf.py index d5cef3d64..4f0004dc2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -47,7 +47,7 @@ # 'sphinx.ext.autodoc', # 'sphinx.ext.intersphinx', # ] -extensions = [] +extensions = ['sphinx.ext.autodoc'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -101,11 +101,7 @@ # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] -html_context = { - 'css_files': [ - '_static/theme_overrides.css', # override wide tables in RTD theme - ], -} +html_css_files = ['custom.css'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -149,7 +145,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'Merlin.tex', u'Merlin Documentation', - u'MLSI', 'manual'), + u'The Merlin Development Team', 'manual'), ] @@ -188,8 +184,6 @@ def setup(app): try: app.add_javascript("custom.js") app.add_javascript("https://cdn.jsdelivr.net/npm/clipboard@1/dist/clipboard.min.js") - app.add_stylesheet('custom.css') except AttributeError: - app.add_css_file('custom.css') app.add_js_file("custom.js") app.add_js_file("https://cdn.jsdelivr.net/npm/clipboard@1/dist/clipboard.min.js") diff --git a/docs/source/faq.rst b/docs/source/faq.rst index e08edd88c..28d46460c 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -1,5 +1,8 @@ .. _faq: +.. role:: underline + :class: underline + FAQ === .. contents:: Frequently Asked Questions @@ -100,7 +103,7 @@ Where are some example workflows? .. code:: bash - $ merlin example --help + $ merlin example list How do I launch a workflow? ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -185,7 +188,7 @@ Each step is ultimately designated as: Normally this happens behinds the scenes, so you don't need to worry about it. To hard-code this into your step logic, use a shell command such as ``exit $(MERLIN_HARD_FAIL)``. -.. note:: ``$(MERLIN_HARD_FAIL)`` +.. note:: The ``$(MERLIN_HARD_FAIL)`` exit code will shutdown all workers connected to the queue associated with the failed step. To shutdown *all* workers use the ``$(MERLIN_STOP_WORKERS)`` exit code @@ -403,25 +406,35 @@ Do something like this: nodes: 1 procs: 3 -The arguments the LAUNCHER syntax will use: +:underline:`The arguments the LAUNCHER syntax will use`: + +``procs``: The total number of MPI tasks + +``nodes``: The total number of MPI nodes + +``walltime``: The total walltime of the run (hh:mm:ss or mm:ss or ss) (not available in lsf) + +``cores per task``: The number of hardware threads per MPI task + +``gpus per task``: The number of GPUs per MPI task + +:underline:`SLURM specific run flags`: + +``slurm``: Verbatim flags only for the srun parallel launch (srun -n -n ) + +:underline:`FLUX specific run flags`: + +``flux``: Verbatim flags for the flux parallel launch (flux mini run ) + +:underline:`LSF specific run flags`: -procs: The total number of MPI tasks -nodes: The total number of MPI nodes -walltime: The total walltime of the run (hh:mm:ss or mm:ss or ss) (not available in lsf) -cores per task: The number of hardware threads per MPI task -gpus per task: The number of GPUs per MPI task +``bind``: Flag for MPI binding of tasks on a node (default: -b rs) -SLURM specific run flags: -slurm: Verbatim flags only for the srun parallel launch (srun -n -n ) +``num resource set``: Number of resource sets -FLUX specific run flags: -flux: Verbatim flags for the flux parallel launch (flux mini run ) +``launch_distribution``: The distribution of resources (default: plane:{procs/nodes}) -LSF specific run flags: -bind: Flag for MPI binding of tasks on a node (default: -b rs) -num resource set: Number of resource sets -launch_distribution : The distribution of resources (default: plane:{procs/nodes}) -lsf: Verbatim flags only for the lsf parallel launch (jsrun ... ) +``lsf``: Verbatim flags only for the lsf parallel launch (jsrun ... ) What is level_max_dirs? ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index 3d4429b4f..4b1a4c1a3 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -13,6 +13,13 @@ Check out the :doc:`Tutorial<./tutorial>`! Developer Setup ++++++++++++++++++ +The developer setup can be done via pip or via make. This section will cover how to do both. + +Additionally, there is an alternative method to setup merlin on supercomputers. See the :doc:`Spack <./spack>` section for more details. + +Pip Setup +****************** + To install with the additional developer dependencies, use:: pip3 install "merlin[dev]" @@ -21,8 +28,35 @@ or:: pip3 install -e "git+https://github.com/LLNL/merlin.git@develop#egg=merlin[dev]" -See the :doc:`Spack <./spack>` section for an alternative method to setup merlin on supercomputers. +Make Setup +******************* + +Visit the `Merlin repository `_ on github. `Create a fork of the repo `_ and `clone it `_ onto your system. + +Change directories into the merlin repo: + +.. code-block:: bash + + $ cd merlin/ + +Install Merlin with the developer dependencies: + +.. code-block:: bash + + $ make install-dev + +This will create a virtualenv, start it, and install Merlin and it's dependencies for you. + +More documentation about using Virtualenvs with Merlin can be found at +:doc:`Using Virtualenvs with Merlin <./virtualenv>`. + +We can make sure it's installed by running: + +.. code-block:: bash + $ merlin --version + +If you don't see a version number, you may need to restart your virtualenv and try again. Configuring Merlin ******************* @@ -32,6 +66,32 @@ Documentation for merlin configuration is in the :doc:`Configuring Merlin <./mer That's it. To start running Merlin see the :doc:`Merlin Workflows. <./merlin_workflows>` +(Optional) Testing Merlin +************************* + +.. warning:: + + With python 3.6 you may see some tests fail and a unicode error presented. To fix this, you need to reset the LC_ALL environment variable to en_US.utf8. + +If you have ``make`` installed and the `Merlin repository `_ cloned, you can run the test suite provided in the Makefile by running: + +.. code-block:: bash + + $ make tests + +This will run both the unit tests suite and the end-to-end tests suite. + +If you'd just like to run the unit tests you can run: + +.. code-block:: bash + + $ make unit-tests + +Similarly, if you'd just like to run the end-to-end tests you can run: + +.. code-block:: bash + + $ make e2e-tests Custom Setup +++++++++++++ diff --git a/docs/source/index.rst b/docs/source/index.rst index 3747adca4..3776466d3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -75,9 +75,9 @@ Need help? `merlin@llnl.gov `_ merlin_specification merlin_config merlin_variables + merlin_server celery_overview virtualenv spack merlin_developer docker - diff --git a/docs/source/merlin_commands.rst b/docs/source/merlin_commands.rst index 0a1a9f2bc..2a767797f 100644 --- a/docs/source/merlin_commands.rst +++ b/docs/source/merlin_commands.rst @@ -71,7 +71,7 @@ If you want to run an example workflow, use Merlin's ``merlin example``: .. code:: bash - $ merlin example --help + $ merlin example list This will list the available example workflows and a description for each one. To select one: @@ -406,4 +406,36 @@ The only currently available option for ``--task_server`` is celery, which is th only one might get the signal. In this case, you can send it again. +Hosting Local Server (``merlin server``) +---------------------------------------- + +To create a local server for merlin to connect to. Merlin server creates and configures a server on the current directory. +This allows multiple instances of merlin server to exist for different studies or uses. + +The ``init`` subcommand initalizes a new instance of merlin server. + +The ``status`` subcommand checks to the status of the merlin server. + +The ``start`` subcommand starts the merlin server. + +The ``stop`` subcommand stops the merlin server. + +The ``restart`` subcommand performs stop command followed by a start command on the merlin server. + +The ``config`` subcommand edits configurations for the merlin server. There are multiple flags to allow for different configurations. + +- The ``-ip IPADDRESS, --ipaddress IPADDRESS`` option set the binded IP address for merlin server. +- The ``-p PORT, --port PORT`` option set the binded port for merlin server. +- The ``-pwd PASSWORD, --password PASSWORD`` option set the password file for merlin server. +- The ``--add-user USER PASSWORD`` option add a new user for merlin server. +- The ``--remove-user REMOVE_USER`` option remove an exisiting user from merlin server. +- The ``-d DIRECTORY, --directory DIRECTORY`` option set the working directory for merlin server. +- The ``-ss SNAPSHOT_SECONDS, --snapshot-seconds SNAPSHOT_SECONDS`` option set the number of seconds before each snapshot. +- The ``-sc SNAPSHOT_CHANGES, --snapshot-changes SNAPSHOT_CHANGES`` option set the number of database changes before each snapshot. +- The ``-sf SNAPSHOT_FILE, --snapshot-file SNAPSHOT_FILE`` option set the name of snapshots. +- The ``-am APPEND_MODE, --append-mode APPEND_MODE`` option set the appendonly mode. Options are always, everysec, no. +- The ``-af APPEND_FILE, --append-file APPEND_FILE`` option set the filename for server append/change file. + +More information can be found on :doc:`Merlin Server <./merlin_server>` + diff --git a/docs/source/merlin_config.rst b/docs/source/merlin_config.rst index 7bb43d3c6..599a50413 100644 --- a/docs/source/merlin_config.rst +++ b/docs/source/merlin_config.rst @@ -153,7 +153,7 @@ show below. ca_certs: /var/ssl/myca.pem # This is optional and can be required, optional or none # (required is the default) - cert_req: required + cert_reqs: required @@ -197,7 +197,7 @@ url when using a redis server version 6 or greater with ssl_. ca_certs: /var/ssl/myca.pem # This is optional and can be required, optional or none # (required is the default) - cert_req: required + cert_reqs: required The resulting ``broker_use_ssl`` configuration for a ``rediss`` server is given below. diff --git a/docs/source/merlin_developer.rst b/docs/source/merlin_developer.rst index d61cc9794..08947ca89 100644 --- a/docs/source/merlin_developer.rst +++ b/docs/source/merlin_developer.rst @@ -43,11 +43,11 @@ To expedite review, please ensure that pull requests - Are from a meaningful branch name (e.g. ``feature/my_name/cool_thing``) -- Into the `appropriate branch `_ +- Are being merged into the `appropriate branch `_ - Include testing for any new features - - unit tests in ``tests/*`` + - unit tests in ``tests/unit`` - integration tests in ``tests/integration`` - Include descriptions of the changes @@ -64,6 +64,8 @@ To expedite review, please ensure that pull requests - in ``CHANGELOG.md`` - in ``merlin.__init__.py`` +- Have `squashed `_ commits + Testing +++++++ @@ -88,3 +90,87 @@ Merlin has style checkers configured. They can be run from the Makefile: .. code-block:: bash $ make check-style + +Adding New Features to YAML Spec File ++++++++++++++++++++++++++++++++++++++ + +In order to conform to Maestro's verification format introduced in Maestro v1.1.7, +we now use `json schema `_ validation to verify our spec +file. + +If you are adding a new feature to Merlin that requires a new block within the yaml spec +file or a new property within a block, then you are going to need to update the +merlinspec.json file located in the merlin/spec/ directory. You also may want to add +additional verifications within the specification.py file located in the same directory. + +.. note:: + If you add custom verifications beyond the pattern checking that the json schema + checks for, then you should also add tests for this verification in the test_specification.py + file located in the merlin/tests/unit/spec/ directory. Follow the steps for adding new + tests in the docstring of the TestCustomVerification class. + +Adding a New Property +********************* + +To add a new property to a block in the yaml file, you need to create a +template for that property and place it in the correct block in merlinspec.json. For +example, say I wanted to add a new property called ``example`` that's an integer within +the ``description`` block. I would modify the ``description`` block in the merlinspec.json file to look +like this: + +.. code-block:: json + + "DESCRIPTION": { + "type": "object", + "properties": { + "name": {"type": "string", "minLength": 1}, + "description": {"type": "string", "minLength": 1}, + "example": {"type": "integer", "minimum": 1} + }, + "required": ["name", "description"] + } + +If you need help with json schema formatting, check out the `step-by-step getting +started guide `_. + +That's all that's required of adding a new property. If you want to add your own custom +verifications make sure to create unit tests for them (see the note above for more info). + +Adding a New Block +****************** + +Adding a new block is slightly more complicated than adding a new property. You will not +only have to update the merlinspec.json schema file but also add calls to verify that +block within specification.py. + +To add a block to the json schema, you will need to define the template for that entire +block. For example, if I wanted to create a block called ``country`` with two +properties labeled ``name`` and ``population`` that are both required, it would look like so: + +.. code-block:: json + + "COUNTRY": { + "type": "object", + "properties": { + "name": {"type": "string", "minLength": 1}, + "population": { + "anyOf": [ + {"type": "string", "minLength": 1}, + {"type": "integer", "minimum": 1} + ] + } + }, + "required": ["name", "capital"] + } + +Here, ``name`` can only be a string but ``population`` can be both a string and an integer. +For help with json schema formatting, check out the `step-by-step getting started guide +`_. + +The next step is to enable this block in the schema validation process. To do this we need to: + +#. Create a new method called verify_() within the MerlinSpec class +#. Call the YAMLSpecification.validate_schema() method provided to us via Maestro in your new method +#. Add a call to verify_() inside the verify() method + +If you add your own custom verifications on top of this, please add unit tests for them. diff --git a/docs/source/merlin_server.rst b/docs/source/merlin_server.rst new file mode 100644 index 000000000..24b37c776 --- /dev/null +++ b/docs/source/merlin_server.rst @@ -0,0 +1,72 @@ +Merlin Server +============= +The merlin server command allows users easy access to containerized broker +and results servers for merlin workflows. This allowsusers to run merlin without +a dedicated external server. + +The main configuration will be stored in the subdirectory called "server/" by +default in the main merlin configuration "~/.merlin". However different server +images can be created for different use cases or studies just by simplying creating +a new directory to store local configuration files for merlin server instances. + +Below is an example of how merlin server can be utilized. + +First create and navigate into a directory to store your local merlin +configuration for a specific use case or study. + +.. code-block:: bash + + mkdir study1/ + cd study1/ + +Afterwards you can instantiate merlin server in this directory by running + +.. code-block:: bash + + merlin server init + +A main server configuration will be created in the ~/.merlin/server and a local +configuration will be created in a subdirectory called "merlin_server/" + +We should expect the following files in each directory + +.. code-block:: bash + + ~/study1$ ls ~/.merlin/server/ + docker.yaml merlin_server.yaml podman.yaml singularity.yaml + + ~/study1$ ls + merlin_server + + ~/study1$ ls merlin_server/ + redis.conf redis_latest.sif + +The main configuration in "~/.merlin/server" deals with defaults and +technical commands that might be used for setting up the merlin server +local configuration and its containers. Each container has their own +configuration file to allow users to be able to switch between different +containerized services freely. + +The local configuration "merlin_server" folder contains configuration files +specific to a certain use case or run. In the case above you can see that we have a +redis singularity container called "redis_latest.sif" with the redis configuration +file called "redis.conf". This redis configuration will allow the user to +configurate redis to their specified needs without have to manage or edit +the redis container. When the server is run this configuration will be dynamically +read, so settings can be changed between runs if needed. + +Once the merlin server has been initialized in the local directory the user will be allowed +to run other merlin server commands such as "run, status, stop" to interact with the +merlin server. A detailed list of commands can be found in the `Merlin Server Commands <./server/commands.html>`_ page. + +Note: Running "merlin server init" again will NOT override any exisiting configuration +that the users might have set or edited. By running this command again any missing files +will be created for the users with exisiting defaults. HOWEVER it is highly advised that +users back up their configuration in case an error occurs where configuration files are overriden. + +.. toctree:: + :maxdepth: 1 + :caption: Merlin Server Settings: + + server/configuration + server/commands diff --git a/docs/source/merlin_specification.rst b/docs/source/merlin_specification.rst index a84eae1a6..f857fabf3 100644 --- a/docs/source/merlin_specification.rst +++ b/docs/source/merlin_specification.rst @@ -48,6 +48,9 @@ see :doc:`./merlin_variables`. queue: pbatch flux_path: flux_start_opts: + flux_exec: flux_exec_workers: launch_pre: @@ -226,7 +229,7 @@ see :doc:`./merlin_variables`. #################################### # Merlin Block (Required) #################################### - # The merlin specific block will add any required configuration to + # The merlin specific block will add any configuration to # the DAG created by the study description. # including task server config, data management and sample definitions. # @@ -274,7 +277,7 @@ see :doc:`./merlin_variables`. batch: type: local machines: [host3] - + ################################################### # Sample definitions # @@ -290,3 +293,54 @@ see :doc:`./merlin_variables`. cmd: | python $(SPECROOT)/make_samples.py -dims 2 -n 10 -outfile=$(INPUT_PATH)/samples.npy "[(1.3, 1.3, 'linear'), (3.3, 3.3, 'linear')]" level_max_dirs: 25 + + #################################### + # User Block (Optional) + #################################### + # The user block allows other variables in the workflow file to be propagated + # through to the workflow (including in variables .partial.yaml and .expanded.yaml). + # User block uses yaml anchors, which defines a chunk of configuration and use + # their alias to refer to that specific chunk of configuration elsewhere. + ####################################################################### + user: + study: + run: + hello: &hello_run + cmd: | + python3 $(HELLO) -outfile hello_world_output_$(MERLIN_SAMPLE_ID).json $(X0) $(X1) $(X2) + max_retries: 1 + collect: &collect_run + cmd: | + echo $(MERLIN_GLOB_PATH) + echo $(hello.workspace) + ls $(hello.workspace)/X2.$(X2)/$(MERLIN_GLOB_PATH)/hello_world_output_*.json > files_to_collect.txt + spellbook collect -outfile results.json -instring "$(cat files_to_collect.txt)" + translate: &translate_run + cmd: spellbook translate -input $(collect.workspace)/results.json -output results.npz -schema $(FEATURES) + learn: &learn_run + cmd: spellbook learn -infile $(translate.workspace)/results.npz + make_samples: &make_samples_run + cmd: spellbook make-samples -n $(N_NEW) -sample_type grid -outfile grid_$(N_NEW).npy + predict: &predict_run + cmd: spellbook predict -infile $(make_new_samples.workspace)/grid_$(N_NEW).npy -outfile prediction_$(N_NEW).npy -reg $(learn.workspace)/random_forest_reg.pkl + verify: &verify_run + cmd: | + if [[ -f $(learn.workspace)/random_forest_reg.pkl && -f $(predict.workspace)/prediction_$(N_NEW).npy ]] + then + touch FINISHED + exit $(MERLIN_SUCCESS) + else + exit $(MERLIN_SOFT_FAIL) + fi + python3: + run: &python3_run + cmd: | + print("OMG is this in python?") + print("Variable X2 is $(X2)") + shell: /usr/bin/env python3 + python2: + run: &python2_run + cmd: | + print "OMG is this in python2? Change is bad." + print "Variable X2 is $(X2)" + shell: /usr/bin/env python2 diff --git a/docs/source/merlin_variables.rst b/docs/source/merlin_variables.rst index b8da52cb6..7f545a4d2 100644 --- a/docs/source/merlin_variables.rst +++ b/docs/source/merlin_variables.rst @@ -31,59 +31,161 @@ The directory structure of merlin output looks like this: Reserved variables ------------------ .. list-table:: Study variables that Merlin uses. May be referenced within a specification file, but not reassigned or overridden. - - * - Variable - - Description - - Example Expansion - * - ``$(SPECROOT)`` - - Directory path of the specification file. - - ``/globalfs/user/merlin_workflows`` - * - ``$(OUTPUT_PATH)`` - - Directory path the study output will be written to. If not defined - will default to the current working directory. May be reassigned or - overridden. - - ``./studies`` - * - ``$(MERLIN_TIMESTAMP)`` - - The time a study began. May be used as a unique identifier. - - ``"YYYYMMDD-HHMMSS"`` - * - ``$(MERLIN_WORKSPACE)`` - - Output directory generated by a study at ``OUTPUT_PATH``. Ends with - ``MERLIN_TIMESTAMP``. - - ``$(OUTPUT_PATH)/ensemble_name_$(MERLIN_TIMESTAMP)`` - * - ``$(WORKSPACE)`` - - The workspace directory for a single step. - - ``$(OUTPUT_PATH)/ensemble_name_$(MERLIN_TIMESTAMP)/step_name/`` - * - ``$(MERLIN_INFO)`` - - Directory within ``MERLIN_WORKSPACE`` that holds the provenance specs and sample generation results. - Commonly used to hold ``samples.npy``. - - ``$(MERLIN_WORKSPACE)/merlin_info/`` - * - ``$(MERLIN_SAMPLE_ID)`` - - Sample index in an ensemble - - ``0`` ``1`` ``2`` ``3`` - * - ``$(MERLIN_SAMPLE_PATH)`` - - Path in the sample directory tree to a sample's directory, i.e. where the - task is actually run. - - ``/0/0/0/`` ``/0/0/1/`` ``/0/0/2/`` ``/0/0/3/`` - * - ``$(MERLIN_GLOB_PATH)`` - - All of the directories in a simulation tree as a glob (*) string - - ``/\*/\*/\*/\*`` - * - ``$(MERLIN_PATHS_ALL)`` - - A space delimited string of all of the paths; - can be used as is in bash for loop for instance with: - - .. code-block:: bash - - for path in $(MERLIN_PATHS_ALL) - do - ls $path - done - - for path in $(MERLIN_PATHS_ALL) - do - ls $path - done - - ``0/0/0 0/0/1 0/0/2 0/0/3`` - + :widths: 25 50 25 + :header-rows: 1 + + * - Variable + - Description + - Example Expansion + + * - ``$(SPECROOT)`` + - Directory path of the specification file. + - + :: + + /globalfs/user/merlin_workflows + + * - ``$(OUTPUT_PATH)`` + - Directory path the study output will be written to. If not defined + will default to the current working directory. May be reassigned or + overridden. + - + :: + + ./studies + + * - ``$(MERLIN_TIMESTAMP)`` + - The time a study began. May be used as a unique identifier. + - + :: + + "YYYYMMDD-HHMMSS" + + * - ``$(MERLIN_WORKSPACE)`` + - Output directory generated by a study at ``OUTPUT_PATH``. Ends with + ``MERLIN_TIMESTAMP``. + - + :: + + $(OUTPUT_PATH)/ensemble_name_$(MERLIN_TIMESTAMP) + + * - ``$(WORKSPACE)`` + - The workspace directory for a single step. + - + :: + + $(OUTPUT_PATH)/ensemble_name_$(MERLIN_TIMESTAMP)/step_name/`` + + * - ``$(MERLIN_INFO)`` + - Directory within ``MERLIN_WORKSPACE`` that holds the provenance specs and sample generation results. + Commonly used to hold ``samples.npy``. + - + :: + + $(MERLIN_WORKSPACE)/merlin_info/ + + * - ``$(MERLIN_SAMPLE_ID)`` + - Sample index in an ensemble + - + :: + + 0 1 2 3 + + * - ``$(MERLIN_SAMPLE_PATH)`` + - Path in the sample directory tree to a sample's directory, i.e. where the + task is actually run. + - + :: + + /0/0/0/ /0/0/1/ /0/0/2/ /0/0/3/ + + * - ``$(MERLIN_GLOB_PATH)`` + - All of the directories in a simulation tree as a glob (*) string + - + :: + + /*/*/*/* + + * - ``$(MERLIN_PATHS_ALL)`` + - A space delimited string of all of the paths; + can be used as is in bash for loop for instance with: + + .. code-block:: bash + + for path in $(MERLIN_PATHS_ALL) + do + ls $path + done + + - + :: + + 0/0/0 + 0/0/1 + 0/0/2 + 0/0/3 + + * - ``$(MERLIN_SAMPLE_VECTOR)`` + - Vector of merlin sample values + - + :: + + $(SAMPLE_COLUMN_1) $(SAMPLE_COLUMN_2) ... + + * - ``$(MERLIN_SAMPLE_NAMES)`` + - Names of merlin sample values + - + :: + + SAMPLE_COLUMN_1 SAMPLE_COLUMN_2 ... + + * - ``$(MERLIN_SPEC_ORIGINAL_TEMPLATE)`` + - Copy of original yaml file passed to ``merlin run``. + - + :: + + $(MERLIN_INFO)/*.orig.yaml + + * - ``$(MERLIN_SPEC_EXECUTED_RUN)`` + - Parsed and processed yaml file with command-line variable substitutions included. + - + :: + + $(MERLIN_INFO)/*.partial.yaml + + * - ``$(MERLIN_SPEC_ARCHIVED_COPY)`` + - Archive version of ``MERLIN_SPEC_EXECUTED_RUN`` with all variables and paths fully resolved. + - + :: + + $(MERLIN_INFO)/*.expanded.yaml + +The ``LAUNCHER`` Variable ++++++++++++++++++++++ + +``$(LAUNCHER)`` is a special case of a reserved variable since it's value *can* be changed. +It serves as an abstraction to launch a job with parallel schedulers like :ref:`slurm`, +:ref:`lsf`, and :ref:`flux` and it can be used within a step command. For example, +say we start with this run cmd inside our step: + +.. code:: yaml + + run: + cmd: srun -N 1 -n 3 python script.py + +We can modify this to use the ``$(LAUNCHER)`` variable like so: + +.. code:: yaml + + batch: + type: slurm + + run: + cmd: $(LAUNCHER) python script.py + nodes: 1 + procs: 3 + +In other words, the ``$(LAUNCHER)`` variable would become ``srun -N 1 -n 3``. User variables ------------------- diff --git a/docs/source/merlin_workflows.rst b/docs/source/merlin_workflows.rst index c06976438..42cb7a39a 100644 --- a/docs/source/merlin_workflows.rst +++ b/docs/source/merlin_workflows.rst @@ -8,7 +8,7 @@ provides documentation on running these Merlin workflow examples. Overview -------- -List the built-in Merlin workflow examples with ``merlin example --help``. +List the built-in Merlin workflow examples with ``merlin example list``. The Merlin team is working on adding a more diverse array of example workflows like these. diff --git a/docs/source/modules/before.rst b/docs/source/modules/before.rst index e9a886548..dab1c8e2c 100644 --- a/docs/source/modules/before.rst +++ b/docs/source/modules/before.rst @@ -8,23 +8,40 @@ start the tutorial modules: __ https://www.python.org/downloads/release/python-360/ +* Make sure you have `pip`__ version 22.3 or newer. + +__ https://www.pypi.org/project/pip/ + + * You can upgrade pip to the latest version with: + + .. code-block:: bash + + pip install --upgrade pip + + * OR you can upgrade to a specific version with: + + .. code-block:: bash + + pip install --upgrade pip==x.y.z + + * Make sure you have `GNU make tools`__ and `compilers`__. __ https://www.gnu.org/software/make/ __ https://gcc.gnu.org/ -* Install `docker`__. +* (OPTIONAL) Install `docker`__. __ https://docs.docker.com/install/ -* Download OpenFOAM image with: + * Download OpenFOAM image with: -.. code-block:: bash + .. code-block:: bash - docker pull cfdengine/openfoam + docker pull cfdengine/openfoam -* Download redis image with: + * Download redis image with: -.. code-block:: bash + .. code-block:: bash - docker pull redis + docker pull redis diff --git a/docs/source/modules/contribute.rst b/docs/source/modules/contribute.rst index 8e8cde7af..acf35d323 100644 --- a/docs/source/modules/contribute.rst +++ b/docs/source/modules/contribute.rst @@ -17,16 +17,16 @@ Issues Found a bug? Have an idea? Or just want to ask a question? `Create a new issue `_ on GitHub. -Bug Reports 🐛 --------------- +Bug Reports +----------- To report a bug, simply navigate to `Issues `_, click "New Issue", then click "Bug report". Then simply fill out a few fields such as "Describe the bug" and "Expected behavior". Try to fill out every field as it will help us figure out your bug sooner. -Feature Requests 🚀 -------------------- +Feature Requests +---------------- We are still adding new features to merlin. To suggest one, simply navigate to `Issues `_, click "New Issue", then click "Feature request". Then fill out a few fields such as "What problem is this feature looking to solve?" -Questions 🤓 ------------- +Questions +--------- .. note:: Who knows? Your question may already be answered in the :doc:`FAQ<../faq>`. @@ -44,3 +44,5 @@ Contributing to Merlin is easy! Just `send us a pull request `. diff --git a/docs/source/modules/hello_world/hello_world.rst b/docs/source/modules/hello_world/hello_world.rst index 31ea31976..2cec6f05c 100644 --- a/docs/source/modules/hello_world/hello_world.rst +++ b/docs/source/modules/hello_world/hello_world.rst @@ -20,9 +20,15 @@ This hands-on module walks through the steps of building and running a simple me .. contents:: Table of Contents: :local: -Get example files +Get Example Files +++++++++++++++++ -``merlin example`` is a command line tool that makes it easy to get a basic workflow up and running. Run the following commands: +``merlin example`` is a command line tool that makes it easy to get a basic workflow up and running. To see a list of all the examples provided with merlin you can run: + +.. code-block:: bash + + $ merlin example list + +For this tutorial we will be using the ``hello`` example. Run the following commands: .. code-block:: bash @@ -44,7 +50,7 @@ This will create and move into directory called ``hello``, which contains these * ``requirements.txt`` -- this is a text file listing this workflow's python dependencies. -Specification file +Specification File ++++++++++++++++++ Central to Merlin is something called a specification file, or a "spec" for short. @@ -97,7 +103,7 @@ So this will give us 1) an English result, and 2) a Spanish one (you could add a Section: ``study`` ~~~~~~~~~~~~~~~~~~ This is where you define workflow steps. -While the convention is to list steps as sequentially as possible, the only factor in determining step order is the dependency DAG created by the ``depends`` field. +While the convention is to list steps as sequentially as possible, the only factor in determining step order is the dependency directed acyclic graph (DAG) created by the ``depends`` field. .. code-block:: yaml @@ -163,7 +169,7 @@ The order of the spec sections doesn't matter. At this point, ``my_hello.yaml`` is still maestro-compatible. The primary difference is that maestro won't understand anything in the ``merlin`` block, which we will still add later. If you want to try it, run: ``$ maestro run my_hello.yaml`` -Try it! +Try It! +++++++ First, we'll run merlin locally. On the command line, run: @@ -200,7 +206,7 @@ A lot of stuff, right? Here's what it means: .. Assuming config is ready -Run distributed! +Run Distributed! ++++++++++++++++ .. important:: @@ -234,6 +240,12 @@ Immediately after that, this will pop up: .. literalinclude :: celery.txt :language: text +You may not see all of the info logs listed after the Celery C is displayed. If you'd like to see them you can change the merlin workers' log levels with the ``--worker-args`` tag: + +.. code-block:: bash + + $ merlin run-workers --worker-args "-l INFO" my_hello.yaml + The terminal you ran workers in is now being taken over by Celery, the powerful task queue library that merlin uses internally. The workers will continue to report their task status here until their tasks are complete. Workers are persistent, even after work is done. Send a stop signal to all your workers with this command: @@ -249,7 +261,7 @@ Workers are persistent, even after work is done. Send a stop signal to all your .. _Using Samples: -Using samples +Using Samples +++++++++++++ It's a little boring to say "hello world" in just two different ways. Let's instead say hello to many people! @@ -283,10 +295,12 @@ This makes ``N_SAMPLES`` into a user-defined variable that you can use elsewhere file: $(MERLIN_INFO)/samples.csv column_labels: [WORLD] -This is the merlin block, an exclusively merlin feature. It provides a way to generate samples for your workflow. In this case, a sample is the name of a person. +This is the merlin block, an exclusively merlin feature. It provides a way to generate samples for your workflow. In this case, a sample is the name of a person. For simplicity we give ``column_labels`` the name ``WORLD``, just like before. +It's also important to note that ``$(SPECROOT)`` and ``$(MERLIN_INFO)`` are reserved variables. The ``$(SPECROOT)`` variable is a shorthand for the directory path of the spec file and the ``$(MERLIN_INFO)`` variable is a shorthand for the directory holding the provenance specs and sample generation results. More information on Merlin variables can be found on the :doc:`variables page<../../merlin_variables>`. + It's good practice to shift larger chunks of code to external scripts. At the same location of your spec, make a new file called ``make_samples.py``: .. literalinclude :: ../../../../merlin/examples/workflows/hello/make_samples.py diff --git a/docs/source/modules/installation/installation.rst b/docs/source/modules/installation/installation.rst index 2eb1ac95d..d18261af5 100644 --- a/docs/source/modules/installation/installation.rst +++ b/docs/source/modules/installation/installation.rst @@ -6,9 +6,9 @@ Installation * python3 >= python3.6 * pip3 * wget - * build tools (make, C/C++ compiler for local-redis) - * docker (required for :doc:`Module 4: Run a Real Simulation<../run_simulation/run_simulation>`) - * file editor for docker config file editing + * build tools (make, C/C++ compiler) + * (OPTIONAL) docker (required for :doc:`Module 4: Run a Real Simulation<../run_simulation/run_simulation>`) + * (OPTIONAL) file editor for docker config file editing .. admonition:: Estimated time @@ -17,9 +17,7 @@ Installation .. admonition:: You will learn * How to install merlin in a virtual environment using pip. - * How to install a local redis server. - * How to install merlin using docker (optional). - * How to start the docker containers, including redis (optional). + * How to install a container platform eg. singularity, docker, or podman. * How to configure merlin. * How to test/verify the installation. @@ -27,27 +25,21 @@ Installation :local: This section details the steps necessary to install merlin and its dependencies. -Merlin will then be configured and this configuration checked to ensure a proper installation. +Merlin will then be configured for the local machine and the configuration +will be checked to ensure a proper installation. -Installing merlin +Installing Merlin ----------------- -A merlin installation is required for the subsequent modules of this tutorial. You can choose between the pip method or the docker method. Choose one or the other but -do not use both unless you are familiar with redis servers run locally and through docker. -**The pip method is recommended.** +A merlin installation is required for the subsequent modules of this tutorial. -Once merlin is installed, it requires servers to operate. -The pip section will inform you how to setup a -local redis server to use in merlin. An alternative method for setting up a -redis server can be found in the docker section. Only setup one redis server either -local-redis or docker-redis. -Your computer/organization may already have a redis server available, please check +Once merlin is installed, it requires servers to operate. While you are able to host your own servers, +we will use merlin's containerized servers in this tutorial. However, if you prefer to host your own servers +you can host a redis server that is accessible to your current machine. +Your computer/organization may already have a redis server available you can use, please check with your local system administrator. -Pip (recommended) -+++++++++++++++++ - Create a virtualenv using python3 to install merlin. .. code-block:: bash @@ -77,176 +69,135 @@ Install merlin through pip. pip3 install merlin -When you are done with the virtualenv you can deactivate it using ``deactivate``, -but leave the virtualenv activated for the subsequent steps. +Check to make sure merlin installed correctly. .. code-block:: bash - deactivate + which merlin - -redis local server -^^^^^^^^^^^^^^^^^^ - -A redis server is required for the celery results backend server, this same server -can also be used for the celery broker. This method will be called local-redis. +You should see that it was installed in your virtualenv, like so: .. code-block:: bash - # Download redis - wget http://download.redis.io/releases/redis-6.0.5.tar.gz + ~//merlin_venv/bin/merlin - # Untar - tar xvf redis*.tar.gz +If this is not the output you see, you may need to restart your virtualenv and try again. - # cd into redis dir - cd redis*/ - - # make redis - make - - # make test (~3.5 minutes) - make test +When you are done with the virtualenv you can deactivate it using ``deactivate``, +but leave the virtualenv activated for the subsequent steps. +.. code-block:: bash -The redis server is started by calling the ``redis-server`` command located in -the src directory. -This should be run in a separate terminal in the top-level source -directory so the output can be examined. -The redis server will use the default ``redis.conf`` file in the top-level -redis directory. + deactivate -.. code:: bash - # run redis with default config, server is at localhost port 6379 - ./src/redis-server & +Redis Server +++++++++++++ -You can shutdown the local-redis server by using the ``redis-cli shutdown`` command -when you are done with the tutorial. +A redis server is required for the celery results backend server, this same server +can also be used for the celery broker. We will be using merlin's containerized server +however we will need to download one of the supported container platforms avaliable. For +the purpose of this tutorial we will be using singularity. .. code-block:: bash - #cd to redis directory - cd /redis*/ - ./src/redis-cli shutdown - - -Docker -++++++ - -Merlin and the servers required by merlin are all available as docker containers on dockerhub. Do not use this method if you have already set up a virtualenv through -the pip installation method. - -.. note:: + # Update and install singularity dependencies + apt-get update && apt-get install -y \ + build-essential \ + libssl-dev \ + uuid-dev \ + libgpgme11-dev \ + squashfs-tools \ + libseccomp-dev \ + pkg-config + + # Download dependency go + wget https://go.dev/dl/go1.18.1.linux-amd64.tar.gz + + # Extract go into local + tar -C /usr/local -xzf go1.18.1.linux-amd64.tar.gz + + # Remove go tar file + rm go1.18.1.linux-amd64.tar.gz + + # Update PATH to include go + export PATH=$PATH:/usr/local/go/bin + + # Download singularity + wget https://github.com/sylabs/singularity/releases/download/v3.9.9/singularity-ce-3.9.9.tar.gz + + # Extract singularity + tar -xzf singularity-ce-3.9.9.tar.gz + + # Configure and install singularity + cd singularity-ce-3.9.9 + ./mconfig && \ + make -C ./builddir && \ + sudo make -C ./builddir install + +Configuring Merlin +------------------ +Merlin requires a configuration script for the celery interface. +Run this configuration method to create the ``app.yaml`` +configuration file. - When using the docker method the celery workers will run inside the - merlin container. This - means that any workflow tools that are also from docker containers must - be installed in, or - otherwise made available to, the merlin container. +.. code-block:: bash + merlin config --broker redis -To run a merlin docker container with a docker redis server, cut -and paste the commands below into a new file called ``docker-compose.yml``. -This file can be placed anywhere in your filesystem but you may want to put it in -a directory ``merlin_docker_redis``. +The ``merlin config`` command above will create a file called ``app.yaml`` +in the ``~/.merlin`` directory. +If you are running a redis server locally then you are all set, look in the ``~/.merlin/app.yaml`` file +to see the configuration, it should look like the configuration below. -.. literalinclude:: ./docker-compose.yml +.. literalinclude:: ./app_local_redis.yaml :language: yaml -This file can then be run with the ``docker-compose`` command in same directory -as the ``docker-compose.yml`` file. - -.. code-block:: bash - - docker-compose up -d - -The ``volume`` option in the ``docker-compose.yml`` file -will link the local ``$HOME/merlinu`` directory to the ``/home/merlinu`` -directory in the container. +More detailed information on configuring Merlin can be found in the :doc:`configuration section<../../merlin_config>`. -Some aliases can be defined for convenience. - -.. code-block:: bash +.. _Verifying installation: - # define some aliases for the merlin and celery commands (assuming Bourne shell) - alias merlin="docker exec my-merlin merlin" - alias celery="docker exec my-merlin celery" - alias python3="docker exec my-merlin python3" +Checking/Verifying Installation +------------------------------- -When you are done with the containers you can stop them using ``docker-compose down``. -We will be using the containers in the subsequent modules so leave them running. +First launch the merlin server containers by using the ``merlin server`` commands .. code-block:: bash - docker-compose down + merlin server init + merlin server start -Any required python modules can be installed in the running ``my-merlin`` container -through ``docker exec``. When using docker-compose, these changes will persist -if you stop the containers with ``docker-compose down`` and restart them with -``docker-compose up -d``. +A subdirectory called ``merlin_server/`` will have been created in the current run directory. +This contains all of the proper configuration for the server containers merlin creates. +Configuration can be done through the ``merlin server config`` command, however users have +the flexibility to edit the files directly in the directory. Additionally an preconfigured ``app.yaml`` +file has been created in the ``merlin_server/`` subdirectory to utilize the merlin server +containers . To use it locally simply copy it to the run directory with a cp command. .. code-block:: bash - docker exec my-merlin pip3 install pandas faker - -Configuring merlin ------------------- - -Merlin configuration is slightly different between the pip and docker methods. -The fundamental differences include the app.yaml file location and the server name. + cp ./merlin_server/app.yaml . -Merlin requires a configuration script for the celery interface and optional -passwords for the redis server and encryption. Run this configuration method -to create the ``app.yaml`` configuration file. +You can also make this server container your main server configuration by replacing the one located in your home +directory. Make sure you make back-ups of your current app.yaml file in case you want to use your previous +configurations. Note: since merlin servers are created locally on your run directory you are allowed to create +multiple instances of merlin server with their unique configurations for different studies. Simply create different +directories for each study and run ``merlin server init`` in each directory to create an instance for each. .. code-block:: bash - merlin config --broker redis - -Pip -+++ - -The ``merlin config`` command above will create a file called ``app.yaml`` -in the ``~/.merlin`` directory. -If you are using local-redis then you are all set, look in the ``~/.merlin/app.yaml`` file -to see the configuration, it should look like the configuration below. - -.. literalinclude:: ./app_local_redis.yaml - :language: yaml - -Docker -++++++ - -If you are using the docker merlin with docker-redis server then the -``~/merlinu/.merlin/app.yaml`` will be created by the ``merlin config`` -command above. -This file must be edited to -add the server from the redis docker container my-redis. Change the ``server: localhost``, in both the -broker and backend config definitions, to ``server: my-redis``, the port will remain the same. - -.. note:: - You can use the docker redis server, instead of the local-redis server, - with the virtualenv installed merlin by using the local-redis - ``app.yaml`` file above. - -.. literalinclude:: ./app_docker_redis.yaml - :language: yaml - -.. _Verifying installation: - -Checking/Verifying installation -------------------------------- + mv ~/.merlin/app.yaml ~/.merlin/app.yaml.bak + cp ./merlin_server/app.yaml ~/.merlin/ The ``merlin info`` command will check that the configuration file is installed correctly, display the server configuration strings, and check server -access. This command works for both the pip and docker installed merlin. +access. .. code-block:: bash merlin info -If everything is set up correctly, you should see (assuming local-redis servers): +If everything is set up correctly, you should see: .. code-block:: bash @@ -277,10 +228,10 @@ If everything is set up correctly, you should see (assuming local-redis servers) . -Docker Advanced Installation +(OPTIONAL) Docker Advanced Installation ---------------------------- -RabbitMQ server +RabbitMQ Server +++++++++++++++ This optional section details the setup of a rabbitmq server for merlin. @@ -341,7 +292,7 @@ and add the password ``guest``. The aliases defined previously can be used with this set of docker containers. -Redis TLS server +Redis TLS Server ++++++++++++++++ This optional section details the setup of a redis server with TLS for merlin. diff --git a/docs/source/modules/port_your_application.rst b/docs/source/modules/port_your_application.rst index 26f2cc1d1..c9d89b06d 100644 --- a/docs/source/modules/port_your_application.rst +++ b/docs/source/modules/port_your_application.rst @@ -26,6 +26,7 @@ Tips for porting your app, building workflows The first step of building a new workflow, or porting an existing app to a workflow, is to describe it as a set of discrete, and ideally focused steps. Decoupling the steps and making them generic when possible will facilitate more rapid composition of future workflows. This will also require mapping out the dependencies and parameters that get passed between/shared across these steps. Setting up a template using tools such as `cookiecutter `_ can be useful for more production style workflows that will be frequently reused. Additionally, make use of the built-in examples accessible from the merlin command line with ``merlin example``. + .. (machine learning applications on different data sets?) Use dry runs ``merlin run --dry --local`` to prototype without actually populating task broker's queues. Similarly, once the dry run prototype looks good, try it on a small number of parameters before throwing millions at it. @@ -39,7 +40,7 @@ Make use of exit keys such as ``MERLIN_RESTART`` or ``MERLIN_RETRY`` in your ste Tips for debugging your workflows +++++++++++++++++++++++++++++++++ -The scripts defined in the workflow steps are also written to the output directories; this is a useful debugging tool as it can both catch parameter and variable replacement errors, as well as providing a quick way to reproduce, edit, and retry the step offline before fixing the step in the workflow specification. The ``.out`` and ``.err`` files log all of the output to catch any runtime errors. Additionally, you may need to grep for ``'WARNING'`` and ``'ERROR'`` in the worker logs. +The scripts defined in the workflow steps are also written to the output directories; this is a useful debugging tool as it can both catch parameter and variable replacement errors, as well as provide a quick way to reproduce, edit, and retry the step offline before fixing the step in the workflow specification. The ``.out`` and ``.err`` files log all of the output to catch any runtime errors. Additionally, you may need to grep for ``'WARNING'`` and ``'ERROR'`` in the worker logs. .. where are the worker logs, and what might show up there that .out and .err won't see? -> these more developer focused output? diff --git a/docs/source/modules/run_simulation/run_simulation.rst b/docs/source/modules/run_simulation/run_simulation.rst index a30568946..f48d7dc97 100644 --- a/docs/source/modules/run_simulation/run_simulation.rst +++ b/docs/source/modules/run_simulation/run_simulation.rst @@ -8,7 +8,7 @@ Run a Real Simulation .. admonition:: Prerequisites - * :doc:`Module 0: Before you come<../before>` + * :doc:`Module 0: Before you start<../before>` * :doc:`Module 2: Installation<../installation/installation>` * :doc:`Module 3: Hello World<../hello_world/hello_world>` @@ -53,7 +53,9 @@ This module will be going over: * Combining the outputs of these simulations into a an array * Predictive modeling and visualization -Before moving on, +.. _Before Moving On: + +Before Moving On ~~~~~~~~~~~~~~~~~ check that the virtual environment with merlin installed is activated @@ -65,14 +67,25 @@ and that redis server is set up using this command: This is covered more in depth here: :ref:`Verifying installation` - -Then use the ``merlin example`` to get the necessary files for this module. - +There are two ways to do this example: with docker and without docker. To go through the version with docker, get the necessary files for this module by running: + .. code-block:: bash $ merlin example openfoam_wf $ cd openfoam_wf/ + +For the version without docker you should run: + +.. code-block:: bash + + $ merlin example openfoam_wf_no_docker + + $ cd openfoam_wf_no_docker/ + +.. note:: + + From here on, this tutorial will focus solely on the docker version of running openfoam. However, the docker version of this tutorial is almost identical to the no docker version. If you're using the no docker version of this tutorial you can still follow along but check the openfoam_no_docker_template.yaml file in each step to see what differs. In the ``openfoam_wf`` directory you should see the following: diff --git a/docs/source/server/commands.rst b/docs/source/server/commands.rst new file mode 100644 index 000000000..dd8ca1b02 --- /dev/null +++ b/docs/source/server/commands.rst @@ -0,0 +1,87 @@ +Merlin Server Commands +====================== + +Merlin server has a list of commands for interacting with the broker and results server. +These commands allow the user to manage and monitor the exisiting server and create +instances of servers if needed. + +Initializing Merlin Server (``merlin server init``) +--------------------------------------------------- +The merlin server init command creates configurations for merlin server commands. + +A main merlin sever configuration subdirectory is created in "~/.merlin/server" which contains +configuration for local merlin configuration, and configurations for different containerized +services that merlin server supports, which includes singularity (docker and podman implemented +in the future). + +A local merlin server configuration subdirectory called "merlin_server/" will also +be created when this command is run. This will contain a container for merlin server and associated +configuration files that might be used to start the server. For example, for a redis server a "redis.conf" +will contain settings which will be dynamically loaded when the redis server is run. This local configuration +will also contain information about currently running containers as well. + +Note: If there is an exisiting subdirectory containing a merlin server configuration then only +missing files will be replaced. However it is recommended that users backup their local configurations. + + +Checking Merlin Server Status (``merlin server status``) +-------------------------------------------------------- + +Displays the current status of the merlin server. + +Starting up a Merlin Server (``merlin server start``) +----------------------------------------------------- + +Starts the container located in the local merlin server configuration. + +Stopping an exisiting Merlin Server (``merlin server stop``) +------------------------------------------------------------ + +Stop any exisiting container being managed and monitored by merlin server. + +Restarting a Merlin Server instance (``merlin server restart``) +--------------------------------------------------------------- + +Restarting an existing container that is being managed and monitored by merlin server. + +Configurating Merlin Server instance (``merlin server config``) +--------------------------------------------------------------- +Place holder for information regarding merlin server config command + +Possible Flags + +.. code-block:: none + + -ip IPADDRESS, --ipaddress IPADDRESS + Set the binded IP address for the merlin server + container. (default: None) + -p PORT, --port PORT Set the binded port for the merlin server container. + (default: None) + -pwd PASSWORD, --password PASSWORD + Set the password file to be used for merlin server + container. (default: None) + --add-user ADD_USER ADD_USER + Create a new user for merlin server instance. (Provide + both username and password) (default: None) + --remove-user REMOVE_USER + Remove an exisiting user. (default: None) + -d DIRECTORY, --directory DIRECTORY + Set the working directory of the merlin server + container. (default: None) + -ss SNAPSHOT_SECONDS, --snapshot-seconds SNAPSHOT_SECONDS + Set the number of seconds merlin server waits before + checking if a snapshot is needed. (default: None) + -sc SNAPSHOT_CHANGES, --snapshot-changes SNAPSHOT_CHANGES + Set the number of changes that are required to be made + to the merlin server before a snapshot is made. + (default: None) + -sf SNAPSHOT_FILE, --snapshot-file SNAPSHOT_FILE + Set the snapshot filename for database dumps. + (default: None) + -am APPEND_MODE, --append-mode APPEND_MODE + The appendonly mode to be set. The avaiable options + are always, everysec, no. (default: None) + -af APPEND_FILE, --append-file APPEND_FILE + Set append only filename for merlin server container. + (default: None) + diff --git a/docs/source/server/configuration.rst b/docs/source/server/configuration.rst new file mode 100644 index 000000000..84429c079 --- /dev/null +++ b/docs/source/server/configuration.rst @@ -0,0 +1,75 @@ +Merlin Server Configuration +=========================== + +Below are a sample list of configurations for the merlin server command + +Main Configuration ``~/.merlin/server/`` +---------------------------------------- + +merlin_server.yaml + +.. code-block:: yaml + + container: + # Select the format for the recipe e.g. singularity, docker, podman (currently singularity is the only working option.) + format: singularity + # The image name + image: redis_latest.sif + # The url to pull the image from + url: docker://redis + # The config file + config: redis.conf + # Subdirectory name to store configurations Default: merlin_server/ + config_dir: merlin_server/ + # Process file containing information regarding the redis process + pfile: merlin_server.pf + + process: + # Command for determining the process of the command + status: pgrep -P {pid} #ps -e | grep {pid} + # Command for killing process + kill: kill {pid} + + +singularity.yaml + +.. code-block:: yaml + + singularity: + command: singularity + # init_command: \{command} .. (optional or default) + run_command: \{command} run {image} {config} + stop_command: kill # \{command} (optional or kill default) + pull_command: \{command} pull {image} {url} + + +Local Configuration ``merlin_server/`` +-------------------------------------- + +redis.conf + +.. code-block:: yaml + + bind 127.0.0.1 -::1 + protected-mode yes + port 6379 + logfile "" + dir ./ + ... + +see documentation on redis configuration `here `_ for more detail + +merlin_server.pf + +.. code-block:: yaml + + bits: '64' + commit: '00000000' + hostname: ubuntu + image_pid: '1111' + mode: standalone + modified: '0' + parent_pid: 1112 + port: '6379' + version: 6.2.6 + diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 9f90a0b0d..0b69f9553 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -20,7 +20,7 @@ Finally we offer some tips and tricks for porting and scaling up your applicatio .. toctree:: :maxdepth: 1 - :caption: Before you come: + :caption: Before you begin: modules/before diff --git a/lgtm.yml b/lgtm.yml new file mode 100644 index 000000000..e3f53c87d --- /dev/null +++ b/lgtm.yml @@ -0,0 +1,25 @@ +########################################################################################## +# Customize file classifications. # +# Results from files under any classifier will be excluded from LGTM # +# statistics. # +########################################################################################## + +########################################################################################## +# Use the `path_classifiers` block to define changes to the default classification of # +# files. # +########################################################################################## + +path_classifiers: + test: + # Classify all files in the top-level directories tests/ as test code. + - exclude: + - tests + - merlin/examples + +######################################################################################### +# Use the `queries` block to change the default display of query results. # +######################################################################################### + +queries: + # Specifically hide the results of clear-text-logging-sensitive-data + - exclude: py/clear-text-logging-sensitive-data diff --git a/merlin/__init__.py b/merlin/__init__.py index fa68134d6..aa33bc4d2 100644 --- a/merlin/__init__.py +++ b/merlin/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -38,7 +38,7 @@ import sys -__version__ = "1.8.5" +__version__ = "1.9.0" VERSION = __version__ PATH_TO_PROJ = os.path.join(os.path.dirname(__file__), "") diff --git a/merlin/ascii_art.py b/merlin/ascii_art.py index de04bfc78..0b5971627 100644 --- a/merlin/ascii_art.py +++ b/merlin/ascii_art.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/celery.py b/merlin/celery.py index 3b8769bb5..ebac67eec 100644 --- a/merlin/celery.py +++ b/merlin/celery.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/__init__.py b/merlin/common/__init__.py index 13db3dccc..7155d0c5f 100644 --- a/merlin/common/__init__.py +++ b/merlin/common/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/abstracts/__init__.py b/merlin/common/abstracts/__init__.py index 13db3dccc..7155d0c5f 100644 --- a/merlin/common/abstracts/__init__.py +++ b/merlin/common/abstracts/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/abstracts/enums/__init__.py b/merlin/common/abstracts/enums/__init__.py index fa4a5f7c1..b02f9e909 100644 --- a/merlin/common/abstracts/enums/__init__.py +++ b/merlin/common/abstracts/enums/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/openfilelist.py b/merlin/common/openfilelist.py index 814fb1881..26c64e9e2 100644 --- a/merlin/common/openfilelist.py +++ b/merlin/common/openfilelist.py @@ -8,7 +8,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/opennpylib.py b/merlin/common/opennpylib.py index a8be89486..8d9a89285 100644 --- a/merlin/common/opennpylib.py +++ b/merlin/common/opennpylib.py @@ -8,7 +8,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/sample_index.py b/merlin/common/sample_index.py index cb2a221d8..1859a55df 100644 --- a/merlin/common/sample_index.py +++ b/merlin/common/sample_index.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/sample_index_factory.py b/merlin/common/sample_index_factory.py index c3af9cdb6..6f3e58e9a 100644 --- a/merlin/common/sample_index_factory.py +++ b/merlin/common/sample_index_factory.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/security/__init__.py b/merlin/common/security/__init__.py index 13db3dccc..7155d0c5f 100644 --- a/merlin/common/security/__init__.py +++ b/merlin/common/security/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/security/encrypt.py b/merlin/common/security/encrypt.py index af29d394f..e378573c4 100644 --- a/merlin/common/security/encrypt.py +++ b/merlin/common/security/encrypt.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/security/encrypt_backend_traffic.py b/merlin/common/security/encrypt_backend_traffic.py index 8adc75228..16365e32c 100644 --- a/merlin/common/security/encrypt_backend_traffic.py +++ b/merlin/common/security/encrypt_backend_traffic.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/tasks.py b/merlin/common/tasks.py index f36134fec..9820e1041 100644 --- a/merlin/common/tasks.py +++ b/merlin/common/tasks.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/common/util_sampling.py b/merlin/common/util_sampling.py index bd2795915..027bcd291 100644 --- a/merlin/common/util_sampling.py +++ b/merlin/common/util_sampling.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/__init__.py b/merlin/config/__init__.py index ebbdc662e..7ade90e05 100644 --- a/merlin/config/__init__.py +++ b/merlin/config/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/broker.py b/merlin/config/broker.py index 72cd6ec27..a546591f1 100644 --- a/merlin/config/broker.py +++ b/merlin/config/broker.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/celeryconfig.py b/merlin/config/celeryconfig.py index bf58602d5..a8c0a1ef2 100644 --- a/merlin/config/celeryconfig.py +++ b/merlin/config/celeryconfig.py @@ -2,6 +2,36 @@ Default celery configuration for merlin """ +############################################################################### +# Copyright (c) 2022, Lawrence Livermore National Security, LLC. +# Produced at the Lawrence Livermore National Laboratory +# Written by the Merlin dev team, listed in the CONTRIBUTORS file. +# +# +# LLNL-CODE-797170 +# All rights reserved. +# This file is part of Merlin, Version: 1.9.0. +# +# For details, see https://github.com/LLNL/merlin. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +############################################################################### + from merlin.log_formatter import FORMATS diff --git a/merlin/config/configfile.py b/merlin/config/configfile.py index cc3dedd13..5dd08ddfb 100644 --- a/merlin/config/configfile.py +++ b/merlin/config/configfile.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/results_backend.py b/merlin/config/results_backend.py index 843930cac..335bd05f5 100644 --- a/merlin/config/results_backend.py +++ b/merlin/config/results_backend.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/config/utils.py b/merlin/config/utils.py index 12f7283bb..cd47be750 100644 --- a/merlin/config/utils.py +++ b/merlin/config/utils.py @@ -1,3 +1,33 @@ +############################################################################### +# Copyright (c) 2022, Lawrence Livermore National Security, LLC. +# Produced at the Lawrence Livermore National Laboratory +# Written by the Merlin dev team, listed in the CONTRIBUTORS file. +# +# +# LLNL-CODE-797170 +# All rights reserved. +# This file is part of Merlin, Version: 1.9.0. +# +# For details, see https://github.com/LLNL/merlin. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +############################################################################### + import enum from typing import List diff --git a/merlin/data/celery/__init__.py b/merlin/data/celery/__init__.py index 13db3dccc..7155d0c5f 100644 --- a/merlin/data/celery/__init__.py +++ b/merlin/data/celery/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/display.py b/merlin/display.py index 1cd9af01e..3b1469f70 100644 --- a/merlin/display.py +++ b/merlin/display.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -88,7 +88,12 @@ def check_server_access(sconf): def _examine_connection(s, sconf, excpts): connect_timeout = 60 try: - conn = Connection(sconf[s]) + ssl_conf = None + if "broker" in s: + ssl_conf = broker.get_ssl_config() + if "results" in s: + ssl_conf = results_backend.get_ssl_config() + conn = Connection(sconf[s], ssl=ssl_conf) conn_check = ConnProcess(target=conn.connect) conn_check.start() counter = 0 diff --git a/merlin/examples/__init__.py b/merlin/examples/__init__.py index 13db3dccc..7155d0c5f 100644 --- a/merlin/examples/__init__.py +++ b/merlin/examples/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/examples/examples.py b/merlin/examples/examples.py index 686f04013..39c5cf2e0 100644 --- a/merlin/examples/examples.py +++ b/merlin/examples/examples.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/examples/generator.py b/merlin/examples/generator.py index d0cf1acb9..d59fc8511 100644 --- a/merlin/examples/generator.py +++ b/merlin/examples/generator.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/examples/workflows/feature_demo/feature_demo.yaml b/merlin/examples/workflows/feature_demo/feature_demo.yaml index b4bc1ca46..b07107e7b 100644 --- a/merlin/examples/workflows/feature_demo/feature_demo.yaml +++ b/merlin/examples/workflows/feature_demo/feature_demo.yaml @@ -17,15 +17,33 @@ env: HELLO: $(SCRIPTS)/hello_world.py FEATURES: $(SCRIPTS)/features.json +user: + study: + run: + hello: &hello_run + cmd: | + python3 $(HELLO) -outfile hello_world_output_$(MERLIN_SAMPLE_ID).json $(X0) $(X1) $(X2) + max_retries: 1 + python3: + run: &python3_run + cmd: | + print("OMG is this in python?") + print("Variable X2 is $(X2)") + shell: /usr/bin/env python3 + python2: + run: &python2_run + cmd: | + print "OMG is this in python2? Change is bad." + print "Variable X2 is $(X2)" + shell: /usr/bin/env python2 + study: - name: hello description: | process a sample with hello world run: - cmd: | - python3 $(HELLO) -outfile hello_world_output_$(MERLIN_SAMPLE_ID).json $(X0) $(X1) $(X2) + <<: *hello_run task_queue: hello_queue - max_retries: 1 - name: collect description: | @@ -89,20 +107,14 @@ study: description: | do something in python run: - cmd: | - print("OMG is this in python?") - print("Variable X2 is $(X2)") - shell: /usr/bin/env python3 + <<: *python3_run task_queue: pyth3_q - name: python2_hello description: | do something in python2, because change is bad run: - cmd: | - print "OMG is this in python2? Change is bad." - print "Variable X2 is $(X2)" - shell: /usr/bin/env python2 + <<: *python2_run task_queue: pyth2_hello global.parameters: diff --git a/merlin/examples/workflows/feature_demo/scripts/hello_world.py b/merlin/examples/workflows/feature_demo/scripts/hello_world.py index ab14bedf4..634dfe417 100644 --- a/merlin/examples/workflows/feature_demo/scripts/hello_world.py +++ b/merlin/examples/workflows/feature_demo/scripts/hello_world.py @@ -1,5 +1,6 @@ import argparse import json +import sys def process_args(args): @@ -25,9 +26,14 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - process_args(args) + try: + parser = setup_argparse() + args = parser.parse_args() + process_args(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/flux/flux_par.yaml b/merlin/examples/workflows/flux/flux_par.yaml index d401a0270..1fee4131e 100644 --- a/merlin/examples/workflows/flux/flux_par.yaml +++ b/merlin/examples/workflows/flux/flux_par.yaml @@ -6,6 +6,7 @@ batch: type: flux nodes: 1 queue: pbatch + flux_exec: flux exec -r "0-1" flux_start_opts: -o,-S,log-filename=flux_par.out env: diff --git a/merlin/examples/workflows/flux/scripts/make_samples.py b/merlin/examples/workflows/flux/scripts/make_samples.py index e6c807bc9..8ec1c7e2f 100644 --- a/merlin/examples/workflows/flux/scripts/make_samples.py +++ b/merlin/examples/workflows/flux/scripts/make_samples.py @@ -1,5 +1,6 @@ import argparse import ast +import sys import numpy as np @@ -51,9 +52,14 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - process_args(args) + try: + parser = setup_argparse() + args = parser.parse_args() + process_args(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/hpc_demo/cumulative_sample_processor.py b/merlin/examples/workflows/hpc_demo/cumulative_sample_processor.py index 7d06ab594..43b732ae2 100644 --- a/merlin/examples/workflows/hpc_demo/cumulative_sample_processor.py +++ b/merlin/examples/workflows/hpc_demo/cumulative_sample_processor.py @@ -1,5 +1,6 @@ import argparse import os +import sys from concurrent.futures import ProcessPoolExecutor import matplotlib.pyplot as plt @@ -55,45 +56,50 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() + try: + parser = setup_argparse() + args = parser.parse_args() - # Load all iterations' data into single pandas dataframe for further analysis - all_iter_df = load_samples(args.sample_file_paths, args.np) + # Load all iterations' data into single pandas dataframe for further analysis + all_iter_df = load_samples(args.sample_file_paths, args.np) - # PLOTS: - # counts vs index for each iter range (1, [1,2], [1-3], [1-4], ...) - # num names vs iter - # median, min, max counts vs iter -> same plot - fig, ax = plt.subplots(nrows=2, ncols=1, constrained_layout=True, sharex=True) + # PLOTS: + # counts vs index for each iter range (1, [1,2], [1-3], [1-4], ...) + # num names vs iter + # median, min, max counts vs iter -> same plot + fig, ax = plt.subplots(nrows=2, ncols=1, constrained_layout=True, sharex=True) - iterations = sorted(all_iter_df.Iter.unique()) + iterations = sorted(all_iter_df.Iter.unique()) - max_counts = [] - min_counts = [] - med_counts = [] - unique_names = [] + max_counts = [] + min_counts = [] + med_counts = [] + unique_names = [] - for it in iterations: - max_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].max()) - min_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].min()) - med_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].median()) + for it in iterations: + max_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].max()) + min_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].min()) + med_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].median()) - unique_names.append(len(all_iter_df[all_iter_df["Iter"] <= it].index.value_counts())) + unique_names.append(len(all_iter_df[all_iter_df["Iter"] <= it].index.value_counts())) - ax[0].plot(iterations, min_counts, label="Minimum Occurances") - ax[0].plot(iterations, max_counts, label="Maximum Occurances") + ax[0].plot(iterations, min_counts, label="Minimum Occurances") + ax[0].plot(iterations, max_counts, label="Maximum Occurances") - ax[0].plot(iterations, med_counts, label="Median Occurances") + ax[0].plot(iterations, med_counts, label="Median Occurances") - ax[0].set_ylabel("Counts") - ax[0].legend() + ax[0].set_ylabel("Counts") + ax[0].legend() - ax[1].set_xlabel("Iteration") - ax[1].set_ylabel("Unique Names") - ax[1].plot(iterations, unique_names) + ax[1].set_xlabel("Iteration") + ax[1].set_ylabel("Unique Names") + ax[1].plot(iterations, unique_names) - fig.savefig(args.hardcopy, dpi=150) + fig.savefig(args.hardcopy, dpi=150) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/hpc_demo/faker_sample.py b/merlin/examples/workflows/hpc_demo/faker_sample.py index ee8bf2f5c..be16be5de 100644 --- a/merlin/examples/workflows/hpc_demo/faker_sample.py +++ b/merlin/examples/workflows/hpc_demo/faker_sample.py @@ -1,4 +1,5 @@ import argparse +import sys from faker import Faker @@ -31,9 +32,14 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - process_args(args) + try: + parser = setup_argparse() + args = parser.parse_args() + process_args(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/hpc_demo/sample_collector.py b/merlin/examples/workflows/hpc_demo/sample_collector.py index f62111e8e..ad06dc6c5 100644 --- a/merlin/examples/workflows/hpc_demo/sample_collector.py +++ b/merlin/examples/workflows/hpc_demo/sample_collector.py @@ -1,5 +1,6 @@ import argparse import os +import sys from concurrent.futures import ProcessPoolExecutor @@ -36,12 +37,17 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - - # Collect sample files into single file - sample_paths = [sample_path for sample_path in args.sample_file_paths] - serialize_samples(sample_paths, args.outfile, args.np) + try: + parser = setup_argparse() + args = parser.parse_args() + + # Collect sample files into single file + sample_paths = [sample_path for sample_path in args.sample_file_paths] + serialize_samples(sample_paths, args.outfile, args.np) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/hpc_demo/sample_processor.py b/merlin/examples/workflows/hpc_demo/sample_processor.py index 9ec0951e9..8523dcc80 100644 --- a/merlin/examples/workflows/hpc_demo/sample_processor.py +++ b/merlin/examples/workflows/hpc_demo/sample_processor.py @@ -1,6 +1,7 @@ import argparse import os import pathlib +import sys import pandas as pd @@ -28,25 +29,30 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - - # Collect the samples - samples = load_samples(args.sample_file_paths) - - # Count up the occurences - namesdf = pd.DataFrame({"Name": samples}) - - names = namesdf["Name"].value_counts() - - # Serialize processed samples - # create directory if it doesn't exist already - abspath = os.path.abspath(args.results) - absdir = os.path.dirname(abspath) - if not os.path.isdir(absdir): - pathlib.Path(absdir).mkdir(parents=True, exist_ok=True) - - names.to_json(args.results) + try: + parser = setup_argparse() + args = parser.parse_args() + + # Collect the samples + samples = load_samples(args.sample_file_paths) + + # Count up the occurences + namesdf = pd.DataFrame({"Name": samples}) + + names = namesdf["Name"].value_counts() + + # Serialize processed samples + # create directory if it doesn't exist already + abspath = os.path.abspath(args.results) + absdir = os.path.dirname(abspath) + if not os.path.isdir(absdir): + pathlib.Path(absdir).mkdir(parents=True, exist_ok=True) + + names.to_json(args.results) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/iterative_demo/cumulative_sample_processor.py b/merlin/examples/workflows/iterative_demo/cumulative_sample_processor.py index 7d06ab594..43b732ae2 100644 --- a/merlin/examples/workflows/iterative_demo/cumulative_sample_processor.py +++ b/merlin/examples/workflows/iterative_demo/cumulative_sample_processor.py @@ -1,5 +1,6 @@ import argparse import os +import sys from concurrent.futures import ProcessPoolExecutor import matplotlib.pyplot as plt @@ -55,45 +56,50 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() + try: + parser = setup_argparse() + args = parser.parse_args() - # Load all iterations' data into single pandas dataframe for further analysis - all_iter_df = load_samples(args.sample_file_paths, args.np) + # Load all iterations' data into single pandas dataframe for further analysis + all_iter_df = load_samples(args.sample_file_paths, args.np) - # PLOTS: - # counts vs index for each iter range (1, [1,2], [1-3], [1-4], ...) - # num names vs iter - # median, min, max counts vs iter -> same plot - fig, ax = plt.subplots(nrows=2, ncols=1, constrained_layout=True, sharex=True) + # PLOTS: + # counts vs index for each iter range (1, [1,2], [1-3], [1-4], ...) + # num names vs iter + # median, min, max counts vs iter -> same plot + fig, ax = plt.subplots(nrows=2, ncols=1, constrained_layout=True, sharex=True) - iterations = sorted(all_iter_df.Iter.unique()) + iterations = sorted(all_iter_df.Iter.unique()) - max_counts = [] - min_counts = [] - med_counts = [] - unique_names = [] + max_counts = [] + min_counts = [] + med_counts = [] + unique_names = [] - for it in iterations: - max_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].max()) - min_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].min()) - med_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].median()) + for it in iterations: + max_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].max()) + min_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].min()) + med_counts.append(all_iter_df[all_iter_df["Iter"] <= it]["Count"].median()) - unique_names.append(len(all_iter_df[all_iter_df["Iter"] <= it].index.value_counts())) + unique_names.append(len(all_iter_df[all_iter_df["Iter"] <= it].index.value_counts())) - ax[0].plot(iterations, min_counts, label="Minimum Occurances") - ax[0].plot(iterations, max_counts, label="Maximum Occurances") + ax[0].plot(iterations, min_counts, label="Minimum Occurances") + ax[0].plot(iterations, max_counts, label="Maximum Occurances") - ax[0].plot(iterations, med_counts, label="Median Occurances") + ax[0].plot(iterations, med_counts, label="Median Occurances") - ax[0].set_ylabel("Counts") - ax[0].legend() + ax[0].set_ylabel("Counts") + ax[0].legend() - ax[1].set_xlabel("Iteration") - ax[1].set_ylabel("Unique Names") - ax[1].plot(iterations, unique_names) + ax[1].set_xlabel("Iteration") + ax[1].set_ylabel("Unique Names") + ax[1].plot(iterations, unique_names) - fig.savefig(args.hardcopy, dpi=150) + fig.savefig(args.hardcopy, dpi=150) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/iterative_demo/faker_sample.py b/merlin/examples/workflows/iterative_demo/faker_sample.py index ee8bf2f5c..be16be5de 100644 --- a/merlin/examples/workflows/iterative_demo/faker_sample.py +++ b/merlin/examples/workflows/iterative_demo/faker_sample.py @@ -1,4 +1,5 @@ import argparse +import sys from faker import Faker @@ -31,9 +32,14 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - process_args(args) + try: + parser = setup_argparse() + args = parser.parse_args() + process_args(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/iterative_demo/sample_collector.py b/merlin/examples/workflows/iterative_demo/sample_collector.py index f62111e8e..ad06dc6c5 100644 --- a/merlin/examples/workflows/iterative_demo/sample_collector.py +++ b/merlin/examples/workflows/iterative_demo/sample_collector.py @@ -1,5 +1,6 @@ import argparse import os +import sys from concurrent.futures import ProcessPoolExecutor @@ -36,12 +37,17 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - - # Collect sample files into single file - sample_paths = [sample_path for sample_path in args.sample_file_paths] - serialize_samples(sample_paths, args.outfile, args.np) + try: + parser = setup_argparse() + args = parser.parse_args() + + # Collect sample files into single file + sample_paths = [sample_path for sample_path in args.sample_file_paths] + serialize_samples(sample_paths, args.outfile, args.np) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/iterative_demo/sample_processor.py b/merlin/examples/workflows/iterative_demo/sample_processor.py index 9ec0951e9..8523dcc80 100644 --- a/merlin/examples/workflows/iterative_demo/sample_processor.py +++ b/merlin/examples/workflows/iterative_demo/sample_processor.py @@ -1,6 +1,7 @@ import argparse import os import pathlib +import sys import pandas as pd @@ -28,25 +29,30 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - - # Collect the samples - samples = load_samples(args.sample_file_paths) - - # Count up the occurences - namesdf = pd.DataFrame({"Name": samples}) - - names = namesdf["Name"].value_counts() - - # Serialize processed samples - # create directory if it doesn't exist already - abspath = os.path.abspath(args.results) - absdir = os.path.dirname(abspath) - if not os.path.isdir(absdir): - pathlib.Path(absdir).mkdir(parents=True, exist_ok=True) - - names.to_json(args.results) + try: + parser = setup_argparse() + args = parser.parse_args() + + # Collect the samples + samples = load_samples(args.sample_file_paths) + + # Count up the occurences + namesdf = pd.DataFrame({"Name": samples}) + + names = namesdf["Name"].value_counts() + + # Serialize processed samples + # create directory if it doesn't exist already + abspath = os.path.abspath(args.results) + absdir = os.path.dirname(abspath) + if not os.path.isdir(absdir): + pathlib.Path(absdir).mkdir(parents=True, exist_ok=True) + + names.to_json(args.results) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/lsf/scripts/make_samples.py b/merlin/examples/workflows/lsf/scripts/make_samples.py index e6c807bc9..8ec1c7e2f 100644 --- a/merlin/examples/workflows/lsf/scripts/make_samples.py +++ b/merlin/examples/workflows/lsf/scripts/make_samples.py @@ -1,5 +1,6 @@ import argparse import ast +import sys import numpy as np @@ -51,9 +52,14 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - process_args(args) + try: + parser = setup_argparse() + args = parser.parse_args() + process_args(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/null_spec/scripts/read_output.py b/merlin/examples/workflows/null_spec/scripts/read_output.py index 7c0f8017e..278283bd7 100644 --- a/merlin/examples/workflows/null_spec/scripts/read_output.py +++ b/merlin/examples/workflows/null_spec/scripts/read_output.py @@ -118,11 +118,16 @@ def start_sample1_time(): def main(): - single_task_times() - merlin_run_time() - start_verify_time() - start_run_workers_time() - start_sample1_time() + try: + single_task_times() + merlin_run_time() + start_verify_time() + start_run_workers_time() + start_sample1_time() + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/remote_feature_demo/scripts/hello_world.py b/merlin/examples/workflows/remote_feature_demo/scripts/hello_world.py index 232c43e86..3b9f62df1 100644 --- a/merlin/examples/workflows/remote_feature_demo/scripts/hello_world.py +++ b/merlin/examples/workflows/remote_feature_demo/scripts/hello_world.py @@ -1,5 +1,6 @@ import argparse import json +import sys from typing import Dict @@ -35,9 +36,14 @@ def main(): """ Primary coordinating method for collecting args and dumping them to a json file for later examination. """ - parser: argparse.ArgumentParser = setup_argparse() - args: argparse.Namespace = parser.parse_args() - process_args(args) + try: + parser: argparse.ArgumentParser = setup_argparse() + args: argparse.Namespace = parser.parse_args() + process_args(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/restart/scripts/make_samples.py b/merlin/examples/workflows/restart/scripts/make_samples.py index e6c807bc9..8ec1c7e2f 100644 --- a/merlin/examples/workflows/restart/scripts/make_samples.py +++ b/merlin/examples/workflows/restart/scripts/make_samples.py @@ -1,5 +1,6 @@ import argparse import ast +import sys import numpy as np @@ -51,9 +52,14 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - process_args(args) + try: + parser = setup_argparse() + args = parser.parse_args() + process_args(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/restart_delay/scripts/make_samples.py b/merlin/examples/workflows/restart_delay/scripts/make_samples.py index e6c807bc9..8ec1c7e2f 100644 --- a/merlin/examples/workflows/restart_delay/scripts/make_samples.py +++ b/merlin/examples/workflows/restart_delay/scripts/make_samples.py @@ -1,5 +1,6 @@ import argparse import ast +import sys import numpy as np @@ -51,9 +52,14 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - process_args(args) + try: + parser = setup_argparse() + args = parser.parse_args() + process_args(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/examples/workflows/slurm/scripts/make_samples.py b/merlin/examples/workflows/slurm/scripts/make_samples.py index e6c807bc9..8ec1c7e2f 100644 --- a/merlin/examples/workflows/slurm/scripts/make_samples.py +++ b/merlin/examples/workflows/slurm/scripts/make_samples.py @@ -1,5 +1,6 @@ import argparse import ast +import sys import numpy as np @@ -51,9 +52,14 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - process_args(args) + try: + parser = setup_argparse() + args = parser.parse_args() + process_args(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/exceptions/__init__.py b/merlin/exceptions/__init__.py index 20970fe7a..269bf6097 100644 --- a/merlin/exceptions/__init__.py +++ b/merlin/exceptions/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/log_formatter.py b/merlin/log_formatter.py index 1f6befa88..33c6776e4 100644 --- a/merlin/log_formatter.py +++ b/merlin/log_formatter.py @@ -8,7 +8,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/main.py b/merlin/main.py index 6e491ba1a..9e207b749 100644 --- a/merlin/main.py +++ b/merlin/main.py @@ -8,7 +8,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -52,6 +52,7 @@ from merlin.ascii_art import banner_small from merlin.examples.generator import list_examples, setup_example from merlin.log_formatter import setup_logging +from merlin.server.server_commands import config_server, init_server, restart_server, start_server, status_server, stop_server from merlin.spec.expansion import RESERVED, get_spec_with_expansion from merlin.spec.specification import MerlinSpec from merlin.study.study import MerlinStudy @@ -342,6 +343,21 @@ def process_monitor(args): LOG.info("Monitor: ... stop condition met") +def process_server(args: Namespace): + if args.commands == "init": + init_server() + elif args.commands == "start": + start_server() + elif args.commands == "stop": + stop_server() + elif args.commands == "status": + status_server() + elif args.commands == "restart": + restart_server() + elif args.commands == "config": + config_server(args) + + def setup_argparse() -> None: """ Setup argparse and any CLI options we want available via the package. @@ -551,6 +567,143 @@ def setup_argparse() -> None: generate_diagnostic_parsers(subparsers) + # merlin server + server: ArgumentParser = subparsers.add_parser( + "server", + help="Manage broker and results server for merlin workflow.", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + server.set_defaults(func=process_server) + + server_commands: ArgumentParser = server.add_subparsers(dest="commands") + + server_init: ArgumentParser = server_commands.add_parser( + "init", + help="Initialize merlin server resources.", + description="Initialize merlin server", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + server_init.set_defaults(func=process_server) + + server_status: ArgumentParser = server_commands.add_parser( + "status", + help="View status of the current server containers.", + description="View status", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + server_status.set_defaults(func=process_server) + + server_start: ArgumentParser = server_commands.add_parser( + "start", + help="Start a containerized server to be used as an broker and results server.", + description="Start server", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + server_start.set_defaults(func=process_server) + + server_stop: ArgumentParser = server_commands.add_parser( + "stop", + help="Stop an instance of redis containers currently running.", + description="Stop server.", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + server_stop.set_defaults(func=process_server) + + server_stop: ArgumentParser = server_commands.add_parser( + "restart", + help="Restart merlin server instance", + description="Restart server.", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + server_stop.set_defaults(func=process_server) + + server_config: ArgumentParser = server_commands.add_parser( + "config", + help="Making configurations for to the merlin server instance.", + description="Config server.", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + server_config.add_argument( + "-ip", + "--ipaddress", + action="store", + type=str, + # default="127.0.0.1", + help="Set the binded IP address for the merlin server container.", + ) + server_config.add_argument( + "-p", + "--port", + action="store", + type=int, + # default=6379, + help="Set the binded port for the merlin server container.", + ) + server_config.add_argument( + "-pwd", + "--password", + action="store", + type=str, + # default="~/.merlin/redis.pass", + help="Set the password file to be used for merlin server container.", + ) + server_config.add_argument( + "--add-user", + action="store", + nargs=2, + type=str, + help="Create a new user for merlin server instance. (Provide both username and password)", + ) + server_config.add_argument("--remove-user", action="store", type=str, help="Remove an exisiting user.") + server_config.add_argument( + "-d", + "--directory", + action="store", + type=str, + # default="./", + help="Set the working directory of the merlin server container.", + ) + server_config.add_argument( + "-ss", + "--snapshot-seconds", + action="store", + type=int, + # default=300, + help="Set the number of seconds merlin server waits before checking if a snapshot is needed.", + ) + server_config.add_argument( + "-sc", + "--snapshot-changes", + action="store", + type=int, + # default=100, + help="Set the number of changes that are required to be made to the merlin server before a snapshot is made.", + ) + server_config.add_argument( + "-sf", + "--snapshot-file", + action="store", + type=str, + # default="dump.db", + help="Set the snapshot filename for database dumps.", + ) + server_config.add_argument( + "-am", + "--append-mode", + action="store", + type=str, + # default="everysec", + help="The appendonly mode to be set. The avaiable options are always, everysec, no.", + ) + server_config.add_argument( + "-af", + "--append-file", + action="store", + type=str, + # default="appendonly.aof", + help="Set append only filename for merlin server container.", + ) + return parser @@ -748,11 +901,11 @@ def main(): except Exception as excpt: # pylint: disable=broad-except LOG.debug(traceback.format_exc()) LOG.error(str(excpt)) - return 1 + sys.exit(1) # All paths in a function ought to return an exit code, or none of them should. Given the # distributed nature of Merlin, maybe it doesn't make sense for it to exit 0 until the work is completed, but # if the work is dispatched with no errors, that is a 'successful' Merlin run - any other failures are runtime. - return 0 + sys.exit() if __name__ == "__main__": diff --git a/merlin/merlin_templates.py b/merlin/merlin_templates.py index a1e80fea1..4195ceacc 100644 --- a/merlin/merlin_templates.py +++ b/merlin/merlin_templates.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -33,6 +33,7 @@ """ import argparse import logging +import sys from merlin.ascii_art import banner_small from merlin.log_formatter import setup_logging @@ -57,10 +58,15 @@ def setup_argparse(): def main(): - parser = setup_argparse() - args = parser.parse_args() - setup_logging(logger=LOG, log_level=DEFAULT_LOG_LEVEL, colors=True) - args.func(args) + try: + parser = setup_argparse() + args = parser.parse_args() + setup_logging(logger=LOG, log_level=DEFAULT_LOG_LEVEL, colors=True) + args.func(args) + sys.exit() + except Exception as ex: + print(ex) + sys.exit(1) if __name__ == "__main__": diff --git a/merlin/router.py b/merlin/router.py index 90aa9db38..8858dcfad 100644 --- a/merlin/router.py +++ b/merlin/router.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/server/docker.yaml b/merlin/server/docker.yaml new file mode 100644 index 000000000..d7d5bc00a --- /dev/null +++ b/merlin/server/docker.yaml @@ -0,0 +1,6 @@ +docker: + command: docker + # init_command: ? + run_command: \{command} run --name {name} -d {image} + stop_command: \{command} stop {name} + pull_command: \{command} pull {url} diff --git a/merlin/server/merlin_server.yaml b/merlin/server/merlin_server.yaml new file mode 100644 index 000000000..01b3c7ddb --- /dev/null +++ b/merlin/server/merlin_server.yaml @@ -0,0 +1,27 @@ +container: + # Select the format for the recipe e.g. singularity, docker, podman (currently singularity is the only working option.) + format: singularity + #Type of container that is used + image_type: redis + # The image name + image: redis_latest.sif + # The url to pull the image from + url: docker://redis + # The config file + config: redis.conf + # Directory name to store configurations Default: ./merlin_server/ + config_dir: ./merlin_server/ + # Process file containing information regarding the redis process + pfile: merlin_server.pf + # Password file to be used for accessing container + pass_file: redis.pass + # Password command for generating password file + # pass_command: date +%s | sha256sum + # Users file to track concurrent users. + user_file: redis.users + +process: + # Command for determining the process of the command + status: pgrep -P {pid} #ps -e | grep {pid} + # Command for killing process + kill: kill {pid} diff --git a/merlin/server/podman.yaml b/merlin/server/podman.yaml new file mode 100644 index 000000000..1632840bb --- /dev/null +++ b/merlin/server/podman.yaml @@ -0,0 +1,6 @@ +podman: + command: podman + # init_command: \{command} .. (optional or default) + run_command: \{command} run --name {name} -d {image} + stop_command: \{command} stop {name} + pull_command: \{command} pull {url} diff --git a/merlin/server/redis.conf b/merlin/server/redis.conf new file mode 100644 index 000000000..893677763 --- /dev/null +++ b/merlin/server/redis.conf @@ -0,0 +1,2051 @@ +# Redis configuration file example. +# +# Note that in order to read the configuration file, Redis must be +# started with the file path as first argument: +# +# ./redis-server /path/to/redis.conf + +# Note on units: when memory size is needed, it is possible to specify +# it in the usual form of 1k 5GB 4M and so forth: +# +# 1k => 1000 bytes +# 1kb => 1024 bytes +# 1m => 1000000 bytes +# 1mb => 1024*1024 bytes +# 1g => 1000000000 bytes +# 1gb => 1024*1024*1024 bytes +# +# units are case insensitive so 1GB 1Gb 1gB are all the same. + +################################## INCLUDES ################################### + +# Include one or more other config files here. This is useful if you +# have a standard template that goes to all Redis servers but also need +# to customize a few per-server settings. Include files can include +# other files, so use this wisely. +# +# Note that option "include" won't be rewritten by command "CONFIG REWRITE" +# from admin or Redis Sentinel. Since Redis always uses the last processed +# line as value of a configuration directive, you'd better put includes +# at the beginning of this file to avoid overwriting config change at runtime. +# +# If instead you are interested in using includes to override configuration +# options, it is better to use include as the last line. +# +# include /path/to/local.conf +# include /path/to/other.conf + +################################## MODULES ##################################### + +# Load modules at startup. If the server is not able to load modules +# it will abort. It is possible to use multiple loadmodule directives. +# +# loadmodule /path/to/my_module.so +# loadmodule /path/to/other_module.so + +################################## NETWORK ##################################### + +# By default, if no "bind" configuration directive is specified, Redis listens +# for connections from all available network interfaces on the host machine. +# It is possible to listen to just one or multiple selected interfaces using +# the "bind" configuration directive, followed by one or more IP addresses. +# Each address can be prefixed by "-", which means that redis will not fail to +# start if the address is not available. Being not available only refers to +# addresses that does not correspond to any network interfece. Addresses that +# are already in use will always fail, and unsupported protocols will always BE +# silently skipped. +# +# Examples: +# +# bind 192.168.1.100 10.0.0.1 # listens on two specific IPv4 addresses +# bind 127.0.0.1 ::1 # listens on loopback IPv4 and IPv6 +# bind * -::* # like the default, all available interfaces +# +# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the +# internet, binding to all the interfaces is dangerous and will expose the +# instance to everybody on the internet. So by default we uncomment the +# following bind directive, that will force Redis to listen only on the +# IPv4 and IPv6 (if available) loopback interface addresses (this means Redis +# will only be able to accept client connections from the same host that it is +# running on). +# +# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES +# JUST COMMENT OUT THE FOLLOWING LINE. +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +bind 127.0.0.1 + +# Protected mode is a layer of security protection, in order to avoid that +# Redis instances left open on the internet are accessed and exploited. +# +# When protected mode is on and if: +# +# 1) The server is not binding explicitly to a set of addresses using the +# "bind" directive. +# 2) No password is configured. +# +# The server only accepts connections from clients connecting from the +# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain +# sockets. +# +# By default protected mode is enabled. You should disable it only if +# you are sure you want clients from other hosts to connect to Redis +# even if no authentication is configured, nor a specific set of interfaces +# are explicitly listed using the "bind" directive. +protected-mode yes + +# Accept connections on the specified port, default is 6379 (IANA #815344). +# If port 0 is specified Redis will not listen on a TCP socket. +port 6379 + +# TCP listen() backlog. +# +# In high requests-per-second environments you need a high backlog in order +# to avoid slow clients connection issues. Note that the Linux kernel +# will silently truncate it to the value of /proc/sys/net/core/somaxconn so +# make sure to raise both the value of somaxconn and tcp_max_syn_backlog +# in order to get the desired effect. +tcp-backlog 511 + +# Unix socket. +# +# Specify the path for the Unix socket that will be used to listen for +# incoming connections. There is no default, so Redis will not listen +# on a unix socket when not specified. +# +# unixsocket /run/redis.sock +# unixsocketperm 700 + +# Close the connection after a client is idle for N seconds (0 to disable) +timeout 0 + +# TCP keepalive. +# +# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence +# of communication. This is useful for two reasons: +# +# 1) Detect dead peers. +# 2) Force network equipment in the middle to consider the connection to be +# alive. +# +# On Linux, the specified value (in seconds) is the period used to send ACKs. +# Note that to close the connection the double of the time is needed. +# On other kernels the period depends on the kernel configuration. +# +# A reasonable value for this option is 300 seconds, which is the new +# Redis default starting with Redis 3.2.1. +tcp-keepalive 300 + +################################# TLS/SSL ##################################### + +# By default, TLS/SSL is disabled. To enable it, the "tls-port" configuration +# directive can be used to define TLS-listening ports. To enable TLS on the +# default port, use: +# +# port 0 +# tls-port 6379 + +# Configure a X.509 certificate and private key to use for authenticating the +# server to connected clients, masters or cluster peers. These files should be +# PEM formatted. +# +# tls-cert-file redis.crt +# tls-key-file redis.key +# +# If the key file is encrypted using a passphrase, it can be included here +# as well. +# +# tls-key-file-pass secret + +# Normally Redis uses the same certificate for both server functions (accepting +# connections) and client functions (replicating from a master, establishing +# cluster bus connections, etc.). +# +# Sometimes certificates are issued with attributes that designate them as +# client-only or server-only certificates. In that case it may be desired to use +# different certificates for incoming (server) and outgoing (client) +# connections. To do that, use the following directives: +# +# tls-client-cert-file client.crt +# tls-client-key-file client.key +# +# If the key file is encrypted using a passphrase, it can be included here +# as well. +# +# tls-client-key-file-pass secret + +# Configure a DH parameters file to enable Diffie-Hellman (DH) key exchange: +# +# tls-dh-params-file redis.dh + +# Configure a CA certificate(s) bundle or directory to authenticate TLS/SSL +# clients and peers. Redis requires an explicit configuration of at least one +# of these, and will not implicitly use the system wide configuration. +# +# tls-ca-cert-file ca.crt +# tls-ca-cert-dir /etc/ssl/certs + +# By default, clients (including replica servers) on a TLS port are required +# to authenticate using valid client side certificates. +# +# If "no" is specified, client certificates are not required and not accepted. +# If "optional" is specified, client certificates are accepted and must be +# valid if provided, but are not required. +# +# tls-auth-clients no +# tls-auth-clients optional + +# By default, a Redis replica does not attempt to establish a TLS connection +# with its master. +# +# Use the following directive to enable TLS on replication links. +# +# tls-replication yes + +# By default, the Redis Cluster bus uses a plain TCP connection. To enable +# TLS for the bus protocol, use the following directive: +# +# tls-cluster yes + +# By default, only TLSv1.2 and TLSv1.3 are enabled and it is highly recommended +# that older formally deprecated versions are kept disabled to reduce the attack surface. +# You can explicitly specify TLS versions to support. +# Allowed values are case insensitive and include "TLSv1", "TLSv1.1", "TLSv1.2", +# "TLSv1.3" (OpenSSL >= 1.1.1) or any combination. +# To enable only TLSv1.2 and TLSv1.3, use: +# +# tls-protocols "TLSv1.2 TLSv1.3" + +# Configure allowed ciphers. See the ciphers(1ssl) manpage for more information +# about the syntax of this string. +# +# Note: this configuration applies only to <= TLSv1.2. +# +# tls-ciphers DEFAULT:!MEDIUM + +# Configure allowed TLSv1.3 ciphersuites. See the ciphers(1ssl) manpage for more +# information about the syntax of this string, and specifically for TLSv1.3 +# ciphersuites. +# +# tls-ciphersuites TLS_CHACHA20_POLY1305_SHA256 + +# When choosing a cipher, use the server's preference instead of the client +# preference. By default, the server follows the client's preference. +# +# tls-prefer-server-ciphers yes + +# By default, TLS session caching is enabled to allow faster and less expensive +# reconnections by clients that support it. Use the following directive to disable +# caching. +# +# tls-session-caching no + +# Change the default number of TLS sessions cached. A zero value sets the cache +# to unlimited size. The default size is 20480. +# +# tls-session-cache-size 5000 + +# Change the default timeout of cached TLS sessions. The default timeout is 300 +# seconds. +# +# tls-session-cache-timeout 60 + +################################# GENERAL ##################################### + +# By default Redis does not run as a daemon. Use 'yes' if you need it. +# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. +# When Redis is supervised by upstart or systemd, this parameter has no impact. +daemonize no + +# If you run Redis from upstart or systemd, Redis can interact with your +# supervision tree. Options: +# supervised no - no supervision interaction +# supervised upstart - signal upstart by putting Redis into SIGSTOP mode +# requires "expect stop" in your upstart job config +# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET +# on startup, and updating Redis status on a regular +# basis. +# supervised auto - detect upstart or systemd method based on +# UPSTART_JOB or NOTIFY_SOCKET environment variables +# Note: these supervision methods only signal "process is ready." +# They do not enable continuous pings back to your supervisor. +# +# The default is "no". To run under upstart/systemd, you can simply uncomment +# the line below: +# +# supervised auto + +# If a pid file is specified, Redis writes it where specified at startup +# and removes it at exit. +# +# When the server runs non daemonized, no pid file is created if none is +# specified in the configuration. When the server is daemonized, the pid file +# is used even if not specified, defaulting to "/var/run/redis.pid". +# +# Creating a pid file is best effort: if Redis is not able to create it +# nothing bad happens, the server will start and run normally. +# +# Note that on modern Linux systems "/run/redis.pid" is more conforming +# and should be used instead. +pidfile /var/run/redis_6379.pid + +# Specify the server verbosity level. +# This can be one of: +# debug (a lot of information, useful for development/testing) +# verbose (many rarely useful info, but not a mess like the debug level) +# notice (moderately verbose, what you want in production probably) +# warning (only very important / critical messages are logged) +loglevel notice + +# Specify the log file name. Also the empty string can be used to force +# Redis to log on the standard output. Note that if you use standard +# output for logging but daemonize, logs will be sent to /dev/null +logfile "" + +# To enable logging to the system logger, just set 'syslog-enabled' to yes, +# and optionally update the other syslog parameters to suit your needs. +# syslog-enabled no + +# Specify the syslog identity. +# syslog-ident redis + +# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. +# syslog-facility local0 + +# To disable the built in crash log, which will possibly produce cleaner core +# dumps when they are needed, uncomment the following: +# +# crash-log-enabled no + +# To disable the fast memory check that's run as part of the crash log, which +# will possibly let redis terminate sooner, uncomment the following: +# +# crash-memcheck-enabled no + +# Set the number of databases. The default database is DB 0, you can select +# a different one on a per-connection basis using SELECT where +# dbid is a number between 0 and 'databases'-1 +databases 16 + +# By default Redis shows an ASCII art logo only when started to log to the +# standard output and if the standard output is a TTY and syslog logging is +# disabled. Basically this means that normally a logo is displayed only in +# interactive sessions. +# +# However it is possible to force the pre-4.0 behavior and always show a +# ASCII art logo in startup logs by setting the following option to yes. +always-show-logo no + +# By default, Redis modifies the process title (as seen in 'top' and 'ps') to +# provide some runtime information. It is possible to disable this and leave +# the process name as executed by setting the following to no. +set-proc-title yes + +# When changing the process title, Redis uses the following template to construct +# the modified title. +# +# Template variables are specified in curly brackets. The following variables are +# supported: +# +# {title} Name of process as executed if parent, or type of child process. +# {listen-addr} Bind address or '*' followed by TCP or TLS port listening on, or +# Unix socket if only that's available. +# {server-mode} Special mode, i.e. "[sentinel]" or "[cluster]". +# {port} TCP port listening on, or 0. +# {tls-port} TLS port listening on, or 0. +# {unixsocket} Unix domain socket listening on, or "". +# {config-file} Name of configuration file used. +# +proc-title-template "{title} {listen-addr} {server-mode}" + +################################ SNAPSHOTTING ################################ + +# Save the DB to disk. +# +# save +# +# Redis will save the DB if both the given number of seconds and the given +# number of write operations against the DB occurred. +# +# Snapshotting can be completely disabled with a single empty string argument +# as in following example: +# +# save "" +# +# Unless specified otherwise, by default Redis will save the DB: +# * After 3600 seconds (an hour) if at least 1 key changed +# * After 300 seconds (5 minutes) if at least 100 keys changed +# * After 60 seconds if at least 10000 keys changed +# +# You can set these explicitly by uncommenting the three following lines. +# +# save 3600 1 +save 300 100 +# save 60 10000 + +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in a hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# disaster will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usual even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error no + +# Compress string objects using LZF when dump .rdb databases? +# By default compression is enabled as it's almost always a win. +# If you want to save some CPU in the saving child set it to 'no' but +# the dataset will likely be bigger if you have compressible values or keys. +rdbcompression yes + +# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. +# This makes the format more resistant to corruption but there is a performance +# hit to pay (around 10%) when saving and loading RDB files, so you can disable it +# for maximum performances. +# +# RDB files created with checksum disabled have a checksum of zero that will +# tell the loading code to skip the check. +rdbchecksum yes + +# Enables or disables full sanitation checks for ziplist and listpack etc when +# loading an RDB or RESTORE payload. This reduces the chances of a assertion or +# crash later on while processing commands. +# Options: +# no - Never perform full sanitation +# yes - Always perform full sanitation +# clients - Perform full sanitation only for user connections. +# Excludes: RDB files, RESTORE commands received from the master +# connection, and client connections which have the +# skip-sanitize-payload ACL flag. +# The default should be 'clients' but since it currently affects cluster +# resharding via MIGRATE, it is temporarily set to 'no' by default. +# +# sanitize-dump-payload no + +# The filename where to dump the DB +dbfilename dump.rdb + +# Remove RDB files used by replication in instances without persistence +# enabled. By default this option is disabled, however there are environments +# where for regulations or other security concerns, RDB files persisted on +# disk by masters in order to feed replicas, or stored on disk by replicas +# in order to load them for the initial synchronization, should be deleted +# ASAP. Note that this option ONLY WORKS in instances that have both AOF +# and RDB persistence disabled, otherwise is completely ignored. +# +# An alternative (and sometimes better) way to obtain the same effect is +# to use diskless replication on both master and replicas instances. However +# in the case of replicas, diskless is not always an option. +rdb-del-sync-files no + +# The working directory. +# +# The DB will be written inside this directory, with the filename specified +# above using the 'dbfilename' configuration directive. +# +# The Append Only File will also be created inside this directory. +# +# Note that you must specify a directory here, not a file name. +dir ./ + +################################# REPLICATION ################################# + +# Master-Replica replication. Use replicaof to make a Redis instance a copy of +# another Redis server. A few things to understand ASAP about Redis replication. +# +# +------------------+ +---------------+ +# | Master | ---> | Replica | +# | (receive writes) | | (exact copy) | +# +------------------+ +---------------+ +# +# 1) Redis replication is asynchronous, but you can configure a master to +# stop accepting writes if it appears to be not connected with at least +# a given number of replicas. +# 2) Redis replicas are able to perform a partial resynchronization with the +# master if the replication link is lost for a relatively small amount of +# time. You may want to configure the replication backlog size (see the next +# sections of this file) with a sensible value depending on your needs. +# 3) Replication is automatic and does not need user intervention. After a +# network partition replicas automatically try to reconnect to masters +# and resynchronize with them. +# +# replicaof + +# If the master is password protected (using the "requirepass" configuration +# directive below) it is possible to tell the replica to authenticate before +# starting the replication synchronization process, otherwise the master will +# refuse the replica request. +# +# masterauth +# +# However this is not enough if you are using Redis ACLs (for Redis version +# 6 or greater), and the default user is not capable of running the PSYNC +# command and/or other commands needed for replication. In this case it's +# better to configure a special user to use with replication, and specify the +# masteruser configuration as such: +# +# masteruser root +# +# When masteruser is specified, the replica will authenticate against its +# master using the new AUTH form: AUTH . + +# When a replica loses its connection with the master, or when the replication +# is still in progress, the replica can act in two different ways: +# +# 1) if replica-serve-stale-data is set to 'yes' (the default) the replica will +# still reply to client requests, possibly with out of date data, or the +# data set may just be empty if this is the first synchronization. +# +# 2) If replica-serve-stale-data is set to 'no' the replica will reply with +# an error "SYNC with master in progress" to all commands except: +# INFO, REPLICAOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG, SUBSCRIBE, +# UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB, COMMAND, POST, +# HOST and LATENCY. +# +replica-serve-stale-data yes + +# You can configure a replica instance to accept writes or not. Writing against +# a replica instance may be useful to store some ephemeral data (because data +# written on a replica will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it because of a +# misconfiguration. +# +# Since Redis 2.6 by default replicas are read-only. +# +# Note: read only replicas are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only replica exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve +# security of read only replicas using 'rename-command' to shadow all the +# administrative / dangerous commands. +replica-read-only yes + +# Replication SYNC strategy: disk or socket. +# +# New replicas and reconnecting replicas that are not able to continue the +# replication process just receiving differences, need to do what is called a +# "full synchronization". An RDB file is transmitted from the master to the +# replicas. +# +# The transmission can happen in two different ways: +# +# 1) Disk-backed: The Redis master creates a new process that writes the RDB +# file on disk. Later the file is transferred by the parent +# process to the replicas incrementally. +# 2) Diskless: The Redis master creates a new process that directly writes the +# RDB file to replica sockets, without touching the disk at all. +# +# With disk-backed replication, while the RDB file is generated, more replicas +# can be queued and served with the RDB file as soon as the current child +# producing the RDB file finishes its work. With diskless replication instead +# once the transfer starts, new replicas arriving will be queued and a new +# transfer will start when the current one terminates. +# +# When diskless replication is used, the master waits a configurable amount of +# time (in seconds) before starting the transfer in the hope that multiple +# replicas will arrive and the transfer can be parallelized. +# +# With slow disks and fast (large bandwidth) networks, diskless replication +# works better. +repl-diskless-sync no + +# When diskless replication is enabled, it is possible to configure the delay +# the server waits in order to spawn the child that transfers the RDB via socket +# to the replicas. +# +# This is important since once the transfer starts, it is not possible to serve +# new replicas arriving, that will be queued for the next RDB transfer, so the +# server waits a delay in order to let more replicas arrive. +# +# The delay is specified in seconds, and by default is 5 seconds. To disable +# it entirely just set it to 0 seconds and the transfer will start ASAP. +repl-diskless-sync-delay 5 + +# ----------------------------------------------------------------------------- +# WARNING: RDB diskless load is experimental. Since in this setup the replica +# does not immediately store an RDB on disk, it may cause data loss during +# failovers. RDB diskless load + Redis modules not handling I/O reads may also +# cause Redis to abort in case of I/O errors during the initial synchronization +# stage with the master. Use only if you know what you are doing. +# ----------------------------------------------------------------------------- +# +# Replica can load the RDB it reads from the replication link directly from the +# socket, or store the RDB to a file and read that file after it was completely +# received from the master. +# +# In many cases the disk is slower than the network, and storing and loading +# the RDB file may increase replication time (and even increase the master's +# Copy on Write memory and salve buffers). +# However, parsing the RDB file directly from the socket may mean that we have +# to flush the contents of the current database before the full rdb was +# received. For this reason we have the following options: +# +# "disabled" - Don't use diskless load (store the rdb file to the disk first) +# "on-empty-db" - Use diskless load only when it is completely safe. +# "swapdb" - Keep a copy of the current db contents in RAM while parsing +# the data directly from the socket. note that this requires +# sufficient memory, if you don't have it, you risk an OOM kill. +repl-diskless-load disabled + +# Replicas send PINGs to server in a predefined interval. It's possible to +# change this interval with the repl_ping_replica_period option. The default +# value is 10 seconds. +# +# repl-ping-replica-period 10 + +# The following option sets the replication timeout for: +# +# 1) Bulk transfer I/O during SYNC, from the point of view of replica. +# 2) Master timeout from the point of view of replicas (data, pings). +# 3) Replica timeout from the point of view of masters (REPLCONF ACK pings). +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-replica-period otherwise a timeout will be detected +# every time there is low traffic between the master and the replica. The default +# value is 60 seconds. +# +# repl-timeout 60 + +# Disable TCP_NODELAY on the replica socket after SYNC? +# +# If you select "yes" Redis will use a smaller number of TCP packets and +# less bandwidth to send data to replicas. But this can add a delay for +# the data to appear on the replica side, up to 40 milliseconds with +# Linux kernels using a default configuration. +# +# If you select "no" the delay for data to appear on the replica side will +# be reduced but more bandwidth will be used for replication. +# +# By default we optimize for low latency, but in very high traffic conditions +# or when the master and replicas are many hops away, turning this to "yes" may +# be a good idea. +repl-disable-tcp-nodelay no + +# Set the replication backlog size. The backlog is a buffer that accumulates +# replica data when replicas are disconnected for some time, so that when a +# replica wants to reconnect again, often a full resync is not needed, but a +# partial resync is enough, just passing the portion of data the replica +# missed while disconnected. +# +# The bigger the replication backlog, the longer the replica can endure the +# disconnect and later be able to perform a partial resynchronization. +# +# The backlog is only allocated if there is at least one replica connected. +# +# repl-backlog-size 1mb + +# After a master has no connected replicas for some time, the backlog will be +# freed. The following option configures the amount of seconds that need to +# elapse, starting from the time the last replica disconnected, for the backlog +# buffer to be freed. +# +# Note that replicas never free the backlog for timeout, since they may be +# promoted to masters later, and should be able to correctly "partially +# resynchronize" with other replicas: hence they should always accumulate backlog. +# +# A value of 0 means to never release the backlog. +# +# repl-backlog-ttl 3600 + +# The replica priority is an integer number published by Redis in the INFO +# output. It is used by Redis Sentinel in order to select a replica to promote +# into a master if the master is no longer working correctly. +# +# A replica with a low priority number is considered better for promotion, so +# for instance if there are three replicas with priority 10, 100, 25 Sentinel +# will pick the one with priority 10, that is the lowest. +# +# However a special priority of 0 marks the replica as not able to perform the +# role of master, so a replica with priority of 0 will never be selected by +# Redis Sentinel for promotion. +# +# By default the priority is 100. +replica-priority 100 + +# ----------------------------------------------------------------------------- +# By default, Redis Sentinel includes all replicas in its reports. A replica +# can be excluded from Redis Sentinel's announcements. An unannounced replica +# will be ignored by the 'sentinel replicas ' command and won't be +# exposed to Redis Sentinel's clients. +# +# This option does not change the behavior of replica-priority. Even with +# replica-announced set to 'no', the replica can be promoted to master. To +# prevent this behavior, set replica-priority to 0. +# +# replica-announced yes + +# It is possible for a master to stop accepting writes if there are less than +# N replicas connected, having a lag less or equal than M seconds. +# +# The N replicas need to be in "online" state. +# +# The lag in seconds, that must be <= the specified value, is calculated from +# the last ping received from the replica, that is usually sent every second. +# +# This option does not GUARANTEE that N replicas will accept the write, but +# will limit the window of exposure for lost writes in case not enough replicas +# are available, to the specified number of seconds. +# +# For example to require at least 3 replicas with a lag <= 10 seconds use: +# +# min-replicas-to-write 3 +# min-replicas-max-lag 10 +# +# Setting one or the other to 0 disables the feature. +# +# By default min-replicas-to-write is set to 0 (feature disabled) and +# min-replicas-max-lag is set to 10. + +# A Redis master is able to list the address and port of the attached +# replicas in different ways. For example the "INFO replication" section +# offers this information, which is used, among other tools, by +# Redis Sentinel in order to discover replica instances. +# Another place where this info is available is in the output of the +# "ROLE" command of a master. +# +# The listed IP address and port normally reported by a replica is +# obtained in the following way: +# +# IP: The address is auto detected by checking the peer address +# of the socket used by the replica to connect with the master. +# +# Port: The port is communicated by the replica during the replication +# handshake, and is normally the port that the replica is using to +# listen for connections. +# +# However when port forwarding or Network Address Translation (NAT) is +# used, the replica may actually be reachable via different IP and port +# pairs. The following two options can be used by a replica in order to +# report to its master a specific set of IP and port, so that both INFO +# and ROLE will report those values. +# +# There is no need to use both the options if you need to override just +# the port or the IP address. +# +# replica-announce-ip 5.5.5.5 +# replica-announce-port 1234 + +############################### KEYS TRACKING ################################# + +# Redis implements server assisted support for client side caching of values. +# This is implemented using an invalidation table that remembers, using +# a radix key indexed by key name, what clients have which keys. In turn +# this is used in order to send invalidation messages to clients. Please +# check this page to understand more about the feature: +# +# https://redis.io/topics/client-side-caching +# +# When tracking is enabled for a client, all the read only queries are assumed +# to be cached: this will force Redis to store information in the invalidation +# table. When keys are modified, such information is flushed away, and +# invalidation messages are sent to the clients. However if the workload is +# heavily dominated by reads, Redis could use more and more memory in order +# to track the keys fetched by many clients. +# +# For this reason it is possible to configure a maximum fill value for the +# invalidation table. By default it is set to 1M of keys, and once this limit +# is reached, Redis will start to evict keys in the invalidation table +# even if they were not modified, just to reclaim memory: this will in turn +# force the clients to invalidate the cached values. Basically the table +# maximum size is a trade off between the memory you want to spend server +# side to track information about who cached what, and the ability of clients +# to retain cached objects in memory. +# +# If you set the value to 0, it means there are no limits, and Redis will +# retain as many keys as needed in the invalidation table. +# In the "stats" INFO section, you can find information about the number of +# keys in the invalidation table at every given moment. +# +# Note: when key tracking is used in broadcasting mode, no memory is used +# in the server side so this setting is useless. +# +# tracking-table-max-keys 1000000 + +################################## SECURITY ################################### + +# Warning: since Redis is pretty fast, an outside user can try up to +# 1 million passwords per second against a modern box. This means that you +# should use very strong passwords, otherwise they will be very easy to break. +# Note that because the password is really a shared secret between the client +# and the server, and should not be memorized by any human, the password +# can be easily a long string from /dev/urandom or whatever, so by using a +# long and unguessable password no brute force attack will be possible. + +# Redis ACL users are defined in the following format: +# +# user ... acl rules ... +# +# For example: +# +# user worker +@list +@connection ~jobs:* on >ffa9203c493aa99 +# +# The special username "default" is used for new connections. If this user +# has the "nopass" rule, then new connections will be immediately authenticated +# as the "default" user without the need of any password provided via the +# AUTH command. Otherwise if the "default" user is not flagged with "nopass" +# the connections will start in not authenticated state, and will require +# AUTH (or the HELLO command AUTH option) in order to be authenticated and +# start to work. +# +# The ACL rules that describe what a user can do are the following: +# +# on Enable the user: it is possible to authenticate as this user. +# off Disable the user: it's no longer possible to authenticate +# with this user, however the already authenticated connections +# will still work. +# skip-sanitize-payload RESTORE dump-payload sanitation is skipped. +# sanitize-payload RESTORE dump-payload is sanitized (default). +# + Allow the execution of that command +# - Disallow the execution of that command +# +@ Allow the execution of all the commands in such category +# with valid categories are like @admin, @set, @sortedset, ... +# and so forth, see the full list in the server.c file where +# the Redis command table is described and defined. +# The special category @all means all the commands, but currently +# present in the server, and that will be loaded in the future +# via modules. +# +|subcommand Allow a specific subcommand of an otherwise +# disabled command. Note that this form is not +# allowed as negative like -DEBUG|SEGFAULT, but +# only additive starting with "+". +# allcommands Alias for +@all. Note that it implies the ability to execute +# all the future commands loaded via the modules system. +# nocommands Alias for -@all. +# ~ Add a pattern of keys that can be mentioned as part of +# commands. For instance ~* allows all the keys. The pattern +# is a glob-style pattern like the one of KEYS. +# It is possible to specify multiple patterns. +# allkeys Alias for ~* +# resetkeys Flush the list of allowed keys patterns. +# & Add a glob-style pattern of Pub/Sub channels that can be +# accessed by the user. It is possible to specify multiple channel +# patterns. +# allchannels Alias for &* +# resetchannels Flush the list of allowed channel patterns. +# > Add this password to the list of valid password for the user. +# For example >mypass will add "mypass" to the list. +# This directive clears the "nopass" flag (see later). +# < Remove this password from the list of valid passwords. +# nopass All the set passwords of the user are removed, and the user +# is flagged as requiring no password: it means that every +# password will work against this user. If this directive is +# used for the default user, every new connection will be +# immediately authenticated with the default user without +# any explicit AUTH command required. Note that the "resetpass" +# directive will clear this condition. +# resetpass Flush the list of allowed passwords. Moreover removes the +# "nopass" status. After "resetpass" the user has no associated +# passwords and there is no way to authenticate without adding +# some password (or setting it as "nopass" later). +# reset Performs the following actions: resetpass, resetkeys, off, +# -@all. The user returns to the same state it has immediately +# after its creation. +# +# ACL rules can be specified in any order: for instance you can start with +# passwords, then flags, or key patterns. However note that the additive +# and subtractive rules will CHANGE MEANING depending on the ordering. +# For instance see the following example: +# +# user alice on +@all -DEBUG ~* >somepassword +# +# This will allow "alice" to use all the commands with the exception of the +# DEBUG command, since +@all added all the commands to the set of the commands +# alice can use, and later DEBUG was removed. However if we invert the order +# of two ACL rules the result will be different: +# +# user alice on -DEBUG +@all ~* >somepassword +# +# Now DEBUG was removed when alice had yet no commands in the set of allowed +# commands, later all the commands are added, so the user will be able to +# execute everything. +# +# Basically ACL rules are processed left-to-right. +# +# For more information about ACL configuration please refer to +# the Redis web site at https://redis.io/topics/acl + +# ACL LOG +# +# The ACL Log tracks failed commands and authentication events associated +# with ACLs. The ACL Log is useful to troubleshoot failed commands blocked +# by ACLs. The ACL Log is stored in memory. You can reclaim memory with +# ACL LOG RESET. Define the maximum entry length of the ACL Log below. +acllog-max-len 128 + +# Using an external ACL file +# +# Instead of configuring users here in this file, it is possible to use +# a stand-alone file just listing users. The two methods cannot be mixed: +# if you configure users here and at the same time you activate the external +# ACL file, the server will refuse to start. +# +# The format of the external ACL user file is exactly the same as the +# format that is used inside redis.conf to describe users. +# +# aclfile /etc/redis/users.acl + +# IMPORTANT NOTE: starting with Redis 6 "requirepass" is just a compatibility +# layer on top of the new ACL system. The option effect will be just setting +# the password for the default user. Clients will still authenticate using +# AUTH as usually, or more explicitly with AUTH default +# if they follow the new protocol: both will work. +# +# The requirepass is not compatable with aclfile option and the ACL LOAD +# command, these will cause requirepass to be ignored. +# +requirepass merlin_password + +# New users are initialized with restrictive permissions by default, via the +# equivalent of this ACL rule 'off resetkeys -@all'. Starting with Redis 6.2, it +# is possible to manage access to Pub/Sub channels with ACL rules as well. The +# default Pub/Sub channels permission if new users is controlled by the +# acl-pubsub-default configuration directive, which accepts one of these values: +# +# allchannels: grants access to all Pub/Sub channels +# resetchannels: revokes access to all Pub/Sub channels +# +# To ensure backward compatibility while upgrading Redis 6.0, acl-pubsub-default +# defaults to the 'allchannels' permission. +# +# Future compatibility note: it is very likely that in a future version of Redis +# the directive's default of 'allchannels' will be changed to 'resetchannels' in +# order to provide better out-of-the-box Pub/Sub security. Therefore, it is +# recommended that you explicitly define Pub/Sub permissions for all users +# rather then rely on implicit default values. Once you've set explicit +# Pub/Sub for all existing users, you should uncomment the following line. +# +# acl-pubsub-default resetchannels + +# Command renaming (DEPRECATED). +# +# ------------------------------------------------------------------------ +# WARNING: avoid using this option if possible. Instead use ACLs to remove +# commands from the default user, and put them only in some admin user you +# create for administrative purposes. +# ------------------------------------------------------------------------ +# +# It is possible to change the name of dangerous commands in a shared +# environment. For instance the CONFIG command may be renamed into something +# hard to guess so that it will still be available for internal-use tools +# but not available for general clients. +# +# Example: +# +# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 +# +# It is also possible to completely kill a command by renaming it into +# an empty string: +# +# rename-command CONFIG "" +# +# Please note that changing the name of commands that are logged into the +# AOF file or transmitted to replicas may cause problems. + +################################### CLIENTS #################################### + +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able to configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# +# Once the limit is reached Redis will close all the new connections sending +# an error 'max number of clients reached'. +# +# IMPORTANT: When Redis Cluster is used, the max number of connections is also +# shared with the cluster bus: every node in the cluster will use two +# connections, one incoming and another outgoing. It is important to size the +# limit accordingly in case of very large clusters. +# +# maxclients 10000 + +############################## MEMORY MANAGEMENT ################################ + +# Set a memory usage limit to the specified amount of bytes. +# When the memory limit is reached Redis will try to remove keys +# according to the eviction policy selected (see maxmemory-policy). +# +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. +# +# This option is usually useful when using Redis as an LRU or LFU cache, or to +# set a hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have replicas attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the replicas are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of replicas is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have replicas attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for replica +# output buffers (but this is not needed if the policy is 'noeviction'). +# +# maxmemory + +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached. You can select one from the following behaviors: +# +# volatile-lru -> Evict using approximated LRU, only keys with an expire set. +# allkeys-lru -> Evict any key using approximated LRU. +# volatile-lfu -> Evict using approximated LFU, only keys with an expire set. +# allkeys-lfu -> Evict any key using approximated LFU. +# volatile-random -> Remove a random key having an expire set. +# allkeys-random -> Remove a random key, any key. +# volatile-ttl -> Remove the key with the nearest expire time (minor TTL) +# noeviction -> Don't evict anything, just return an error on write operations. +# +# LRU means Least Recently Used +# LFU means Least Frequently Used +# +# Both LRU, LFU and volatile-ttl are implemented using approximated +# randomized algorithms. +# +# Note: with any of the above policies, when there are no suitable keys for +# eviction, Redis will return an error on write operations that require +# more memory. These are usually commands that create new keys, add data or +# modify existing keys. A few examples are: SET, INCR, HSET, LPUSH, SUNIONSTORE, +# SORT (due to the STORE argument), and EXEC (if the transaction includes any +# command that requires memory). +# +# The default is: +# +# maxmemory-policy noeviction + +# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can tune it for speed or +# accuracy. By default Redis will check five keys and pick the one that was +# used least recently, you can change the sample size using the following +# configuration directive. +# +# The default of 5 produces good enough results. 10 Approximates very closely +# true LRU but costs more CPU. 3 is faster but not very accurate. +# +# maxmemory-samples 5 + +# Eviction processing is designed to function well with the default setting. +# If there is an unusually large amount of write traffic, this value may need to +# be increased. Decreasing this value may reduce latency at the risk of +# eviction processing effectiveness +# 0 = minimum latency, 10 = default, 100 = process without regard to latency +# +# maxmemory-eviction-tenacity 10 + +# Starting from Redis 5, by default a replica will ignore its maxmemory setting +# (unless it is promoted to master after a failover or manually). It means +# that the eviction of keys will be just handled by the master, sending the +# DEL commands to the replica as keys evict in the master side. +# +# This behavior ensures that masters and replicas stay consistent, and is usually +# what you want, however if your replica is writable, or you want the replica +# to have a different memory setting, and you are sure all the writes performed +# to the replica are idempotent, then you may change this default (but be sure +# to understand what you are doing). +# +# Note that since the replica by default does not evict, it may end using more +# memory than the one set via maxmemory (there are certain buffers that may +# be larger on the replica, or data structures may sometimes take more memory +# and so forth). So make sure you monitor your replicas and make sure they +# have enough memory to never hit a real out-of-memory condition before the +# master hits the configured maxmemory setting. +# +# replica-ignore-maxmemory yes + +# Redis reclaims expired keys in two ways: upon access when those keys are +# found to be expired, and also in background, in what is called the +# "active expire key". The key space is slowly and interactively scanned +# looking for expired keys to reclaim, so that it is possible to free memory +# of keys that are expired and will never be accessed again in a short time. +# +# The default effort of the expire cycle will try to avoid having more than +# ten percent of expired keys still in memory, and will try to avoid consuming +# more than 25% of total memory and to add latency to the system. However +# it is possible to increase the expire "effort" that is normally set to +# "1", to a greater value, up to the value "10". At its maximum value the +# system will use more CPU, longer cycles (and technically may introduce +# more latency), and will tolerate less already expired keys still present +# in the system. It's a tradeoff between memory, CPU and latency. +# +# active-expire-effort 1 + +############################# LAZY FREEING #################################### + +# Redis has two primitives to delete keys. One is called DEL and is a blocking +# deletion of the object. It means that the server stops processing new commands +# in order to reclaim all the memory associated with an object in a synchronous +# way. If the key deleted is associated with a small object, the time needed +# in order to execute the DEL command is very small and comparable to most other +# O(1) or O(log_N) commands in Redis. However if the key is associated with an +# aggregated value containing millions of elements, the server can block for +# a long time (even seconds) in order to complete the operation. +# +# For the above reasons Redis also offers non blocking deletion primitives +# such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and +# FLUSHDB commands, in order to reclaim memory in background. Those commands +# are executed in constant time. Another thread will incrementally free the +# object in the background as fast as possible. +# +# DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled. +# It's up to the design of the application to understand when it is a good +# idea to use one or the other. However the Redis server sometimes has to +# delete keys or flush the whole database as a side effect of other operations. +# Specifically Redis deletes objects independently of a user call in the +# following scenarios: +# +# 1) On eviction, because of the maxmemory and maxmemory policy configurations, +# in order to make room for new data, without going over the specified +# memory limit. +# 2) Because of expire: when a key with an associated time to live (see the +# EXPIRE command) must be deleted from memory. +# 3) Because of a side effect of a command that stores data on a key that may +# already exist. For example the RENAME command may delete the old key +# content when it is replaced with another one. Similarly SUNIONSTORE +# or SORT with STORE option may delete existing keys. The SET command +# itself removes any old content of the specified key in order to replace +# it with the specified string. +# 4) During replication, when a replica performs a full resynchronization with +# its master, the content of the whole database is removed in order to +# load the RDB file just transferred. +# +# In all the above cases the default is to delete objects in a blocking way, +# like if DEL was called. However you can configure each case specifically +# in order to instead release memory in a non-blocking way like if UNLINK +# was called, using the following configuration directives. + +lazyfree-lazy-eviction no +lazyfree-lazy-expire no +lazyfree-lazy-server-del no +replica-lazy-flush no + +# It is also possible, for the case when to replace the user code DEL calls +# with UNLINK calls is not easy, to modify the default behavior of the DEL +# command to act exactly like UNLINK, using the following configuration +# directive: + +lazyfree-lazy-user-del no + +# FLUSHDB, FLUSHALL, and SCRIPT FLUSH support both asynchronous and synchronous +# deletion, which can be controlled by passing the [SYNC|ASYNC] flags into the +# commands. When neither flag is passed, this directive will be used to determine +# if the data should be deleted asynchronously. + +lazyfree-lazy-user-flush no + +################################ THREADED I/O ################################# + +# Redis is mostly single threaded, however there are certain threaded +# operations such as UNLINK, slow I/O accesses and other things that are +# performed on side threads. +# +# Now it is also possible to handle Redis clients socket reads and writes +# in different I/O threads. Since especially writing is so slow, normally +# Redis users use pipelining in order to speed up the Redis performances per +# core, and spawn multiple instances in order to scale more. Using I/O +# threads it is possible to easily speedup two times Redis without resorting +# to pipelining nor sharding of the instance. +# +# By default threading is disabled, we suggest enabling it only in machines +# that have at least 4 or more cores, leaving at least one spare core. +# Using more than 8 threads is unlikely to help much. We also recommend using +# threaded I/O only if you actually have performance problems, with Redis +# instances being able to use a quite big percentage of CPU time, otherwise +# there is no point in using this feature. +# +# So for instance if you have a four cores boxes, try to use 2 or 3 I/O +# threads, if you have a 8 cores, try to use 6 threads. In order to +# enable I/O threads use the following configuration directive: +# +# io-threads 4 +# +# Setting io-threads to 1 will just use the main thread as usual. +# When I/O threads are enabled, we only use threads for writes, that is +# to thread the write(2) syscall and transfer the client buffers to the +# socket. However it is also possible to enable threading of reads and +# protocol parsing using the following configuration directive, by setting +# it to yes: +# +# io-threads-do-reads no +# +# Usually threading reads doesn't help much. +# +# NOTE 1: This configuration directive cannot be changed at runtime via +# CONFIG SET. Aso this feature currently does not work when SSL is +# enabled. +# +# NOTE 2: If you want to test the Redis speedup using redis-benchmark, make +# sure you also run the benchmark itself in threaded mode, using the +# --threads option to match the number of Redis threads, otherwise you'll not +# be able to notice the improvements. + +############################ KERNEL OOM CONTROL ############################## + +# On Linux, it is possible to hint the kernel OOM killer on what processes +# should be killed first when out of memory. +# +# Enabling this feature makes Redis actively control the oom_score_adj value +# for all its processes, depending on their role. The default scores will +# attempt to have background child processes killed before all others, and +# replicas killed before masters. +# +# Redis supports three options: +# +# no: Don't make changes to oom-score-adj (default). +# yes: Alias to "relative" see below. +# absolute: Values in oom-score-adj-values are written as is to the kernel. +# relative: Values are used relative to the initial value of oom_score_adj when +# the server starts and are then clamped to a range of -1000 to 1000. +# Because typically the initial value is 0, they will often match the +# absolute values. +oom-score-adj no + +# When oom-score-adj is used, this directive controls the specific values used +# for master, replica and background child processes. Values range -2000 to +# 2000 (higher means more likely to be killed). +# +# Unprivileged processes (not root, and without CAP_SYS_RESOURCE capabilities) +# can freely increase their value, but not decrease it below its initial +# settings. This means that setting oom-score-adj to "relative" and setting the +# oom-score-adj-values to positive values will always succeed. +oom-score-adj-values 0 200 800 + + +#################### KERNEL transparent hugepage CONTROL ###################### + +# Usually the kernel Transparent Huge Pages control is set to "madvise" or +# or "never" by default (/sys/kernel/mm/transparent_hugepage/enabled), in which +# case this config has no effect. On systems in which it is set to "always", +# redis will attempt to disable it specifically for the redis process in order +# to avoid latency problems specifically with fork(2) and CoW. +# If for some reason you prefer to keep it enabled, you can set this config to +# "no" and the kernel global to "always". + +disable-thp yes + +############################## APPEND ONLY MODE ############################### + +# By default Redis asynchronously dumps the dataset on disk. This mode is +# good enough in many applications, but an issue with the Redis process or +# a power outage may result into a few minutes of writes lost (depending on +# the configured save points). +# +# The Append Only File is an alternative persistence mode that provides +# much better durability. For instance using the default data fsync policy +# (see later in the config file) Redis can lose just one second of writes in a +# dramatic event like a server power outage, or a single write if something +# wrong with the Redis process itself happens, but the operating system is +# still running correctly. +# +# AOF and RDB persistence can be enabled at the same time without problems. +# If the AOF is enabled on startup Redis will load the AOF, that is the file +# with the better durability guarantees. +# +# Please check https://redis.io/topics/persistence for more information. + +appendonly yes + +# The name of the append only file (default: "appendonly.aof") + +appendfilename "appendonly.aof" + +# The fsync() call tells the Operating System to actually write data on disk +# instead of waiting for more data in the output buffer. Some OS will really flush +# data on disk, some other OS will just try to do it ASAP. +# +# Redis supports three different modes: +# +# no: don't fsync, just let the OS flush the data when it wants. Faster. +# always: fsync after every write to the append only log. Slow, Safest. +# everysec: fsync only one time every second. Compromise. +# +# The default is "everysec", as that's usually the right compromise between +# speed and data safety. It's up to you to understand if you can relax this to +# "no" that will let the operating system flush the output buffer when +# it wants, for better performances (but if you can live with the idea of +# some data loss consider the default persistence mode that's snapshotting), +# or on the contrary, use "always" that's very slow but a bit safer than +# everysec. +# +# More details please check the following article: +# http://antirez.com/post/redis-persistence-demystified.html +# +# If unsure, use "everysec". + +# appendfsync always +appendfsync everysec +# appendfsync no + +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving, the durability of Redis is +# the same as "appendfsync none". In practical terms, this means that it is +# possible to lose up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. + +no-appendfsync-on-rewrite no + +# Automatic rewrite of the append only file. +# Redis is able to automatically rewrite the log file implicitly calling +# BGREWRITEAOF when the AOF log size grows by the specified percentage. +# +# This is how it works: Redis remembers the size of the AOF file after the +# latest rewrite (if no rewrite has happened since the restart, the size of +# the AOF at startup is used). +# +# This base size is compared to the current size. If the current size is +# bigger than the specified percentage, the rewrite is triggered. Also +# you need to specify a minimal size for the AOF file to be rewritten, this +# is useful to avoid rewriting the AOF file even if the percentage increase +# is reached but it is still pretty small. +# +# Specify a percentage of zero in order to disable the automatic AOF +# rewrite feature. + +auto-aof-rewrite-percentage 100 +auto-aof-rewrite-min-size 64mb + +# An AOF file may be found to be truncated at the end during the Redis +# startup process, when the AOF data gets loaded back into memory. +# This may happen when the system where Redis is running +# crashes, especially when an ext4 filesystem is mounted without the +# data=ordered option (however this can't happen when Redis itself +# crashes or aborts but the operating system still works correctly). +# +# Redis can either exit with an error when this happens, or load as much +# data as possible (the default now) and start if the AOF file is found +# to be truncated at the end. The following option controls this behavior. +# +# If aof-load-truncated is set to yes, a truncated AOF file is loaded and +# the Redis server starts emitting a log to inform the user of the event. +# Otherwise if the option is set to no, the server aborts with an error +# and refuses to start. When the option is set to no, the user requires +# to fix the AOF file using the "redis-check-aof" utility before to restart +# the server. +# +# Note that if the AOF file will be found to be corrupted in the middle +# the server will still exit with an error. This option only applies when +# Redis will try to read more data from the AOF file but not enough bytes +# will be found. +aof-load-truncated yes + +# When rewriting the AOF file, Redis is able to use an RDB preamble in the +# AOF file for faster rewrites and recoveries. When this option is turned +# on the rewritten AOF file is composed of two different stanzas: +# +# [RDB file][AOF tail] +# +# When loading, Redis recognizes that the AOF file starts with the "REDIS" +# string and loads the prefixed RDB file, then continues loading the AOF +# tail. +aof-use-rdb-preamble yes + +################################ LUA SCRIPTING ############################### + +# Max execution time of a Lua script in milliseconds. +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceeds the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet call any write commands. The second +# is the only way to shut down the server in the case a write command was +# already issued by the script but the user doesn't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### + +# Normal Redis instances can't be part of a Redis Cluster; only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system do not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# Cluster node timeout is the amount of milliseconds a node must be unreachable +# for it to be considered in failure state. +# Most other internal time limits are a multiple of the node timeout. +# +# cluster-node-timeout 15000 + +# A replica of a failing master will avoid to start a failover if its data +# looks too old. +# +# There is no simple way for a replica to actually have an exact measure of +# its "data age", so the following two checks are performed: +# +# 1) If there are multiple replicas able to failover, they exchange messages +# in order to try to give an advantage to the replica with the best +# replication offset (more data from the master processed). +# Replicas will try to get their rank by offset, and apply to the start +# of the failover a delay proportional to their rank. +# +# 2) Every single replica computes the time of the last interaction with +# its master. This can be the last ping or command received (if the master +# is still in the "connected" state), or the time that elapsed since the +# disconnection with the master (if the replication link is currently down). +# If the last interaction is too old, the replica will not try to failover +# at all. +# +# The point "2" can be tuned by user. Specifically a replica will not perform +# the failover if, since the last interaction with the master, the time +# elapsed is greater than: +# +# (node-timeout * cluster-replica-validity-factor) + repl-ping-replica-period +# +# So for example if node-timeout is 30 seconds, and the cluster-replica-validity-factor +# is 10, and assuming a default repl-ping-replica-period of 10 seconds, the +# replica will not try to failover if it was not able to talk with the master +# for longer than 310 seconds. +# +# A large cluster-replica-validity-factor may allow replicas with too old data to failover +# a master, while a too small value may prevent the cluster from being able to +# elect a replica at all. +# +# For maximum availability, it is possible to set the cluster-replica-validity-factor +# to a value of 0, which means, that replicas will always try to failover the +# master regardless of the last time they interacted with the master. +# (However they'll always try to apply a delay proportional to their +# offset rank). +# +# Zero is the only value able to guarantee that when all the partitions heal +# the cluster will always be able to continue. +# +# cluster-replica-validity-factor 10 + +# Cluster replicas are able to migrate to orphaned masters, that are masters +# that are left without working replicas. This improves the cluster ability +# to resist to failures as otherwise an orphaned master can't be failed over +# in case of failure if it has no working replicas. +# +# Replicas migrate to orphaned masters only if there are still at least a +# given number of other working replicas for their old master. This number +# is the "migration barrier". A migration barrier of 1 means that a replica +# will migrate only if there is at least 1 other working replica for its master +# and so forth. It usually reflects the number of replicas you want for every +# master in your cluster. +# +# Default is 1 (replicas migrate only if their masters remain with at least +# one replica). To disable migration just set it to a very large value or +# set cluster-allow-replica-migration to 'no'. +# A value of 0 can be set but is useful only for debugging and dangerous +# in production. +# +# cluster-migration-barrier 1 + +# Turning off this option allows to use less automatic cluster configuration. +# It both disables migration to orphaned masters and migration from masters +# that became empty. +# +# Default is 'yes' (allow automatic migrations). +# +# cluster-allow-replica-migration yes + +# By default Redis Cluster nodes stop accepting queries if they detect there +# is at least a hash slot uncovered (no available node is serving it). +# This way if the cluster is partially down (for example a range of hash slots +# are no longer covered) all the cluster becomes, eventually, unavailable. +# It automatically returns available as soon as all the slots are covered again. +# +# However sometimes you want the subset of the cluster which is working, +# to continue to accept queries for the part of the key space that is still +# covered. In order to do so, just set the cluster-require-full-coverage +# option to no. +# +# cluster-require-full-coverage yes + +# This option, when set to yes, prevents replicas from trying to failover its +# master during master failures. However the replica can still perform a +# manual failover, if forced to do so. +# +# This is useful in different scenarios, especially in the case of multiple +# data center operations, where we want one side to never be promoted if not +# in the case of a total DC failure. +# +# cluster-replica-no-failover no + +# This option, when set to yes, allows nodes to serve read traffic while the +# the cluster is in a down state, as long as it believes it owns the slots. +# +# This is useful for two cases. The first case is for when an application +# doesn't require consistency of data during node failures or network partitions. +# One example of this is a cache, where as long as the node has the data it +# should be able to serve it. +# +# The second use case is for configurations that don't meet the recommended +# three shards but want to enable cluster mode and scale later. A +# master outage in a 1 or 2 shard configuration causes a read/write outage to the +# entire cluster without this option set, with it set there is only a write outage. +# Without a quorum of masters, slot ownership will not change automatically. +# +# cluster-allow-reads-when-down no + +# In order to setup your cluster make sure to read the documentation +# available at https://redis.io web site. + +########################## CLUSTER DOCKER/NAT support ######################## + +# In certain deployments, Redis Cluster nodes address discovery fails, because +# addresses are NAT-ted or because ports are forwarded (the typical case is +# Docker and other containers). +# +# In order to make Redis Cluster working in such environments, a static +# configuration where each node knows its public address is needed. The +# following four options are used for this scope, and are: +# +# * cluster-announce-ip +# * cluster-announce-port +# * cluster-announce-tls-port +# * cluster-announce-bus-port +# +# Each instructs the node about its address, client ports (for connections +# without and with TLS) and cluster message bus port. The information is then +# published in the header of the bus packets so that other nodes will be able to +# correctly map the address of the node publishing the information. +# +# If cluster-tls is set to yes and cluster-announce-tls-port is omitted or set +# to zero, then cluster-announce-port refers to the TLS port. Note also that +# cluster-announce-tls-port has no effect if cluster-tls is set to no. +# +# If the above options are not used, the normal Redis Cluster auto-detection +# will be used instead. +# +# Note that when remapped, the bus port may not be at the fixed offset of +# clients port + 10000, so you can specify any port and bus-port depending +# on how they get remapped. If the bus-port is not set, a fixed offset of +# 10000 will be used as usual. +# +# Example: +# +# cluster-announce-ip 10.1.1.5 +# cluster-announce-tls-port 6379 +# cluster-announce-port 0 +# cluster-announce-bus-port 6380 + +################################## SLOW LOG ################################### + +# The Redis Slow Log is a system to log queries that exceeded a specified +# execution time. The execution time does not include the I/O operations +# like talking with the client, sending the reply and so forth, +# but just the time needed to actually execute the command (this is the only +# stage of command execution where the thread is blocked and can not serve +# other requests in the meantime). +# +# You can configure the slow log with two parameters: one tells Redis +# what is the execution time, in microseconds, to exceed in order for the +# command to get logged, and the other parameter is the length of the +# slow log. When a new command is logged the oldest one is removed from the +# queue of logged commands. + +# The following time is expressed in microseconds, so 1000000 is equivalent +# to one second. Note that a negative number disables the slow log, while +# a value of zero forces the logging of every command. +slowlog-log-slower-than 10000 + +# There is no limit to this length. Just be aware that it will consume memory. +# You can reclaim memory used by the slow log with SLOWLOG RESET. +slowlog-max-len 128 + +################################ LATENCY MONITOR ############################## + +# The Redis latency monitoring subsystem samples different operations +# at runtime in order to collect data related to possible sources of +# latency of a Redis instance. +# +# Via the LATENCY command this information is available to the user that can +# print graphs and obtain reports. +# +# The system only logs operations that were performed in a time equal or +# greater than the amount of milliseconds specified via the +# latency-monitor-threshold configuration directive. When its value is set +# to zero, the latency monitor is turned off. +# +# By default latency monitoring is disabled since it is mostly not needed +# if you don't have latency issues, and collecting data has a performance +# impact, that while very small, can be measured under big load. Latency +# monitoring can easily be enabled at runtime using the command +# "CONFIG SET latency-monitor-threshold " if needed. +latency-monitor-threshold 0 + +############################# EVENT NOTIFICATION ############################## + +# Redis can notify Pub/Sub clients about events happening in the key space. +# This feature is documented at https://redis.io/topics/notifications +# +# For instance if keyspace events notification is enabled, and a client +# performs a DEL operation on key "foo" stored in the Database 0, two +# messages will be published via Pub/Sub: +# +# PUBLISH __keyspace@0__:foo del +# PUBLISH __keyevent@0__:del foo +# +# It is possible to select the events that Redis will notify among a set +# of classes. Every class is identified by a single character: +# +# K Keyspace events, published with __keyspace@__ prefix. +# E Keyevent events, published with __keyevent@__ prefix. +# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... +# $ String commands +# l List commands +# s Set commands +# h Hash commands +# z Sorted set commands +# x Expired events (events generated every time a key expires) +# e Evicted events (events generated when a key is evicted for maxmemory) +# t Stream commands +# d Module key type events +# m Key-miss events (Note: It is not included in the 'A' class) +# A Alias for g$lshzxetd, so that the "AKE" string means all the events +# (Except key-miss events which are excluded from 'A' due to their +# unique nature). +# +# The "notify-keyspace-events" takes as argument a string that is composed +# of zero or multiple characters. The empty string means that notifications +# are disabled. +# +# Example: to enable list and generic events, from the point of view of the +# event name, use: +# +# notify-keyspace-events Elg +# +# Example 2: to get the stream of the expired keys subscribing to channel +# name __keyevent@0__:expired use: +# +# notify-keyspace-events Ex +# +# By default all notifications are disabled because most users don't need +# this feature and the feature has some overhead. Note that if you don't +# specify at least one of K or E, no events will be delivered. +notify-keyspace-events "" + +############################### GOPHER SERVER ################################# + +# Redis contains an implementation of the Gopher protocol, as specified in +# the RFC 1436 (https://www.ietf.org/rfc/rfc1436.txt). +# +# The Gopher protocol was very popular in the late '90s. It is an alternative +# to the web, and the implementation both server and client side is so simple +# that the Redis server has just 100 lines of code in order to implement this +# support. +# +# What do you do with Gopher nowadays? Well Gopher never *really* died, and +# lately there is a movement in order for the Gopher more hierarchical content +# composed of just plain text documents to be resurrected. Some want a simpler +# internet, others believe that the mainstream internet became too much +# controlled, and it's cool to create an alternative space for people that +# want a bit of fresh air. +# +# Anyway for the 10nth birthday of the Redis, we gave it the Gopher protocol +# as a gift. +# +# --- HOW IT WORKS? --- +# +# The Redis Gopher support uses the inline protocol of Redis, and specifically +# two kind of inline requests that were anyway illegal: an empty request +# or any request that starts with "/" (there are no Redis commands starting +# with such a slash). Normal RESP2/RESP3 requests are completely out of the +# path of the Gopher protocol implementation and are served as usual as well. +# +# If you open a connection to Redis when Gopher is enabled and send it +# a string like "/foo", if there is a key named "/foo" it is served via the +# Gopher protocol. +# +# In order to create a real Gopher "hole" (the name of a Gopher site in Gopher +# talking), you likely need a script like the following: +# +# https://github.com/antirez/gopher2redis +# +# --- SECURITY WARNING --- +# +# If you plan to put Redis on the internet in a publicly accessible address +# to server Gopher pages MAKE SURE TO SET A PASSWORD to the instance. +# Once a password is set: +# +# 1. The Gopher server (when enabled, not by default) will still serve +# content via Gopher. +# 2. However other commands cannot be called before the client will +# authenticate. +# +# So use the 'requirepass' option to protect your instance. +# +# Note that Gopher is not currently supported when 'io-threads-do-reads' +# is enabled. +# +# To enable Gopher support, uncomment the following line and set the option +# from no (the default) to yes. +# +# gopher-enabled no + +############################### ADVANCED CONFIG ############################### + +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 + +# Lists are also encoded in a special way to save a lot of space. +# The number of entries allowed per internal list node can be specified +# as a fixed maximum size or a maximum number of elements. +# For a fixed maximum size, use -5 through -1, meaning: +# -5: max size: 64 Kb <-- not recommended for normal workloads +# -4: max size: 32 Kb <-- not recommended +# -3: max size: 16 Kb <-- probably not recommended +# -2: max size: 8 Kb <-- good +# -1: max size: 4 Kb <-- good +# Positive numbers mean store up to _exactly_ that number of elements +# per list node. +# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), +# but if your use case is unique, adjust the settings as necessary. +list-max-ziplist-size -2 + +# Lists may also be compressed. +# Compress depth is the number of quicklist ziplist nodes from *each* side of +# the list to *exclude* from compression. The head and tail of the list +# are always uncompressed for fast push/pop operations. Settings are: +# 0: disable all list compression +# 1: depth 1 means "don't start compressing until after 1 node into the list, +# going from either the head or tail" +# So: [head]->node->node->...->node->[tail] +# [head], [tail] will always be uncompressed; inner nodes will compress. +# 2: [head]->[next]->node->node->...->node->[prev]->[tail] +# 2 here means: don't compress head or head->next or tail->prev or tail, +# but compress all nodes between them. +# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] +# etc. +list-compress-depth 0 + +# Sets have a special encoding in just one case: when a set is composed +# of just strings that happen to be integers in radix 10 in the range +# of 64 bit signed integers. +# The following configuration setting sets the limit in the size of the +# set in order to use this special memory saving encoding. +set-max-intset-entries 512 + +# Similarly to hashes and lists, sorted sets are also specially encoded in +# order to save a lot of space. This encoding is only used when the length and +# elements of a sorted set are below the following limits: +zset-max-ziplist-entries 128 +zset-max-ziplist-value 64 + +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is converted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. The value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + +# Streams macro node max size / items. The stream data structure is a radix +# tree of big nodes that encode multiple items inside. Using this configuration +# it is possible to configure how big a single node can be in bytes, and the +# maximum number of items it may contain before switching to a new node when +# appending new stream entries. If any of the following settings are set to +# zero, the limit is ignored, so for instance it is possible to set just a +# max entries limit by setting max-bytes to 0 and max-entries to the desired +# value. +stream-node-max-bytes 4096 +stream-node-max-entries 100 + +# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in +# order to help rehashing the main Redis hash table (the one mapping top-level +# keys to values). The hash table implementation Redis uses (see dict.c) +# performs a lazy rehashing: the more operation you run into a hash table +# that is rehashing, the more rehashing "steps" are performed, so if the +# server is idle the rehashing is never complete and some more memory is used +# by the hash table. +# +# The default is to use this millisecond 10 times every second in order to +# actively rehash the main dictionaries, freeing memory when possible. +# +# If unsure: +# use "activerehashing no" if you have hard latency requirements and it is +# not a good thing in your environment that Redis can reply from time to time +# to queries with 2 milliseconds delay. +# +# use "activerehashing yes" if you don't have such hard requirements but +# want to free memory asap when possible. +activerehashing yes + +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients including MONITOR clients +# replica -> replica clients +# pubsub -> clients subscribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and replica clients, since +# subscribers and replicas receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled by setting them to zero. +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit replica 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + +# Client query buffers accumulate new commands. They are limited to a fixed +# amount by default in order to avoid that a protocol desynchronization (for +# instance due to a bug in the client) will lead to unbound memory usage in +# the query buffer. However you can configure it here if you have very special +# needs, such us huge multi/exec requests or alike. +# +# client-query-buffer-limit 1gb + +# In the Redis protocol, bulk requests, that are, elements representing single +# strings, are normally limited to 512 mb. However you can change this limit +# here, but must be 1mb or greater +# +# proto-max-bulk-len 512mb + +# Redis calls an internal function to perform many background tasks, like +# closing connections of clients in timeout, purging expired keys that are +# never requested, and so forth. +# +# Not all tasks are performed with the same frequency, but Redis checks for +# tasks to perform according to the specified "hz" value. +# +# By default "hz" is set to 10. Raising the value will use more CPU when +# Redis is idle, but at the same time will make Redis more responsive when +# there are many keys expiring at the same time, and timeouts may be +# handled with more precision. +# +# The range is between 1 and 500, however a value over 100 is usually not +# a good idea. Most users should use the default of 10 and raise this up to +# 100 only in environments where very low latency is required. +hz 10 + +# Normally it is useful to have an HZ value which is proportional to the +# number of clients connected. This is useful in order, for instance, to +# avoid too many clients are processed for each background task invocation +# in order to avoid latency spikes. +# +# Since the default HZ value by default is conservatively set to 10, Redis +# offers, and enables by default, the ability to use an adaptive HZ value +# which will temporarily raise when there are many connected clients. +# +# When dynamic HZ is enabled, the actual configured HZ will be used +# as a baseline, but multiples of the configured HZ value will be actually +# used as needed once more clients are connected. In this way an idle +# instance will use very little CPU time while a busy instance will be +# more responsive. +dynamic-hz yes + +# When a child rewrites the AOF file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +aof-rewrite-incremental-fsync yes + +# When redis saves RDB file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +rdb-save-incremental-fsync yes + +# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good +# idea to start with the default settings and only change them after investigating +# how to improve the performances and how the keys LFU change over time, which +# is possible to inspect via the OBJECT FREQ command. +# +# There are two tunable parameters in the Redis LFU implementation: the +# counter logarithm factor and the counter decay time. It is important to +# understand what the two parameters mean before changing them. +# +# The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis +# uses a probabilistic increment with logarithmic behavior. Given the value +# of the old counter, when a key is accessed, the counter is incremented in +# this way: +# +# 1. A random number R between 0 and 1 is extracted. +# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). +# 3. The counter is incremented only if R < P. +# +# The default lfu-log-factor is 10. This is a table of how the frequency +# counter changes with a different number of accesses with different +# logarithmic factors: +# +# +--------+------------+------------+------------+------------+------------+ +# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | +# +--------+------------+------------+------------+------------+------------+ +# | 0 | 104 | 255 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 1 | 18 | 49 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 10 | 10 | 18 | 142 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 100 | 8 | 11 | 49 | 143 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# +# NOTE: The above table was obtained by running the following commands: +# +# redis-benchmark -n 1000000 incr foo +# redis-cli object freq foo +# +# NOTE 2: The counter initial value is 5 in order to give new objects a chance +# to accumulate hits. +# +# The counter decay time is the time, in minutes, that must elapse in order +# for the key counter to be divided by two (or decremented if it has a value +# less <= 10). +# +# The default value for the lfu-decay-time is 1. A special value of 0 means to +# decay the counter every time it happens to be scanned. +# +# lfu-log-factor 10 +# lfu-decay-time 1 + +########################### ACTIVE DEFRAGMENTATION ####################### +# +# What is active defragmentation? +# ------------------------------- +# +# Active (online) defragmentation allows a Redis server to compact the +# spaces left between small allocations and deallocations of data in memory, +# thus allowing to reclaim back memory. +# +# Fragmentation is a natural process that happens with every allocator (but +# less so with Jemalloc, fortunately) and certain workloads. Normally a server +# restart is needed in order to lower the fragmentation, or at least to flush +# away all the data and create it again. However thanks to this feature +# implemented by Oran Agra for Redis 4.0 this process can happen at runtime +# in a "hot" way, while the server is running. +# +# Basically when the fragmentation is over a certain level (see the +# configuration options below) Redis will start to create new copies of the +# values in contiguous memory regions by exploiting certain specific Jemalloc +# features (in order to understand if an allocation is causing fragmentation +# and to allocate it in a better place), and at the same time, will release the +# old copies of the data. This process, repeated incrementally for all the keys +# will cause the fragmentation to drop back to normal values. +# +# Important things to understand: +# +# 1. This feature is disabled by default, and only works if you compiled Redis +# to use the copy of Jemalloc we ship with the source code of Redis. +# This is the default with Linux builds. +# +# 2. You never need to enable this feature if you don't have fragmentation +# issues. +# +# 3. Once you experience fragmentation, you can enable this feature when +# needed with the command "CONFIG SET activedefrag yes". +# +# The configuration parameters are able to fine tune the behavior of the +# defragmentation process. If you are not sure about what they mean it is +# a good idea to leave the defaults untouched. + +# Enabled active defragmentation +# activedefrag no + +# Minimum amount of fragmentation waste to start active defrag +# active-defrag-ignore-bytes 100mb + +# Minimum percentage of fragmentation to start active defrag +# active-defrag-threshold-lower 10 + +# Maximum percentage of fragmentation at which we use maximum effort +# active-defrag-threshold-upper 100 + +# Minimal effort for defrag in CPU percentage, to be used when the lower +# threshold is reached +# active-defrag-cycle-min 1 + +# Maximal effort for defrag in CPU percentage, to be used when the upper +# threshold is reached +# active-defrag-cycle-max 25 + +# Maximum number of set/hash/zset/list fields that will be processed from +# the main dictionary scan +# active-defrag-max-scan-fields 1000 + +# Jemalloc background thread for purging will be enabled by default +jemalloc-bg-thread yes + +# It is possible to pin different threads and processes of Redis to specific +# CPUs in your system, in order to maximize the performances of the server. +# This is useful both in order to pin different Redis threads in different +# CPUs, but also in order to make sure that multiple Redis instances running +# in the same host will be pinned to different CPUs. +# +# Normally you can do this using the "taskset" command, however it is also +# possible to this via Redis configuration directly, both in Linux and FreeBSD. +# +# You can pin the server/IO threads, bio threads, aof rewrite child process, and +# the bgsave child process. The syntax to specify the cpu list is the same as +# the taskset command: +# +# Set redis server/io threads to cpu affinity 0,2,4,6: +# server_cpulist 0-7:2 +# +# Set bio threads to cpu affinity 1,3: +# bio_cpulist 1,3 +# +# Set aof rewrite child process to cpu affinity 8,9,10,11: +# aof_rewrite_cpulist 8-11 +# +# Set bgsave child process to cpu affinity 1,10,11 +# bgsave_cpulist 1,10-11 + +# In some cases redis will emit warnings and even refuse to start if it detects +# that the system is in bad state, it is possible to suppress these warnings +# by setting the following config which takes a space delimited list of warnings +# to suppress +# +# ignore-warnings ARM64-COW-BUG \ No newline at end of file diff --git a/merlin/server/server_commands.py b/merlin/server/server_commands.py new file mode 100644 index 000000000..8a570b70d --- /dev/null +++ b/merlin/server/server_commands.py @@ -0,0 +1,308 @@ +"""Main functions for instantiating and running Merlin server containers.""" + +############################################################################### +# Copyright (c) 2022, Lawrence Livermore National Security, LLC. +# Produced at the Lawrence Livermore National Laboratory +# Written by the Merlin dev team, listed in the CONTRIBUTORS file. +# +# +# LLNL-CODE-797170 +# All rights reserved. +# This file is part of Merlin, Version: 1.9.0. +# +# For details, see https://github.com/LLNL/merlin. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +############################################################################### + +import logging +import os +import socket +import subprocess +import time +from argparse import Namespace + +from merlin.server.server_config import ( + ServerStatus, + config_merlin_server, + create_server_config, + dump_process_file, + get_server_status, + parse_redis_output, + pull_process_file, + pull_server_config, + pull_server_image, +) +from merlin.server.server_util import AppYaml, RedisConfig, RedisUsers + + +LOG = logging.getLogger("merlin") + + +def init_server() -> None: + """ + Initialize merlin server by checking and initializing main configuration directory + and local server configuration. + """ + + if not create_server_config(): + LOG.info("Merlin server initialization failed.") + return + pull_server_image() + + config_merlin_server() + + LOG.info("Merlin server initialization successful.") + + +def config_server(args: Namespace) -> None: + """ + Process the merlin server config flags to make changes and edits to appropriate configurations + based on the input passed in by the user. + """ + server_config = pull_server_config() + if not server_config: + LOG.error('Try to run "merlin server init" again to reinitialize values.') + return False + redis_config = RedisConfig(server_config.container.get_config_path()) + + redis_config.set_ip_address(args.ipaddress) + + redis_config.set_port(args.port) + + redis_config.set_password(args.password) + if args.password is not None: + redis_users = RedisUsers(server_config.container.get_user_file_path()) + redis_users.set_password("default", args.password) + redis_users.write() + + redis_config.set_directory(args.directory) + + redis_config.set_snapshot_seconds(args.snapshot_seconds) + + redis_config.set_snapshot_changes(args.snapshot_changes) + + redis_config.set_snapshot_file(args.snapshot_file) + + redis_config.set_append_mode(args.append_mode) + + redis_config.set_append_file(args.append_file) + + if redis_config.changes_made(): + redis_config.write() + LOG.info("Merlin server config has changed. Restart merlin server to apply new configuration.") + LOG.info("Run 'merlin server restart' to restart running merlin server") + LOG.info("Run 'merlin server start' to start merlin server instance.") + else: + LOG.info("Add changes to config file and exisiting containers.") + + server_config = pull_server_config() + if not server_config: + LOG.error('Try to run "merlin server init" again to reinitialize values.') + return False + + # Read the user from the list of avaliable users + redis_users = RedisUsers(server_config.container.get_user_file_path()) + redis_config = RedisConfig(server_config.container.get_config_path()) + + if args.add_user is not None: + # Log the user in a file + if redis_users.add_user(user=args.add_user[0], password=args.add_user[1]): + redis_users.write() + LOG.info(f"Added user {args.add_user[0]} to merlin server") + # Create a new user in container + if get_server_status() == ServerStatus.RUNNING: + LOG.info("Adding user to current merlin server instance") + redis_users.apply_to_redis(redis_config.get_ip_address(), redis_config.get_port(), redis_config.get_password()) + else: + LOG.error(f"User '{args.add_user[0]}' already exisits within current users") + + if args.remove_user is not None: + # Remove user from file + if redis_users.remove_user(args.remove_user): + redis_users.write() + LOG.info(f"Removed user {args.remove_user} to merlin server") + # Remove user from container + if get_server_status() == ServerStatus.RUNNING: + LOG.info("Removing user to current merlin server instance") + redis_users.apply_to_redis(redis_config.get_ip_address(), redis_config.get_port(), redis_config.get_password()) + else: + LOG.error(f"User '{args.remove_user}' doesn't exist within current users.") + + +def status_server() -> None: + """ + Get the server status of the any current running containers for merlin server + """ + current_status = get_server_status() + if current_status == ServerStatus.NOT_INITALIZED: + LOG.info("Merlin server has not been initialized.") + LOG.info("Please initalize server by running 'merlin server init'") + elif current_status == ServerStatus.MISSING_CONTAINER: + LOG.info("Unable to find server image.") + LOG.info("Ensure there is a .sif file in merlin server directory.") + elif current_status == ServerStatus.NOT_RUNNING: + LOG.info("Merlin server is not running.") + elif current_status == ServerStatus.RUNNING: + LOG.info("Merlin server is running.") + + +def start_server() -> bool: + """ + Start a merlin server container using singularity. + :return:: True if server was successful started and False if failed. + """ + current_status = get_server_status() + + if current_status == ServerStatus.NOT_INITALIZED or current_status == ServerStatus.MISSING_CONTAINER: + LOG.info("Merlin server has not been initialized. Please run 'merlin server init' first.") + return False + + if current_status == ServerStatus.RUNNING: + LOG.info("Merlin server already running.") + LOG.info("Stop current server with 'merlin server stop' before attempting to start a new server.") + return False + + server_config = pull_server_config() + if not server_config: + LOG.error('Try to run "merlin server init" again to reinitialize values.') + return False + + image_path = server_config.container.get_image_path() + if not os.path.exists(image_path): + LOG.error("Unable to find image at " + image_path) + return False + + config_path = server_config.container.get_config_path() + if not os.path.exists(config_path): + LOG.error("Unable to find config file at " + config_path) + return False + + process = subprocess.Popen( + server_config.container_format.get_run_command() + .strip("\\") + .format( + command=server_config.container_format.get_command(), + home_dir=server_config.container.get_config_dir(), + image=image_path, + config=config_path, + ) + .split(), + start_new_session=True, + close_fds=True, + stdout=subprocess.PIPE, + ) + + time.sleep(1) + + redis_start, redis_out = parse_redis_output(process.stdout) + + if not redis_start: + LOG.error("Redis is unable to start") + LOG.error('Check to see if there is an unresponsive instance of redis with "ps -e"') + LOG.error(redis_out.strip("\n")) + return False + + redis_out["image_pid"] = redis_out.pop("pid") + redis_out["parent_pid"] = process.pid + redis_out["hostname"] = socket.gethostname() + if not dump_process_file(redis_out, server_config.container.get_pfile_path()): + LOG.error("Unable to create process file for container.") + return False + + if get_server_status() != ServerStatus.RUNNING: + LOG.error("Unable to start merlin server.") + return False + + LOG.info(f"Server started with PID {str(process.pid)}.") + LOG.info(f'Merlin server operating on "{redis_out["hostname"]}" and port "{redis_out["port"]}".') + + redis_users = RedisUsers(server_config.container.get_user_file_path()) + redis_config = RedisConfig(server_config.container.get_config_path()) + redis_users.apply_to_redis(redis_config.get_ip_address(), redis_config.get_port(), redis_config.get_password()) + + new_app_yaml = os.path.join(server_config.container.get_config_dir(), "app.yaml") + ay = AppYaml() + ay.apply_server_config(server_config=server_config) + ay.write(new_app_yaml) + LOG.info(f"New app.yaml written to {new_app_yaml}.") + LOG.info("Replace app.yaml in ~/.merlin/app.yaml to use merlin server as main configuration.") + LOG.info("To use for local runs, move app.yaml into the running directory.") + + return True + + +def stop_server(): + """ + Stop running merlin server containers. + :return:: True if server was stopped successfully and False if failed. + """ + if get_server_status() != ServerStatus.RUNNING: + LOG.info("There is no instance of merlin server running.") + LOG.info("Start a merlin server first with 'merlin server start'") + return False + + server_config = pull_server_config() + if not server_config: + LOG.error('Try to run "merlin server init" again to reinitialize values.') + return False + + pf_data = pull_process_file(server_config.container.get_pfile_path()) + read_pid = pf_data["parent_pid"] + + process = subprocess.run( + server_config.process.get_status_command().strip("\\").format(pid=read_pid).split(), stdout=subprocess.PIPE + ) + if process.stdout == b"": + LOG.error("Unable to get the PID for the current merlin server.") + return False + + command = server_config.process.get_kill_command().strip("\\").format(pid=read_pid).split() + if server_config.container_format.get_stop_command() != "kill": + command = ( + server_config.container_format.get_stop_command() + .strip("\\") + .format(name=server_config.container.get_image_name) + .split() + ) + + LOG.info(f"Attempting to close merlin server PID {str(read_pid)}") + + subprocess.run(command, stdout=subprocess.PIPE) + time.sleep(1) + if get_server_status() == ServerStatus.RUNNING: + LOG.error("Unable to kill process.") + return False + + LOG.info("Merlin server terminated.") + return True + + +def restart_server() -> bool: + """ + Restart a running merlin server instance. + :return:: True if server was restarted successfully and False if failed. + """ + if get_server_status() != ServerStatus.RUNNING: + LOG.info("Merlin server is not currently running.") + LOG.info("Please start a merlin server instance first with 'merlin server start'") + return False + stop_server() + time.sleep(1) + start_server() + return True diff --git a/merlin/server/server_config.py b/merlin/server/server_config.py new file mode 100644 index 000000000..57cb5af22 --- /dev/null +++ b/merlin/server/server_config.py @@ -0,0 +1,387 @@ +############################################################################### +# Copyright (c) 2022, Lawrence Livermore National Security, LLC. +# Produced at the Lawrence Livermore National Laboratory +# Written by the Merlin dev team, listed in the CONTRIBUTORS file. +# +# +# LLNL-CODE-797170 +# All rights reserved. +# This file is part of Merlin, Version: 1.9.0. +# +# For details, see https://github.com/LLNL/merlin. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +############################################################################### + +import enum +import logging +import os +import random +import shutil +import string +import subprocess +from io import BufferedReader +from typing import Tuple + +import yaml + +from merlin.server.server_util import ( + CONTAINER_TYPES, + MERLIN_CONFIG_DIR, + MERLIN_SERVER_CONFIG, + MERLIN_SERVER_SUBDIR, + AppYaml, + RedisConfig, + RedisUsers, + ServerConfig, +) + + +LOG = logging.getLogger("merlin") + +# Default values for configuration +CONFIG_DIR = os.path.abspath("./merlin_server/") +IMAGE_NAME = "redis_latest.sif" +PROCESS_FILE = "merlin_server.pf" +CONFIG_FILE = "redis.conf" +REDIS_URL = "docker://redis" +LOCAL_APP_YAML = "./app.yaml" + +PASSWORD_LENGTH = 256 + + +class ServerStatus(enum.Enum): + """ + Different states in which the server can be in. + """ + + RUNNING = 0 + NOT_INITALIZED = 1 + MISSING_CONTAINER = 2 + NOT_RUNNING = 3 + ERROR = 4 + + +def generate_password(length, pass_command: str = None) -> str: + """ + Function for generating passwords for redis container. If a specified command is given + then a password would be generated with the given command. If not a password will be + created by combining a string a characters based on the given length. + + :return:: string value with given length + """ + if pass_command: + process = subprocess.run(pass_command.split(), shell=True, stdout=subprocess.PIPE) + return process.stdout + + characters = list(string.ascii_letters + string.digits + "!@#$%^&*()") + + random.shuffle(characters) + + password = [] + for i in range(length): + password.append(random.choice(characters)) + + random.shuffle(password) + return "".join(password) + + +def parse_redis_output(redis_stdout: BufferedReader) -> Tuple[bool, str]: + """ + Parse the redis output for a the redis container. It will get all the necessary information + from the output and returns a dictionary of those values. + + :return:: two values is_successful, dictionary of values from redis output + """ + if redis_stdout is None: + return False, "None passed as redis output" + server_init = False + redis_config = {} + line = redis_stdout.readline() + while line != "" or line is not None: + if not server_init: + values = [ln for ln in line.split() if b"=" in ln] + for val in values: + key, value = val.split(b"=") + redis_config[key.decode("utf-8")] = value.strip(b",").strip(b".").decode("utf-8") + if b"Server initialized" in line: + server_init = True + if b"Ready to accept connections" in line: + return True, redis_config + if b"aborting" in line or b"Fatal error" in line: + return False, line.decode("utf-8") + line = redis_stdout.readline() + + +def create_server_config() -> bool: + """ + Create main configuration file for merlin server in the + merlin configuration directory. If a configuration already + exists it will not replace the current configuration and exit. + + :return:: True if success and False if fail + """ + if not os.path.exists(MERLIN_CONFIG_DIR): + LOG.error("Unable to find main merlin configuration directory at " + MERLIN_CONFIG_DIR) + return False + + config_dir = os.path.join(MERLIN_CONFIG_DIR, MERLIN_SERVER_SUBDIR) + if not os.path.exists(config_dir): + LOG.info("Unable to find exisiting server configuration.") + LOG.info(f"Creating default configuration in {config_dir}") + try: + os.mkdir(config_dir) + except OSError as err: + LOG.error(err) + return False + + files = [i + ".yaml" for i in CONTAINER_TYPES] + for file in files: + file_path = os.path.join(config_dir, file) + if os.path.exists(file_path): + LOG.info(f"{file} already exists.") + continue + LOG.info(f"Copying file {file} to configuration directory.") + try: + shutil.copy(os.path.join(os.path.dirname(os.path.abspath(__file__)), file), config_dir) + except OSError: + LOG.error(f"Destination location {config_dir} is not writable.") + return False + + # Load Merlin Server Configuration and apply it to app.yaml + with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), MERLIN_SERVER_CONFIG)) as f: + main_server_config = yaml.load(f, yaml.Loader) + filename = LOCAL_APP_YAML if os.path.exists(LOCAL_APP_YAML) else AppYaml.default_filename + merlin_app_yaml = AppYaml(filename) + merlin_app_yaml.update_data(main_server_config) + merlin_app_yaml.write(filename) + + server_config = pull_server_config() + if not server_config: + LOG.error('Try to run "merlin server init" again to reinitialize values.') + return False + + if not os.path.exists(server_config.container.get_config_dir()): + LOG.info("Creating merlin server directory.") + os.mkdir(server_config.container.get_config_dir()) + + return True + + +def config_merlin_server(): + """ + Configurate the merlin server with configurations such as username password and etc. + """ + + server_config = pull_server_config() + if not server_config: + LOG.error('Try to run "merlin server init" again to reinitialize values.') + return False + + pass_file = server_config.container.get_pass_file_path() + if os.path.exists(pass_file): + LOG.info("Password file already exists. Skipping password generation step.") + else: + # if "pass_command" in server_config["container"]: + # password = generate_password(PASSWORD_LENGTH, server_config["container"]["pass_command"]) + # else: + password = generate_password(PASSWORD_LENGTH) + + with open(pass_file, "w+") as f: + f.write(password) + + LOG.info("Creating password file for merlin server container.") + + user_file = server_config.container.get_user_file_path() + if os.path.exists(user_file): + LOG.info("User file already exists.") + else: + redis_users = RedisUsers(user_file) + redis_config = RedisConfig(server_config.container.get_config_path()) + redis_config.set_password(server_config.container.get_container_password()) + redis_users.add_user(user="default", password=server_config.container.get_container_password()) + redis_users.add_user(user=os.environ.get("USER"), password=server_config.container.get_container_password()) + redis_users.write() + redis_config.write() + + LOG.info("User {} created in user file for merlin server container".format(os.environ.get("USER"))) + + +def pull_server_config() -> ServerConfig: + """ + Pull the main configuration file and corresponding format configuration file + as well. Returns the values as a dictionary. + + :return: A instance of ServerConfig containing all the necessary configuration values. + """ + return_data = {} + format_needed_keys = ["command", "run_command", "stop_command", "pull_command"] + process_needed_keys = ["status", "kill"] + + merlin_app_yaml = AppYaml(LOCAL_APP_YAML) + server_config = merlin_app_yaml.get_data() + return_data.update(server_config) + + config_dir = os.path.join(MERLIN_CONFIG_DIR, MERLIN_SERVER_SUBDIR) + + if "container" in server_config: + if "format" in server_config["container"]: + format_file = os.path.join(config_dir, server_config["container"]["format"] + ".yaml") + with open(format_file, "r") as ff: + format_data = yaml.load(ff, yaml.Loader) + for key in format_needed_keys: + if key not in format_data[server_config["container"]["format"]]: + LOG.error(f'Unable to find necessary "{key}" value in format config file {format_file}') + return None + return_data.update(format_data) + else: + LOG.error(f'Unable to find "format" in {merlin_app_yaml.default_filename}') + return None + else: + LOG.error(f'Unable to find "container" object in {merlin_app_yaml.default_filename}') + return None + + # Checking for process values that are needed for main functions and defaults + if "process" not in server_config: + LOG.error(f"Process config not found in {merlin_app_yaml.default_filename}") + return None + + for key in process_needed_keys: + if key not in server_config["process"]: + LOG.error(f'Process necessary "{key}" command configuration not found in {merlin_app_yaml.default_filename}') + return None + + return ServerConfig(return_data) + + +def pull_server_image() -> bool: + """ + Fetch the server image using singularity. + + :return:: True if success and False if fail + """ + server_config = pull_server_config() + if not server_config: + LOG.error('Try to run "merlin server init" again to reinitialize values.') + return False + + config_dir = server_config.container.get_config_dir() + config_file = server_config.container.get_config_name() + image_url = server_config.container.get_image_url() + image_path = server_config.container.get_image_path() + + if not os.path.exists(image_path): + LOG.info(f"Fetching redis image from {image_url}") + subprocess.run( + server_config.container_format.get_pull_command() + .strip("\\") + .format(command=server_config.container_format.get_command(), image=image_path, url=image_url) + .split(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + else: + LOG.info(f"{image_path} already exists.") + + if not os.path.exists(os.path.join(config_dir, config_file)): + LOG.info("Copying default redis configuration file.") + try: + file_dir = os.path.dirname(os.path.abspath(__file__)) + shutil.copy(os.path.join(file_dir, config_file), config_dir) + except OSError: + LOG.error(f"Destination location {config_dir} is not writable.") + return False + else: + LOG.info("Redis configuration file already exist.") + + return True + + +def get_server_status(): + """ + Determine the status of the current server. + This function can be used to check if the servers + have been initalized, started, or stopped. + + :param `server_dir`: location of all server related files. + :param `image_name`: name of the image when fetched. + :return:: A enum value of ServerStatus describing its current state. + """ + server_config = pull_server_config() + if not server_config: + return ServerStatus.NOT_INITALIZED + + if not os.path.exists(server_config.container.get_config_dir()): + return ServerStatus.NOT_INITALIZED + + if not os.path.exists(server_config.container.get_image_path()): + return ServerStatus.MISSING_CONTAINER + + if not os.path.exists(server_config.container.get_pfile_path()): + return ServerStatus.NOT_RUNNING + + pf_data = pull_process_file(server_config.container.get_pfile_path()) + parent_pid = pf_data["parent_pid"] + + check_process = subprocess.run( + server_config.process.get_status_command().strip("\\").format(pid=parent_pid).split(), + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + + if check_process.stdout == b"": + return ServerStatus.NOT_RUNNING + + return ServerStatus.RUNNING + + +def check_process_file_format(data: dict) -> bool: + """ + Check to see if the process file has the correct format and contains the expected key values. + :return:: True if success and False if fail + """ + required_keys = ["parent_pid", "image_pid", "port", "hostname"] + for key in required_keys: + if key not in data: + return False + return True + + +def pull_process_file(file_path: str) -> dict: + """ + Pull the data from the process file. If one is found returns the data in a dictionary + if not returns None + :return:: Data containing in process file. + """ + with open(file_path, "r") as f: + data = yaml.load(f, yaml.Loader) + if check_process_file_format(data): + return data + return None + + +def dump_process_file(data: dict, file_path: str): + """ + Dump the process data from the dictionary to the specified file path. + :return:: True if success and False if fail + """ + if not check_process_file_format(data): + return False + with open(file_path, "w+") as f: + yaml.dump(data, f, yaml.Dumper) + return True diff --git a/merlin/server/server_util.py b/merlin/server/server_util.py new file mode 100644 index 000000000..e6eb6379d --- /dev/null +++ b/merlin/server/server_util.py @@ -0,0 +1,607 @@ +############################################################################### +# Copyright (c) 2022, Lawrence Livermore National Security, LLC. +# Produced at the Lawrence Livermore National Laboratory +# Written by the Merlin dev team, listed in the CONTRIBUTORS file. +# +# +# LLNL-CODE-797170 +# All rights reserved. +# This file is part of Merlin, Version: 1.9.0. +# +# For details, see https://github.com/LLNL/merlin. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +############################################################################### + +import hashlib +import logging +import os + +import redis +import yaml + +import merlin.utils + + +LOG = logging.getLogger("merlin") + +# Constants for main merlin server configuration values. +CONTAINER_TYPES = ["singularity", "docker", "podman"] +MERLIN_CONFIG_DIR = os.path.join(os.path.expanduser("~"), ".merlin") +MERLIN_SERVER_SUBDIR = "server/" +MERLIN_SERVER_CONFIG = "merlin_server.yaml" + + +def valid_ipv4(ip: str) -> bool: + """ + Checks valid ip address + """ + if not ip: + return False + + arr = ip.split(".") + if len(arr) != 4: + return False + + for i in arr: + if int(i) < 0 and int(i) > 255: + return False + + return True + + +def valid_port(port: int) -> bool: + """ + Checks valid network port + """ + if port > 0 and port < 65536: + return True + return False + + +class ContainerConfig: + """ + ContainerConfig provides interface for parsing and interacting with the container value specified within + the merlin_server.yaml configuration file. Dictionary of the config values should be passed when initialized + to parse values. This can be done after parsing yaml to data dictionary. + If there are missing values within the configuration it will be populated with default values for + singularity container. + + Configuration contains values for setting up containers and storing values specific to each container. + Values that are stored consist of things within the local configuration directory as different runs + can have differnt configuration values. + """ + + # Default values for configuration + FORMAT = "singularity" + IMAGE_TYPE = "redis" + IMAGE_NAME = "redis_latest.sif" + REDIS_URL = "docker://redis" + CONFIG_FILE = "redis.conf" + CONFIG_DIR = os.path.abspath("./merlin_server/") + PROCESS_FILE = "merlin_server.pf" + PASSWORD_FILE = "redis.pass" + USERS_FILE = "redis.users" + + format = FORMAT + image_type = IMAGE_TYPE + image = IMAGE_NAME + url = REDIS_URL + config = CONFIG_FILE + config_dir = CONFIG_DIR + pfile = PROCESS_FILE + pass_file = PASSWORD_FILE + user_file = USERS_FILE + + def __init__(self, data: dict) -> None: + self.format = data["format"] if "format" in data else self.FORMAT + self.image_type = data["image_type"] if "image_type" in data else self.IMAGE_TYPE + self.image = data["image"] if "image" in data else self.IMAGE_NAME + self.url = data["url"] if "url" in data else self.REDIS_URL + self.config = data["config"] if "config" in data else self.CONFIG_FILE + self.config_dir = os.path.abspath(data["config_dir"]) if "config_dir" in data else self.CONFIG_DIR + self.pfile = data["pfile"] if "pfile" in data else self.PROCESS_FILE + self.pass_file = data["pass_file"] if "pass_file" in data else self.PASSWORD_FILE + self.user_file = data["user_file"] if "user_file" in data else self.USERS_FILE + + def get_format(self) -> str: + return self.format + + def get_image_type(self) -> str: + return self.image_type + + def get_image_name(self) -> str: + return self.image + + def get_image_url(self) -> str: + return self.url + + def get_image_path(self) -> str: + return os.path.join(self.config_dir, self.image) + + def get_config_name(self) -> str: + return self.config + + def get_config_path(self) -> str: + return os.path.join(self.config_dir, self.config) + + def get_config_dir(self) -> str: + return self.config_dir + + def get_pfile_name(self) -> str: + return self.pfile + + def get_pfile_path(self) -> str: + return os.path.join(self.config_dir, self.pfile) + + def get_pass_file_name(self) -> str: + return self.pass_file + + def get_pass_file_path(self) -> str: + return os.path.join(self.config_dir, self.pass_file) + + def get_user_file_name(self) -> str: + return self.user_file + + def get_user_file_path(self) -> str: + return os.path.join(self.config_dir, self.user_file) + + def get_container_password(self) -> str: + password = None + with open(self.get_pass_file_path(), "r") as f: + password = f.read() + return password + + +class ContainerFormatConfig: + """ + ContainerFormatConfig provides an interface for parsing and interacting with container specific + configuration files .yaml. These configuration files contain container specific + commands to run containerizers such as singularity, docker, and podman. + """ + + COMMAND = "singularity" + RUN_COMMAND = "{command} run {image} {config}" + STOP_COMMAND = "kill" + PULL_COMMAND = "{command} pull {image} {url}" + + command = COMMAND + run_command = RUN_COMMAND + stop_command = STOP_COMMAND + pull_command = PULL_COMMAND + + def __init__(self, data: dict) -> None: + self.command = data["command"] if "command" in data else self.COMMAND + self.run_command = data["run_command"] if "run_command" in data else self.RUN_COMMAND + self.stop_command = data["stop_command"] if "stop_command" in data else self.STOP_COMMAND + self.pull_command = data["pull_command"] if "pull_command" in data else self.PULL_COMMAND + + def get_command(self) -> str: + return self.command + + def get_run_command(self) -> str: + return self.run_command + + def get_stop_command(self) -> str: + return self.stop_command + + def get_pull_command(self) -> str: + return self.pull_command + + +class ProcessConfig: + """ + ProcessConfig provides an interface for parsing and interacting with process config specified + in merlin_server.yaml configuration. This configuration provide commands for interfacing with + host machine while the containers are running. + """ + + STATUS_COMMAND = "pgrep -P {pid}" + KILL_COMMAND = "kill {pid}" + + status = STATUS_COMMAND + kill = KILL_COMMAND + + def __init__(self, data: dict) -> None: + self.status = data["status"] if "status" in data else self.STATUS_COMMAND + self.kill = data["kill"] if "kill" in data else self.KILL_COMMAND + + def get_status_command(self) -> str: + return self.status + + def get_kill_command(self) -> str: + return self.kill + + +class ServerConfig: + """ + ServerConfig is an interface for storing all the necessary configuration for merlin server. + These configuration container things such as ContainerConfig, ProcessConfig, and ContainerFormatConfig. + """ + + container: ContainerConfig = None + process: ProcessConfig = None + container_format: ContainerFormatConfig = None + + def __init__(self, data: dict) -> None: + if "container" in data: + self.container = ContainerConfig(data["container"]) + if "process" in data: + self.process = ProcessConfig(data["process"]) + if self.container.get_format() in data: + self.container_format = ContainerFormatConfig(data[self.container.get_format()]) + + +class RedisConfig: + """ + RedisConfig is an interface for parsing and interacing with redis.conf file that is provided + by redis. This allows users to parse the given redis configuration and make edits and allow users + to write those changes into a redis readable config file. + """ + + filename = "" + entry_order = [] + entries = {} + comments = {} + trailing_comments = "" + changed = False + + def __init__(self, filename) -> None: + self.filename = filename + self.changed = False + self.parse() + + def parse(self) -> None: + self.entries = {} + self.comments = {} + with open(self.filename, "r+") as f: + file_contents = f.read() + file_lines = file_contents.split("\n") + comments = "" + for line in file_lines: + if len(line) > 0 and line[0] != "#": + line_contents = line.split(maxsplit=1) + if line_contents[0] in self.entries: + sub_split = line_contents[1].split(maxsplit=1) + line_contents[0] += " " + sub_split[0] + line_contents[1] = sub_split[1] + self.entry_order.append(line_contents[0]) + self.entries[line_contents[0]] = line_contents[1] + self.comments[line_contents[0]] = comments + comments = "" + else: + comments += line + "\n" + self.trailing_comments = comments[:-1] + + def write(self) -> None: + with open(self.filename, "w") as f: + for entry in self.entry_order: + f.write(self.comments[entry]) + f.write(f"{entry} {self.entries[entry]}\n") + f.write(self.trailing_comments) + + def set_filename(self, filename: str) -> None: + self.filename = filename + + def set_config_value(self, key: str, value: str) -> bool: + if key not in self.entries: + return False + self.entries[key] = value + self.changed = True + return True + + def get_config_value(self, key: str) -> str: + if key in self.entries: + return self.entries[key] + return None + + def changes_made(self) -> bool: + return self.changed + + def get_ip_address(self) -> str: + return self.get_config_value("bind") + + def set_ip_address(self, ipaddress: str) -> bool: + if ipaddress is None: + return False + # Check if ipaddress is valid + if valid_ipv4(ipaddress): + # Set ip address in redis config + if not self.set_config_value("bind", ipaddress): + LOG.error("Unable to set ip address for redis config") + return False + else: + LOG.error("Invalid IPv4 address given.") + return False + LOG.info(f"Ipaddress is set to {ipaddress}") + return True + + def get_port(self) -> str: + return self.get_config_value("port") + + def set_port(self, port: str) -> bool: + if port is None: + return False + # Check if port is valid + if valid_port(port): + # Set port in redis config + if not self.set_config_value("port", port): + LOG.error("Unable to set port for redis config") + return False + else: + LOG.error("Invalid port given.") + return False + LOG.info(f"Port is set to {port}") + return True + + def set_password(self, password: str) -> bool: + if password is None: + return False + self.set_config_value("requirepass", password) + LOG.info("New password set") + return True + + def get_password(self) -> str: + return self.get_config_value("requirepass") + + def set_directory(self, directory: str) -> bool: + if directory is None: + return False + if not os.path.exists(directory): + os.mkdir(directory) + LOG.info(f"Created directory {directory}") + # Validate the directory input + if os.path.exists(directory): + # Set the save directory to the redis config + if not self.set_config_value("dir", directory): + LOG.error("Unable to set directory for redis config") + return False + else: + LOG.error(f"Directory {directory} given does not exist and could not be created.") + return False + LOG.info(f"Directory is set to {directory}") + return True + + def set_snapshot_seconds(self, seconds: int) -> bool: + if seconds is None: + return False + # Set the snapshot second in the redis config + value = self.get_config_value("save") + if value is None: + LOG.error("Unable to get exisiting parameter values for snapshot") + return False + else: + value = value.split() + value[0] = str(seconds) + value = " ".join(value) + if not self.set_config_value("save", value): + LOG.error("Unable to set snapshot value seconds") + return False + LOG.info(f"Snapshot wait time is set to {seconds} seconds") + return True + + def set_snapshot_changes(self, changes: int) -> bool: + if changes is None: + return False + # Set the snapshot changes into the redis config + value = self.get_config_value("save") + if value is None: + LOG.error("Unable to get exisiting parameter values for snapshot") + return False + else: + value = value.split() + value[1] = str(changes) + value = " ".join(value) + if not self.set_config_value("save", value): + LOG.error("Unable to set snapshot value seconds") + return False + LOG.info(f"Snapshot threshold is set to {changes} changes") + return True + + def set_snapshot_file(self, file: str) -> bool: + if file is None: + return False + # Set the snapshot file in the redis config + if not self.set_config_value("dbfilename", file): + LOG.error("Unable to set snapshot_file name") + return False + + LOG.info(f"Snapshot file is set to {file}") + return True + + def set_append_mode(self, mode: str) -> bool: + if mode is None: + return False + valid_modes = ["always", "everysec", "no"] + + # Validate the append mode (always, everysec, no) + if mode in valid_modes: + # Set the append mode in the redis config + if not self.set_config_value("appendfsync", mode): + LOG.error("Unable to set append_mode in redis config") + return False + else: + LOG.error("Not a valid append_mode(Only valid modes are always, everysec, no)") + return False + + LOG.info(f"Append mode is set to {mode}") + return True + + def set_append_file(self, file: str) -> bool: + if file is None: + return False + # Set the append file in the redis config + if not self.set_config_value("appendfilename", f'"{file}"'): + LOG.error("Unable to set append filename.") + return False + LOG.info(f"Append file is set to {file}") + return True + + +class RedisUsers: + """ + RedisUsers provides an interface for parsing and interacting with redis.users configuration + file. Allow users and merlin server to create, remove, and edit users within the redis files. + Changes can be sync and push to an exisiting redis server if one is available. + """ + + class User: + status = "on" + hash_password = hashlib.sha256(b"password").hexdigest() + keys = "*" + channels = "*" + commands = "@all" + + def __init__(self, status="on", keys="*", channels="*", commands="@all", password=None) -> None: + self.status = status + self.keys = keys + self.channels = channels + self.commands = commands + if password is not None: + self.set_password(password) + + def parse_dict(self, dict: dict) -> None: + self.status = dict["status"] + self.keys = dict["keys"] + self.channels = dict["channels"] + self.commands = dict["commands"] + self.hash_password = dict["hash_password"] + + def get_user_dict(self) -> dict: + self.status = "on" + return { + "status": self.status, + "hash_password": self.hash_password, + "keys": self.keys, + "channels": self.channels, + "commands": self.commands, + } + + def __repr__(self) -> str: + return str(self.get_user_dict()) + + def __str__(self) -> str: + return self.__repr__() + + def set_password(self, password: str) -> None: + self.hash_password = hashlib.sha256(bytes(password, "utf-8")).hexdigest() + + filename = "" + users = {} + + def __init__(self, filename) -> None: + self.filename = filename + if os.path.exists(self.filename): + self.parse() + + def parse(self) -> None: + with open(self.filename, "r") as f: + self.users = yaml.load(f, yaml.Loader) + for user in self.users: + new_user = self.User() + new_user.parse_dict(self.users[user]) + self.users[user] = new_user + + def write(self) -> None: + data = self.users.copy() + for key in data: + data[key] = self.users[key].get_user_dict() + with open(self.filename, "w") as f: + yaml.dump(data, f, yaml.Dumper) + + def add_user(self, user, status="on", keys="*", channels="*", commands="@all", password=None) -> bool: + if user in self.users: + return False + self.users[user] = self.User(status, keys, channels, commands, password) + return True + + def set_password(self, user: str, password: str): + if user not in self.users: + return False + self.users[user].set_password(password) + + def remove_user(self, user) -> bool: + if user in self.users: + del self.users[user] + return True + return False + + def apply_to_redis(self, host: str, port: int, password: str) -> None: + db = redis.Redis(host=host, port=port, password=password) + current_users = db.acl_users() + for user in self.users: + if user not in current_users: + data = self.users[user] + db.acl_setuser( + username=user, + hashed_passwords=[f"+{data.hash_password}"], + enabled=(data.status == "on"), + keys=data.keys, + channels=data.channels, + commands=[f"+{data.commands}"], + ) + + for user in current_users: + if user not in self.users: + db.acl_deluser(user) + + +class AppYaml: + """ + AppYaml allows for an structured way to interact with any app.yaml main merlin configuration file. + It helps to parse each component of the app.yaml and allow users to edit, configure and write the + file. + """ + + default_filename = os.path.join(MERLIN_CONFIG_DIR, "app.yaml") + data = {} + broker_name = "broker" + results_name = "results_backend" + + def __init__(self, filename: str = default_filename) -> None: + if not os.path.exists(filename): + filename = self.default_filename + self.read(filename) + + def apply_server_config(self, server_config: ServerConfig): + rc = RedisConfig(server_config.container.get_config_path()) + + self.data[self.broker_name]["name"] = server_config.container.get_image_type() + self.data[self.broker_name]["username"] = "default" + self.data[self.broker_name]["password"] = server_config.container.get_pass_file_path() + self.data[self.broker_name]["server"] = rc.get_ip_address() + self.data[self.broker_name]["port"] = rc.get_port() + + self.data[self.results_name]["name"] = server_config.container.get_image_type() + self.data[self.results_name]["username"] = "default" + self.data[self.results_name]["password"] = server_config.container.get_pass_file_path() + self.data[self.results_name]["server"] = rc.get_ip_address() + self.data[self.results_name]["port"] = rc.get_port() + + def update_data(self, new_data: dict): + self.data.update(new_data) + + def get_data(self): + return self.data + + def read(self, filename: str = default_filename): + self.data = merlin.utils.load_yaml(filename) + + def write(self, filename: str = default_filename): + with open(filename, "w+") as f: + yaml.dump(self.data, f, yaml.Dumper) diff --git a/merlin/server/singularity.yaml b/merlin/server/singularity.yaml new file mode 100644 index 000000000..d2b34874e --- /dev/null +++ b/merlin/server/singularity.yaml @@ -0,0 +1,6 @@ +singularity: + command: singularity + # init_command: \{command} .. (optional or default) + run_command: \{command} run -H {home_dir} {image} {config} + stop_command: kill # \{command} (optional or kill default) + pull_command: \{command} pull {image} {url} diff --git a/merlin/spec/__init__.py b/merlin/spec/__init__.py index 13db3dccc..7155d0c5f 100644 --- a/merlin/spec/__init__.py +++ b/merlin/spec/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/spec/all_keys.py b/merlin/spec/all_keys.py index 9af27d456..950ceb253 100644 --- a/merlin/spec/all_keys.py +++ b/merlin/spec/all_keys.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -38,6 +38,7 @@ "shell", "flux_path", "flux_start_opts", + "flux_exec", "flux_exec_workers", "launch_pre", "launch_args", diff --git a/merlin/spec/defaults.py b/merlin/spec/defaults.py index 34b2113cd..1c0e9fa42 100644 --- a/merlin/spec/defaults.py +++ b/merlin/spec/defaults.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -32,7 +32,7 @@ BATCH = {"batch": {"type": "local", "dry_run": False, "shell": "/bin/bash"}} -ENV = {"env": {"variables": {}, "sources": {}, "labels": {}, "dependencies": {}}} +ENV = {"env": {"variables": {}, "sources": [], "labels": {}, "dependencies": {}}} STUDY_STEP_RUN = {"task_queue": "merlin", "shell": "/bin/bash", "max_retries": 30} diff --git a/merlin/spec/expansion.py b/merlin/spec/expansion.py index 0be5b21b9..254ba80be 100644 --- a/merlin/spec/expansion.py +++ b/merlin/spec/expansion.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/spec/merlinspec.json b/merlin/spec/merlinspec.json new file mode 100644 index 000000000..47e738ee6 --- /dev/null +++ b/merlin/spec/merlinspec.json @@ -0,0 +1,285 @@ +{ + "DESCRIPTION": { + "type": "object", + "properties": { + "name": {"type": "string", "minLength": 1}, + "description": {"type": "string", "minLength": 1} + }, + "required": [ + "name", + "description" + ] + }, + "PARAM": { + "type": "object", + "properties": { + "values": { + "type": "array" + }, + "label": {"type": "string", "minLength": 1} + }, + "required": [ + "values", + "label" + ] + }, + "STUDY_STEP": { + "type": "object", + "properties": { + "name": {"type": "string", "minLength": 1}, + "description": {"type": "string", "minLength": 1}, + "run": { + "type": "object", + "properties": { + "cmd": {"type": "string", "minLength": 1}, + "depends": {"type": "array", "uniqueItems": true}, + "pre": {"type": "string", "minLength": 1}, + "post": {"type": "string", "minLength": 1}, + "restart": {"type": "string", "minLength": 1}, + "slurm": {"type": "string", "minLength": 1}, + "lsf": {"type": "string", "minLength": 1}, + "num resource set": {"type": "integer", "minimum": 1}, + "launch distribution": {"type": "string", "minLength": 1}, + "exit_on_error": {"type": "integer", "minimum": 0, "maximum": 1}, + "shell": {"type": "string", "minLength": 1}, + "flux": {"type": "string", "minLength": 1}, + "batch": { + "type": "object", + "properties": { + "type": {"type": "string", "minLength": 1} + } + }, + "gpus per task": {"type": "integer", "minimum": 1}, + "max_retries": {"type": "integer", "minimum": 1}, + "task_queue": {"type": "string", "minLength": 1}, + "nodes": { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ] + }, + "procs": { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "gpus": { + "anyOf": [ + {"type": "integer", "minimum": 0}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "cores per task": { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "tasks per rs": { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "rs per node": { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "cpus per rs": { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "bind": { + "anyOf": [ + {"type": "string", "minLength": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "bind gpus": { + "anyOf": [ + {"type": "string", "minLength": 1}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "walltime": { + "anyOf": [ + {"type": "string", "minLength": 1}, + {"type": "integer", "minimum": 0}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "reservation": {"type": "string", "minLength": 1}, + "exclusive": { + "anyOf": [ + {"type": "boolean"}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ]}, + "nested": {"type": "boolean"}, + "priority": { + "anyOf": [ + { + "type": "string", + "enum": [ + "HELD", "MINIMAL", "LOW", "MEDIUM", "HIGH", "EXPEDITED", + "held", "minimal", "low", "medium", "high", "expedited", + "Held", "Minimal", "Low", "Medium", "High", "Expedited" + ] + }, + {"type": "number", "minimum": 0.0, "maximum": 1.0} + ] + }, + "qos": {"type": "string", "minLength": 1} + }, + "required": [ + "cmd" + ] + } + }, + "required": [ + "name", + "description", + "run" + ] + }, + "ENV": { + "type": "object", + "properties": { + "variables": { + "type": "object", + "patternProperties": { + "^.*": { + "anyOf": [ + {"type": "string", "minLength": 1}, + {"type": "number"} + ] + } + } + }, + "labels": {"type": "object"}, + "sources": {"type": "array"}, + "dependencies": { + "type": "object", + "properties": { + "paths": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string", "minLength": 1}, + "path": {"type": "string", "minLength": 1} + }, + "required": [ + "name", + "path" + ] + } + }, + "git": { + "type": "array", + "items": { + "properties": { + "name": {"type": "string", "minLength": 1}, + "path": {"type": "string", "minLength": 1}, + "url": {"type": "string", "minLength": 1}, + "tag": {"type": "string", "minLength": 1} + }, + "required": [ + "name", + "path", + "url" + ] + } + }, + "spack": { + "type": "object", + "properties": { + "name": {"type": "string", "minLength": 1}, + "package_name": {"type": "string", "minLength": 1} + }, + "required": [ + "type", + "package_name" + ] + } + } + } + } + }, + "MERLIN": { + "type": "object", + "properties": { + "resources": { + "type": "object", + "properties": { + "task_server": {"type": "string", "minLength": 1}, + "overlap": {"type": "boolean"}, + "workers": { + "type": "object", + "patternProperties": { + "^.+": { + "type": "object", + "properties": { + "args": {"type": "string", "minLength": 1}, + "steps": {"type": "array", "uniqueItems": true}, + "nodes": { + "anyOf": [ + {"type": "null"}, + {"type": "integer", "minimum": 1} + ] + }, + "batch": { + "anyOf": [ + {"type": "null"}, + { + "type": "object", + "properties": { + "type": {"type": "string", "minLength": 1} + } + } + ] + }, + "machines": {"type": "array", "uniqueItems": true} + } + } + }, + "minProperties": 1 + } + } + }, + "samples": { + "anyOf": [ + {"type": "null"}, + { + "type": "object", + "properties": { + "generate": { + "type": "object", + "properties": { + "cmd": {"type": "string", "minLength": 1} + }, + "required": ["cmd"] + }, + "file": {"type": "string", "minLength": 1}, + "column_labels": {"type": "array", "uniqueItems": true}, + "level_max_dirs": {"type": "integer", "minimum": 1} + } + } + ] + } + } + }, + "BATCH": { + "type": "object", + "properties": { + "type": {"type": "string", "minLength": 1}, + "bank": {"type": "string", "minLength": 1}, + "queue": {"type": "string", "minLength": 1}, + "dry_run": {"type": "boolean"}, + "shell": {"type": "string", "minLength": 1}, + "flux_path": {"type": "string", "minLength": 1}, + "flux_start_opts": {"type": "string", "minLength": 1}, + "flux_exec_workers": {"type": "boolean"}, + "launch_pre": {"type": "string", "minLength": 1}, + "launch_args": {"type": "string", "minLength": 1}, + "worker_launch": {"type": "string", "minLength": 1}, + "nodes": {"type": "integer", "minimum": 1}, + "walltime": {"type": "string", "pattern": "^(?:(?:([0-9][0-9]|2[0-3]):)?([0-5][0-9]):)?([0-5][0-9])$"} + } + } +} diff --git a/merlin/spec/override.py b/merlin/spec/override.py index a3fbf281b..c4fcfee97 100644 --- a/merlin/spec/override.py +++ b/merlin/spec/override.py @@ -1,3 +1,33 @@ +############################################################################### +# Copyright (c) 2022, Lawrence Livermore National Security, LLC. +# Produced at the Lawrence Livermore National Laboratory +# Written by the Merlin dev team, listed in the CONTRIBUTORS file. +# +# +# LLNL-CODE-797170 +# All rights reserved. +# This file is part of Merlin, Version: 1.9.0. +# +# For details, see https://github.com/LLNL/merlin. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +############################################################################### + import logging from copy import deepcopy diff --git a/merlin/spec/specification.py b/merlin/spec/specification.py index 6bb612cb4..a6ecdae2a 100644 --- a/merlin/spec/specification.py +++ b/merlin/spec/specification.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -33,13 +33,14 @@ data from the Merlin specification file. To see examples of yaml specifications, run `merlin example`. """ +import json import logging import os import shlex from io import StringIO import yaml -from maestrowf.datastructures import YAMLSpecification +from maestrowf.specification import YAMLSpecification from merlin.spec import all_keys, defaults @@ -82,6 +83,7 @@ def yaml_sections(self): "study": self.study, "global.parameters": self.globals, "merlin": self.merlin, + "user": self.user, } @property @@ -97,27 +99,215 @@ def sections(self): "study": self.study, "globals": self.globals, "merlin": self.merlin, + "user": self.user, } + def __str__(self): + """Magic method to print an instance of our MerlinSpec class.""" + env = "" + globs = "" + merlin = "" + user = "" + if self.environment: + env = f"\n\tenvironment: \n\t\t{self.environment}" + if self.globals: + globs = f"\n\tglobals:\n\t\t{self.globals}" + if self.merlin: + merlin = f"\n\tmerlin:\n\t\t{self.merlin}" + if self.user is not None: + user = f"\n\tuser:\n\t\t{self.user}" + result = f"""MERLIN SPEC OBJECT:\n\tdescription:\n\t\t{self.description} + \n\tbatch:\n\t\t{self.batch}\n\tstudy:\n\t\t{self.study} + {env}{globs}{merlin}{user}""" + + return result + @classmethod def load_specification(cls, filepath, suppress_warning=True): - spec = super(MerlinSpec, cls).load_specification(filepath) - with open(filepath, "r") as f: - spec.merlin = MerlinSpec.load_merlin_block(f) + LOG.info("Loading specification from path: %s", filepath) + try: + # Load the YAML spec from the filepath + with open(filepath, "r") as data: + spec = cls.load_spec_from_string(data, needs_IO=False, needs_verification=True) + except Exception as e: + LOG.exception(e.args) + raise e + + # Path not set in _populate_spec because loading spec with string + # does not have a path so we set it here + spec.path = filepath spec.specroot = os.path.dirname(spec.path) - spec.process_spec_defaults() + if not suppress_warning: spec.warn_unrecognized_keys() return spec @classmethod - def load_spec_from_string(cls, string): - spec = super(MerlinSpec, cls).load_specification_from_stream(StringIO(string)) - spec.merlin = MerlinSpec.load_merlin_block(StringIO(string)) + def load_spec_from_string(cls, string, needs_IO=True, needs_verification=False): + LOG.debug("Creating Merlin spec object...") + # Create and populate the MerlinSpec object + data = StringIO(string) if needs_IO else string + spec = cls._populate_spec(data) spec.specroot = None spec.process_spec_defaults() + LOG.debug("Merlin spec object created.") + + # Verify the spec object + if needs_verification: + LOG.debug("Verifying Merlin spec...") + spec.verify() + LOG.debug("Merlin spec verified.") + return spec + @classmethod + def _populate_spec(cls, data): + """ + Helper method to load a study spec and populate it's fields. + + NOTE: This is basically a direct copy of YAMLSpecification's + load_specification method from Maestro just without the call to verify. + The verify method was breaking our code since we have no way of modifying + Maestro's schema that they use to verify yaml files. The work around + is to load the yaml file ourselves and create our own schema to verify + against. + + :param data: Raw text stream to study YAML spec data + :returns: A MerlinSpec object containing information from the path + """ + # Read in the spec file + try: + spec = yaml.load(data, yaml.FullLoader) + except AttributeError: + LOG.warn( + "PyYAML is using an unsafe version with a known " + "load vulnerability. Please upgrade your installation " + "to a more recent version!" + ) + spec = yaml.load(data) + LOG.debug("Successfully loaded specification: \n%s", spec["description"]) + + # Load in the parts of the yaml that are the same as Maestro's + merlin_spec = cls() + merlin_spec.path = None + merlin_spec.description = spec.pop("description", {}) + merlin_spec.environment = spec.pop("env", {"variables": {}, "sources": [], "labels": {}, "dependencies": {}}) + merlin_spec.batch = spec.pop("batch", {}) + merlin_spec.study = spec.pop("study", []) + merlin_spec.globals = spec.pop("global.parameters", {}) + + # Reset the file pointer and load the merlin block + data.seek(0) + merlin_spec.merlin = MerlinSpec.load_merlin_block(data) + + # Reset the file pointer and load the user block + data.seek(0) + merlin_spec.user = MerlinSpec.load_user_block(data) + + return merlin_spec + + def verify(self): + """ + Verify the spec against a valid schema. Similar to YAMLSpecification's verify + method from Maestro but specific for Merlin yaml specs. + + NOTE: Maestro v2.0 may add the ability to customize the schema files it + compares against. If that's the case then we can convert this file back to + using Maestro's verification. + """ + # Load the MerlinSpec schema file + dir_path = os.path.dirname(os.path.abspath(__file__)) + schema_path = os.path.join(dir_path, "merlinspec.json") + with open(schema_path, "r") as json_file: + schema = json.load(json_file) + + # Use Maestro's verification methods for shared sections + self.verify_description(schema["DESCRIPTION"]) + self.verify_environment(schema["ENV"]) + self.verify_study(schema["STUDY_STEP"]) + self.verify_parameters(schema["PARAM"]) + + # Merlin specific verification + self.verify_merlin_block(schema["MERLIN"]) + self.verify_batch_block(schema["BATCH"]) + + def get_study_step_names(self): + """ + Get a list of the names of steps in our study. + + :returns: an unsorted list of study step names + """ + names = [] + for step in self.study: + names.append(step["name"]) + return names + + def _verify_workers(self): + """ + Helper method to verify the workers section located within the Merlin block + of our spec file. + """ + # Retrieve the names of the steps in our study + actual_steps = self.get_study_step_names() + + try: + # Verify that the steps in merlin block's worker section actually exist + for worker, worker_vals in self.merlin["resources"]["workers"].items(): + error_prefix = f"Problem in Merlin block with worker {worker} --" + for step in worker_vals["steps"]: + if step != "all" and step not in actual_steps: + error_msg = ( + f"{error_prefix} Step with the name {step}" + " is not defined in the study block of the yaml specification file" + ) + raise ValueError(error_msg) + + except Exception: + raise + + def verify_merlin_block(self, schema): + """ + Method to verify the merlin section of our spec file. + + :param schema: The section of the predefined schema (merlinspec.json) to check + our spec file against. + """ + # Validate merlin block against the json schema + YAMLSpecification.validate_schema("merlin", self.merlin, schema) + # Verify the workers section within merlin block + self._verify_workers() + + def verify_batch_block(self, schema): + """ + Method to verify the batch section of our spec file. + + :param schema: The section of the predefined schema (merlinspec.json) to check + our spec file against. + """ + # Validate batch block against the json schema + YAMLSpecification.validate_schema("batch", self.batch, schema) + + # Additional Walltime checks in case the regex from the schema bypasses an error + if "walltime" in self.batch: + if self.batch["type"] == "lsf": + LOG.warning("The walltime argument is not available in lsf.") + else: + try: + err_msg = "Walltime must be of the form SS, MM:SS, or HH:MM:SS." + walltime = self.batch["walltime"] + if len(walltime) > 2: + # Walltime must have : if it's not of the form SS + if ":" not in walltime: + raise ValueError(err_msg) + else: + # Walltime must have exactly 2 chars between : + time = walltime.split(":") + for section in time: + if len(section) != 2: + raise ValueError(err_msg) + except Exception: + raise + @staticmethod def load_merlin_block(stream): try: @@ -132,6 +322,14 @@ def load_merlin_block(stream): LOG.warning(warning_msg) return merlin_block + @staticmethod + def load_user_block(stream): + try: + user_block = yaml.safe_load(stream)["user"] + except KeyError: + user_block = {} + return user_block + def process_spec_defaults(self): for name, section in self.sections.items(): if section is None: @@ -161,6 +359,8 @@ def process_spec_defaults(self): if self.merlin["samples"] is not None: MerlinSpec.fill_missing_defaults(self.merlin["samples"], defaults.SAMPLES) + # no defaults for user block + @staticmethod def fill_missing_defaults(object_to_update, default_dict): """ @@ -185,6 +385,7 @@ def recurse(result, defaults): recurse(object_to_update, default_dict) + # ***Unsure if this method is still needed after adding json schema verification*** def warn_unrecognized_keys(self): # check description MerlinSpec.check_section("description", self.description, all_keys.DESCRIPTION) @@ -212,9 +413,14 @@ def warn_unrecognized_keys(self): if self.merlin["samples"]: MerlinSpec.check_section("merlin.samples", self.merlin["samples"], all_keys.SAMPLES) + # user block is not checked + @staticmethod def check_section(section_name, section, all_keys): diff = set(section.keys()).difference(all_keys) + + # TODO: Maybe add a check here for required keys + for extra in diff: LOG.warn(f"Unrecognized key '{extra}' found in spec section '{section_name}'.") @@ -268,7 +474,7 @@ def _process_dict_or_list(self, obj, string, key_stack, lvl, tab): list_offset = 2 * " " if isinstance(obj, list): n = len(obj) - use_hyphens = key_stack[-1] in ["paths", "sources", "git", "study"] + use_hyphens = key_stack[-1] in ["paths", "sources", "git", "study"] or key_stack[0] in ["user"] if not use_hyphens: string += "[" else: diff --git a/merlin/study/__init__.py b/merlin/study/__init__.py index 13db3dccc..7155d0c5f 100644 --- a/merlin/study/__init__.py +++ b/merlin/study/__init__.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/study/batch.py b/merlin/study/batch.py index 69c68856a..f395c5d80 100644 --- a/merlin/study/batch.py +++ b/merlin/study/batch.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -175,7 +175,7 @@ def batch_worker_launch( flux_exec: str = "" if flux_exec_workers: - flux_exec = "flux exec" + flux_exec = get_yaml_var(batch, "flux_exec", "flux exec") if "/" in flux_path: flux_path += "/" diff --git a/merlin/study/celeryadapter.py b/merlin/study/celeryadapter.py index 67bae9d74..34393f967 100644 --- a/merlin/study/celeryadapter.py +++ b/merlin/study/celeryadapter.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/study/dag.py b/merlin/study/dag.py index 01f7aae91..838c14762 100644 --- a/merlin/study/dag.py +++ b/merlin/study/dag.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -44,11 +44,18 @@ class DAG: independent chains of tasks. """ - def __init__(self, maestro_dag, labels): + def __init__(self, maestro_adjacency_table, maestro_values, labels): """ - :param `maestro_dag`: A maestrowf ExecutionGraph. + :param `maestro_adjacency_table`: An ordered dict showing adjacency of nodes. Comes from a maestrowf ExecutionGraph. + :param `maestro_values`: An ordered dict of the values at each node. Comes from a maestrowf ExecutionGraph. + :param `labels`: A list of labels provided in the spec file. """ - self.dag = maestro_dag + # We used to store the entire maestro ExecutionGraph here but now it's + # unpacked so we're only storing the 2 attributes from it that we use: + # the adjacency table and the values. This had to happen to get pickle + # to work for Celery. + self.maestro_adjacency_table = maestro_adjacency_table + self.maestro_values = maestro_values self.backwards_adjacency = {} self.calc_backwards_adjacency() self.labels = labels @@ -59,7 +66,7 @@ def step(self, task_name): :param `task_name`: The task name. :return: A Merlin Step object. """ - return Step(self.dag.values[task_name]) + return Step(self.maestro_values[task_name]) def calc_depth(self, node, depths, current_depth=0): """Calculate the depth of the given node and its children. @@ -116,7 +123,7 @@ def children(self, task_name): :return: list of children of this task. """ - return self.dag.adjacency_table[task_name] + return self.maestro_adjacency_table[task_name] def num_children(self, task_name): """Find the number of children for the given task in the dag. @@ -156,8 +163,8 @@ def find_chain(task_name, list_of_groups_of_chains): def calc_backwards_adjacency(self): """initializes our backwards adjacency table""" - for parent in self.dag.adjacency_table: - for task_name in self.dag.adjacency_table[parent]: + for parent in self.maestro_adjacency_table: + for task_name in self.maestro_adjacency_table[parent]: if task_name in self.backwards_adjacency: self.backwards_adjacency[task_name].append(parent) else: diff --git a/merlin/study/script_adapter.py b/merlin/study/script_adapter.py index 23398e117..826c1d2e3 100644 --- a/merlin/study/script_adapter.py +++ b/merlin/study/script_adapter.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/merlin/study/step.py b/merlin/study/step.py index 5b03bf2cb..5e7d89e43 100644 --- a/merlin/study/step.py +++ b/merlin/study/step.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -121,7 +121,7 @@ def clone_changing_workspace_and_cmd(self, new_cmd=None, cmd_replacement_pairs=N new_workspace = self.get_workspace() LOG.debug(f"cloned step with workspace {new_workspace}") study_step = StudyStep() - study_step.name = step_dict["name"] + study_step.name = step_dict["_name"] study_step.description = step_dict["description"] study_step.run = step_dict["run"] return Step(MerlinStepRecord(new_workspace, study_step)) @@ -218,7 +218,7 @@ def name(self): """ :return : The step name. """ - return self.mstep.step.__dict__["name"] + return self.mstep.step.__dict__["_name"] def execute(self, adapter_config): """ diff --git a/merlin/study/study.py b/merlin/study/study.py index efa43dd9f..b7f990a2a 100644 --- a/merlin/study/study.py +++ b/merlin/study/study.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -107,6 +107,20 @@ def __init__( "MERLIN_SOFT_FAIL": str(int(ReturnCode.SOFT_FAIL)), "MERLIN_HARD_FAIL": str(int(ReturnCode.HARD_FAIL)), "MERLIN_RETRY": str(int(ReturnCode.RETRY)), + # below will be substituted for sample values on execution + "MERLIN_SAMPLE_VECTOR": " ".join( + ["$({})".format(k) for k in self.get_sample_labels(from_spec=self.original_spec)] + ), + "MERLIN_SAMPLE_NAMES": " ".join(self.get_sample_labels(from_spec=self.original_spec)), + "MERLIN_SPEC_ORIGINAL_TEMPLATE": os.path.join( + self.info, self.original_spec.description["name"].replace(" ", "_") + ".orig.yaml" + ), + "MERLIN_SPEC_EXECUTED_RUN": os.path.join( + self.info, self.original_spec.description["name"].replace(" ", "_") + ".partial.yaml" + ), + "MERLIN_SPEC_ARCHIVED_COPY": os.path.join( + self.info, self.original_spec.description["name"].replace(" ", "_") + ".expanded.yaml" + ), } self.pgen_file = pgen_file @@ -182,6 +196,11 @@ def samples(self): return self.load_samples() return [] + def get_sample_labels(self, from_spec): + if from_spec.merlin["samples"]: + return from_spec.merlin["samples"]["column_labels"] + return [] + @property def sample_labels(self): """ @@ -197,9 +216,7 @@ def sample_labels(self): :return: list of labels (e.g. ["X0", "X1"] ) """ - if self.expanded_spec.merlin["samples"]: - return self.expanded_spec.merlin["samples"]["column_labels"] - return [] + return self.get_sample_labels(from_spec=self.expanded_spec) def load_samples(self): """ @@ -502,7 +519,8 @@ def load_dag(self): labels = [] if self.expanded_spec.merlin["samples"]: labels = self.expanded_spec.merlin["samples"]["column_labels"] - self.dag = DAG(maestro_dag, labels) + # To avoid pickling issues with _pass_detect_cycle from maestro, we unpack the dag here + self.dag = DAG(maestro_dag.adjacency_table, maestro_dag.values, labels) def get_adapter_config(self, override_type=None): adapter_config = dict(self.expanded_spec.batch) diff --git a/merlin/utils.py b/merlin/utils.py index b9f8742bb..2959b79e2 100644 --- a/merlin/utils.py +++ b/merlin/utils.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.5. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # diff --git a/requirements/release.txt b/requirements/release.txt index 4771b7a4c..821589c41 100644 --- a/requirements/release.txt +++ b/requirements/release.txt @@ -3,9 +3,10 @@ celery[redis,sqlalchemy]>=5.0.3 coloredlogs cryptography importlib_resources; python_version < '3.7' -maestrowf==1.1.7dev0 +maestrowf>=1.1.9dev1 numpy parse psutil>=5.1.0 pyyaml>=5.1.2 tabulate +redis>=4.3.4 \ No newline at end of file diff --git a/setup.py b/setup.py index f60536d30..9bf170126 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.4. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -96,11 +96,11 @@ def extras_require(): long_description=readme(), long_description_content_type="text/markdown", classifiers=[ - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], keywords="machine learning workflow", url="https://github.com/LLNL/merlin", diff --git a/tests/integration/conditions.py b/tests/integration/conditions.py index afafa0d99..3448ec61a 100644 --- a/tests/integration/conditions.py +++ b/tests/integration/conditions.py @@ -238,3 +238,65 @@ def passes(self): if self.negate: return not self.is_within() return self.is_within() + + +class PathExists(Condition): + """ + A condition for checking if a path to a file or directory exists + """ + + def __init__(self, pathname) -> None: + self.pathname = pathname + + def path_exists(self) -> bool: + return os.path.exists(self.pathname) + + def __str__(self) -> str: + return f"{__class__.__name__} expected to find file or directory at {self.pathname}" + + @property + def passes(self): + return self.path_exists() + + +class FileHasRegex(Condition): + """ + A condition that some body of text within a file + MUST match a given regular expression. + """ + + def __init__(self, filename, regex) -> None: + self.filename = filename + self.regex = regex + + def contains(self) -> bool: + try: + with open(self.filename, "r") as f: + filetext = f.read() + return self.is_within(filetext) + except Exception: + return False + + def is_within(self, text): + return search(self.regex, text) is not None + + def __str__(self) -> str: + return f"{__class__.__name__} expected to find {self.regex} regex match within {self.filename} file but no match was found" + + @property + def passes(self): + return self.contains() + + +class FileHasNoRegex(FileHasRegex): + """ + A condition that some body of text within a file + MUST NOT match a given regular expression. + """ + + def __str__(self) -> str: + return f"{__class__.__name__} expected to find {self.regex} regex to not match within {self.filename} file but a match was found" + + @property + def passes(self): + return not self.contains() diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 3a8038d06..bfd80eb83 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -6,7 +6,7 @@ # # LLNL-CODE-797170 # All rights reserved. -# This file is part of Merlin, Version: 1.8.4. +# This file is part of Merlin, Version: 1.9.0. # # For details, see https://github.com/LLNL/merlin. # @@ -34,6 +34,7 @@ """ import argparse import shutil +import sys import time from contextlib import suppress from subprocess import PIPE, Popen @@ -77,6 +78,12 @@ def run_single_test(name, test, test_label="", buffer_length=50): info["violated_condition"] = (condition, i, len(conditions)) break + if len(test) == 4: + end_process = Popen(test[3], stdout=PIPE, stderr=PIPE, shell=True) + end_stdout, end_stderr = end_process.communicate() + info["end_stdout"] = end_stdout + info["end_stderr"] = end_stderr + return passed, info @@ -138,7 +145,11 @@ def run_tests(args, tests): n_to_run = 0 selective = True for test_id, test in enumerate(tests.values()): - if len(test) == 3 and test[2] == "local": + # Ensures that test definitions are atleast size 3. + # 'local' variable is stored in 3rd element of the test definitions, + # but an optional 4th element can be provided for an ending command + # to be ran after all checks have been made. + if len(test) >= 3 and test[2] == "local": args.ids.append(test_id + 1) n_to_run += 1 @@ -210,7 +221,7 @@ def main(): clear_test_studies_dir() result = run_tests(args, tests) - return result + sys.exit(result) if __name__ == "__main__": diff --git a/tests/integration/test_definitions.py b/tests/integration/test_definitions.py index dbdffd2cd..d23cbd57e 100644 --- a/tests/integration/test_definitions.py +++ b/tests/integration/test_definitions.py @@ -1,9 +1,19 @@ -from conditions import HasRegex, HasReturnCode, ProvenanceYAMLFileHasRegex, StepFileExists, StepFileHasRegex +from conditions import ( + FileHasNoRegex, + FileHasRegex, + HasRegex, + HasReturnCode, + PathExists, + ProvenanceYAMLFileHasRegex, + StepFileExists, + StepFileHasRegex, +) from merlin.utils import get_flux_cmd OUTPUT_DIR = "cli_test_studies" +CLEAN_MERLIN_SERVER = "rm -rf appendonly.aof dump.rdb merlin_server/" def define_tests(): @@ -45,6 +55,81 @@ def define_tests(): "local", ), } + server_basic_tests = { + "merlin server init": ( + "merlin server init", + HasRegex(".*successful"), + "local", + CLEAN_MERLIN_SERVER, + ), + "merlin server start/stop": ( + """merlin server init; + merlin server start; + merlin server status; + merlin server stop;""", + [ + HasRegex("Server started with PID [0-9]*"), + HasRegex("Merlin server is running"), + HasRegex("Merlin server terminated"), + ], + "local", + CLEAN_MERLIN_SERVER, + ), + "merlin server restart": ( + """merlin server init; + merlin server start; + merlin server restart; + merlin server status; + merlin server stop;""", + [ + HasRegex("Server started with PID [0-9]*"), + HasRegex("Merlin server is running"), + HasRegex("Merlin server terminated"), + ], + "local", + CLEAN_MERLIN_SERVER, + ), + } + server_config_tests = { + "merlin server change config": ( + """merlin server init; + merlin server config -p 8888 -pwd new_password -d ./config_dir -ss 80 -sc 8 -sf new_sf -am always -af new_af.aof; + merlin server start; + merlin server stop;""", + [ + FileHasRegex("merlin_server/redis.conf", "port 8888"), + FileHasRegex("merlin_server/redis.conf", "requirepass new_password"), + FileHasRegex("merlin_server/redis.conf", "dir ./config_dir"), + FileHasRegex("merlin_server/redis.conf", "save 80 8"), + FileHasRegex("merlin_server/redis.conf", "dbfilename new_sf"), + FileHasRegex("merlin_server/redis.conf", "appendfsync always"), + FileHasRegex("merlin_server/redis.conf", 'appendfilename "new_af.aof"'), + PathExists("./config_dir/new_sf"), + PathExists("./config_dir/appendonlydir"), + HasRegex("Server started with PID [0-9]*"), + HasRegex("Merlin server terminated"), + ], + "local", + "rm -rf appendonly.aof dump.rdb merlin_server/ config_dir/", + ), + "merlin server config add/remove user": ( + """merlin server init; + merlin server start; + merlin server config --add-user new_user new_password; + merlin server stop; + scp ./merlin_server/redis.users ./merlin_server/redis.users_new + merlin server start; + merlin server config --remove-user new_user; + merlin server stop; + """, + [ + FileHasRegex("./merlin_server/redis.users_new", "new_user"), + FileHasNoRegex("./merlin_server/redis.users", "new_user"), + ], + "local", + CLEAN_MERLIN_SERVER, + ), + } examples_check = { "example list": ( "merlin example list", @@ -371,6 +456,8 @@ def define_tests(): all_tests = {} for test_dict in [ basic_checks, + server_basic_tests, + server_config_tests, examples_check, run_workers_echo_tests, wf_format_tests, diff --git a/tests/unit/spec/test_specification.py b/tests/unit/spec/test_specification.py index 6b3503fb5..e1fb09a6b 100644 --- a/tests/unit/spec/test_specification.py +++ b/tests/unit/spec/test_specification.py @@ -3,6 +3,8 @@ import tempfile import unittest +import yaml + from merlin.spec.specification import MerlinSpec @@ -80,6 +82,31 @@ label : N_NEW.%% """ +INVALID_MERLIN = """ +description: + name: basic_ensemble_invalid_merlin + description: Template yaml to ensure our custom merlin block verification works as intended + +batch: + type: local + +study: + - name: step1 + description: | + this won't actually run + run: + cmd: | + echo "if this is printed something is bad" + +merlin: + resources: + task_server: celery + overlap: false + workers: + worker1: + steps: [] +""" + class TestMerlinSpec(unittest.TestCase): """Test the logic for parsing the Merlin spec into a MerlinSpec.""" @@ -170,3 +197,75 @@ def test_default_merlin_block(self): self.assertEqual(self.spec.merlin["resources"]["workers"]["default_worker"]["batch"], None) self.assertEqual(self.spec.merlin["resources"]["workers"]["default_worker"]["nodes"], None) self.assertEqual(self.spec.merlin["samples"], None) + + +class TestCustomVerification(unittest.TestCase): + """ + Tests to make sure our custom verification on merlin specific parts of our + spec files is working as intended. Verification happens in + merlin/spec/specification.py + + NOTE: reset_spec() should be called at the end of each test to make sure the + test file is reset. + + CREATING A NEW VERIFICATION TEST: + 1. Read in the spec with self.read_spec() + 2. Modify the spec with an invalid value to test for (e.g. a bad step, a bad walltime, etc.) + 3. Update the spec file with self.update_spec(spec) + 4. Assert that the correct error is thrown + 5. Reset the spec file with self.reset_spec() + """ + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.merlin_spec_filepath = os.path.join(self.tmpdir, "merlin_verification.yaml") + self.write_spec(INVALID_MERLIN) + + def tearDown(self): + shutil.rmtree(self.tmpdir, ignore_errors=True) + + def reset_spec(self): + self.write_spec(INVALID_MERLIN) + + def write_spec(self, spec): + with open(self.merlin_spec_filepath, "w+") as _file: + _file.write(spec) + + def read_spec(self): + with open(self.merlin_spec_filepath, "r") as yamfile: + spec = yaml.load(yamfile, yaml.Loader) + return spec + + def update_spec(self, spec): + with open(self.merlin_spec_filepath, "w") as yamfile: + yaml.dump(spec, yamfile, yaml.Dumper) + + def test_invalid_step(self): + # Read in the existing spec and update it with our bad step + spec = self.read_spec() + spec["merlin"]["resources"]["workers"]["worker1"]["steps"].append("bad_step") + self.update_spec(spec) + + # Assert that the invalid format was caught + with self.assertRaises(ValueError): + MerlinSpec.load_specification(self.merlin_spec_filepath) + + # Reset the spec to the default value + self.reset_spec() + + def test_invalid_walltime(self): + # Read in INVALID_MERLIN spec + spec = self.read_spec() + + invalid_walltimes = ["2", "0:1", "111", "1:1:1", "65", "65:12", "66:77", ":02:12", "123:45:33", ""] + + # Loop through the invalid walltimes and make sure they're all caught + for time in invalid_walltimes: + spec["batch"]["walltime"] = time + self.update_spec(spec) + + with self.assertRaises(ValueError): + MerlinSpec.load_specification(self.merlin_spec_filepath) + + # Reset the spec + self.reset_spec() diff --git a/tox.ini b/tox.ini index 457a827ca..4dfe01e03 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py35, py36, py37 +envlist = py37, py38, py39, py310, py311 [testenv] deps =