From 38a6f06f1c7fdf24e0171ed6e8052f5f63b22bcf Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 9 Nov 2023 11:39:36 +0000 Subject: [PATCH 1/5] Initial implementation of remove_tagged_cells --- papermill/cli.py | 5 +++++ papermill/execute.py | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/papermill/cli.py b/papermill/cli.py index 07be8d44..830622d6 100755 --- a/papermill/cli.py +++ b/papermill/cli.py @@ -62,6 +62,9 @@ def print_papermill_version(ctx, param, value): @click.option( '--parameters_base64', '-b', multiple=True, help='Base64 encoded YAML string as parameters.' ) +@click.option( + '--remove-tagged-cells', type=str, help='Remove cells with the specified tag before execution.' +) @click.option( '--inject-input-path', is_flag=True, @@ -166,6 +169,7 @@ def papermill( parameters_file, parameters_yaml, parameters_base64, + remove_tagged_cells, inject_input_path, inject_output_path, inject_paths, @@ -259,6 +263,7 @@ def papermill( request_save_on_cell_execute=request_save_on_cell_execute, autosave_cell_every=autosave_cell_every, prepare_only=prepare_only, + remove_tagged_cells=remove_tagged_cells, kernel_name=kernel, language=language, progress_bar=progress_bar, diff --git a/papermill/execute.py b/papermill/execute.py index cb443a3c..6529e9c8 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -2,6 +2,7 @@ import nbformat from pathlib import Path +from copy import deepcopy from .log import logger from .exceptions import PapermillExecutionError @@ -19,6 +20,7 @@ def execute_notebook( engine_name=None, request_save_on_cell_execute=True, prepare_only=False, + remove_tagged_cells=None, kernel_name=None, language=None, progress_bar=True, @@ -28,7 +30,7 @@ def execute_notebook( start_timeout=60, report_mode=False, cwd=None, - **engine_kwargs + **engine_kwargs, ): """Executes a single notebook locally. @@ -48,6 +50,9 @@ def execute_notebook( How often in seconds to save in the middle of long cell executions prepare_only : bool, optional Flag to determine if execution should occur or not + remove_tagged_cells : str, optional + If specified, cells with the specified tag will be removed + before execution and will not be present in the output notebook. kernel_name : str, optional Name of kernel to execute the notebook against language : str, optional @@ -106,6 +111,8 @@ def execute_notebook( ) nb = prepare_notebook_metadata(nb, input_path, output_path, report_mode) + if remove_tagged_cells is not None: + nb = remove_tagged_cells_from_notebook(nb, remove_tagged_cells) # clear out any existing error markers from previous papermill runs nb = remove_error_markers(nb) @@ -127,7 +134,7 @@ def execute_notebook( start_timeout=start_timeout, stdout_file=stdout_file, stderr_file=stderr_file, - **engine_kwargs + **engine_kwargs, ) # Check for errors first (it saves on error before raising) @@ -167,6 +174,33 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): return nb +def remove_tagged_cells_from_notebook(nb, tag): + """ + Remove cells with a matching tag. + + Parameters + ---------- + nb : NotebookNode + Executable notebook object + tag : str + Tag to used to identify cells to remove. + """ + + # Copy the notebook to avoid changing the input one + nb = deepcopy(nb) + + # Filter out cells containing the tag + cells = [] + for cell in nb.cells: + if hasattr(cell, 'metadata') and 'tags' in cell.metadata: + if tag not in cell.metadata['tags']: + cells.append(cell) + + nb.cells = cells + + return nb + + ERROR_MARKER_TAG = "papermill-error-cell-tag" ERROR_STYLE = ( From 7ad32a0fad727200274ce2b63454743b02e7a8c6 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 9 Nov 2023 11:55:36 +0000 Subject: [PATCH 2/5] Added test notebook --- .../tests/notebooks/simple_with_tags.ipynb | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 papermill/tests/notebooks/simple_with_tags.ipynb diff --git a/papermill/tests/notebooks/simple_with_tags.ipynb b/papermill/tests/notebooks/simple_with_tags.ipynb new file mode 100644 index 00000000..c99ba1ce --- /dev/null +++ b/papermill/tests/notebooks/simple_with_tags.ipynb @@ -0,0 +1,67 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a0730871-38e0-4f31-9466-1a117965e5a1", + "metadata": {}, + "source": [ + "### Markdown cell" + ] + }, + { + "cell_type": "raw", + "id": "00000b67-913e-459a-80dc-2520b2483d7d", + "metadata": {}, + "source": [ + "Raw cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7979d49a-abb1-4815-9534-ad76e4505b56", + "metadata": { + "tags": [ + "skipcell" + ] + }, + "outputs": [], + "source": [ + "a = 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "778581dd-f385-4039-be97-4050615fa271", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "print(a)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 332509e2e35ba13c7dc9f45be901f92e442d3a90 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 9 Nov 2023 12:13:30 +0000 Subject: [PATCH 3/5] Added test of remove_tagged_cells in execute_notebook --- .../tests/notebooks/simple_with_tags.ipynb | 6 ++- papermill/tests/test_execute.py | 43 +++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/papermill/tests/notebooks/simple_with_tags.ipynb b/papermill/tests/notebooks/simple_with_tags.ipynb index c99ba1ce..09e55981 100644 --- a/papermill/tests/notebooks/simple_with_tags.ipynb +++ b/papermill/tests/notebooks/simple_with_tags.ipynb @@ -22,7 +22,7 @@ "id": "7979d49a-abb1-4815-9534-ad76e4505b56", "metadata": { "tags": [ - "skipcell" + "assigncell" ] }, "outputs": [], @@ -35,7 +35,9 @@ "execution_count": null, "id": "778581dd-f385-4039-be97-4050615fa271", "metadata": { - "tags": [] + "tags": [ + "printcell" + ] }, "outputs": [], "source": [ diff --git a/papermill/tests/test_execute.py b/papermill/tests/test_execute.py index 253e55f5..6607265d 100644 --- a/papermill/tests/test_execute.py +++ b/papermill/tests/test_execute.py @@ -135,6 +135,49 @@ def test_prepare_only(self): ['# Parameters', r'foo = "do\\ not\\ crash"', ''], ) + def test_remove_tagged_cells(self): + notebook_name = 'simple_with_tags.ipynb' + + # Default case, no cells are skipped + nb_test_executed_fname = os.path.join(self.test_dir, 'output_{}'.format(notebook_name)) + execute_notebook(get_notebook_path(notebook_name), nb_test_executed_fname, {}) + output_nb = load_notebook_node(nb_test_executed_fname) + assert len(output_nb.cells) == 4 + + # If a nonexistent tag is specified, no cells are skipped + nb_test_executed_fname = os.path.join(self.test_dir, 'output_{}'.format(notebook_name)) + execute_notebook( + get_notebook_path(notebook_name), + nb_test_executed_fname, + {}, + remove_tagged_cells="nonexistent", + ) + output_nb = load_notebook_node(nb_test_executed_fname) + assert len(output_nb.cells) == 4 + + # If cells with the 'printcell' tag are skipped, the output notebook is missing one cell + nb_test_executed_fname = os.path.join(self.test_dir, 'output_{}'.format(notebook_name)) + execute_notebook( + get_notebook_path(notebook_name), + nb_test_executed_fname, + {}, + remove_tagged_cells="printcell", + ) + output_nb = load_notebook_node(nb_test_executed_fname) + assert len(output_nb.cells) == 3 + + # If cells with the 'assigncell' tag are skipped, the execution raises an error + nb_test_executed_fname = os.path.join(self.test_dir, 'output_{}'.format(notebook_name)) + with self.assertRaises(PapermillExecutionError): + execute_notebook( + get_notebook_path(notebook_name), + nb_test_executed_fname, + {}, + remove_tagged_cells="assigncell", + ) + output_nb = load_notebook_node(nb_test_executed_fname) + self.assertEqual(output_nb.cells[4].outputs[0]["evalue"], "name 'a' is not defined") + class TestBrokenNotebook1(unittest.TestCase): def setUp(self): From fcbbd845fff2e20d29ed8497f94f3687e453645e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:01:26 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- papermill/tests/test_execute.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/papermill/tests/test_execute.py b/papermill/tests/test_execute.py index 1a8ec9ae..385eb91c 100644 --- a/papermill/tests/test_execute.py +++ b/papermill/tests/test_execute.py @@ -136,13 +136,13 @@ def test_remove_tagged_cells(self): notebook_name = 'simple_with_tags.ipynb' # Default case, no cells are skipped - nb_test_executed_fname = os.path.join(self.test_dir, 'output_{}'.format(notebook_name)) + nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}') execute_notebook(get_notebook_path(notebook_name), nb_test_executed_fname, {}) output_nb = load_notebook_node(nb_test_executed_fname) assert len(output_nb.cells) == 4 # If a nonexistent tag is specified, no cells are skipped - nb_test_executed_fname = os.path.join(self.test_dir, 'output_{}'.format(notebook_name)) + nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}') execute_notebook( get_notebook_path(notebook_name), nb_test_executed_fname, @@ -153,7 +153,7 @@ def test_remove_tagged_cells(self): assert len(output_nb.cells) == 4 # If cells with the 'printcell' tag are skipped, the output notebook is missing one cell - nb_test_executed_fname = os.path.join(self.test_dir, 'output_{}'.format(notebook_name)) + nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}') execute_notebook( get_notebook_path(notebook_name), nb_test_executed_fname, @@ -164,7 +164,7 @@ def test_remove_tagged_cells(self): assert len(output_nb.cells) == 3 # If cells with the 'assigncell' tag are skipped, the execution raises an error - nb_test_executed_fname = os.path.join(self.test_dir, 'output_{}'.format(notebook_name)) + nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}') with self.assertRaises(PapermillExecutionError): execute_notebook( get_notebook_path(notebook_name), From 5a585ca3bacaea8d6d12a3b3cd2c0f50f2255c1e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 5 Oct 2024 15:32:38 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- papermill/cli.py | 4 +--- papermill/execute.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/papermill/cli.py b/papermill/cli.py index f8b2c03f..9413f1c0 100755 --- a/papermill/cli.py +++ b/papermill/cli.py @@ -45,9 +45,7 @@ def print_papermill_version(ctx, param, value): @click.option('--parameters_file', '-f', multiple=True, help='Path to YAML file containing parameters.') @click.option('--parameters_yaml', '-y', multiple=True, help='YAML string to be used as parameters.') @click.option('--parameters_base64', '-b', multiple=True, help='Base64 encoded YAML string as parameters.') -@click.option( - '--remove-tagged-cells', type=str, help='Remove cells with the specified tag before execution.' -) +@click.option('--remove-tagged-cells', type=str, help='Remove cells with the specified tag before execution.') @click.option( '--inject-input-path', is_flag=True, diff --git a/papermill/execute.py b/papermill/execute.py index ddcb1b85..4f647a8b 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -1,5 +1,5 @@ -from pathlib import Path from copy import deepcopy +from pathlib import Path import nbformat