diff --git a/papermill/cli.py b/papermill/cli.py index 0c1b54d0..9413f1c0 100755 --- a/papermill/cli.py +++ b/papermill/cli.py @@ -45,6 +45,7 @@ def print_papermill_version(ctx, param, value): @click.option('--parameters_file', '-f', multiple=True, help='Path to YAML file containing parameters.') @click.option('--parameters_yaml', '-y', multiple=True, help='YAML string to be used as parameters.') @click.option('--parameters_base64', '-b', multiple=True, help='Base64 encoded YAML string as parameters.') +@click.option('--remove-tagged-cells', type=str, help='Remove cells with the specified tag before execution.') @click.option( '--inject-input-path', is_flag=True, @@ -147,6 +148,7 @@ def papermill( parameters_file, parameters_yaml, parameters_base64, + remove_tagged_cells, inject_input_path, inject_output_path, inject_paths, @@ -240,6 +242,7 @@ def papermill( request_save_on_cell_execute=request_save_on_cell_execute, autosave_cell_every=autosave_cell_every, prepare_only=prepare_only, + remove_tagged_cells=remove_tagged_cells, kernel_name=kernel, language=language, progress_bar=progress_bar, diff --git a/papermill/execute.py b/papermill/execute.py index eb76cc34..4f647a8b 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -1,3 +1,4 @@ +from copy import deepcopy from pathlib import Path import nbformat @@ -18,6 +19,7 @@ def execute_notebook( engine_name=None, request_save_on_cell_execute=True, prepare_only=False, + remove_tagged_cells=None, kernel_name=None, language=None, progress_bar=True, @@ -47,6 +49,9 @@ def execute_notebook( How often in seconds to save in the middle of long cell executions prepare_only : bool, optional Flag to determine if execution should occur or not + remove_tagged_cells : str, optional + If specified, cells with the specified tag will be removed + before execution and will not be present in the output notebook. kernel_name : str, optional Name of kernel to execute the notebook against language : str, optional @@ -105,6 +110,8 @@ def execute_notebook( ) nb = prepare_notebook_metadata(nb, input_path, output_path, report_mode) + if remove_tagged_cells is not None: + nb = remove_tagged_cells_from_notebook(nb, remove_tagged_cells) # clear out any existing error markers from previous papermill runs nb = remove_error_markers(nb) @@ -164,6 +171,33 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): return nb +def remove_tagged_cells_from_notebook(nb, tag): + """ + Remove cells with a matching tag. + + Parameters + ---------- + nb : NotebookNode + Executable notebook object + tag : str + Tag to used to identify cells to remove. + """ + + # Copy the notebook to avoid changing the input one + nb = deepcopy(nb) + + # Filter out cells containing the tag + cells = [] + for cell in nb.cells: + if hasattr(cell, 'metadata') and 'tags' in cell.metadata: + if tag not in cell.metadata['tags']: + cells.append(cell) + + nb.cells = cells + + return nb + + ERROR_MARKER_TAG = "papermill-error-cell-tag" ERROR_STYLE = 'style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;"' diff --git a/papermill/tests/notebooks/simple_with_tags.ipynb b/papermill/tests/notebooks/simple_with_tags.ipynb new file mode 100644 index 00000000..09e55981 --- /dev/null +++ b/papermill/tests/notebooks/simple_with_tags.ipynb @@ -0,0 +1,69 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a0730871-38e0-4f31-9466-1a117965e5a1", + "metadata": {}, + "source": [ + "### Markdown cell" + ] + }, + { + "cell_type": "raw", + "id": "00000b67-913e-459a-80dc-2520b2483d7d", + "metadata": {}, + "source": [ + "Raw cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7979d49a-abb1-4815-9534-ad76e4505b56", + "metadata": { + "tags": [ + "assigncell" + ] + }, + "outputs": [], + "source": [ + "a = 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "778581dd-f385-4039-be97-4050615fa271", + "metadata": { + "tags": [ + "printcell" + ] + }, + "outputs": [], + "source": [ + "print(a)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/papermill/tests/test_execute.py b/papermill/tests/test_execute.py index 87816cd3..e114c496 100644 --- a/papermill/tests/test_execute.py +++ b/papermill/tests/test_execute.py @@ -124,6 +124,49 @@ def test_prepare_only(self): ['# Parameters', r'foo = "do\\ not\\ crash"', ''], ) + def test_remove_tagged_cells(self): + notebook_name = 'simple_with_tags.ipynb' + + # Default case, no cells are skipped + nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}') + execute_notebook(get_notebook_path(notebook_name), nb_test_executed_fname, {}) + output_nb = load_notebook_node(nb_test_executed_fname) + assert len(output_nb.cells) == 4 + + # If a nonexistent tag is specified, no cells are skipped + nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}') + execute_notebook( + get_notebook_path(notebook_name), + nb_test_executed_fname, + {}, + remove_tagged_cells="nonexistent", + ) + output_nb = load_notebook_node(nb_test_executed_fname) + assert len(output_nb.cells) == 4 + + # If cells with the 'printcell' tag are skipped, the output notebook is missing one cell + nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}') + execute_notebook( + get_notebook_path(notebook_name), + nb_test_executed_fname, + {}, + remove_tagged_cells="printcell", + ) + output_nb = load_notebook_node(nb_test_executed_fname) + assert len(output_nb.cells) == 3 + + # If cells with the 'assigncell' tag are skipped, the execution raises an error + nb_test_executed_fname = os.path.join(self.test_dir, f'output_{notebook_name}') + with self.assertRaises(PapermillExecutionError): + execute_notebook( + get_notebook_path(notebook_name), + nb_test_executed_fname, + {}, + remove_tagged_cells="assigncell", + ) + output_nb = load_notebook_node(nb_test_executed_fname) + self.assertEqual(output_nb.cells[4].outputs[0]["evalue"], "name 'a' is not defined") + class TestBrokenNotebook1(unittest.TestCase): def setUp(self):