Skip to content

Commit

Permalink
Change docs to reflect keyword-only args in run
Browse files Browse the repository at this point in the history
  • Loading branch information
parejkoj committed Feb 21, 2025
1 parent 118edfa commit 392ab7a
Show file tree
Hide file tree
Showing 13 changed files with 37 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(self, config: pexConfig.Config, *args, **kwargs):
self.outputSchema = afwTable.SourceTable.makeMinimalSchema()
self.apKey = self.outputSchema.addField("apFlux", type=np.float64, doc="Ap flux measured")

def run(self, exposure: afwImage.Exposure, inputCatalog: afwTable.SourceCatalog) -> pipeBase.Struct:
def run(self, *, exposure: afwImage.Exposure, inputCatalog: afwTable.SourceCatalog) -> pipeBase.Struct:
# set dimension cutouts to 3 times the apRad times 2 (for diameter)
dimensions = (3 * self.apRad * 2, 3 * self.apRad * 2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(self, config: pexConfig.Config, initInput: Mapping, *args, **kwargs
# matches an initOut so will be persisted
self.outputSchema = afwTable.SourceCatalog(self.schema)

def run(self, exposure: afwImage.Exposure, inputCatalog: afwTable.SourceCatalog) -> pipeBase.Struct:
def run(self, *, exposure: afwImage.Exposure, inputCatalog: afwTable.SourceCatalog) -> pipeBase.Struct:
# create the catalog in which new measurements will be stored
outputCatalog = afwTable.SourceCatalog(self.schema)
# Add in all the records from the input catalog into what will be the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def __init__(self, config: pexConfig.Config, initInput: Mapping, *args, **kwargs

def run(
self,
*,
exposure: afwImage.Exposure,
inputCatalog: afwTable.SourceCatalog,
background: afwMath.BackgroundList | None = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def __init__(self, config: pexConfig.Config, initInput: Mapping, *args, **kwargs

def run(
self,
*,
exposure: afwImage.Exposure,
inputCatalog: afwTable.SourceCatalog,
background: afwMath.BackgroundList | None = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def __init__(self, config: pexConfig.Config, initInput: Mapping, *args, **kwargs

def run(
self,
*,
exposure: afwImage.Exposure,
inputCatalog: afwTable.SourceCatalog,
areaMask: afwImage.Mask,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def __init__(self, config: pexConfig.Config, initInput: Mapping, *args, **kwargs

def run(
self,
*,
exposures: list[afwImage.Exposure],
inputCatalogs: list[afwTable.SourceCatalog],
areaMasks: list[afwImage.Mask],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def __init__(self, config: pexConfig.Config, initInput: Mapping, *args, **kwargs

def run(
self,
*,
exposures: list[afwImage.Exposure],
inputCatalogs: list[afwTable.SourceCatalog],
areaMasks: list[afwImage.Mask],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def __init__(self, config: pexConfig.Config, initInput: Mapping, *args, **kwargs

def run(
self,
*,
exposures: list[afwImage.Exposure],
inputCatalogs: list[afwTable.SourceCatalog],
areaMasks: list[afwImage.Mask],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def runQuantum(

def run(
self,
*,
exposures: list[afwImage.Exposure],
lengths: list[int],
areaMasks: list[afwImage.Mask],
Expand Down
47 changes: 18 additions & 29 deletions doc/lsst.pipe.base/creating-a-pipelinetask.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ Next, create a |Task| class that performs the measurements:
)
def run(
self, exposure: afwImage.Exposure, inputCatalog: afwTable.SourceCatalog
self, *, exposure: afwImage.Exposure, inputCatalog: afwTable.SourceCatalog
) -> pipeBase.Struct:
# set dimension cutouts to 3 times the apRad times 2 (for diameter)
dimensions = (3 * self.apRad * 2, 3 * self.apRad * 2)
Expand Down Expand Up @@ -177,26 +177,12 @@ look at what changes when you turn a Task into a PipelineTask.
class ApertureTask(pipeBase.PipelineTask):
...
def run(
self, exposure: afwImage.Exposure, inputCatalog: afwTable.SourceCatalog
) -> pipeBase.Struct:
...
return pipeBase.Struct(outputCatalog=self.outputCatalog)
In a simple PipelineTask like this, these are all the changes that need to be
made. Firstly the base class to is changed to `PipelineTask`. This inheritance
In a simple PipelineTask like this, this are all the changes that need to be
made. The base class to is changed to `PipelineTask`. This inheritance
provides all the base machinery that the middleware will need to run
this task. The second change you need to make a task into a `PipelineTask` is
to change the signature of the run method. A run method in a PipelineTask must
return a `lsst.pipe.base.Struct` object whose field names correspond to the
names of the outputs defined in the connection class. In our connection class
we defined the output collection with the identifier ``outputCatalog``, so in
our returned `~lsst.pipe.base.Struct` has a field with that name as well.
Another thing worth highlighting, though it was not a change that was made, is
the names of the arguments to the run method. These names also must (and do)
correspond to the identifiers used for the input connections. The names of the
variables of the inputs and outputs are how the |PipelineTask| activator maps
connections into the in-memory data products that the algorithm requires.
this task.
Note that the ``run`` method already takes two `keyword-only arguments <https://peps.python.org/pep-3102/>`_ that match the input connections given above, and that it returns a `~lsst.pipe.base.Struct` containing a field matching the name of the output connection.
As noted in the :ref:`Task run documentation<task-run-method>`, returning a `~lsst.pipe.base.Struct` is good practice for any `~lsst.pipe.base.Task`, whether it is a `PipelineTask` or not.

The complete source integrating these changes can be used in :ref:`pipeline-appendix-a`.

Expand Down Expand Up @@ -248,7 +234,7 @@ is executed. Take a look what the connection class looks like.
self.schema = self.mapper.getOutputSchema()
def run(
self, exposure: afwImage.Exposure, inputCatalog: afwTable.SourceCatalog
self, *, exposure: afwImage.Exposure, inputCatalog: afwTable.SourceCatalog
) -> pipeBase.Struct:
# create the catalog in which new measurements will be stored
outputCatalog = afwTable.SourceCatalog(self.schema)
Expand Down Expand Up @@ -432,7 +418,7 @@ take into account that a background may or may not be supplied.
...
def run(
self,
self, *,
exposure: afwImage.Exposure,
inputCatalog: afwTable.SourceCatalog,
background: afwMath.BackgroundList | None = None,
Expand Down Expand Up @@ -725,7 +711,7 @@ code this behavior, but are done to demonstrate how to make use of the
...
def run(
self,
self, *,
exposures: List[afwImage.Exposure],
inputCatalogs: List[afwTable.SourceCatalog],
areaMasks: List[afwImage.Mask],
Expand Down Expand Up @@ -863,7 +849,7 @@ Take a look at how the ``run`` method changes to make use of this.
...
def run(
self,
self, *,
exposures: List[afwImage.Exposure],
inputCatalogs: List[afwTable.SourceCatalog],
areaMasks: List[afwImage.Mask],
Expand Down Expand Up @@ -1042,10 +1028,11 @@ with that single `lsst.daf.butler.DatasetRef`.
inputRefs: pipeBase.InputQuantizedConnection,
outputRefs: pipeBase.OutputQuantizedConnection,
):
inputs = {}
for name, refs in inputRefs:
inputs[name] = butlerQC.get(refs)
output = self.run(**inputs)
inputs = butlerQC.get(inputRefs)
exposure = inputs.pop("exposure")
inputCatalog = inputs.pop("inputCatalog")
assert not inputs, "runQuantum got more inputs than expected"
output = self.run(exposure=exposure, inputCatalog=inputCatalog)
butlerQC.put(output, outputRefs.OutputCatalog)
Overriding ``runQuantum`` also provides the opportunity to do a transformation
Expand All @@ -1054,6 +1041,8 @@ method to have a convenient interface for user interaction within a notebook
or shell, but still match the types of input `PipelineTask`\ s will get when
run by the middleware system.

TODO: the below explicitly violates the "stateless" convention for Tasks that we've tried to maintain since forever. Might be some work to redo it completely; is it worth keeping?

To demonstrate this, modify the ``runQuantum`` and ``run`` methods in such a
way that the output catalog of the task is already pre-populated with all of
the input catalogs. The user then only needs to supply the lengths of each of
Expand Down Expand Up @@ -1097,7 +1086,7 @@ demoing the concepts here.
def run(
self,
self, *,
exposures: List[afwImage.Exposure],
lengths: List[int],
areaMasks: List[afwImage.Mask],
Expand Down
4 changes: 3 additions & 1 deletion doc/lsst.pipe.base/creating-a-task.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,11 +190,13 @@ Task execution methods

For a detailed overview of creating a `~lsst.pipe.base.PipelineTask` see :doc:`creating-a-pipelinetask`.

.. _task-run-method:

The run method
^^^^^^^^^^^^^^

All tasks are required to have a ``run`` method which acts as their primary point of entry.
This method takes, as explicit arguments, everything that the task needs to perform one unit of execution (for example, processing a single image), and returns the result to the caller.
This method takes, as `keyword-only arguments <https://peps.python.org/pep-3102/>`_, everything that the task needs to perform one unit of execution (for example, processing a single image), and returns the result to the caller.
The ``run`` method should not perform I/O, and, in particular, should not be expected to have access to the Data Butler for storing and retrieving data.
Instead, results are returned as an `lsst.pipe.base.struct.Struct` object, with a named field for each item of data.
This is safer than returning a tuple of items, and allows adding fields without affecting existing code.
Expand Down
2 changes: 1 addition & 1 deletion doc/lsst.pipe.base/testing-a-pipeline-task.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ There is currently no test framework for the use of init-inputs in task construc
class OrTask(PipelineTask):
ConfigClass = OrConfig
def run(exp=None, cat=None):
def run(self, *, exp=None, cat=None):
...
Expand Down
12 changes: 6 additions & 6 deletions python/lsst/pipe/base/pipelineTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,14 @@ def run(self, **kwargs: Any) -> Struct: # type: ignore
"""Run task algorithm on in-memory data.
This method should be implemented in a subclass. This method will
receive keyword arguments whose names will be the same as names of
receive keyword-only arguments whose names will be the same as names of
connection fields describing input dataset types. Argument values will
be data objects retrieved from data butler. If a dataset type is
configured with ``multiple`` field set to ``True`` then the argument
value will be a list of objects, otherwise it will be a single object.
If the task needs to know its input or output DataIds then it has to
override `runQuantum` method instead.
If the task needs to know its input or output DataIds then it also has
to override the `runQuantum` method.
This method should return a `Struct` whose attributes share the same
name as the connection fields describing output dataset types.
Expand All @@ -149,9 +149,9 @@ def run(self, **kwargs: Any) -> Struct: # type: ignore
.. code-block:: python
def run(self, input, calib):
# "input", "calib", and "output" are the names of the config
# fields
def run(self, *, input, calib):
# "input", "calib", and "output" are the names of the
# connection fields.
# Assuming that input/calib datasets are `scalar` they are
# simple objects, do something with inputs and calibs, produce
Expand Down

0 comments on commit 392ab7a

Please sign in to comment.