From c7f3238ac509a8514dece376dc2a21470f67f710 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Mon, 4 Dec 2023 16:54:00 -0500
Subject: [PATCH 01/35] Remove the deprecated quantization tool (#53)

Remove the deprecated quantization tool

Signed-off-by: Jeremy Fowers <jeremy.fowers@amd.com>
---
 docs/coverage.md                            |  1 -
 src/turnkeyml/build/export.py               | 63 +----------------
 src/turnkeyml/build/ignition.py             | 65 ++---------------
 src/turnkeyml/build/quantization_helpers.py | 78 ---------------------
 src/turnkeyml/build/sequences.py            | 12 ----
 src/turnkeyml/build_api.py                  | 11 ---
 src/turnkeyml/common/build.py               | 34 +++------
 7 files changed, 17 insertions(+), 247 deletions(-)
 delete mode 100644 src/turnkeyml/build/quantization_helpers.py

diff --git a/docs/coverage.md b/docs/coverage.md
index 9cf1fc34..3bf9b898 100644
--- a/docs/coverage.md
+++ b/docs/coverage.md
@@ -41,7 +41,6 @@ Name                                      Stmts   Miss Branch BrPart  Cover   Mi
 --------------------------------------------------------------------------------------------------------
 \turnkeyml\build\__init__.py                 0      0      0      0   100%
 \turnkeyml\build\onnx_helpers.py            70     34     28      2    45%   15-21, 28-87, 92, 95-100
-\turnkeyml\build\quantization_helpers.py    29     20     18      0    19%   13-30, 35, 50-78
 \turnkeyml\build\sequences.py               15      1      8      2    87%   62->61, 65
 \turnkeyml\build\tensor_helpers.py          47     26     34      4    41%   17-44, 57, 61, 63-74, 78
 \turnkeyml\build_api.py                     31      9      8      3    64%   68-71, 120-125, 140-147
diff --git a/src/turnkeyml/build/export.py b/src/turnkeyml/build/export.py
index 4a9f1572..2f4bc88d 100644
--- a/src/turnkeyml/build/export.py
+++ b/src/turnkeyml/build/export.py
@@ -15,7 +15,6 @@
 import turnkeyml.common.build as build
 import turnkeyml.build.tensor_helpers as tensor_helpers
 import turnkeyml.build.onnx_helpers as onnx_helpers
-import turnkeyml.build.quantization_helpers as quant_helpers
 import turnkeyml.common.filesystem as fs
 
 
@@ -77,13 +76,6 @@ def converted_onnx_file(state: build.State):
     )
 
 
-def quantized_onnx_file(state: build.State):
-    return os.path.join(
-        onnx_dir(state),
-        f"{state.config.build_name}-op{state.config.onnx_opset}-opt-quantized_int8.onnx",
-    )
-
-
 class ExportPlaceholder(stage.Stage):
     """
     Placeholder Stage that should be replaced by a framework-specific export stage,
@@ -571,9 +563,8 @@ def fire(self, state: build.State):
         inputs_file = state.original_inputs_file
         if os.path.isfile(inputs_file):
             inputs = np.load(inputs_file, allow_pickle=True)
-            to_downcast = False if state.quantization_samples else True
             inputs_converted = tensor_helpers.save_inputs(
-                inputs, inputs_file, downcast=to_downcast
+                inputs, inputs_file, downcast=True
             )
         else:
             raise exp.StageError(
@@ -621,58 +612,6 @@ def fire(self, state: build.State):
         return state
 
 
-class QuantizeONNXModel(stage.Stage):
-    """
-    Stage that takes an ONNX model and a dataset of quantization samples as inputs,
-    and performs static post-training quantization to the model to int8 precision.
-
-    Expected inputs:
-     - state.model is a path to the ONNX model
-     - state.quantization_dataset is a dataset that is used for static quantization
-
-    Outputs:
-     - A *_quantized.onnx file => the quantized onnx model.
-    """
-
-    def __init__(self):
-        super().__init__(
-            unique_name="quantize_onnx",
-            monitor_message="Quantizing ONNX model",
-        )
-
-    def fire(self, state: build.State):
-        input_path = state.intermediate_results[0]
-        output_path = quantized_onnx_file(state)
-
-        quant_helpers.quantize(
-            input_file=input_path,
-            data=state.quantization_samples,
-            output_file=output_path,
-        )
-
-        # Check that the converted model is still valid
-        success_msg = "\tSuccess quantizing ONNX model to int8"
-        fail_msg = "\tFailed quantizing ONNX model to int8"
-
-        if check_model(output_path, success_msg, fail_msg):
-            state.intermediate_results = [output_path]
-
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
-                fs.Keys.ONNX_FILE,
-                output_path,
-            )
-        else:
-            msg = f"""
-            Attempted to use {state.quantization_dataset} to statically quantize
-            model to int8 datatype, however this operation was not successful.
-            More information may be available in the log file at **{self.logfile_path}**
-            """
-            raise exp.StageError(msg)
-
-        return state
-
-
 class SuccessStage(stage.Stage):
     """
     Stage that sets state.build_status = build.Status.SUCCESSFUL_BUILD,
diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py
index 18da5aaf..add22ec3 100644
--- a/src/turnkeyml/build/ignition.py
+++ b/src/turnkeyml/build/ignition.py
@@ -1,5 +1,4 @@
 from typing import Optional, List, Tuple, Union, Dict, Any, Type, Callable
-from collections.abc import Collection
 import sys
 import os
 import copy
@@ -258,7 +257,6 @@ def load_or_make_state(
     monitor: bool,
     model: build.UnionValidModelInstanceTypes = None,
     inputs: Optional[Dict[str, Any]] = None,
-    quantization_samples: Optional[Collection] = None,
     state_type: Type = build.State,
     cache_validation_func: Callable = validate_cached_model,
     extra_state_args: Optional[Dict] = None,
@@ -280,7 +278,6 @@ def load_or_make_state(
         "cache_dir": cache_dir,
         "config": config,
         "model_type": model_type,
-        "quantization_samples": quantization_samples,
     }
 
     # Ensure that `rebuild` has a valid value
@@ -306,50 +303,6 @@ def load_or_make_state(
                     state_type=state_type,
                 )
 
-                # if the previous build is using quantization while the current is not
-                # or vice versa
-                if state.quantization_samples and quantization_samples is None:
-                    if rebuild == "never":
-                        msg = (
-                            f"Model {config.build_name} was built in a previous call to "
-                            "build_model() with post-training quantization sample enabled."
-                            "However, post-training quantization is not enabled in the "
-                            "current build. Rebuild is necessary but currently the rebuild"
-                            "policy is set to 'never'. "
-                        )
-                        raise exp.CacheError(msg)
-
-                    msg = (
-                        f"Model {config.build_name} was built in a previous call to "
-                        "build_model() with post-training quantization sample enabled."
-                        "However, post-training quantization is not enabled in the "
-                        "current build. Starting a fresh build."
-                    )
-
-                    printing.log_info(msg)
-                    return _begin_fresh_build(state_args, state_type)
-
-                if not state.quantization_samples and quantization_samples is not None:
-                    if rebuild == "never":
-                        msg = (
-                            f"Model {config.build_name} was built in a previous call to "
-                            "build_model() with post-training quantization sample disabled."
-                            "However, post-training quantization is enabled in the "
-                            "current build. Rebuild is necessary but currently the rebuild"
-                            "policy is set to 'never'. "
-                        )
-                        raise exp.CacheError(msg)
-
-                    msg = (
-                        f"Model {config.build_name} was built in a previous call to "
-                        "build_model() with post-training quantization sample disabled."
-                        "However, post-training quantization is enabled in the "
-                        "current build. Starting a fresh build."
-                    )
-
-                    printing.log_info(msg)
-                    return _begin_fresh_build(state_args, state_type)
-
             except exp.StateError as e:
                 problem = (
                     "- build_model() failed to load "
@@ -500,7 +453,6 @@ def model_intake(
     user_model,
     user_inputs,
     user_sequence: Optional[stage.Sequence],
-    user_quantization_samples: Optional[Collection] = None,
 ) -> Tuple[Any, Any, stage.Sequence, build.ModelType, str]:
     # Model intake structure options:
     # user_model
@@ -550,18 +502,11 @@ def model_intake(
 
         sequence = copy.deepcopy(user_sequence)
         if sequence is None:
-            if user_quantization_samples:
-                if model_type != build.ModelType.PYTORCH:
-                    raise exp.IntakeError(
-                        "Currently, post training quantization only supports Pytorch models."
-                    )
-                sequence = sequences.pytorch_with_quantization
-            else:
-                sequence = stage.Sequence(
-                    "top_level_sequence",
-                    "Top Level Sequence",
-                    [sequences.onnx_fp32],
-                )
+            sequence = stage.Sequence(
+                "top_level_sequence",
+                "Top Level Sequence",
+                [sequences.onnx_fp32],
+            )
 
         # If there is an ExportPlaceholder Stage in the sequence, replace it with
         # a framework-specific export Stage.
diff --git a/src/turnkeyml/build/quantization_helpers.py b/src/turnkeyml/build/quantization_helpers.py
deleted file mode 100644
index e945bb2f..00000000
--- a/src/turnkeyml/build/quantization_helpers.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import os
-import numpy as np
-
-import onnx
-import onnxruntime
-from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType
-
-
-class DataReader(CalibrationDataReader):
-    """Wrapper class around calibration data, which is used to quantize an onnx model."""
-
-    def __init__(self, input_file, samples, input_shapes=None, pack_inputs=False):
-        session = onnxruntime.InferenceSession(input_file, None)
-        input_names = [inp.name for inp in session.get_inputs()]
-
-        if pack_inputs:
-            expand_each = lambda data: [np.expand_dims(d, axis=0) for d in data]
-            self.enum_data_dicts = iter(
-                [
-                    dict(zip(input_names, expand_each(sample_inputs)))
-                    for sample_inputs in zip(*samples)
-                ]
-            )
-        else:
-            if input_shapes:
-                self.samples = samples.reshape(-1, len(input_shapes), *input_shapes[0])
-            else:
-                self.samples = samples
-
-            self.enum_data_dicts = iter(
-                [dict(zip(input_names, sample)) for sample in self.samples]
-            )
-
-    def get_next(self):
-        return next(self.enum_data_dicts, None)
-
-
-def quantize(
-    input_file,
-    data,
-    input_shapes=None,
-    pack_inputs=False,
-    verbose=False,
-    output_file=None,
-):
-    """
-    Given an onnx file and calibration data on which to quantize,
-    computes and saves quantized onnx model to a local file.
-    """
-    data_reader = DataReader(
-        input_file,
-        samples=data,
-        input_shapes=input_shapes,
-        pack_inputs=pack_inputs,
-    )
-
-    if not output_file:
-        output_file = input_file[:-5] + "_quantized.onnx"
-
-    quantize_static(
-        model_input=input_file,
-        model_output=output_file,
-        calibration_data_reader=data_reader,
-        activation_type=QuantType.QUInt8,
-        weight_type=QuantType.QInt8,
-        op_types_to_quantize=["Conv", "MatMul", "Relu"],
-        extra_options={"ActivationSymmetric": False, "WeightSymmetric": True},
-    )
-
-    onnx.save(onnx.shape_inference.infer_shapes(onnx.load(output_file)), output_file)
-
-    if os.path.isfile("augmented_model.onnx"):
-        os.remove("augmented_model.onnx")
-
-    if verbose:
-        print("Calibrated and quantized model saved.")
-
-    return output_file
diff --git a/src/turnkeyml/build/sequences.py b/src/turnkeyml/build/sequences.py
index bc39a0db..7e90ead3 100644
--- a/src/turnkeyml/build/sequences.py
+++ b/src/turnkeyml/build/sequences.py
@@ -35,18 +35,6 @@
     enable_model_validation=True,
 )
 
-pytorch_with_quantization = stage.Sequence(
-    "pytorch_export_sequence_with_quantization",
-    "Exporting PyTorch Model and Quantizing Exported ONNX",
-    [
-        export.ExportPytorchModel(),
-        export.OptimizeOnnxModel(),
-        export.QuantizeONNXModel(),
-        export.SuccessStage(),
-    ],
-    enable_model_validation=True,
-)
-
 # Plugin interface for sequences
 discovered_plugins = plugins.discover()
 
diff --git a/src/turnkeyml/build_api.py b/src/turnkeyml/build_api.py
index 5cf681b2..7322a15c 100644
--- a/src/turnkeyml/build_api.py
+++ b/src/turnkeyml/build_api.py
@@ -1,6 +1,5 @@
 import os
 from typing import Optional, List, Dict, Any
-from collections.abc import Collection
 import turnkeyml.build.ignition as ignition
 import turnkeyml.build.stage as stage
 import turnkeyml.common.printing as printing
@@ -17,7 +16,6 @@ def build_model(
     monitor: Optional[bool] = None,
     rebuild: Optional[str] = None,
     sequence: Optional[List[stage.Stage]] = None,
-    quantization_samples: Collection = None,
     onnx_opset: Optional[int] = None,
     device: Optional[str] = None,
 ) -> build.State:
@@ -48,11 +46,6 @@ def build_model(
             - None: Falls back to default
         sequence: Override the default sequence of build stages. Power
             users only.
-        quantization_samples: If set, performs post-training quantization
-            on the ONNX model using the provided samplesIf the previous build used samples
-            that are different to the samples used in current build, the "rebuild"
-            argument needs to be manually set to "always" in the current build
-            in order to create a new ONNX file.
         onnx_opset: ONNX opset to use during ONNX export.
         device: Specific device target to take into account during the build sequence.
             Use the format "device_family", "device_family::part", or
@@ -96,7 +89,6 @@ def build_model(
         model,
         inputs,
         sequence,
-        user_quantization_samples=quantization_samples,
     )
 
     # Get the state of the model from the cache if a valid build is available
@@ -109,7 +101,6 @@ def build_model(
         monitor=monitor_setting,
         model=model_locked,
         inputs=inputs_locked,
-        quantization_samples=quantization_samples,
     )
 
     # Return a cached build if possible, otherwise prepare the model State for
@@ -124,8 +115,6 @@ def build_model(
 
         return state
 
-    state.quantization_samples = quantization_samples
-
     sequence_locked.show_monitor(config, state.monitor)
     state = sequence_locked.launch(state)
 
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index a224d1ab..9f8ac382 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -8,7 +8,6 @@
 import subprocess
 import enum
 from typing import Optional, Any, List, Dict, Union, Type
-from collections.abc import Collection
 import dataclasses
 import hashlib
 import pkg_resources
@@ -259,8 +258,6 @@ class State:
     # Results of a successful build
     results: Any = None
 
-    quantization_samples: Optional[Collection] = None
-
     def __post_init__(self):
         if self.uid is None:
             self.uid = unique_id()
@@ -309,16 +306,6 @@ def prepare_state_dict(self) -> Dict:
         state_dict["model_type"] = self.model_type.value
         state_dict["build_status"] = self.build_status.value
 
-        # During actual execution, quantization_samples in the state
-        # stores the actual quantization samples.
-        # However, we do not save quantization samples
-        # Instead, we save a boolean to indicate whether the model
-        # stored has been quantized by some samples.
-        if self.quantization_samples:
-            state_dict["quantization_samples"] = True
-        else:
-            state_dict["quantization_samples"] = False
-
         return state_dict
 
     def save_yaml(self, state_dict: Dict):
@@ -524,7 +511,7 @@ def get_system_info():
     # Get OS Version
     try:
         info_dict["OS Version"] = platform.platform()
-    except Exception as e: # pylint: disable=broad-except
+    except Exception as e:  # pylint: disable=broad-except
         info_dict["Error OS Version"] = str(e)
 
     if os_type == "Windows":
@@ -537,7 +524,7 @@ def get_system_info():
                 .strip()
             )
             info_dict["Processor"] = proc_info
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error Processor"] = str(e)
 
         # Get OEM System Information
@@ -549,7 +536,7 @@ def get_system_info():
                 .strip()
             )
             info_dict["OEM System"] = oem_info
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error OEM System"] = str(e)
 
         # Get Physical Memory in GB
@@ -564,7 +551,7 @@ def get_system_info():
             )
             mem_info_gb = round(int(mem_info_bytes) / (1024**3), 2)
             info_dict["Physical Memory"] = f"{mem_info_gb} GB"
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error Physical Memory"] = str(e)
 
     elif os_type == "Linux":
@@ -586,7 +573,7 @@ def get_system_info():
                     .strip()
                 )
                 info_dict["OEM System"] = oem_info
-            except Exception as e: # pylint: disable=broad-except
+            except Exception as e:  # pylint: disable=broad-except
                 info_dict["Error OEM System (WSL)"] = str(e)
 
         else:
@@ -602,7 +589,7 @@ def get_system_info():
                     .replace("\n", " ")
                 )
                 info_dict["OEM System"] = oem_info
-            except Exception as e: # pylint: disable=broad-except
+            except Exception as e:  # pylint: disable=broad-except
                 info_dict["Error OEM System"] = str(e)
 
         # Get CPU Information
@@ -612,7 +599,7 @@ def get_system_info():
                 if "Model name:" in line:
                     info_dict["Processor"] = line.split(":")[1].strip()
                     break
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error Processor"] = str(e)
 
         # Get Memory Information
@@ -625,7 +612,7 @@ def get_system_info():
             )
             mem_info_gb = round(int(mem_info) / 1024, 2)
             info_dict["Memory Info"] = f"{mem_info_gb} GB"
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error Memory Info"] = str(e)
 
     else:
@@ -635,9 +622,10 @@ def get_system_info():
     try:
         installed_packages = pkg_resources.working_set
         info_dict["Python Packages"] = [
-            f"{i.key}=={i.version}" for i in installed_packages # pylint: disable=not-an-iterable
+            f"{i.key}=={i.version}"
+            for i in installed_packages  # pylint: disable=not-an-iterable
         ]
-    except Exception as e: # pylint: disable=broad-except
+    except Exception as e:  # pylint: disable=broad-except
         info_dict["Error Python Packages"] = str(e)
 
     return info_dict

From d5390fb453d752290adeef974dcb23231ec5046b Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:25:09 -0500
Subject: [PATCH 02/35] Stop saving labels files in the cache (#54)

Signed-off-by: Jeremy Fowers <jeremy.fowers@amd.com>
---
 models/readme.md                   |  2 +-
 src/turnkeyml/analyze/script.py    | 12 +++++-------
 src/turnkeyml/common/filesystem.py |  2 ++
 src/turnkeyml/common/labels.py     | 31 ------------------------------
 test/analysis.py                   |  9 +++++----
 test/cli.py                        |  7 ++++---
 6 files changed, 17 insertions(+), 46 deletions(-)

diff --git a/models/readme.md b/models/readme.md
index 2917b57b..c726f8e3 100644
--- a/models/readme.md
+++ b/models/readme.md
@@ -114,7 +114,7 @@ Example:
 # labels: author::google test_group::daily,monthly
 ```
      
-Labels are saved in your cache directory and can later be retrieved using the function `turnkey.common.labels.load_from_cache()`, which receives the `cache_dir` and `build_name` as inputs and returns the labels as a dictionary. 
+Labels are saved in your cache directory in the `turnkey_stats.yaml` file under the "labels" key.
 
 ### Parameters
 
diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py
index 27c13efb..3c213bb2 100644
--- a/src/turnkeyml/analyze/script.py
+++ b/src/turnkeyml/analyze/script.py
@@ -138,11 +138,6 @@ def explore_invocation(
                     inputs[all_args[i]] = args[i]
         invocation_info.inputs = inputs
 
-    # Save model labels
-    if model_info.model_type != build.ModelType.ONNX_FILE:
-        tracer_args.labels["class"] = [f"{type(model_info.model).__name__}"]
-    labels.save_to_cache(tracer_args.cache_dir, build_name, tracer_args.labels)
-
     # If the user has not provided a specific runtime, select the runtime
     # based on the device provided.
     if tracer_args.runtime is None:
@@ -182,13 +177,16 @@ def explore_invocation(
         fs.Keys.PARAMETERS,
         model_info.params,
     )
+    if model_info.model_type != build.ModelType.ONNX_FILE:
+        stats.save_stat(fs.Keys.CLASS, type(model_info.model).__name__)
     if fs.Keys.AUTHOR in tracer_args.labels:
         stats.save_stat(fs.Keys.AUTHOR, tracer_args.labels[fs.Keys.AUTHOR][0])
-    if fs.Keys.CLASS in tracer_args.labels:
-        stats.save_stat(fs.Keys.CLASS, tracer_args.labels[fs.Keys.CLASS][0])
     if fs.Keys.TASK in tracer_args.labels:
         stats.save_stat(fs.Keys.TASK, tracer_args.labels[fs.Keys.TASK][0])
 
+    # Save all of the lables in one place
+    stats.save_stat(fs.Keys.LABELS, tracer_args.labels)
+
     # If the input script is a built-in TurnkeyML model, make a note of
     # which one
     if os.path.abspath(fs.MODELS_DIR) in os.path.abspath(tracer_args.input):
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index 082ecef0..2767d15e 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -333,6 +333,8 @@ class Keys:
     MODEL_NAME = "model_name"
     # References the per-build stats section
     BUILDS = "builds"
+    # Catch-all for storing a file's labels
+    LABELS = "labels"
     # Author of the model
     AUTHOR = "author"
     # Class type of the model
diff --git a/src/turnkeyml/common/labels.py b/src/turnkeyml/common/labels.py
index 812962e0..be3d2e10 100644
--- a/src/turnkeyml/common/labels.py
+++ b/src/turnkeyml/common/labels.py
@@ -1,4 +1,3 @@
-import os
 from typing import Dict, List
 import turnkeyml.common.printing as printing
 
@@ -44,36 +43,6 @@ def load_from_file(file_path: str) -> Dict[str, List[str]]:
         return {}
 
 
-def load_from_cache(cache_dir: str, build_name: str) -> Dict[str, List[str]]:
-    """
-    Loads labels from the cache directory
-    """
-    # Open file
-    file_path = os.path.join(cache_dir, "labels", f"{build_name}.txt")
-    with open(file_path, encoding="utf-8") as f:
-        first_line = f.readline()
-
-    # Return label dict
-    label_list = first_line.replace("\n", "").split(" ")
-    return to_dict(label_list)
-
-
-def save_to_cache(cache_dir: str, build_name: str, label_dict: Dict[str, List[str]]):
-    """
-    Save labels as a stand-alone file as part of the cache directory
-    """
-    labels_list = [f"{k}::{','.join(label_dict[k])}" for k in label_dict.keys()]
-
-    # Create labels folder if it doesn't exist
-    labels_dir = os.path.join(cache_dir, "labels")
-    os.makedirs(labels_dir, exist_ok=True)
-
-    # Save labels to cache
-    file_path = os.path.join(labels_dir, f"{build_name}.txt")
-    with open(file_path, "w", encoding="utf8") as fp:
-        fp.write(" ".join(labels_list))
-
-
 def is_subset(label_dict_a: Dict[str, List[str]], label_dict_b: Dict[str, List[str]]):
     """
     This function returns True if label_dict_a is a subset of label_dict_b.
diff --git a/test/analysis.py b/test/analysis.py
index 8d6581e1..598d3e79 100644
--- a/test/analysis.py
+++ b/test/analysis.py
@@ -36,8 +36,7 @@
 # filesystem access
 
 test_scripts_dot_py = {
-    "linear_pytorch.py": """
-# labels: test_group::selftest license::mit framework::pytorch tags::selftest,small
+    "linear_pytorch.py": """# labels: test_group::selftest license::mit framework::pytorch tags::selftest,small
 import torch
 import argparse
 
@@ -235,8 +234,10 @@ def test_05_cache(self):
             ]
         )
         build_name = f"linear_pytorch_{model_hash}"
-        labels_found = labels.load_from_cache(cache_dir, build_name) != {}
-        assert cache_is_lean(cache_dir, build_name) and labels_found
+        labels_found = filesystem.Stats(cache_dir, build_name).stats[
+            filesystem.Keys.LABELS
+        ]
+        assert cache_is_lean(cache_dir, build_name) and labels_found != {}, labels_found
 
     def test_06_generic_args(self):
         output = run_cli(
diff --git a/test/cli.py b/test/cli.py
index e6ce7080..3cf38353 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -311,9 +311,10 @@ def test_021_cli_report(self):
         ]
         linear_summary = summary[1]
         assert len(summary) == len(test_scripts)
-        assert all(
-            elem in linear_summary for elem in expected_cols
-        ), f"Looked for each of {expected_cols} in {linear_summary.keys()}"
+        for elem in expected_cols:
+            assert (
+                elem in linear_summary
+            ), f"Couldn't find expected key {elem} in results spreadsheet"
 
         # Check whether all rows we expect to be populated are actually populated
         assert (

From f8f8093f4f99925b317cec7d729abeceda1bcb7d Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Tue, 5 Dec 2023 09:36:37 -0500
Subject: [PATCH 03/35] Better naming for Stats methods and members (#56)

---
 docs/contribute.md                            |  2 +-
 .../turnkeyml_plugin_example_rt/runtime.py    |  4 +-
 src/turnkeyml/analyze/script.py               | 62 ++++++++++---------
 src/turnkeyml/analyze/status.py               |  2 +-
 src/turnkeyml/build/export.py                 | 30 ++++++---
 src/turnkeyml/build/hummingbird.py            |  5 +-
 src/turnkeyml/build/ignition.py               |  4 +-
 src/turnkeyml/build/stage.py                  |  6 +-
 src/turnkeyml/build_api.py                    |  6 +-
 src/turnkeyml/cli/report.py                   | 38 ++++++------
 src/turnkeyml/common/build.py                 |  2 +-
 src/turnkeyml/common/filesystem.py            | 39 +++++-------
 src/turnkeyml/model_api.py                    |  6 +-
 src/turnkeyml/run/tensorrt/runtime.py         |  7 ++-
 test/cli.py                                   |  8 +--
 test/helpers/common.py                        | 15 ++---
 16 files changed, 121 insertions(+), 115 deletions(-)

diff --git a/docs/contribute.md b/docs/contribute.md
index d1e88dca..fa6596a1 100644
--- a/docs/contribute.md
+++ b/docs/contribute.md
@@ -87,7 +87,7 @@ To add a runtime to a plugin:
     - `"RuntimeClass": <class_name>`, where `<class_name>` is a unique name for a Python class that inherits `BaseRT` and implements the runtime.
       - For example, `"RuntimeClass": ExampleRT` implements the `example` runtime.
       - The interface for the runtime class is defined in [Runtime Class](#runtime-class) below.
-    - (Optional) `"status_stats": List[str]`: a list of keys from the build stats that should be printed out at the end of benchmarking in the CLI's `Status` output. These keys, and corresponding values, must be set in the runtime class using `self.stats.add_build_stat(key, value)`.
+    - (Optional) `"status_stats": List[str]`: a list of keys from the build stats that should be printed out at the end of benchmarking in the CLI's `Status` output. These keys, and corresponding values, must be set in the runtime class using `self.stats.save_model_eval_stat(key, value)`.
     - (Optional) `"requirement_check": Callable`: a callable that runs before each benchmark. This may be used to check whether the device selected is available and functional before each benchmarking run. Exceptions raised during this callable will halt the benchmark of all selected files.
 
 1. Populate the package with the following files (see [Plugin Directory Layout](#plugin-directory-layout)):
diff --git a/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py b/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py
index 7846a2a9..2dc117d6 100644
--- a/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py
+++ b/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py
@@ -51,8 +51,8 @@ def benchmark(self) -> MeasuredPerformance:
 
         # Assign values to the stats that will be printed
         # out by the CLI when status is reported
-        self.stats.add_build_stat("magic_perf_points", 42)
-        self.stats.add_build_stat("super_runtime_points", 100)
+        self.stats.save_model_eval_stat("magic_perf_points", 42)
+        self.stats.save_model_eval_stat("super_runtime_points", 100)
 
         return MeasuredPerformance(
             mean_latency=self.mean_latency,
diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py
index 3c213bb2..cebf7999 100644
--- a/src/turnkeyml/analyze/script.py
+++ b/src/turnkeyml/analyze/script.py
@@ -152,40 +152,40 @@ def explore_invocation(
         invocation_info.stats_keys = []
 
     # Create an ID for the build stats by combining the device and runtime.
-    # We don't need more info in the stats_id because changes to benchmark_model()
+    # We don't need more info in the evaluation_id because changes to benchmark_model()
     # arguments (e.g., sequence) will trigger a rebuild, which is intended to replace the
     # build stats so long as the device and runtime have not changed.
-    stats_id = f"{tracer_args.device}_{selected_runtime}"
+    evaluation_id = f"{tracer_args.device}_{selected_runtime}"
 
     stats = fs.Stats(
         tracer_args.cache_dir,
         build_name,
-        stats_id,
+        evaluation_id,
     )
     invocation_info.stats = stats
 
     # Stats that apply to the model, regardless of build
-    stats.save_stat(
+    stats.save_model_stat(
         fs.Keys.HASH,
         model_info.hash,
     )
-    stats.save_stat(
+    stats.save_model_stat(
         fs.Keys.MODEL_NAME,
         tracer_args.script_name,
     )
-    stats.save_stat(
+    stats.save_model_stat(
         fs.Keys.PARAMETERS,
         model_info.params,
     )
     if model_info.model_type != build.ModelType.ONNX_FILE:
-        stats.save_stat(fs.Keys.CLASS, type(model_info.model).__name__)
+        stats.save_model_stat(fs.Keys.CLASS, type(model_info.model).__name__)
     if fs.Keys.AUTHOR in tracer_args.labels:
-        stats.save_stat(fs.Keys.AUTHOR, tracer_args.labels[fs.Keys.AUTHOR][0])
+        stats.save_model_stat(fs.Keys.AUTHOR, tracer_args.labels[fs.Keys.AUTHOR][0])
     if fs.Keys.TASK in tracer_args.labels:
-        stats.save_stat(fs.Keys.TASK, tracer_args.labels[fs.Keys.TASK][0])
+        stats.save_model_stat(fs.Keys.TASK, tracer_args.labels[fs.Keys.TASK][0])
 
     # Save all of the lables in one place
-    stats.save_stat(fs.Keys.LABELS, tracer_args.labels)
+    stats.save_model_stat(fs.Keys.LABELS, tracer_args.labels)
 
     # If the input script is a built-in TurnkeyML model, make a note of
     # which one
@@ -203,18 +203,18 @@ def explore_invocation(
             fs.MODELS_DIR,
             f"https://github.com/onnx/turnkeyml/tree/{git_hash}/models",
         ).replace("\\", "/")
-        stats.save_stat(fs.Keys.MODEL_SCRIPT, relative_path)
+        stats.save_model_stat(fs.Keys.MODEL_SCRIPT, relative_path)
 
     # Build-specific stats
-    stats.add_build_stat(
+    stats.save_model_eval_stat(
         fs.Keys.DEVICE_TYPE,
         tracer_args.device,
     )
-    stats.add_build_stat(
+    stats.save_model_eval_stat(
         fs.Keys.RUNTIME,
         selected_runtime,
     )
-    stats.add_build_stat(
+    stats.save_model_eval_stat(
         fs.Keys.ITERATIONS,
         tracer_args.iterations,
     )
@@ -233,12 +233,14 @@ def explore_invocation(
             # we will try to catch the exception and note it in the stats.
             # If a concluded build still has a status of "running", this means
             # there was an uncaught exception.
-            stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.RUNNING)
+            stats.save_model_eval_stat(
+                fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.RUNNING
+            )
 
             perf = benchmark_model(
                 model_info.model,
                 inputs,
-                stats_id=stats_id,
+                evaluation_id=evaluation_id,
                 device=tracer_args.device,
                 runtime=selected_runtime,
                 build_name=build_name,
@@ -263,7 +265,7 @@ def explore_invocation(
         invocation_info.status_message = f"Build Error: {e}"
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
 
         _store_traceback(invocation_info)
 
@@ -275,14 +277,14 @@ def explore_invocation(
         )
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.KILLED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.KILLED)
 
     except exp.ArgError as e:
         # ArgError indicates that some argument to benchmark_model() was
         # illegal. In that case we want to halt execution so that users can
         # fix their arguments.
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
 
         raise e
 
@@ -290,7 +292,7 @@ def explore_invocation(
         invocation_info.status_message = f"Error: {e}."
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
 
         _store_traceback(invocation_info)
 
@@ -300,19 +302,21 @@ def explore_invocation(
         invocation_info.status_message = f"Unknown turnkey error: {e}"
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
 
         _store_traceback(invocation_info)
     else:
         # If there was no exception then we consider the build to be a success
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.SUCCESSFUL)
+        stats.save_model_eval_stat(
+            fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.SUCCESSFUL
+        )
 
     finally:
         # Ensure that stdout/stderr is not being forwarded before updating status
         util.stop_logger_forward()
 
         system_info = build.get_system_info()
-        stats.save_stat(
+        stats.save_model_stat(
             fs.Keys.SYSTEM_INFO,
             system_info,
         )
@@ -324,11 +328,11 @@ def explore_invocation(
 
             # ONNX stats that we want to save into the build's turnkey_stats.yaml file
             # so that they can be easily accessed by the report command later
-            if fs.Keys.ONNX_FILE in stats.build_stats.keys():
+            if fs.Keys.ONNX_FILE in stats.evaluation_stats.keys():
                 # Just in case the ONNX file was generated on a different machine:
                 # strip the state's cache dir, then prepend the current cache dir
                 final_onnx_file = fs.rebase_cache_dir(
-                    stats.build_stats[fs.Keys.ONNX_FILE],
+                    stats.evaluation_stats[fs.Keys.ONNX_FILE],
                     build_name,
                     tracer_args.cache_dir,
                 )
@@ -337,22 +341,22 @@ def explore_invocation(
                 onnx_model_info = util.populate_onnx_model_info(final_onnx_file)
                 onnx_input_dimensions = util.onnx_input_dimensions(final_onnx_file)
 
-                stats.save_stat(
+                stats.save_model_stat(
                     fs.Keys.ONNX_OPS_COUNTER,
                     onnx_ops_counter,
                 )
-                stats.save_stat(
+                stats.save_model_stat(
                     fs.Keys.ONNX_MODEL_INFO,
                     onnx_model_info,
                 )
-                stats.save_stat(
+                stats.save_model_stat(
                     fs.Keys.ONNX_INPUT_DIMENSIONS,
                     onnx_input_dimensions,
                 )
 
             if perf:
                 for key, value in vars(perf).items():
-                    stats.add_build_stat(
+                    stats.save_model_eval_stat(
                         key=key,
                         value=value,
                     )
diff --git a/src/turnkeyml/analyze/status.py b/src/turnkeyml/analyze/status.py
index c94542e7..fae91f96 100644
--- a/src/turnkeyml/analyze/status.py
+++ b/src/turnkeyml/analyze/status.py
@@ -198,7 +198,7 @@ def print_invocation(
         if unique_invocation.stats_keys is not None:
             for key in unique_invocation.stats_keys:
                 nice_key = _pretty_print_key(key)
-                value = unique_invocation.stats.build_stats[key]
+                value = unique_invocation.stats.evaluation_stats[key]
                 printing.logn(f"{ident}\t\t\t{nice_key}:\t{value}")
         print()
     else:
diff --git a/src/turnkeyml/build/export.py b/src/turnkeyml/build/export.py
index 2f4bc88d..6d32f052 100644
--- a/src/turnkeyml/build/export.py
+++ b/src/turnkeyml/build/export.py
@@ -181,8 +181,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
@@ -307,8 +309,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
@@ -428,8 +432,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
@@ -492,8 +498,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
@@ -596,8 +604,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
diff --git a/src/turnkeyml/build/hummingbird.py b/src/turnkeyml/build/hummingbird.py
index 21d5f1a6..5540482a 100644
--- a/src/turnkeyml/build/hummingbird.py
+++ b/src/turnkeyml/build/hummingbird.py
@@ -217,9 +217,8 @@ def fire(self, state: build.State):
         np.save(state.original_inputs_file, state.inputs)
 
         state.intermediate_results = [output_path]
-        stats = fs.Stats(state.cache_dir, state.config.build_name)
-        stats.add_sub_stat(
-            state.stats_id,
+        stats = fs.Stats(state.cache_dir, state.config.build_name, state.evaluation_id)
+        stats.save_model_eval_stat(
             fs.Keys.ONNX_FILE,
             output_path,
         )
diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py
index add22ec3..11184a12 100644
--- a/src/turnkeyml/build/ignition.py
+++ b/src/turnkeyml/build/ignition.py
@@ -250,7 +250,7 @@ def _rebuild_if_needed(
 
 def load_or_make_state(
     config: build.Config,
-    stats_id: str,
+    evaluation_id: str,
     cache_dir: str,
     rebuild: str,
     model_type: build.ModelType,
@@ -274,7 +274,7 @@ def load_or_make_state(
         "inputs": inputs,
         "monitor": monitor,
         "rebuild": rebuild,
-        "stats_id": stats_id,
+        "evaluation_id": evaluation_id,
         "cache_dir": cache_dir,
         "config": config,
         "model_type": model_type,
diff --git a/src/turnkeyml/build/stage.py b/src/turnkeyml/build/stage.py
index cd2ec537..0267995e 100644
--- a/src/turnkeyml/build/stage.py
+++ b/src/turnkeyml/build/stage.py
@@ -273,8 +273,8 @@ def launch(self, state: build.State) -> build.State:
             raise exp.Error(msg)
 
         # Collect telemetry for the build
-        stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-        stats.add_build_stat(
+        stats = fs.Stats(state.cache_dir, state.config.build_name, state.evaluation_id)
+        stats.save_model_eval_stat(
             fs.Keys.ALL_BUILD_STAGES,
             self.get_names(),
         )
@@ -292,7 +292,7 @@ def launch(self, state: build.State) -> build.State:
                 # Collect telemetry about the stage
                 execution_time = time.time() - start_time
 
-                stats.add_build_sub_stat(
+                stats.save_model_eval_sub_stat(
                     parent_key=fs.Keys.COMPLETED_BUILD_STAGES,
                     key=stage.unique_name,
                     value=execution_time,
diff --git a/src/turnkeyml/build_api.py b/src/turnkeyml/build_api.py
index 7322a15c..349044e9 100644
--- a/src/turnkeyml/build_api.py
+++ b/src/turnkeyml/build_api.py
@@ -11,7 +11,7 @@ def build_model(
     model: build.UnionValidModelInstanceTypes = None,
     inputs: Optional[Dict[str, Any]] = None,
     build_name: Optional[str] = None,
-    stats_id: Optional[str] = "build",
+    evaluation_id: Optional[str] = "build",
     cache_dir: str = filesystem.DEFAULT_CACHE_DIR,
     monitor: Optional[bool] = None,
     rebuild: Optional[str] = None,
@@ -30,7 +30,7 @@ def build_model(
         build_name: Unique name for the model that will be
             used to store the ONNX file and build state on disk. Defaults to the
             name of the file that calls build_model().
-        stats_id: Unique name for build statistics that should persist across multiple
+        evaluation_id: Unique name for evaluation statistics that should persist across multiple
             builds of the same model.
         cache_dir: Directory to use as the cache for this build. Output files
             from this build will be stored at cache_dir/build_name/
@@ -94,7 +94,7 @@ def build_model(
     # Get the state of the model from the cache if a valid build is available
     state = ignition.load_or_make_state(
         config=config,
-        stats_id=stats_id,
+        evaluation_id=evaluation_id,
         cache_dir=parsed_cache_dir,
         rebuild=rebuild or build.DEFAULT_REBUILD_POLICY,
         model_type=model_type,
diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index 6f778f70..ec49a9fc 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -47,7 +47,7 @@ def summary_spreadsheets(args) -> None:
     Path(report_dir).mkdir(parents=True, exist_ok=True)
 
     report: List[Dict] = []
-    all_build_stats = []
+    all_evaluation_stats = []
 
     # Add results from all user-provided cache folders
     for cache_dir in cache_dirs:
@@ -68,13 +68,13 @@ def summary_spreadsheets(args) -> None:
                     model_stats = yaml.load(stream, Loader=yaml.FullLoader)
 
                     # create a separate dict for each build
-                    for build in model_stats[fs.Keys.BUILDS].values():
-                        build_stats = {}
+                    for build in model_stats[fs.Keys.EVALUATIONS].values():
+                        evaluation_stats = {}
 
                         # Copy all of the stats for the model that are common across builds
                         for key, value in model_stats.items():
-                            if key != fs.Keys.BUILDS:
-                                build_stats[key] = value
+                            if key != fs.Keys.EVALUATIONS:
+                                evaluation_stats[key] = value
 
                         # Copy the build-specific stats
                         for key, value in build.items():
@@ -82,7 +82,7 @@ def summary_spreadsheets(args) -> None:
                             # to make analysis easier
                             if key == fs.Keys.COMPLETED_BUILD_STAGES:
                                 for subkey, subvalue in value.items():
-                                    build_stats[subkey] = subvalue
+                                    evaluation_stats[subkey] = subvalue
 
                             # If a build is still marked as "running" at reporting time, it
                             # must have been killed by a time out, out-of-memory (OOM), or some
@@ -93,30 +93,30 @@ def summary_spreadsheets(args) -> None:
                             ):
                                 value = fs.BenchmarkStatus.KILLED
 
-                            build_stats[key] = value
+                            evaluation_stats[key] = value
 
-                        all_build_stats.append(build_stats)
+                        all_evaluation_stats.append(evaluation_stats)
                 except yaml.scanner.ScannerError:
                     continue
 
         # Scan the build stats to determine the set of columns for the CSV file.
         # The CSV will have one column for every key in any build stats dict.
         column_headers = []
-        for build_stats in all_build_stats:
+        for evaluation_stats in all_evaluation_stats:
             # Add any key that isn't already in column_headers
-            for header in build_stats.keys():
+            for header in evaluation_stats.keys():
                 if header not in column_headers:
                     column_headers.append(header)
 
         # Add each build to the report
-        for build_stats in all_build_stats:
+        for evaluation_stats in all_evaluation_stats:
             # Start with a dictionary where all of the values are "-". If a build
             # has a value for each key we will fill it in, and otherwise the "-"
             # will indicate that no value was available
             result = {k: "-" for k in column_headers}
 
             for key in column_headers:
-                result[key] = _good_get(build_stats, key)
+                result[key] = _good_get(evaluation_stats, key)
 
             report.append(result)
 
@@ -133,13 +133,13 @@ def summary_spreadsheets(args) -> None:
 
     # Save the unique errors and counts to a file
     errors = []
-    for build_stats in all_build_stats:
+    for evaluation_stats in all_evaluation_stats:
         if (
-            "compilation_error" in build_stats.keys()
-            and "compilation_error_id" in build_stats.keys()
+            "compilation_error" in evaluation_stats.keys()
+            and "compilation_error_id" in evaluation_stats.keys()
         ):
-            error = build_stats["compilation_error"]
-            id = build_stats["compilation_error_id"]
+            error = evaluation_stats["compilation_error"]
+            id = evaluation_stats["compilation_error_id"]
             if id != "":
                 unique_error = True
                 for reported_error in errors:
@@ -148,13 +148,13 @@ def summary_spreadsheets(args) -> None:
                         reported_error["count"] = reported_error["count"] + 1
                         reported_error["models_impacted"] = reported_error[
                             "models_impacted"
-                        ] + [build_stats["model_name"]]
+                        ] + [evaluation_stats["model_name"]]
 
                 if unique_error:
                     reported_error = {
                         "id": id,
                         "count": 1,
-                        "models_impacted": [build_stats["model_name"]],
+                        "models_impacted": [evaluation_stats["model_name"]],
                         "example": error,
                     }
                     errors.append(reported_error)
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index 9f8ac382..cf768917 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -221,7 +221,7 @@ class State:
     monitor: bool = False
     rebuild: str = ""
     cache_dir: str = ""
-    stats_id: str = ""
+    evaluation_id: str = ""
 
     # User-provided args that will not be saved as part of state.yaml
     model: UnionValidModelInstanceTypes = None
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index 2767d15e..961cb6bd 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -331,8 +331,8 @@ class Keys:
     DEVICE_TYPE = "device_type"
     # Name of the model
     MODEL_NAME = "model_name"
-    # References the per-build stats section
-    BUILDS = "builds"
+    # References the per-evaluation stats section
+    EVALUATIONS = "evaluations"
     # Catch-all for storing a file's labels
     LABELS = "labels"
     # Author of the model
@@ -359,15 +359,15 @@ class BenchmarkStatus:
 
 
 class Stats:
-    def __init__(self, cache_dir: str, build_name: str, stats_id: str = None):
+    def __init__(self, cache_dir: str, build_name: str, evaluation_id: str = None):
         output_dir = build.output_dir(cache_dir, build_name)
 
         self.file = os.path.join(output_dir, "turnkey_stats.yaml")
-        self.stats_id = stats_id
+        self.evaluation_id = evaluation_id
 
         os.makedirs(output_dir, exist_ok=True)
         if not os.path.exists(self.file):
-            initial = {Keys.BUILDS: {}}
+            initial = {Keys.EVALUATIONS: {}}
             _save_yaml(initial, self.file)
 
     @property
@@ -391,7 +391,7 @@ def _set_key(self, dict, keys: List["str"], value):
 
             self._set_key(dict[keys[0]], keys[1:], value)
 
-    def save_stat(self, key: str, value):
+    def save_model_stat(self, key: str, value):
         """
         Save statistics to an yaml file in the build directory
         """
@@ -402,36 +402,25 @@ def save_stat(self, key: str, value):
 
         _save_yaml(stats_dict, self.file)
 
-    def add_sub_stat(self, parent_key: str, key: str, value):
-        """
-        Save nested statistics to an yaml file in the build directory
-
-        stats[parent_key][key] = value
-        """
-
-        stats_dict = self.stats
-
-        self._set_key(stats_dict, [parent_key, key], value)
-
-        _save_yaml(stats_dict, self.file)
-
-    def add_build_stat(self, key: str, value):
+    def save_model_eval_stat(self, key: str, value):
         stats_dict = self.stats
 
-        self._set_key(stats_dict, [Keys.BUILDS, self.stats_id, key], value)
+        self._set_key(stats_dict, [Keys.EVALUATIONS, self.evaluation_id, key], value)
 
         _save_yaml(stats_dict, self.file)
 
-    def add_build_sub_stat(self, parent_key: str, key: str, value):
+    def save_model_eval_sub_stat(self, parent_key: str, key: str, value):
         stats_dict = self.stats
 
-        self._set_key(stats_dict, [Keys.BUILDS, self.stats_id, parent_key, key], value)
+        self._set_key(
+            stats_dict, [Keys.EVALUATIONS, self.evaluation_id, parent_key, key], value
+        )
 
         _save_yaml(stats_dict, self.file)
 
     @property
-    def build_stats(self):
-        return self.stats[Keys.BUILDS][self.stats_id]
+    def evaluation_stats(self):
+        return self.stats[Keys.EVALUATIONS][self.evaluation_id]
 
 
 def print_cache_dir(_=None):
diff --git a/src/turnkeyml/model_api.py b/src/turnkeyml/model_api.py
index 6b1213b7..d7cb155e 100644
--- a/src/turnkeyml/model_api.py
+++ b/src/turnkeyml/model_api.py
@@ -21,7 +21,7 @@ def benchmark_model(
     inputs: Dict[str, Any],
     build_name: str,
     iterations: int = 100,
-    stats_id: str = "build",
+    evaluation_id: str = "build",
     cache_dir: str = filesystem.DEFAULT_CACHE_DIR,
     device: str = "x86",
     runtime: Optional[str] = None,
@@ -88,7 +88,7 @@ def benchmark_model(
             build_model(
                 model=model,
                 inputs=inputs,
-                stats_id=stats_id,
+                evaluation_id=evaluation_id,
                 build_name=build_name,
                 cache_dir=cache_dir,
                 rebuild=rebuild,
@@ -105,7 +105,7 @@ def benchmark_model(
                 rt_args_to_use = rt_args
 
             printing.log_info(f"Benchmarking on {device}...")
-            stats = filesystem.Stats(cache_dir, build_name, stats_id)
+            stats = filesystem.Stats(cache_dir, build_name, evaluation_id)
             model_handle = runtime_info["RuntimeClass"](
                 cache_dir=cache_dir,
                 build_name=build_name,
diff --git a/src/turnkeyml/run/tensorrt/runtime.py b/src/turnkeyml/run/tensorrt/runtime.py
index b6ef9a9b..df1270ca 100644
--- a/src/turnkeyml/run/tensorrt/runtime.py
+++ b/src/turnkeyml/run/tensorrt/runtime.py
@@ -13,6 +13,7 @@
     average_power_and_utilization,
 )
 
+
 def _get_nvidia_driver_version():
     try:
         output = subprocess.check_output(["nvidia-smi"], text=True)
@@ -23,10 +24,12 @@ def _get_nvidia_driver_version():
                 # Extract and return the driver version
                 return line.split(":")[1].strip().split()[0]
 
-    except Exception as e: # pylint: disable=broad-except
+    except Exception as e:  # pylint: disable=broad-except
         return str(e)
 
     return "Driver not found"
+
+
 class TensorRT(BaseRT):
     def __init__(
         self,
@@ -88,7 +91,7 @@ def _execute(
 
         # Add the GPU driver version to the stats file before execution
         gpu_driver_version = _get_nvidia_driver_version()
-        self.stats.add_build_stat("gpu_driver_version", gpu_driver_version)
+        self.stats.save_model_eval_stat("gpu_driver_version", gpu_driver_version)
         power_thread.start()
 
         run(
diff --git a/test/cli.py b/test/cli.py
index 3cf38353..e555bc7f 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -149,7 +149,7 @@ def assert_success_of_builds(
                 stats = filesystem.Stats(
                     build_state.cache_dir,
                     build_state.config.build_name,
-                    build_state.stats_id,
+                    build_state.evaluation_id,
                 )
                 assert build_state.build_status == build.Status.SUCCESSFUL_BUILD
                 script_build_found = True
@@ -161,11 +161,11 @@ def assert_success_of_builds(
                     ), f"{build_state.info.__dict__[info_property[0]]} == {info_property[1]}"
 
                 if check_perf:
-                    assert stats.build_stats["mean_latency"] > 0
-                    assert stats.build_stats["throughput"] > 0
+                    assert stats.evaluation_stats["mean_latency"] > 0
+                    assert stats.evaluation_stats["throughput"] > 0
 
                 if check_iteration_count:
-                    iterations = stats.build_stats["iterations"]
+                    iterations = stats.evaluation_stats["iterations"]
                     assert iterations == check_iteration_count
 
                 if check_opset:
diff --git a/test/helpers/common.py b/test/helpers/common.py
index d5b402ee..4a86782a 100644
--- a/test/helpers/common.py
+++ b/test/helpers/common.py
@@ -1,4 +1,3 @@
-
 import os
 import shutil
 from typing import Dict
@@ -101,13 +100,13 @@ def forward(self, x):
 }
 
 
-def create_test_dir(key:str, test_scripts: Dict = None):
+def create_test_dir(key: str, test_scripts: Dict = None):
     # Define paths to be used
     base_dir = os.path.dirname(os.path.abspath(__file__))
     cache_dir = os.path.join(base_dir, "generated", f"{key}_cache_dir")
     corpus_dir = os.path.join(base_dir, "generated", f"test_corpus")
-    
-    # Delete folders if they exist and 
+
+    # Delete folders if they exist and
     if os.path.isdir(cache_dir):
         shutil.rmtree(cache_dir)
     if os.path.isdir(corpus_dir):
@@ -124,9 +123,11 @@ def create_test_dir(key:str, test_scripts: Dict = None):
 
     return cache_dir, corpus_dir
 
+
 def strip_dot_py(test_script_file: str) -> str:
     return test_script_file.split(".")[0]
 
+
 def get_stats_and_state(
     test_script: str,
     cache_dir: str,
@@ -141,8 +142,8 @@ def get_stats_and_state(
             stats = filesystem.Stats(
                 build_state.cache_dir,
                 build_state.config.build_name,
-                build_state.stats_id,
+                build_state.evaluation_id,
             )
-            return stats.build_stats, build_state
+            return stats.evaluation_stats, build_state
 
-    raise Exception(f"Stats not found for {test_script}")
\ No newline at end of file
+    raise Exception(f"Stats not found for {test_script}")

From 47248e192cc24e9f0f76506cd88b57a29799921c Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Tue, 5 Dec 2023 10:30:57 -0500
Subject: [PATCH 04/35] Refactor build_model() out of benchmark_model() (#48)

* Refactor build out of benchmarking

Signed-off-by: Jeremy Fowers <jeremy.fowers@amd.com>
---
 .github/workflows/publish-to-test-pypi.yml    |   9 +-
 .github/workflows/test_turnkey.yml            |   3 -
 docs/code.md                                  |  11 +-
 docs/readme.md                                |   2 +-
 docs/tools_user_guide.md                      |  58 ++---
 .../turnkeyml_plugin_example_seq/sequence.py  |   2 +-
 examples/model_api/hello_world.py             |  62 -----
 examples/readme.md                            |   1 -
 src/turnkeyml/__init__.py                     |   1 -
 src/turnkeyml/analyze/script.py               | 224 ++++++++++--------
 src/turnkeyml/analyze/util.py                 |  34 ++-
 src/turnkeyml/build/ignition.py               |  11 +-
 src/turnkeyml/build_api.py                    |  19 +-
 src/turnkeyml/cli/report.py                   |  10 +-
 src/turnkeyml/common/filesystem.py            |  21 +-
 src/turnkeyml/model_api.py                    | 138 -----------
 src/turnkeyml/run/devices.py                  |  65 ++++-
 src/turnkeyml/version.py                      |   2 +-
 test/cli.py                                   |   7 +-
 test/model_api.py                             | 171 -------------
 20 files changed, 286 insertions(+), 565 deletions(-)
 delete mode 100644 examples/model_api/hello_world.py
 delete mode 100644 src/turnkeyml/model_api.py
 delete mode 100644 test/model_api.py

diff --git a/.github/workflows/publish-to-test-pypi.yml b/.github/workflows/publish-to-test-pypi.yml
index 0808cdb6..09c9aac7 100644
--- a/.github/workflows/publish-to-test-pypi.yml
+++ b/.github/workflows/publish-to-test-pypi.yml
@@ -5,6 +5,7 @@ on:
     branches: ["main", "canary"]
     tags:
       - v*
+      - RC*
   pull_request:
     branches: ["main", "canary"]
 
@@ -33,7 +34,13 @@ jobs:
           models=$(turnkey models location --quiet)
           turnkey $models/selftest/linear.py
       - name: Publish distribution package to PyPI
-        if: startsWith(github.ref, 'refs/tags')
+        if: startsWith(github.ref, 'refs/tags/v')
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
           password: ${{ secrets.PYPI_API_TOKEN }}
+      - name: Publish distribution package to Test PyPI
+        if: startsWith(github.ref, 'refs/tags/RC')
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+          repository_url: https://test.pypi.org/legacy/
diff --git a/.github/workflows/test_turnkey.yml b/.github/workflows/test_turnkey.yml
index 915bc47a..1ed8168e 100644
--- a/.github/workflows/test_turnkey.yml
+++ b/.github/workflows/test_turnkey.yml
@@ -53,8 +53,6 @@ jobs:
           # turnkey examples
           # Note: we clear the default cache location prior to each example run
           rm -rf ~/.cache/turnkey
-          python examples/model_api/hello_world.py
-          rm -rf ~/.cache/turnkey
           python examples/files_api/onnx_opset.py --onnx-opset 15
           rm -rf ~/.cache/turnkey
           turnkey examples/cli/scripts/hello_world.py
@@ -71,7 +69,6 @@ jobs:
           cd test/
           python cli.py
           python analysis.py
-          python model_api.py
       - name: Test example plugins
         shell: bash -el {0}
         run: |
diff --git a/docs/code.md b/docs/code.md
index e9fdc7a0..28663677 100644
--- a/docs/code.md
+++ b/docs/code.md
@@ -11,8 +11,8 @@ The TurnkeyML source code has a few major top-level directories:
 - `models`: the corpora of models that makes up the TurnkeyML models (see [the models readme](https://github.com/onnx/turnkeyml/blob/main/models/readme.md)).
   - Each subdirectory under `models` represents a corpus of models pulled from somewhere on the internet. For example, `models/torch_hub` is a corpus of models from [Torch Hub](https://github.com/pytorch/hub).
 - `src/turnkey`: source code for the TurnkeyML tools (see [Benchmarking Tools](#benchmarking-tools) for a description of how the code is used).
-  - `src/turnkeyml/analyze`: functions for profiling a model script, discovering model instances, and invoking `benchmark_model()` on those instances.
-  - `src/turnkeyml/run`: implements the runtime and device plugin APIs and the built-in runtimes and devices.
+  - `src/turnkeyml/analyze`: functions for profiling a model script, discovering model instances, and invoking `build_model()` and/or `BaseRT.benchmark()` on those instances.
+  - `src/turnkeyml/run`: implements `BaseRT`, an abstract base class that defines TurnkeyML's vendor-agnostic benchmarking functionality. This module also includes the runtime and device plugin APIs and the built-in runtimes and devices.
   - `src/turnkeyml/cli`: implements the `turnkey` CLI and reporting tool.
   - `src/turnkeyml/common`: functions common to the other modules.
   - `src/turnkeyml/version.py`: defines the package version number.
@@ -29,10 +29,9 @@ TurnkeyML provides two main tools, the `turnkey` CLI and benchmarking APIs. Inst
 1. The default command for `turnkey` CLI runs the `benchmark_files()` API, which is implemented in [files_api.py](https://github.com/onnx/turnkeyml/blob/main/src/turnkeyml/files_api.py).
     - Other CLI commands are also implemented in `cli/`, for example the `report` command is implemented in `cli/report.py`.
 1. The `benchmark_files()` API takes in a set of scripts, each of which should invoke at least one model instance, to evaluate and passes each into the `evaluate_script()` function for analysis, which is implemented in [analyze/script.py](https://github.com/onnx/turnkeyml/blob/main/src/turnkeyml/analyze/script.py).
-1. `evaluate_script()` uses a profiler to discover the model instances in the script, and passes each into the `benchmark_model()` API, which is defined in [model_api.py](https://github.com/onnx/turnkeyml/blob/main/src/turnkeyml/model_api.py).
-1. The `benchmark_model()` API prepares the model for benchmarking (e.g., exporting and optimizing an ONNX file), which creates an instance of a `*Model` class, where `*` can be CPU, GPU, etc. The `*Model` classes are defined in [run/](https://github.com/onnx/turnkeyml/blob/main/src/turnkeyml/run/).
-1. The `*Model` classes provide a `.benchmark()` method that benchmarks the model on the device and returns an instance of the `MeasuredPerformance` class, which includes the performance statistics acquired during benchmarking.
-1. `benchmark_model()` and the `*Model` classes are built using [`build_model()`](#model-build-tool)
+1. `evaluate_script()` uses a profiler to discover the model instances in the script, and passes each into the `build_model()` API, which is defined in [build_api.py](https://github.com/onnx/turnkeyml/blob/main/src/turnkeyml/build_api.py).
+1. The `build_model()` API prepares the model for benchmarking (e.g., exporting and optimizing an ONNX file).
+1. `evaluate_script()` passes the build into `BaseRT.benchmark()` to benchmarks the model on the device and returns an instance of the `MeasuredPerformance` class, which includes the performance statistics acquired during benchmarking.
 
 # Model Build Tool
 
diff --git a/docs/readme.md b/docs/readme.md
index 55ec96c0..5b26c33c 100644
--- a/docs/readme.md
+++ b/docs/readme.md
@@ -3,7 +3,7 @@
 This directory contains documentation for the TurnkeyML project:
 - [code.md](https://github.com/onnx/turnkeyml/blob/main/docs/code.md): Code organization for the benchmark and tools.
 - [install.md](https://github.com/onnx/turnkeyml/blob/main/docs/install.md): Installation instructions for the tools.
-- [tools_user_guide.md](https://github.com/onnx/turnkeyml/blob/main/docs/tools_user_guide.md): User guide for the tools: `turnkey` CLI, `benchmark_files()`, and `benchmark_model()`.
+- [tools_user_guide.md](https://github.com/onnx/turnkeyml/blob/main/docs/tools_user_guide.md): User guide for the tools: the `turnkey` CLI and the `benchmark_files()` and `build_model()` APIs.
 - [versioning.md](https://github.com/onnx/turnkeyml/blob/main/docs/versioning.md): Defines the semantic versioning rules for the `turnkey` package.
 
 There is more useful documentation available in:
diff --git a/docs/tools_user_guide.md b/docs/tools_user_guide.md
index d9819979..53d24635 100644
--- a/docs/tools_user_guide.md
+++ b/docs/tools_user_guide.md
@@ -51,8 +51,8 @@ Where `your_script.py` is a Python script that instantiates and executes a PyTor
 
 The `turnkey` CLI performs the following steps:
 1. [Analysis](#analysis): profile the Python script to identify the PyTorch models within
-2. [Build](#build): call the `benchmark_files()` [API](#the-turnkey-api) to prepare each model for benchmarking
-3. [Benchmark](#benchmark): call the `benchmark_model()` [API](#the-turnkey-api) on each model to gather performance statistics
+2. [Build](#build): call the `build_models()` [API](#the-turnkey-api) to prepare each model for benchmarking
+3. [Benchmark](#benchmark): call the `BaseRT.benchmark()` method on each model to gather performance statistics
 
 _Note_: The benchmarking methodology is defined [here](#benchmark). If you are looking for more detailed instructions on how to install turnkey, you can find that [here](https://github.com/onnx/turnkeyml/blob/main/docs/install.md).
 
@@ -64,31 +64,11 @@ _Note_: The benchmarking methodology is defined [here](#benchmark). If you are l
 
 Most of the functionality provided by the `turnkey` CLI is also available in the the API:
 - `turnkey.benchmark_files()` provides the same benchmarking functionality as the `turnkey` CLI: it takes a list of files and target device, and returns performance results.
-- `turnkey.benchmark_model()` provides a subset of this functionality: it takes a model and its inputs, and returns performance results.
-  - The main difference is that `benchmark_model()` does not include the [Analysis](#analysis) feature, and `benchmark_files()` does.
 - `turnkey.build_model(model, inputs)` is used to programmatically [build](#build) a model instance through a sequence of model-to-model transformations (e.g., starting with an fp32 PyTorch model and ending with an fp16 ONNX model). 
 
-Generally speaking, the `turnkey` CLI is a command line interface for the `benchmark_files()` API, which internally calls `benchmark_model()`, which in turn calls `build_model()`. You can read more about this code organization [here](https://github.com/onnx/turnkeyml/blob/main/docs/code.md).
+Generally speaking, the `turnkey` CLI is a command line interface for the `benchmark_files()` API which in turn calls `build_model()` and then performs benchmarking using `BaseRT.benchmark()`. You can read more about this code organization [here](https://github.com/onnx/turnkeyml/blob/main/docs/code.md).
 
-For an example of `benchmark_model()`, the following script:
-
-```python
-from turnkeyml import benchmark_model
-
-model = YourModel() # Instantiate a torch.nn.module
-results = model(**inputs)
-perf = benchmark_model(model, inputs)
-```
-
-Will print an output like this:
-
-```
-> Performance of YourModel on device Intel® Xeon® Platinum 8380 is:
-> latency: 0.033 ms
-> throughput: 21784.8 ips
-```
-
-`benchmark_model()` returns a `MeasuredPerformance` object that includes members:
+`BaseRT.benchmark()` returns a `MeasuredPerformance` object that includes members:
  - `latency_units`: unit of time used for measuring latency, which is set to `milliseconds (ms)`.
  - `mean_latency`: average benchmarking latency, measured in `latency_units`.
  - `throughput_units`: unit used for measuring throughput, which is set to `inferences per second (IPS)`.
@@ -135,7 +115,7 @@ A **runtime** is a piece of software that executes a model on a device.
 
 **Analysis** is the process by which `benchmark_files()` inspects a Python script or ONNX file and identifies the models within.
 
-`benchmark_files()` performs analysis by running and profiling your file(s). When a model object (see [Model](#model) is encountered, it is inspected to gather statistics (such as the number of parameters in the model) and/or pass it to the `benchmark_model()` API for benchmarking.
+`benchmark_files()` performs analysis by running and profiling your file(s). When a model object (see [Model](#model) is encountered, it is inspected to gather statistics (such as the number of parameters in the model) and/or passed to the build and benchmark APIs.
 
 > _Note_: the `turnkey` CLI and `benchmark_files()` API both run your entire python script(s) whenever python script(s) are passed as input files. Please ensure that these scripts are safe to run, especially if you got them from the internet.
 
@@ -205,12 +185,14 @@ The *build cache* is a location on disk that holds all of the artifacts from you
 
 ## Benchmark
 
-*Benchmark* is the process by which the `benchmark_model()` API collects performance statistics about a [model](#model). Specifically, `benchmark_model()` takes a [build](#build) of a model and executes it on a target device using target runtime software (see [Devices and Runtimes](#devices-and-runtimes)).
+*Benchmark* is the process by which `BaseRT.benchmark()` collects performance statistics about a [model](#model). `BaseRT` is an abstract base class that defines the common benchmarking infrastructure that TurnkeyML provides across devices and runtimes.
+
+Specifically, `BaseRT.benchmark()` takes a [build](#build) of a model and executes it on a target device using target runtime software (see [Devices and Runtimes](#devices-and-runtimes)).
 
-By default, `benchmark_model()` will run the model 100 times to collect the following statistics:
+By default, `BaseRT.benchmark()` will run the model 100 times to collect the following statistics:
 1. Mean Latency, in milliseconds (ms): the average time it takes the runtime/device combination to execute the model/inputs combination once. This includes the time spent invoking the device and transferring the model's inputs and outputs between host memory and the device (when applicable).
 1. Throughput, in inferences per second (IPS):  the number of times the model/inputs combination can be executed on the runtime/device combination per second.
-    > - _Note_: `benchmark_model()` is not aware of whether `inputs` is a single input or a batch of inputs. If your `inputs` is actually a batch of inputs, you should multiply `benchmark_model()`'s reported IPS by the batch size.
+    > - _Note_: `BaseRT.benchmark()` is not aware of whether `inputs` is a single input or a batch of inputs. If your `inputs` is actually a batch of inputs, you should multiply `BaseRT.benchmark()`'s reported IPS by the batch size.
 
 # Devices and Runtimes
 
@@ -226,7 +208,7 @@ If you are using a remote machine, it must:
 - include the target device
 - have `miniconda`, `python>=3.8`, and `docker>=20.10` installed
 
-When you call `turnkey` CLI or `benchmark_model()`, the following actions are performed on your behalf:
+When you call `turnkey` CLI or `benchmark_files()`, the following actions are performed on your behalf:
 1. Perform a `build`, which exports all models from the script to ONNX and prepares for benchmarking.
 1. Set up the benchmarking environment by loading a container and/or setting up a conda environment.
 1. Run the benchmarks.
@@ -253,7 +235,6 @@ Valid values of `TYPE` include:
 
 Also available as API arguments: 
 - `benchmark_files(device=...)`
-- `benchmark_model(device=...)`.
 
 > For a detailed example, see the [CLI Nvidia tutorial](https://github.com/onnx/turnkeyml/blob/main/examples/cli/readme.md#nvidia-benchmarking).
 
@@ -274,9 +255,8 @@ Each device type has its own default runtime, as indicated below.
 
 This feature is also be available as an API argument: 
 - `benchmark_files(runtime=[...])`
-- `benchmark_model(runtime=...)`
 
-> _Note_: Inputs to `torch-eager` and `torch-compiled` are not downcasted to FP16 by default. Downcast inputs before benchmarking for a fair comparison between runtimes.
+> _Note_: Inputs to `torch-eager` and `torch-compiled` are not downcasted to FP16 by default. You must perform your own downcast or quantization of inputs if needed for apples-to-apples comparisons with other runtimes.
 
 # Additional Commands and Options
 
@@ -381,7 +361,6 @@ Process isolation mode applies a timeout to each subprocess. The default timeout
 
 Also available as API arguments:
 - `benchmark_files(cache_dir=...)`
-- `benchmark_model(cache_dir=...)`
 - `build_model(cache_dir=...)`
 
 > See the [Cache Directory tutorial](https://github.com/onnx/turnkeyml/blob/main/examples/cli/cache.md#cache-directory) for a detailed example.
@@ -392,7 +371,6 @@ Also available as API arguments:
 
 Also available as API arguments: 
 - `benchmark_files(lean_cache=True/False, ...)` (default False)
-- `benchmark_model(lean_cache=True/False, ...)` (default False)
 
 > _Note_: useful for benchmarking many models, since the `build` artifacts from the models can take up a significant amount of hard drive space.
 
@@ -409,7 +387,6 @@ Takes one of the following values:
 
 Also available as API arguments: 
 - `benchmark_files(rebuild=...)`
-- `benchmark_model(rebuild=...)`
 - `build_model(rebuild=...)`
 
 ### Sequence
@@ -421,7 +398,6 @@ Usage:
 
 Also available as API arguments:
 - `benchmark_files(sequence=...)`
-- `benchmark_model(sequence=...)`
 - `build_model(sequence=...)`
 
 ### Set Script Arguments
@@ -460,7 +436,6 @@ Usage:
 
 Also available as API arguments:
 - `benchmark_files(onnx_opset=...)`
-- `benchmark_model(onnx_opset=...)`
 - `build_model(onnx_opset=...)`
 
 > _Note_: ONNX opset can also be set by an environment variable. The --onnx-opset argument takes precedence over the environment variable. See [TURNKEY_ONNX_OPSET](#set-the-onnx-opset).
@@ -474,11 +449,10 @@ Usage:
 
 Also available as API arguments:
 - `benchmark_files(iterations=...)`
-- `benchmark_model(iterations=...)`
 
 ### Analyze Only
 
-Instruct `turnkey` or `benchmark_model()` to only run the [Analysis](#analysis) phase of the `benchmark` command.
+Instruct `turnkey` or `benchmark_files()` to only run the [Analysis](#analysis) phase of the `benchmark` command.
 
 Usage:
 - `turnkey benchmark INPUT_FILES --analyze-only`
@@ -493,7 +467,7 @@ Also available as an API argument:
 
 ### Build Only
 
-Instruct `turnkey`, `benchmark_files()`, or `benchmark_model()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command.
+Instruct `turnkey` or `benchmark_files()` to only run the [Analysis](#analysis) and [Build](#build) phases of the `benchmark` command.
 
 Usage:
 - `turnkey benchmark INPUT_FILES --build-only`
@@ -503,7 +477,6 @@ Usage:
 
 Also available as API arguments:
 - `benchmark_files(build_only=True/False)` (default False)
-- `benchmark_model(build_only=True/False)` (default False)
 
 > See the [Build Only tutorial](https://github.com/onnx/turnkeyml/blob/main/examples/cli/build.md#build-only) for a detailed example.
 
@@ -515,7 +488,6 @@ None of the built-in runtimes support such arguments, however plugin contributor
 
 Also available as API arguments:
 - `benchmark_files(rt_args=Dict)` (default None)
-- `benchmark_model(rt_args=Dict)` (default None)
 
 ## Cache Commands
 
@@ -635,7 +607,7 @@ export TURNKEY_DEBUG=True
 
 ### Set the ONNX Opset
 
-By default, `turnkey`, `benchmark_files()`, and `benchmark_model()` will use the default ONNX opset defined in `turnkey.common.build.DEFAULT_ONNX_OPSET`. You can set a different default ONNX opset by setting the `TURNKEY_ONNX_OPSET` environment variable.
+By default, `turnkey`, `benchmark_files()`, and `build_model()` will use the default ONNX opset defined in `turnkey.common.build.DEFAULT_ONNX_OPSET`. You can set a different default ONNX opset by setting the `TURNKEY_ONNX_OPSET` environment variable.
 
 For example:
 
diff --git a/examples/cli/plugins/example_seq/turnkeyml_plugin_example_seq/sequence.py b/examples/cli/plugins/example_seq/turnkeyml_plugin_example_seq/sequence.py
index 6c1a1229..350f2a76 100644
--- a/examples/cli/plugins/example_seq/turnkeyml_plugin_example_seq/sequence.py
+++ b/examples/cli/plugins/example_seq/turnkeyml_plugin_example_seq/sequence.py
@@ -1,7 +1,7 @@
 """
 This script is an example of a sequence.py file for Sequence Plugin. Such a sequence.py 
 can be used to redefine the build phase of the turnkey CLI, benchmark_files(),
-and benchmark_model() to have any custom behavior.
+and build_model() to have any custom behavior.
 
 In this example sequence.py file we are setting the build sequence to simply
 export from pytorch to ONNX. This differs from the default build sequence, which
diff --git a/examples/model_api/hello_world.py b/examples/model_api/hello_world.py
deleted file mode 100644
index 6a5a4a6f..00000000
--- a/examples/model_api/hello_world.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import argparse
-import torch
-from turnkeyml import benchmark_model
-
-torch.manual_seed(0)
-
-
-# Define model class
-class SmallModel(torch.nn.Module):
-    def __init__(self, input_size, output_size):
-        super(SmallModel, self).__init__()
-        self.fc = torch.nn.Linear(input_size, output_size)
-
-    def forward(self, x):
-        output = self.fc(x)
-        return output
-
-
-# Instantiate model and generate inputs
-input_size = 1000
-output_size = 500
-pytorch_model = SmallModel(input_size, output_size)
-inputs = {"x": torch.rand(input_size)}
-
-
-def main():
-    # Define the argument parser
-    parser = argparse.ArgumentParser(
-        description="Benchmark a PyTorch model on a specified device."
-    )
-
-    # Add the arguments
-    parser.add_argument(
-        "--device",
-        type=str,
-        choices=["x86", "nvidia"],
-        default="x86",
-        help="The device to benchmark on (x86 or nvidia)",
-    )
-
-    # Parse the arguments
-    args = parser.parse_args()
-
-    # Instantiate model and generate inputs
-    torch.manual_seed(0)
-    input_size = 1000
-    output_size = 500
-    pytorch_model = SmallModel(input_size, output_size)
-    inputs = {"x": torch.rand(input_size)}
-
-    # Benchmark the model on the specified device
-    print(f"Benchmarking on {args.device}...")
-    benchmark_model(
-        pytorch_model,
-        inputs,
-        build_name="hello_api_world",
-        device=args.device,
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/readme.md b/examples/readme.md
index 8f0c2120..12ae689f 100644
--- a/examples/readme.md
+++ b/examples/readme.md
@@ -2,6 +2,5 @@
 
 This directory contains examples to help you learn how to use the tools. The examples are split up into two sub-directories:
 1. `examples/cli`: a tutorial series for the `turnkey` CLI. This is the recommended starting point.
-1. `examples/model_api`: scripts that demonstrate how to use the `turnkey.benchmark_model()` API.
 1. `examples/files_api`: scripts that demonstrate how to use the `turnkey.benchmark_files()` API.
 1. `examples/build_api`: scripts that demonstrate how to use the `turnkey.build_model()` API.
diff --git a/src/turnkeyml/__init__.py b/src/turnkeyml/__init__.py
index 175c5891..0430c4ce 100644
--- a/src/turnkeyml/__init__.py
+++ b/src/turnkeyml/__init__.py
@@ -1,7 +1,6 @@
 from turnkeyml.version import __version__
 
 from .files_api import benchmark_files
-from .model_api import benchmark_model
 from .cli.cli import main as turnkeycli
 from .build_api import build_model
 from .common.build import load_state
diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py
index cebf7999..3c288909 100644
--- a/src/turnkeyml/analyze/script.py
+++ b/src/turnkeyml/analyze/script.py
@@ -22,13 +22,9 @@
 import turnkeyml.analyze.util as util
 import turnkeyml.common.tf_helpers as tf_helpers
 import turnkeyml.common.labels as labels
-from turnkeyml.model_api import benchmark_model
+from turnkeyml.build_api import build_model
 import turnkeyml.common.filesystem as fs
-from turnkeyml.run.devices import (
-    DEVICE_RUNTIME_MAP,
-    DEFAULT_RUNTIME,
-    SUPPORTED_RUNTIMES,
-)
+import turnkeyml.run.devices as plugins
 
 
 class Action(Enum):
@@ -88,6 +84,15 @@ def _store_traceback(invocation_info: util.UniqueInvocationInfo):
     invocation_info.status_message = " ".join(invocation_info.status_message.split())
 
 
+def set_status_on_exception(build_state: build.State, stats: fs.Stats):
+    # We get `state` when the build tool succeeds, so we can use that to identify
+    # whether the exception was thrown during build or benchmark
+    if not build_state:
+        stats.save_model_eval_stat(fs.Keys.BUILD_STATUS, fs.FunctionStatus.FAILED)
+    else:
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.FunctionStatus.FAILED)
+
+
 def explore_invocation(
     model_inputs: dict,
     model_info: util.ModelInfo,
@@ -138,21 +143,28 @@ def explore_invocation(
                     inputs[all_args[i]] = args[i]
         invocation_info.inputs = inputs
 
+    # Create a build directory in the cache
+    fs.make_build_dir(tracer_args.cache_dir, build_name)
+
     # If the user has not provided a specific runtime, select the runtime
     # based on the device provided.
-    if tracer_args.runtime is None:
-        selected_runtime = DEVICE_RUNTIME_MAP[tracer_args.device][DEFAULT_RUNTIME]
-    else:
-        selected_runtime = tracer_args.runtime
+    (
+        selected_runtime,
+        runtime_info,
+        sequence_selected,
+    ) = plugins.select_runtime_and_sequence(
+        tracer_args.device,
+        tracer_args.runtime,
+        tracer_args.sequence,
+    )
 
-    runtime_info = SUPPORTED_RUNTIMES[selected_runtime]
     if "status_stats" in runtime_info.keys():
         invocation_info.stats_keys = runtime_info["status_stats"]
     else:
         invocation_info.stats_keys = []
 
     # Create an ID for the build stats by combining the device and runtime.
-    # We don't need more info in the evaluation_id because changes to benchmark_model()
+    # We don't need more info in the evaluation_id because changes to build_model()
     # arguments (e.g., sequence) will trigger a rebuild, which is intended to replace the
     # build stats so long as the device and runtime have not changed.
     evaluation_id = f"{tracer_args.device}_{selected_runtime}"
@@ -184,6 +196,13 @@ def explore_invocation(
     if fs.Keys.TASK in tracer_args.labels:
         stats.save_model_stat(fs.Keys.TASK, tracer_args.labels[fs.Keys.TASK][0])
 
+    # Save the system information used for this evaluation
+    system_info = build.get_system_info()
+    stats.save_model_stat(
+        fs.Keys.SYSTEM_INFO,
+        system_info,
+    )
+
     # Save all of the lables in one place
     stats.save_model_stat(fs.Keys.LABELS, tracer_args.labels)
 
@@ -219,72 +238,120 @@ def explore_invocation(
         tracer_args.iterations,
     )
 
+    if model_info.model_type == build.ModelType.PYTORCH_COMPILED:
+        invocation_info.status_message = (
+            "Skipping model compiled using torch.compile(). "
+            "turnkey requires models to be in eager mode "
+            "(regardless of what runtime you have selected)."
+        )
+        invocation_info.status_message_color = printing.Colors.WARNING
+
+        return
+
+    build_state = None
     perf = None
     try:
-        if model_info.model_type == build.ModelType.PYTORCH_COMPILED:
-            invocation_info.status_message = (
-                "Skipping model compiled using torch.compile(). "
-                "turnkey requires models to be in eager mode "
-                "(regardless of what runtime you have selected)."
-            )
-            invocation_info.status_message_color = printing.Colors.WARNING
-        else:
-            # Indicate that the benchmark is running. If the build fails for any reason,
+        # Run the build tool (if needed by the runtime)
+        if runtime_info["build_required"]:
+            # Indicate that the build is running. If the build fails for any reason,
             # we will try to catch the exception and note it in the stats.
             # If a concluded build still has a status of "running", this means
             # there was an uncaught exception.
-            stats.save_model_eval_stat(
-                fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.RUNNING
-            )
+            stats.save_model_eval_stat(fs.Keys.BUILD_STATUS, fs.FunctionStatus.RUNNING)
 
-            perf = benchmark_model(
-                model_info.model,
-                inputs,
+            build_state = build_model(
+                model=model_info.model,
+                inputs=inputs,
                 evaluation_id=evaluation_id,
-                device=tracer_args.device,
-                runtime=selected_runtime,
                 build_name=build_name,
-                iterations=tracer_args.iterations,
                 cache_dir=tracer_args.cache_dir,
-                build_only=Action.BENCHMARK not in tracer_args.actions,
-                lean_cache=tracer_args.lean_cache,
-                sequence=tracer_args.sequence,
-                onnx_opset=tracer_args.onnx_opset,
                 rebuild=tracer_args.rebuild,
-                rt_args=tracer_args.rt_args,
+                sequence=sequence_selected,
+                onnx_opset=tracer_args.onnx_opset,
+                device=tracer_args.device,
+            )
+
+            stats.save_model_eval_stat(
+                fs.Keys.BUILD_STATUS, fs.FunctionStatus.SUCCESSFUL
             )
-            if Action.BENCHMARK in tracer_args.actions:
-                invocation_info.status_message = "Model successfully benchmarked!"
-                invocation_info.performance = perf
-                invocation_info.status_message_color = printing.Colors.OKGREEN
+
+            model_to_benchmark = build_state.results[0]
+
+            # Analyze the onnx file (if any) and save statistics
+            util.analyze_onnx(
+                build_name=build_name,
+                cache_dir=tracer_args.cache_dir,
+                stats=stats,
+            )
+        else:
+            model_to_benchmark = model_info.model
+
+        # Run the benchmark tool (if requested by the user)
+        if Action.BENCHMARK in tracer_args.actions:
+            if tracer_args.rt_args is None:
+                rt_args_to_use = {}
             else:
-                invocation_info.status_message = "Model successfully built!"
-                invocation_info.status_message_color = printing.Colors.OKGREEN
+                rt_args_to_use = tracer_args.rt_args
+
+            stats.save_model_eval_stat(
+                fs.Keys.BENCHMARK_STATUS, fs.FunctionStatus.RUNNING
+            )
+
+            model_handle = runtime_info["RuntimeClass"](
+                cache_dir=tracer_args.cache_dir,
+                build_name=build_name,
+                stats=stats,
+                iterations=tracer_args.iterations,
+                model=model_to_benchmark,
+                inputs=inputs,
+                device_type=tracer_args.device,
+                runtime=selected_runtime,
+                **rt_args_to_use,
+            )
+            perf = model_handle.benchmark()
+
+            for key, value in vars(perf).items():
+                stats.save_model_eval_stat(
+                    key=key,
+                    value=value,
+                )
+
+            stats.save_model_eval_stat(
+                fs.Keys.BENCHMARK_STATUS, fs.FunctionStatus.SUCCESSFUL
+            )
+
+            invocation_info.status_message = "Model successfully benchmarked!"
+            invocation_info.performance = perf
+            invocation_info.status_message_color = printing.Colors.OKGREEN
+        else:
+            invocation_info.status_message = "Model successfully built!"
+            invocation_info.status_message_color = printing.Colors.OKGREEN
 
     except exp.StageError as e:
         invocation_info.status_message = f"Build Error: {e}"
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        set_status_on_exception(build_state, stats)
 
         _store_traceback(invocation_info)
 
     except exp.SkipBuild:
         # SkipBuild is an exception that the build_model() API will raise
         # when it is skipping a previously-failed build when rebuild=never is set
+
+        # NOTE: skipping a build should never update build or benchmark status
+
         invocation_info.status_message = (
             "Build intentionally skipped because rebuild=never"
         )
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.KILLED)
-
     except exp.ArgError as e:
-        # ArgError indicates that some argument to benchmark_model() was
+        # ArgError indicates that some argument to build_model() or BaseRT was
         # illegal. In that case we want to halt execution so that users can
         # fix their arguments.
 
-        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        set_status_on_exception(build_state, stats)
 
         raise e
 
@@ -292,7 +359,7 @@ def explore_invocation(
         invocation_info.status_message = f"Error: {e}."
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        set_status_on_exception(build_state, stats)
 
         _store_traceback(invocation_info)
 
@@ -302,66 +369,19 @@ def explore_invocation(
         invocation_info.status_message = f"Unknown turnkey error: {e}"
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        set_status_on_exception(build_state, stats)
 
         _store_traceback(invocation_info)
-    else:
-        # If there was no exception then we consider the build to be a success
-        stats.save_model_eval_stat(
-            fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.SUCCESSFUL
-        )
 
     finally:
         # Ensure that stdout/stderr is not being forwarded before updating status
         util.stop_logger_forward()
 
-        system_info = build.get_system_info()
-        stats.save_model_stat(
-            fs.Keys.SYSTEM_INFO,
-            system_info,
-        )
-        if model_info.model_type != build.ModelType.PYTORCH_COMPILED:
-            # We have this if-block because torch-compiled model instances
-            # are not legal input to this function. So when we encounter one,
-            # we want to exit the function as quickly as possible, without
-            # doing any of the logic that follows this comment.
-
-            # ONNX stats that we want to save into the build's turnkey_stats.yaml file
-            # so that they can be easily accessed by the report command later
-            if fs.Keys.ONNX_FILE in stats.evaluation_stats.keys():
-                # Just in case the ONNX file was generated on a different machine:
-                # strip the state's cache dir, then prepend the current cache dir
-                final_onnx_file = fs.rebase_cache_dir(
-                    stats.evaluation_stats[fs.Keys.ONNX_FILE],
-                    build_name,
-                    tracer_args.cache_dir,
-                )
-
-                onnx_ops_counter = util.get_onnx_ops_list(final_onnx_file)
-                onnx_model_info = util.populate_onnx_model_info(final_onnx_file)
-                onnx_input_dimensions = util.onnx_input_dimensions(final_onnx_file)
-
-                stats.save_model_stat(
-                    fs.Keys.ONNX_OPS_COUNTER,
-                    onnx_ops_counter,
-                )
-                stats.save_model_stat(
-                    fs.Keys.ONNX_MODEL_INFO,
-                    onnx_model_info,
-                )
-                stats.save_model_stat(
-                    fs.Keys.ONNX_INPUT_DIMENSIONS,
-                    onnx_input_dimensions,
-                )
-
-            if perf:
-                for key, value in vars(perf).items():
-                    stats.save_model_eval_stat(
-                        key=key,
-                        value=value,
-                    )
+        status.update(tracer_args.models_found, build_name, tracer_args.cache_dir)
 
-            status.update(tracer_args.models_found, build_name, tracer_args.cache_dir)
+        if tracer_args.lean_cache:
+            printing.log_info("Removing build artifacts...")
+            fs.clean_output_dir(tracer_args.cache_dir, build_name)
 
 
 def get_model_hash(
@@ -610,7 +630,7 @@ def forward_spy(*args, **kwargs):
             )
             invocation_info.executed = invocation_info.executed + 1
 
-            # Call benchmark_model() if this is the first time the model is being executed
+            # Call explore_invocation() if this is the first time the model is being executed
             # and this model has been selected by the user
             if (
                 invocation_info.executed == 1
@@ -623,7 +643,7 @@ def forward_spy(*args, **kwargs):
                     invocation_info=invocation_info,
                     tracer_args=tracer_args,
                 )
-                # Ensure that benchmark_model() doesn't interfere with our execution count
+                # Ensure that explore_invocation() doesn't interfere with our execution count
                 model_info.executed = 1
 
             build_name = fs.get_build_name(
@@ -795,7 +815,7 @@ def evaluate_script(tracer_args: TracerArgs) -> Dict[str, util.ModelInfo]:
                 "torch.jit.script(",
                 "torch.jit.script() is not supported by turnkey CLI and benchmark_files() API, "
                 "however torch.jit.script() is being called in your script."
-                "You can try passing your model instance into the benchmark_model() API instead. ",
+                "You can try passing your model instance into the build_model() API instead. ",
             )
         ]
     ):
diff --git a/src/turnkeyml/analyze/util.py b/src/turnkeyml/analyze/util.py
index 27e96594..305af95c 100644
--- a/src/turnkeyml/analyze/util.py
+++ b/src/turnkeyml/analyze/util.py
@@ -8,7 +8,7 @@
 from turnkeyml.common import printing
 import turnkeyml.common.build as build
 from turnkeyml.common.performance import MeasuredPerformance
-from turnkeyml.common.filesystem import Stats
+import turnkeyml.common.filesystem as fs
 
 
 class AnalysisException(Exception):
@@ -37,7 +37,7 @@ class UniqueInvocationInfo:
     status_message_color: printing.Colors = printing.Colors.ENDC
     traceback_message_color: printing.Colors = printing.Colors.FAIL
     stats_keys: Optional[List[str]] = None
-    stats: Stats = None
+    stats: fs.Stats = None
 
 
 @dataclass
@@ -162,3 +162,33 @@ def stop_logger_forward() -> None:
         sys.stdout = sys.stdout.terminal
     if hasattr(sys.stderr, "terminal_err"):
         sys.stderr = sys.stderr.terminal_err
+
+
+def analyze_onnx(build_name: str, cache_dir: str, stats: fs.Stats):
+    # ONNX stats that we want to save into the build's turnkey_stats.yaml file
+    # so that they can be easily accessed by the report command later
+    if fs.Keys.ONNX_FILE in stats.evaluation_stats.keys():
+        # Just in case the ONNX file was generated on a different machine:
+        # strip the state's cache dir, then prepend the current cache dir
+        final_onnx_file = fs.rebase_cache_dir(
+            stats.evaluation_stats[fs.Keys.ONNX_FILE],
+            build_name,
+            cache_dir,
+        )
+
+        onnx_ops_counter = get_onnx_ops_list(final_onnx_file)
+        onnx_model_info = populate_onnx_model_info(final_onnx_file)
+        input_dimensions = onnx_input_dimensions(final_onnx_file)
+
+        stats.save_model_stat(
+            fs.Keys.ONNX_OPS_COUNTER,
+            onnx_ops_counter,
+        )
+        stats.save_model_stat(
+            fs.Keys.ONNX_MODEL_INFO,
+            onnx_model_info,
+        )
+        stats.save_model_stat(
+            fs.Keys.ONNX_INPUT_DIMENSIONS,
+            input_dimensions,
+        )
diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py
index 11184a12..86c51c94 100644
--- a/src/turnkeyml/build/ignition.py
+++ b/src/turnkeyml/build/ignition.py
@@ -222,9 +222,16 @@ def _begin_fresh_build(
     # start with a fresh State.
     stats = filesystem.Stats(state_args["cache_dir"], state_args["config"].build_name)
 
+    build_dir = build.output_dir(
+        state_args["cache_dir"], state_args["config"].build_name
+    )
+
     filesystem.rmdir(
-        build.output_dir(state_args["cache_dir"], state_args["config"].build_name),
-        exclude=stats.file,
+        build_dir,
+        excludes=[
+            stats.file,
+            os.path.join(build_dir, filesystem.BUILD_MARKER),
+        ],
     )
     state = state_type(**state_args)
     state.save()
diff --git a/src/turnkeyml/build_api.py b/src/turnkeyml/build_api.py
index 349044e9..b9a6aed6 100644
--- a/src/turnkeyml/build_api.py
+++ b/src/turnkeyml/build_api.py
@@ -118,19 +118,8 @@ def build_model(
     sequence_locked.show_monitor(config, state.monitor)
     state = sequence_locked.launch(state)
 
-    if state.build_status == build.Status.SUCCESSFUL_BUILD:
-        printing.log_success(
-            f"\n    Saved to **{build.output_dir(state.cache_dir, config.build_name)}**"
-        )
-
-        return state
+    printing.log_success(
+        f"\n    Saved to **{build.output_dir(state.cache_dir, config.build_name)}**"
+    )
 
-    else:
-        printing.log_success(
-            f"Build Sequence {sequence_locked.unique_name} completed successfully"
-        )
-        msg = """
-        build_model() only returns a Model instance if the Sequence includes a Stage
-        that sets state.build_status=turnkey.common.build.Status.SUCCESSFUL_BUILD.
-        """
-        printing.log_warning(msg)
+    return state
diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index ec49a9fc..2c2f4281 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -84,14 +84,14 @@ def summary_spreadsheets(args) -> None:
                                 for subkey, subvalue in value.items():
                                     evaluation_stats[subkey] = subvalue
 
-                            # If a build is still marked as "running" at reporting time, it
+                            # If a build or benchmark is still marked as "running" at
+                            # reporting time, it
                             # must have been killed by a time out, out-of-memory (OOM), or some
                             # other uncaught exception
                             if (
-                                key == fs.Keys.BENCHMARK_STATUS
-                                and value == fs.BenchmarkStatus.RUNNING
-                            ):
-                                value = fs.BenchmarkStatus.KILLED
+                                key == fs.Keys.BUILD_STATUS or fs.Keys.BENCHMARK_STATUS
+                            ) and value == fs.FunctionStatus.RUNNING:
+                                value = fs.FunctionStatus.KILLED
 
                             evaluation_stats[key] = value
 
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index 961cb6bd..d2190336 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -39,22 +39,29 @@
 MODELS_DIR = importlib.util.find_spec("turnkeyml_models").submodule_search_locations[0]
 
 
-def rmdir(folder, exclude: Optional[str] = None):
+def rmdir(folder, excludes: Optional[List[str]] = None):
     """
     Remove the contents of a directory from the filesystem.
-    If `exclude=<name>`, the directory itself and the file named <name>
+    If `<name>` is in `excludes`, the directory itself and the file named <name>
     are kept. Otherwise, the entire directory is removed.
     """
+
+    # Use an empty list by default
+    if excludes:
+        excludes_to_use = excludes
+    else:
+        excludes_to_use = []
+
     if os.path.isdir(folder):
         for filename in os.listdir(folder):
             file_path = os.path.join(folder, filename)
-            if file_path != exclude:
+            if file_path not in excludes_to_use:
                 if os.path.isfile(file_path) or os.path.islink(file_path):
                     os.unlink(file_path)
                 elif os.path.isdir(file_path):
                     shutil.rmtree(file_path)
 
-        if exclude is None:
+        if excludes is None:
             shutil.rmtree(folder)
 
         return True
@@ -347,11 +354,13 @@ class Keys:
     SYSTEM_INFO = "system_info"
     # Path to the built-in model script used as input
     MODEL_SCRIPT = "builtin_model_script"
-    # Indicates a benchmark's status: running, successful, failed, or killed
+    # Indicates status of the most recent build tool run: FunctionStatus
+    BUILD_STATUS = "build_status"
+    # Indicates status of the most recent benchmark tool run: FunctionStatus
     BENCHMARK_STATUS = "benchmark_status"
 
 
-class BenchmarkStatus:
+class FunctionStatus:
     RUNNING = "running"
     SUCCESSFUL = "successful"
     FAILED = "failed"
diff --git a/src/turnkeyml/model_api.py b/src/turnkeyml/model_api.py
deleted file mode 100644
index d7cb155e..00000000
--- a/src/turnkeyml/model_api.py
+++ /dev/null
@@ -1,138 +0,0 @@
-from typing import Any, Dict, Optional, Union, List
-from turnkeyml.build_api import build_model
-from turnkeyml.build.stage import Sequence
-import turnkeyml.common.printing as printing
-import turnkeyml.common.filesystem as filesystem
-from turnkeyml.common.performance import MeasuredPerformance
-from turnkeyml.run.devices import (
-    SUPPORTED_DEVICES,
-    SUPPORTED_RUNTIMES,
-    DEVICE_RUNTIME_MAP,
-    apply_default_runtime,
-)
-import turnkeyml.build.sequences as sequences
-import turnkeyml.common.exceptions as exp
-
-TURNKEY_DEFAULT_REBUILD_POLICY = "if_needed"
-
-
-def benchmark_model(
-    model: Any,
-    inputs: Dict[str, Any],
-    build_name: str,
-    iterations: int = 100,
-    evaluation_id: str = "build",
-    cache_dir: str = filesystem.DEFAULT_CACHE_DIR,
-    device: str = "x86",
-    runtime: Optional[str] = None,
-    build_only: bool = False,
-    lean_cache: bool = False,
-    rebuild: str = TURNKEY_DEFAULT_REBUILD_POLICY,
-    onnx_opset: Optional[int] = None,
-    sequence: Sequence = None,
-    rt_args: Optional[Dict[str, Union[str, List[str]]]] = None,
-) -> MeasuredPerformance:
-    """
-    Benchmark a model against some inputs on target hardware
-    """
-
-    selected_runtime = apply_default_runtime(device, runtime)
-
-    # Build and benchmark the model
-    try:
-        # Validate device and runtime selections
-        if device not in SUPPORTED_DEVICES:
-            raise exp.ArgError(
-                f"Device argument '{device}' is not one of the available "
-                f"supported devices {SUPPORTED_DEVICES}\n"
-                f"You may need to check the spelling of '{device}', install a "
-                "plugin, or update the turnkeyml package."
-            )
-        else:
-            if selected_runtime not in DEVICE_RUNTIME_MAP[device]:
-                raise exp.ArgError(
-                    f"Runtime argument '{selected_runtime}' is not one of the available "
-                    f"runtimes supported for device '{device}': {DEVICE_RUNTIME_MAP[device]}\n"
-                    f"You may need to check the spelling of '{selected_runtime}', install a "
-                    "plugin, or update the turnkeyml package."
-                )
-
-        # Get the plugin module for the selected runtime
-        runtime_info = SUPPORTED_RUNTIMES[selected_runtime]
-
-        # Perform a build, if necessary
-        if runtime_info["build_required"]:
-            # Get the build sequence that will be used for the model
-            if sequence is None:
-                # Automatically choose a Sequence based on what the runtime expects
-                sequence_selected = runtime_info["default_sequence"]
-            else:
-                # User-specified Sequence
-                if isinstance(sequence, str):
-                    # Sequence is defined by a plugin
-                    if sequence in sequences.SUPPORTED_SEQUENCES.keys():
-                        sequence_selected = sequences.SUPPORTED_SEQUENCES[sequence]
-                    else:
-                        raise ValueError(
-                            f"Sequence argument {sequence} is not one of the "
-                            "available sequences installed: "
-                            f"{sequences.SUPPORTED_SEQUENCES.keys()} \n"
-                            f"You may need to check the spelling of `{sequence}`, "
-                            "install a plugin, or update the turnkeyml package."
-                        )
-
-                elif isinstance(sequence, Sequence):
-                    # Sequence is a user-defined instance of Sequence
-                    sequence_selected = sequence
-
-            build_model(
-                model=model,
-                inputs=inputs,
-                evaluation_id=evaluation_id,
-                build_name=build_name,
-                cache_dir=cache_dir,
-                rebuild=rebuild,
-                sequence=sequence_selected,
-                onnx_opset=onnx_opset,
-                device=device,
-            )
-
-        # Perform benchmarking, if requested
-        if not build_only:
-            if rt_args is None:
-                rt_args_to_use = {}
-            else:
-                rt_args_to_use = rt_args
-
-            printing.log_info(f"Benchmarking on {device}...")
-            stats = filesystem.Stats(cache_dir, build_name, evaluation_id)
-            model_handle = runtime_info["RuntimeClass"](
-                cache_dir=cache_dir,
-                build_name=build_name,
-                stats=stats,
-                iterations=iterations,
-                model=model,
-                inputs=inputs,
-                device_type=device,
-                runtime=selected_runtime,
-                **rt_args_to_use,
-            )
-            perf = model_handle.benchmark()
-
-    finally:
-        # Make sure the build and cache dirs exist and have the proper marker files
-        # NOTE: We would do this at the top of the file, however
-        # there are conditions where build_model() will wipe the build dir,
-        # which would eliminate our marker file
-        filesystem.make_build_dir(cache_dir, build_name)
-
-        # Clean cache if needed
-        if lean_cache:
-            printing.log_info("Removing build artifacts...")
-            filesystem.clean_output_dir(cache_dir, build_name)
-
-    if not build_only:
-        perf.print()
-        return perf
-    else:
-        return None
diff --git a/src/turnkeyml/run/devices.py b/src/turnkeyml/run/devices.py
index a5411f37..c48d81b5 100644
--- a/src/turnkeyml/run/devices.py
+++ b/src/turnkeyml/run/devices.py
@@ -1,9 +1,12 @@
 from typing import Optional
-from typing import List, Dict
+from typing import List, Dict, Tuple
 import turnkeyml.run.onnxrt as onnxrt
 import turnkeyml.run.tensorrt as tensorrt
 import turnkeyml.run.torchrt as torchrt
 import turnkeyml.common.plugins as plugins
+from turnkeyml.build.stage import Sequence
+import turnkeyml.build.sequences as sequences
+import turnkeyml.common.exceptions as exp
 
 
 def supported_devices_list(data: Dict, parent_key: str = "") -> List:
@@ -72,3 +75,63 @@ def apply_default_runtime(device: str, runtime: Optional[str] = None):
         return DEVICE_RUNTIME_MAP[device][DEFAULT_RUNTIME]
     else:
         return runtime
+
+
+def _check_suggestion(value: str):
+    return (
+        f"You may need to check the spelling of '{value}', install a "
+        "plugin, or update the turnkeyml package."
+    )
+
+
+def select_runtime_and_sequence(
+    device: str, runtime: Optional[str], sequence: Optional[Sequence]
+) -> Tuple[str, str, Sequence]:
+    selected_runtime = apply_default_runtime(device, runtime)
+
+    # Validate device and runtime selections
+    if device not in SUPPORTED_DEVICES:
+        raise exp.ArgError(
+            f"Device argument '{device}' is not one of the available "
+            f"supported devices {SUPPORTED_DEVICES}\n"
+            f"{_check_suggestion(device)}"
+        )
+    if selected_runtime not in DEVICE_RUNTIME_MAP[device]:
+        raise exp.ArgError(
+            f"Runtime argument '{selected_runtime}' is not one of the available "
+            f"runtimes supported for device '{device}': {DEVICE_RUNTIME_MAP[device]}\n"
+            f"{_check_suggestion(selected_runtime)}"
+        )
+
+    # Get the plugin module for the selected runtime
+    runtime_info = SUPPORTED_RUNTIMES[selected_runtime]
+
+    # Perform a build, if necessary
+    if runtime_info["build_required"]:
+        # Get the build sequence that will be used for the model
+        if sequence is None:
+            # Automatically choose a Sequence based on what the runtime expects
+            sequence_selected = runtime_info["default_sequence"]
+        else:
+            # User-specified Sequence
+            if isinstance(sequence, str):
+                # Sequence is defined by a plugin
+                if sequence in sequences.SUPPORTED_SEQUENCES.keys():
+                    sequence_selected = sequences.SUPPORTED_SEQUENCES[sequence]
+                else:
+                    raise ValueError(
+                        f"Sequence argument {sequence} is not one of the "
+                        "available sequences installed: "
+                        f"{sequences.SUPPORTED_SEQUENCES.keys()} \n"
+                        f"{_check_suggestion(sequence)}"
+                    )
+
+            elif isinstance(sequence, Sequence):
+                # Sequence is a user-defined instance of Sequence
+                sequence_selected = sequence
+
+    else:
+        # Sequence is only needed for builds
+        sequence_selected = None
+
+    return selected_runtime, runtime_info, sequence_selected
diff --git a/src/turnkeyml/version.py b/src/turnkeyml/version.py
index 493f7415..6a9beea8 100644
--- a/src/turnkeyml/version.py
+++ b/src/turnkeyml/version.py
@@ -1 +1 @@
-__version__ = "0.3.0"
+__version__ = "0.4.0"
diff --git a/test/cli.py b/test/cli.py
index e555bc7f..d5731b23 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -389,7 +389,7 @@ def test_005_cli_list(self):
 
         for test_script in common.test_scripts_dot_py.keys():
             script_name = common.strip_dot_py(test_script)
-            assert script_name in f.getvalue()
+            assert script_name in f.getvalue(), f"{script_name} {f.getvalue()}"
 
     def test_006_cli_delete(self):
         # NOTE: this is not a unit test, it relies on other command
@@ -976,6 +976,7 @@ def test_028_cli_timeout(self):
             "--process-isolation",
             "--timeout",
             "10",
+            "--build-only",
         ]
         with patch.object(sys, "argv", flatten(testargs)):
             turnkeycli()
@@ -1000,8 +1001,8 @@ def test_028_cli_timeout(self):
         try:
             timeout_summary = summary[0]
 
-            assert timeout_summary["benchmark_status"] == "killed", timeout_summary[
-                "benchmark_status"
+            assert timeout_summary["build_status"] == "killed", timeout_summary[
+                "build_status"
             ]
         except IndexError:
             # Edge case where the CSV is empty because the build timed out before
diff --git a/test/model_api.py b/test/model_api.py
deleted file mode 100644
index 6822da93..00000000
--- a/test/model_api.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import os
-import unittest
-import torch
-import shutil
-import onnx
-import platform
-import turnkeyml.build.stage as stage
-import turnkeyml.common.filesystem as filesystem
-import turnkeyml.build.export as export
-import turnkeyml.common.build as build
-from turnkeyml import benchmark_model
-from helpers import common
-
-
-class SmallPytorchModel(torch.nn.Module):
-    def __init__(self):
-        super(SmallPytorchModel, self).__init__()
-        self.fc = torch.nn.Linear(10, 5)
-
-    def forward(self, x):
-        output = self.fc(x)
-        return output
-
-
-class AnotherSimplePytorchModel(torch.nn.Module):
-    def __init__(self):
-        super(AnotherSimplePytorchModel, self).__init__()
-        self.relu = torch.nn.ReLU()
-
-    def forward(self, x):
-        output = self.relu(x)
-        return output
-
-
-# Define pytorch model and inputs
-pytorch_model = SmallPytorchModel()
-tiny_pytorch_model = AnotherSimplePytorchModel()
-inputs = {"x": torch.rand(10)}
-inputs_2 = {"x": torch.rand(5)}
-input_tensor = torch.rand(10)
-
-# Create a test directory
-cache_dir, _ = common.create_test_dir("cli")
-
-
-def get_build_state(cache_dir, build_name):
-    return build.load_state(cache_dir=cache_dir, build_name=build_name)
-
-
-class Testing(unittest.TestCase):
-    def setUp(self) -> None:
-        filesystem.rmdir(cache_dir)
-        return super().setUp()
-
-    def test_001_build_pytorch_model(self):
-        build_name = "build_pytorch_model"
-        benchmark_model(
-            pytorch_model,
-            inputs,
-            build_name=build_name,
-            rebuild="always",
-            build_only=True,
-            cache_dir=cache_dir,
-            runtime="ort",
-        )
-        state = get_build_state(cache_dir, build_name)
-        assert state.build_status == build.Status.SUCCESSFUL_BUILD
-
-    def test_002_custom_stage(self):
-        build_name = "custom_stage"
-
-        class MyCustomStage(stage.Stage):
-            def __init__(self, funny_saying):
-                super().__init__(
-                    unique_name="funny_stage",
-                    monitor_message="Funny Stage",
-                )
-
-                self.funny_saying = funny_saying
-
-            def fire(self, state):
-                print(f"funny message: {self.funny_saying}")
-                state.build_status = build.Status.SUCCESSFUL_BUILD
-                return state
-
-        my_custom_stage = MyCustomStage(
-            funny_saying="Is a fail whale a fail at all if it makes you smile?"
-        )
-        my_sequence = stage.Sequence(
-            unique_name="my_sequence",
-            monitor_message="Running My Sequence",
-            stages=[
-                export.ExportPytorchModel(),
-                export.OptimizeOnnxModel(),
-                my_custom_stage,
-            ],
-        )
-
-        benchmark_model(
-            pytorch_model,
-            inputs,
-            build_name=build_name,
-            rebuild="always",
-            sequence=my_sequence,
-            build_only=True,
-            cache_dir=cache_dir,
-            runtime="ort",
-        )
-
-        state = get_build_state(cache_dir, build_name)
-        return state.build_status == build.Status.SUCCESSFUL_BUILD
-
-    # TODO: Investigate why this test is only failing on Windows CI failing
-    @unittest.skipIf(platform.system() == "Windows", "Windows CI only failure")
-    def test_003_local_benchmark(self):
-        build_name = "local_benchmark"
-        perf = benchmark_model(
-            pytorch_model,
-            inputs,
-            device="x86",
-            build_name=build_name,
-            rebuild="always",
-            cache_dir=cache_dir,
-            lean_cache=True,
-            runtime="ort",
-        )
-        state = get_build_state(cache_dir, build_name)
-        assert state.build_status == build.Status.SUCCESSFUL_BUILD
-        assert os.path.isfile(
-            os.path.join(cache_dir, build_name, "x86_benchmark/outputs.json")
-        )
-        assert perf.mean_latency > 0
-        assert perf.throughput > 0
-
-    # TODO: Investigate why this test is only failing on Windows CI failing
-    @unittest.skipIf(platform.system() == "Windows", "Windows CI only issue")
-    def test_004_onnx_opset(self):
-        """
-        Make sure we can successfully benchmark a model with a user-defined ONNX opset
-        """
-
-        build_name = "onnx_opset"
-
-        user_opset = 15
-        assert user_opset != build.DEFAULT_ONNX_OPSET
-
-        perf = benchmark_model(
-            pytorch_model,
-            inputs,
-            device="x86",
-            build_name=build_name,
-            rebuild="always",
-            cache_dir=cache_dir,
-            onnx_opset=user_opset,
-            runtime="ort",
-        )
-        state = get_build_state(cache_dir, build_name)
-        assert state.build_status == build.Status.SUCCESSFUL_BUILD
-        assert os.path.isfile(
-            os.path.join(cache_dir, build_name, "x86_benchmark/outputs.json")
-        )
-        assert perf.mean_latency > 0
-        assert perf.throughput > 0
-
-        onnx_model = onnx.load(state.results[0])
-        model_opset = getattr(onnx_model.opset_import[0], "version", None)
-        assert user_opset == model_opset
-
-
-if __name__ == "__main__":
-    unittest.main()

From 6214ab398b89027cf40d0862f211ac700d95e30d Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Tue, 5 Dec 2023 13:34:59 -0500
Subject: [PATCH 05/35] Remove the SetSuccess stage (and the need for it) (#59)

Signed-off-by: Jeremy Fowers <jeremy.fowers@amd.com>
---
 examples/build_api/sequence.py                |  1 -
 .../sequence.py                               |  1 -
 .../turnkeyml_plugin_example_seq/sequence.py  |  1 -
 src/turnkeyml/build/export.py                 | 20 -----------
 src/turnkeyml/build/sequences.py              |  3 --
 src/turnkeyml/build/stage.py                  | 27 ++++++++++----
 test/build_model.py                           | 36 -------------------
 7 files changed, 20 insertions(+), 69 deletions(-)

diff --git a/examples/build_api/sequence.py b/examples/build_api/sequence.py
index 29eff72b..b25c4f0b 100644
--- a/examples/build_api/sequence.py
+++ b/examples/build_api/sequence.py
@@ -44,7 +44,6 @@ def forward(self, x):
         export.ExportPytorchModel(),
         export.OptimizeOnnxModel(),
         # export.ConvertOnnxToFp16(),  #<-- This is the step we want to skip
-        export.SuccessStage(),
     ],
     enable_model_validation=True,
 )
diff --git a/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/sequence.py b/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/sequence.py
index 55ba2083..70f2d1ca 100644
--- a/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/sequence.py
+++ b/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/sequence.py
@@ -27,7 +27,6 @@ def fire(self, state: build.State):
     stages=[
         export.ExportPlaceholder(),
         CombinedExampleStage(),
-        export.SuccessStage(),
     ],
     enable_model_validation=True,
 )
diff --git a/examples/cli/plugins/example_seq/turnkeyml_plugin_example_seq/sequence.py b/examples/cli/plugins/example_seq/turnkeyml_plugin_example_seq/sequence.py
index 350f2a76..d3ef72f2 100644
--- a/examples/cli/plugins/example_seq/turnkeyml_plugin_example_seq/sequence.py
+++ b/examples/cli/plugins/example_seq/turnkeyml_plugin_example_seq/sequence.py
@@ -43,7 +43,6 @@ def fire(self, state: build.State):
     stages=[
         export.ExportPlaceholder(),
         ExampleStage(),
-        export.SuccessStage(),
     ],
     enable_model_validation=True,
 )
diff --git a/src/turnkeyml/build/export.py b/src/turnkeyml/build/export.py
index 6d32f052..eee8a306 100644
--- a/src/turnkeyml/build/export.py
+++ b/src/turnkeyml/build/export.py
@@ -620,23 +620,3 @@ def fire(self, state: build.State):
             raise exp.StageError(msg)
 
         return state
-
-
-class SuccessStage(stage.Stage):
-    """
-    Stage that sets state.build_status = build.Status.SUCCESSFUL_BUILD,
-    indicating that the build sequence has completed all of the requested build stages.
-    """
-
-    def __init__(self):
-        super().__init__(
-            unique_name="set_success",
-            monitor_message="Finishing up",
-        )
-
-    def fire(self, state: build.State):
-        state.build_status = build.Status.SUCCESSFUL_BUILD
-
-        state.results = copy.deepcopy(state.intermediate_results)
-
-        return state
diff --git a/src/turnkeyml/build/sequences.py b/src/turnkeyml/build/sequences.py
index 7e90ead3..abeb159c 100644
--- a/src/turnkeyml/build/sequences.py
+++ b/src/turnkeyml/build/sequences.py
@@ -9,7 +9,6 @@
         export.ExportPlaceholder(),
         export.OptimizeOnnxModel(),
         export.ConvertOnnxToFp16(),
-        export.SuccessStage(),
     ],
     enable_model_validation=True,
 )
@@ -20,7 +19,6 @@
     [
         export.ExportPlaceholder(),
         export.OptimizeOnnxModel(),
-        export.SuccessStage(),
     ],
     enable_model_validation=True,
 )
@@ -30,7 +28,6 @@
     "Base Sequence",
     [
         export.ExportPlaceholder(),
-        export.SuccessStage(),
     ],
     enable_model_validation=True,
 )
diff --git a/src/turnkeyml/build/stage.py b/src/turnkeyml/build/stage.py
index 0267995e..daad45c0 100644
--- a/src/turnkeyml/build/stage.py
+++ b/src/turnkeyml/build/stage.py
@@ -2,6 +2,7 @@
 import sys
 import time
 import os
+import copy
 from typing import List, Tuple
 from multiprocessing import Process
 import psutil
@@ -108,8 +109,7 @@ def fire_helper(self, state: build.State) -> Tuple[build.State, int]:
 
         # Set the build status to BUILD_RUNNING to indicate that a Stage
         # started running. This allows us to test whether the Stage exited
-        # unexpectedly, before it was able to set FAILED_BUILD, SUCCESSFUL_BUILD,
-        # or PARTIAL_BUILD
+        # unexpectedly, before it was able to set FAILED_BUILD
         state.build_status = build.Status.BUILD_RUNNING
 
         self.logfile_path = os.path.join(
@@ -137,11 +137,16 @@ def fire_helper(self, state: build.State) -> Tuple[build.State, int]:
         else:
             self.status_line(successful=True, verbosity=state.monitor)
 
-            # Set the build status PARTIAL_BUILD, indicating that the stage
-            # ran successfully, unless the stage set SUCCESSFUL_BUILD, in which
-            # case leave the build status alone.
-            if state.build_status != build.Status.SUCCESSFUL_BUILD:
-                state.build_status = build.Status.PARTIAL_BUILD
+            # Stages should not set build.Status.SUCCESSFUL_BUILD, as that is
+            # reserved for Sequence.launch()
+            if state.build_status == build.Status.SUCCESSFUL_BUILD:
+                raise exp.StageError(
+                    "TurnkeyML Stages are not allowed to set "
+                    "`state.build_status == build.Status.SUCCESSFUL_BUILD`, "
+                    "however that has happened. If you are a plugin developer, "
+                    "do not do this. If you are a user, please file an issue at "
+                    "https://github.com/onnx/turnkeyml/issues."
+                )
 
         finally:
             if state.monitor:
@@ -314,6 +319,14 @@ def launch(self, state: build.State) -> build.State:
 
         else:
             state.current_build_stage = None
+            state.build_status = build.Status.SUCCESSFUL_BUILD
+
+            # We use a deepcopy here because the Stage framework supports
+            # intermediate_results of any type, including model objects in memory.
+            # The deepcopy ensures that we are providing a result that users
+            # are free to take any action with.
+            state.results = copy.deepcopy(state.intermediate_results)
+
             return state
 
     def status_line(self, successful, verbosity):
diff --git a/test/build_model.py b/test/build_model.py
index 3e1b9ae6..4503294e 100644
--- a/test/build_model.py
+++ b/test/build_model.py
@@ -232,38 +232,6 @@ def scriptmodule_functional_check():
     return state.build_status == build.Status.SUCCESSFUL_BUILD
 
 
-def full_compile_individual_stages():
-    build_name = "full_compile_individual_stages"
-    build_model(
-        pytorch_model,
-        inputs,
-        build_name=build_name,
-        rebuild="always",
-        monitor=False,
-        sequence=stage.Sequence(
-            "ExportPytorchModel_seq", "", [export.ExportPytorchModel()]
-        ),
-        cache_dir=cache_location,
-    )
-    build_model(
-        build_name=build_name,
-        sequence=stage.Sequence("OptimizeModel_seq", "", [export.OptimizeOnnxModel()]),
-        cache_dir=cache_location,
-    )
-    build_model(
-        build_name=build_name,
-        sequence=stage.Sequence("Fp16Conversion_seq", "", [export.ConvertOnnxToFp16()]),
-        cache_dir=cache_location,
-    )
-    state = build_model(
-        build_name=build_name,
-        sequence=stage.Sequence("SuccessStage_seq", "", [export.SuccessStage()]),
-        cache_dir=cache_location,
-    )
-
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
-
-
 def custom_stage():
     build_name = "custom_stage"
 
@@ -299,7 +267,6 @@ def fire(self, state):
             export.ExportPytorchModel(),
             export.OptimizeOnnxModel(),
             my_custom_stage,
-            export.SuccessStage(),
         ],
     )
 
@@ -537,9 +504,6 @@ def test_006_full_compilation_hummingbird_rf(self):
     def test_007_full_compilation_hummingbird_xgb(self):
         assert full_compilation_hummingbird_xgb()
 
-    def test_008_full_compile_individual_stages(self):
-        assert full_compile_individual_stages()
-
     def test_009_custom_stage(self):
         assert custom_stage()
 

From 4d612eb4a4ea41fa3090b2d29de92a18978e4833 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 11:19:22 -0800
Subject: [PATCH 06/35] Show stages completed following all stages order

---
 src/turnkeyml/cli/report.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index 2c2f4281..af576420 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -81,8 +81,17 @@ def summary_spreadsheets(args) -> None:
                             # Break each value in "completed build stages" into its own column
                             # to make analysis easier
                             if key == fs.Keys.COMPLETED_BUILD_STAGES:
-                                for subkey, subvalue in value.items():
-                                    evaluation_stats[subkey] = subvalue
+                                for stage in build[fs.Keys.ALL_BUILD_STAGES]:
+                                    column_name = f"stage_duration-{stage}"
+                                    if stage in build[fs.Keys.COMPLETED_BUILD_STAGES]:
+                                        evaluation_stats[column_name] = build[
+                                            fs.Keys.COMPLETED_BUILD_STAGES
+                                        ][stage]
+                                    else:
+                                        evaluation_stats[column_name] = "INCOMPLETE"
+
+                                # Do not add the raw version of COMPLETED_BUILD_STAGES to the report
+                                continue
 
                             # If a build or benchmark is still marked as "running" at
                             # reporting time, it

From aaf08a21a1fea1175debc7b6e2350f2d9287f39d Mon Sep 17 00:00:00 2001
From: Daniel Holanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 12:52:52 -0800
Subject: [PATCH 07/35] Fix CI testing numbering of `cli.py` (#60)

* Remove the SetSuccess stage (and the need for it)

Signed-off-by: Jeremy Fowers <jeremy.fowers@amd.com>

* Add a comment about deepcopy

Signed-off-by: Jeremy Fowers <jeremy.fowers@amd.com>

* Fix CI testing order

* Move large test to bottom

---------

Signed-off-by: Jeremy Fowers <jeremy.fowers@amd.com>
Co-authored-by: Jeremy Fowers <jeremy.fowers@amd.com>
---
 test/cli.py | 238 ++++++++++++++++++++++++++--------------------------
 1 file changed, 119 insertions(+), 119 deletions(-)

diff --git a/test/cli.py b/test/cli.py
index d5731b23..a1bba925 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -261,104 +261,7 @@ def test_003_cli_build_dir(self):
 
         assert_success_of_builds(test_scripts, cache_dir)
 
-    def test_021_cli_report(self):
-        # NOTE: this is not a unit test, it relies on other command
-        # If this test is failing, make sure the following tests are passing:
-        # - test_cli_corpus
-
-        test_scripts = common.test_scripts_dot_py.keys()
-
-        # Build the test corpus so we have builds to report
-        testargs = [
-            "turnkey",
-            "benchmark",
-            bash(f"{corpus_dir}/*.py"),
-            "--cache-dir",
-            cache_dir,
-        ]
-        with patch.object(sys, "argv", flatten(testargs)):
-            turnkeycli()
-
-        testargs = [
-            "turnkey",
-            "cache",
-            "report",
-            "--cache-dir",
-            cache_dir,
-        ]
-        with patch.object(sys, "argv", testargs):
-            turnkeycli()
-
-        # Read generated CSV file
-        summary_csv_path = report.get_report_name()
-        with open(summary_csv_path, "r", encoding="utf8") as summary_csv:
-            summary = list(csv.DictReader(summary_csv))
-
-        # Check if csv file contains all expected rows and columns
-        expected_cols = [
-            "model_name",
-            "author",
-            "class",
-            "parameters",
-            "hash",
-            "runtime",
-            "device_type",
-            "device",
-            "mean_latency",
-            "throughput",
-            "all_build_stages",
-            "completed_build_stages",
-        ]
-        linear_summary = summary[1]
-        assert len(summary) == len(test_scripts)
-        for elem in expected_cols:
-            assert (
-                elem in linear_summary
-            ), f"Couldn't find expected key {elem} in results spreadsheet"
-
-        # Check whether all rows we expect to be populated are actually populated
-        assert (
-            linear_summary["model_name"] == "linear2"
-        ), f"Wrong model name found {linear_summary['model_name']}"
-        assert (
-            linear_summary["author"] == "turnkey"
-        ), f"Wrong author name found {linear_summary['author']}"
-        assert (
-            linear_summary["class"] == "TwoLayerModel"
-        ), f"Wrong class found {linear_summary['model_class']}"
-        assert (
-            linear_summary["hash"] == "80b93950"
-        ), f"Wrong hash found {linear_summary['hash']}"
-        assert (
-            linear_summary["runtime"] == "ort"
-        ), f"Wrong runtime found {linear_summary['runtime']}"
-        assert (
-            linear_summary["device_type"] == "x86"
-        ), f"Wrong device type found {linear_summary['device_type']}"
-        assert (
-            float(linear_summary["mean_latency"]) > 0
-        ), f"latency must be >0, got {linear_summary['x86_latency']}"
-        assert (
-            float(linear_summary["throughput"]) > 100
-        ), f"throughput must be >100, got {linear_summary['throughput']}"
-
-        # Make sure the report.get_dict() API works
-        result_dict = report.get_dict(
-            summary_csv_path, ["all_build_stages", "completed_build_stages"]
-        )
-        for result in result_dict.values():
-            # All of the models should have exported to ONNX, so the "onnx_exported" value
-            # should be True for all of them
-            assert "export_pytorch" in yaml.safe_load(result["all_build_stages"])
-            assert (
-                "export_pytorch"
-                in yaml.safe_load(result["completed_build_stages"]).keys()
-            )
-            assert (
-                yaml.safe_load(result["completed_build_stages"])["export_pytorch"] > 0
-            )
-
-    def test_005_cli_list(self):
+    def test_004_cli_list(self):
         # NOTE: this is not a unit test, it relies on other command
         # If this test is failing, make sure the following tests are passing:
         # - test_cli_corpus
@@ -391,7 +294,7 @@ def test_005_cli_list(self):
             script_name = common.strip_dot_py(test_script)
             assert script_name in f.getvalue(), f"{script_name} {f.getvalue()}"
 
-    def test_006_cli_delete(self):
+    def test_005_cli_delete(self):
         # NOTE: this is not a unit test, it relies on other command
         # If this test is failing, make sure the following tests are passing:
         # - test_cli_corpus
@@ -453,7 +356,7 @@ def test_006_cli_delete(self):
             script_name = common.strip_dot_py(test_script)
             assert script_name not in f.getvalue()
 
-    def test_007_cli_stats(self):
+    def test_006_cli_stats(self):
         # NOTE: this is not a unit test, it relies on other command
         # If this test is failing, make sure the following tests are passing:
         # - test_cli_corpus
@@ -531,7 +434,7 @@ def test_007_cli_stats(self):
                 ]
                 assert isinstance(stats_dict["task"], str), stats_dict["task"]
 
-    def test_008_cli_version(self):
+    def test_007_cli_version(self):
         # Get the version number
         with redirect_stdout(io.StringIO()) as f:
             testargs = [
@@ -544,7 +447,7 @@ def test_008_cli_version(self):
         # Make sure we get back a 3-digit number
         assert len(f.getvalue().split(".")) == 3
 
-    def test_009_cli_turnkey_args(self):
+    def test_008_cli_turnkey_args(self):
         # NOTE: this is not a unit test, it relies on other command
         # If this test is failing, make sure the following tests are passing:
         # - test_cli_single
@@ -570,7 +473,7 @@ def test_009_cli_turnkey_args(self):
 
     # TODO: Investigate why this test is failing only on Windows CI failing
     @unittest.skipIf(platform.system() == "Windows", "Windows CI only failure")
-    def test_011_cli_benchmark(self):
+    def test_009_cli_benchmark(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -588,7 +491,7 @@ def test_011_cli_benchmark(self):
 
     # TODO: Investigate why this test is non-deterministically failing
     @unittest.skip("Flaky test")
-    def test_013_cli_labels(self):
+    def test_010_cli_labels(self):
         # Only build models labels with test_group::a
         testargs = [
             "turnkey",
@@ -638,7 +541,7 @@ def test_013_cli_labels(self):
         assert state_files == ["linear_d5b1df11_state", "linear2_80b93950_state"]
 
     @unittest.skip("Needs re-implementation")
-    def test_014_report_on_failed_build(self):
+    def test_011_report_on_failed_build(self):
         testargs = [
             "turnkey",
             bash(f"{corpus_dir}/linear.py"),
@@ -680,7 +583,7 @@ def test_014_report_on_failed_build(self):
         ), "Wrong number of parameters found in report"
         assert summary[0]["hash"] == "d5b1df11", "Wrong hash found in report"
 
-    def test_015_runtimes(self):
+    def test_012_runtimes(self):
         # Attempt to benchmark using an invalid runtime
         with self.assertRaises(exceptions.ArgError):
             testargs = [
@@ -729,7 +632,7 @@ def test_015_runtimes(self):
 
     # TODO: Investigate why this test is only failing on Windows CI
     @unittest.skipIf(platform.system() == "Windows", "Windows CI only failure")
-    def test_016_cli_onnx_opset(self):
+    def test_013_cli_onnx_opset(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -752,7 +655,7 @@ def test_016_cli_onnx_opset(self):
             [test_script], cache_dir, None, check_perf=True, check_opset=user_opset
         )
 
-    def test_016_cli_iteration_count(self):
+    def test_014_cli_iteration_count(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -777,7 +680,7 @@ def test_016_cli_iteration_count(self):
             check_iteration_count=test_iterations,
         )
 
-    def test_017_cli_process_isolation(self):
+    def test_015_cli_process_isolation(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -799,7 +702,7 @@ def test_017_cli_process_isolation(self):
         "Skipping, as torch.compile is not supported on Windows"
         "Revisit when torch.compile for Windows is supported",
     )
-    def test_018_skip_compiled(self):
+    def test_016_skip_compiled(self):
         test_script = "compiled.py"
         testargs = [
             "turnkey",
@@ -817,14 +720,14 @@ def test_018_skip_compiled(self):
         # One of those is compiled and should be skipped.
         assert builds_found == 1
 
-    def test_019_invalid_file_type(self):
+    def test_017_invalid_file_type(self):
         # Ensure that we get an error when running turnkey with invalid input_files
         with self.assertRaises(exceptions.ArgError):
             testargs = ["turnkey", "gobbledegook"]
             with patch.object(sys, "argv", flatten(testargs)):
                 turnkeycli()
 
-    def test_020_cli_export_only(self):
+    def test_018_cli_export_only(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -842,7 +745,7 @@ def test_020_cli_export_only(self):
 
         assert_success_of_builds([test_script], cache_dir, check_onnx_file_count=1)
 
-    def test_022_cli_onnx_model(self):
+    def test_019_cli_onnx_model(self):
         """
         Manually export an ONNX file, then feed it into the CLI
         """
@@ -871,7 +774,7 @@ def test_022_cli_onnx_model(self):
 
         assert_success_of_builds([build_name], cache_dir)
 
-    def test_023_cli_onnx_model_opset(self):
+    def test_020_cli_onnx_model_opset(self):
         """
         Manually export an ONNX file with a non-defualt opset, then feed it into the CLI
         """
@@ -904,7 +807,7 @@ def test_023_cli_onnx_model_opset(self):
 
         assert_success_of_builds([build_name], cache_dir)
 
-    def test_024_args_encode_decode(self):
+    def test_021_args_encode_decode(self):
         """
         Test the encoding and decoding of arguments that follow the
         ["arg1::[value1,value2]","arg2::value1","flag_arg"]' format
@@ -916,7 +819,7 @@ def test_024_args_encode_decode(self):
             reencoded_value == encoded_value
         ), f"input: {encoded_value}, decoded: {decoded_value}, reencoded_value: {reencoded_value}"
 
-    def test_025_benchmark_non_existent_file(self):
+    def test_022_benchmark_non_existent_file(self):
         # Ensure we get an error when benchmarking a non existent file
         with self.assertRaises(exceptions.ArgError):
             filename = "thou_shall_not_exist.py"
@@ -925,7 +828,7 @@ def test_025_benchmark_non_existent_file(self):
                 with patch.object(sys, "argv", testargs):
                     turnkeycli()
 
-    def test_026_benchmark_non_existent_file_prefix(self):
+    def test_023_benchmark_non_existent_file_prefix(self):
         # Ensure we get an error when benchmarking a non existent file
         with self.assertRaises(exceptions.ArgError):
             file_prefix = "non_existent_prefix_*.py"
@@ -934,7 +837,7 @@ def test_026_benchmark_non_existent_file_prefix(self):
                 with patch.object(sys, "argv", testargs):
                     turnkeycli()
 
-    def test_027_input_text_file(self):
+    def test_024_input_text_file(self):
         """
         Ensure that we can intake .txt files
         """
@@ -955,7 +858,7 @@ def test_027_input_text_file(self):
             builds_found == 3
         ), f"Expected 3 builds (1 for linear.py, 2 for linear2.py), but got {builds_found}."
 
-    def test_028_cli_timeout(self):
+    def test_025_cli_timeout(self):
         """
         Make sure that the --timeout option and its associated reporting features work.
 
@@ -1009,6 +912,103 @@ def test_028_cli_timeout(self):
             # the stats.yaml was created, which in turn means the CSV is empty
             pass
 
+    def test_026_cli_report(self):
+        # NOTE: this is not a unit test, it relies on other command
+        # If this test is failing, make sure the following tests are passing:
+        # - test_cli_corpus
+
+        test_scripts = common.test_scripts_dot_py.keys()
+
+        # Build the test corpus so we have builds to report
+        testargs = [
+            "turnkey",
+            "benchmark",
+            bash(f"{corpus_dir}/*.py"),
+            "--cache-dir",
+            cache_dir,
+        ]
+        with patch.object(sys, "argv", flatten(testargs)):
+            turnkeycli()
+
+        testargs = [
+            "turnkey",
+            "cache",
+            "report",
+            "--cache-dir",
+            cache_dir,
+        ]
+        with patch.object(sys, "argv", testargs):
+            turnkeycli()
+
+        # Read generated CSV file
+        summary_csv_path = report.get_report_name()
+        with open(summary_csv_path, "r", encoding="utf8") as summary_csv:
+            summary = list(csv.DictReader(summary_csv))
+
+        # Check if csv file contains all expected rows and columns
+        expected_cols = [
+            "model_name",
+            "author",
+            "class",
+            "parameters",
+            "hash",
+            "runtime",
+            "device_type",
+            "device",
+            "mean_latency",
+            "throughput",
+            "all_build_stages",
+            "completed_build_stages",
+        ]
+        linear_summary = summary[1]
+        assert len(summary) == len(test_scripts)
+        for elem in expected_cols:
+            assert (
+                elem in linear_summary
+            ), f"Couldn't find expected key {elem} in results spreadsheet"
+
+        # Check whether all rows we expect to be populated are actually populated
+        assert (
+            linear_summary["model_name"] == "linear2"
+        ), f"Wrong model name found {linear_summary['model_name']}"
+        assert (
+            linear_summary["author"] == "turnkey"
+        ), f"Wrong author name found {linear_summary['author']}"
+        assert (
+            linear_summary["class"] == "TwoLayerModel"
+        ), f"Wrong class found {linear_summary['model_class']}"
+        assert (
+            linear_summary["hash"] == "80b93950"
+        ), f"Wrong hash found {linear_summary['hash']}"
+        assert (
+            linear_summary["runtime"] == "ort"
+        ), f"Wrong runtime found {linear_summary['runtime']}"
+        assert (
+            linear_summary["device_type"] == "x86"
+        ), f"Wrong device type found {linear_summary['device_type']}"
+        assert (
+            float(linear_summary["mean_latency"]) > 0
+        ), f"latency must be >0, got {linear_summary['x86_latency']}"
+        assert (
+            float(linear_summary["throughput"]) > 100
+        ), f"throughput must be >100, got {linear_summary['throughput']}"
+
+        # Make sure the report.get_dict() API works
+        result_dict = report.get_dict(
+            summary_csv_path, ["all_build_stages", "completed_build_stages"]
+        )
+        for result in result_dict.values():
+            # All of the models should have exported to ONNX, so the "onnx_exported" value
+            # should be True for all of them
+            assert "export_pytorch" in yaml.safe_load(result["all_build_stages"])
+            assert (
+                "export_pytorch"
+                in yaml.safe_load(result["completed_build_stages"]).keys()
+            )
+            assert (
+                yaml.safe_load(result["completed_build_stages"])["export_pytorch"] > 0
+            )
+
 
 if __name__ == "__main__":
     unittest.main()

From d749c5cb682d331430d01fe892b8ff810e18c097 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 14:43:34 -0800
Subject: [PATCH 08/35] Add NOT STARTED

---
 test/cli.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/cli.py b/test/cli.py
index d5731b23..64e9069d 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -307,7 +307,6 @@ def test_021_cli_report(self):
             "mean_latency",
             "throughput",
             "all_build_stages",
-            "completed_build_stages",
         ]
         linear_summary = summary[1]
         assert len(summary) == len(test_scripts)

From acae5bb57b7c0e26dcd0c001e156f033be04b0c6 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 14:54:03 -0800
Subject: [PATCH 09/35] Add not started and sort alphabetically

---
 src/turnkeyml/cli/report.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index af576420..68190b42 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -81,14 +81,18 @@ def summary_spreadsheets(args) -> None:
                             # Break each value in "completed build stages" into its own column
                             # to make analysis easier
                             if key == fs.Keys.COMPLETED_BUILD_STAGES:
+                                previous_stave_incomplete = False
                                 for stage in build[fs.Keys.ALL_BUILD_STAGES]:
                                     column_name = f"stage_duration-{stage}"
                                     if stage in build[fs.Keys.COMPLETED_BUILD_STAGES]:
                                         evaluation_stats[column_name] = build[
                                             fs.Keys.COMPLETED_BUILD_STAGES
                                         ][stage]
-                                    else:
+                                    elif not previous_stave_incomplete:
+                                        previous_stave_incomplete = True
                                         evaluation_stats[column_name] = "INCOMPLETE"
+                                    else:
+                                        evaluation_stats[column_name] = "NOT STARTED"
 
                                 # Do not add the raw version of COMPLETED_BUILD_STAGES to the report
                                 continue
@@ -116,6 +120,7 @@ def summary_spreadsheets(args) -> None:
             for header in evaluation_stats.keys():
                 if header not in column_headers:
                     column_headers.append(header)
+        column_headers = sorted(column_headers)
 
         # Add each build to the report
         for evaluation_stats in all_evaluation_stats:

From 4f891d8b858460c03265f0fa8d547c3e8acec449 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 15:13:03 -0800
Subject: [PATCH 10/35] Fix CI

---
 test/cli.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/test/cli.py b/test/cli.py
index 64e9069d..ce6279e7 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -342,20 +342,12 @@ def test_021_cli_report(self):
         ), f"throughput must be >100, got {linear_summary['throughput']}"
 
         # Make sure the report.get_dict() API works
-        result_dict = report.get_dict(
-            summary_csv_path, ["all_build_stages", "completed_build_stages"]
-        )
+        result_dict = report.get_dict(summary_csv_path, ["all_build_stages"])
         for result in result_dict.values():
             # All of the models should have exported to ONNX, so the "onnx_exported" value
             # should be True for all of them
             assert "export_pytorch" in yaml.safe_load(result["all_build_stages"])
-            assert (
-                "export_pytorch"
-                in yaml.safe_load(result["completed_build_stages"]).keys()
-            )
-            assert (
-                yaml.safe_load(result["completed_build_stages"])["export_pytorch"] > 0
-            )
+            assert float(yaml.safe_load(result["stage_duration-export_pytorch"])) > 0
 
     def test_005_cli_list(self):
         # NOTE: this is not a unit test, it relies on other command

From 9ea9537d09cc2584eff311be141cb4482a8a0b71 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 15:15:22 -0800
Subject: [PATCH 11/35] Fix CI

---
 test/cli.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/cli.py b/test/cli.py
index 15e47606..dbecd6ef 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -993,7 +993,9 @@ def test_026_cli_report(self):
         ), f"throughput must be >100, got {linear_summary['throughput']}"
 
         # Make sure the report.get_dict() API works
-        result_dict = report.get_dict(summary_csv_path, ["all_build_stages"])
+        result_dict = report.get_dict(
+            summary_csv_path, ["all_build_stages", "stage_duration-export_pytorch"]
+        )
         for result in result_dict.values():
             # All of the models should have exported to ONNX, so the "onnx_exported" value
             # should be True for all of them

From f3f0584b84b58ac3f4e96a02412902b7bebbd634 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 16:38:28 -0800
Subject: [PATCH 12/35] Split into multiple columns for clarity

---
 src/turnkeyml/cli/report.py        | 33 +++++++++++++++++++++---------
 src/turnkeyml/common/filesystem.py |  4 ++--
 test/cli.py                        | 12 ++++++++---
 3 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index 68190b42..afb1548f 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -78,21 +78,31 @@ def summary_spreadsheets(args) -> None:
 
                         # Copy the build-specific stats
                         for key, value in build.items():
-                            # Break each value in "completed build stages" into its own column
-                            # to make analysis easier
+                            # Break each value in "completed build stages" into state and duration
+                            # to make the analysis of this data easier
                             if key == fs.Keys.COMPLETED_BUILD_STAGES:
-                                previous_stave_incomplete = False
+                                previous_state_incomplete = False
                                 for stage in build[fs.Keys.ALL_BUILD_STAGES]:
-                                    column_name = f"stage_duration-{stage}"
+                                    duration_column_name = f"stage_duration: {stage}"
+                                    state_column_name = f"stage_state: {stage}"
                                     if stage in build[fs.Keys.COMPLETED_BUILD_STAGES]:
-                                        evaluation_stats[column_name] = build[
+                                        evaluation_stats[
+                                            state_column_name
+                                        ] = "COMPLETED"
+                                        evaluation_stats[duration_column_name] = build[
                                             fs.Keys.COMPLETED_BUILD_STAGES
                                         ][stage]
-                                    elif not previous_stave_incomplete:
-                                        previous_stave_incomplete = True
-                                        evaluation_stats[column_name] = "INCOMPLETE"
+                                    elif not previous_state_incomplete:
+                                        previous_state_incomplete = True
+                                        evaluation_stats[
+                                            state_column_name
+                                        ] = "INCOMPLETE"
+                                        evaluation_stats[duration_column_name] = "-"
                                     else:
-                                        evaluation_stats[column_name] = "NOT STARTED"
+                                        evaluation_stats[
+                                            state_column_name
+                                        ] = "NOT STARTED"
+                                        evaluation_stats[duration_column_name] = "-"
 
                                 # Do not add the raw version of COMPLETED_BUILD_STAGES to the report
                                 continue
@@ -106,7 +116,8 @@ def summary_spreadsheets(args) -> None:
                             ) and value == fs.FunctionStatus.RUNNING:
                                 value = fs.FunctionStatus.KILLED
 
-                            evaluation_stats[key] = value
+                            # Add stats ensuring that those are all in lower case
+                            evaluation_stats[key.lower()] = value
 
                         all_evaluation_stats.append(evaluation_stats)
                 except yaml.scanner.ScannerError:
@@ -120,6 +131,8 @@ def summary_spreadsheets(args) -> None:
             for header in evaluation_stats.keys():
                 if header not in column_headers:
                     column_headers.append(header)
+
+        # Sort all columns alphabetically
         column_headers = sorted(column_headers)
 
         # Add each build to the report
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index d2190336..f53bbb8b 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -322,10 +322,10 @@ class Keys:
     # ONNX model input tensor dimensions
     ONNX_INPUT_DIMENSIONS = "onnx_input_dimensions"
     # List of all build stages in the Sequence
-    ALL_BUILD_STAGES = "all_build_stages"
+    ALL_BUILD_STAGES = "selected_stages"
     # Map of build stages that completed successfully to the
     # execution time for that stage. We can figure out if any build
-    # stages failed if all_build_stages != completed_build_stages.keys().
+    # stages failed if selected_stages != completed_build_stages.keys().
     COMPLETED_BUILD_STAGES = "completed_build_stages"
     # Location of the most up-to-date ONNX file for this build. If the
     # build completed successfully, this is the final ONNX file.
diff --git a/test/cli.py b/test/cli.py
index dbecd6ef..58dc6aa7 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -994,13 +994,19 @@ def test_026_cli_report(self):
 
         # Make sure the report.get_dict() API works
         result_dict = report.get_dict(
-            summary_csv_path, ["all_build_stages", "stage_duration-export_pytorch"]
+            summary_csv_path,
+            [
+                "stages_selected",
+                "stage_duration: export_pytorch",
+                "stage_state: export_pytorch",
+            ],
         )
         for result in result_dict.values():
             # All of the models should have exported to ONNX, so the "onnx_exported" value
             # should be True for all of them
-            assert "export_pytorch" in yaml.safe_load(result["all_build_stages"])
-            assert float(yaml.safe_load(result["stage_duration-export_pytorch"])) > 0
+            assert "export_pytorch" in yaml.safe_load(result["stages_selected"])
+            assert yaml.safe_load(result["stage_state: export_pytorch"]) == "COMPLETED"
+            assert yaml.safe_load(result["stage_duration: export_pytorch"]) > 0
 
 
 if __name__ == "__main__":

From d85c9dea5f54553f9d91b32d9655529e1fc288f9 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 16:46:59 -0800
Subject: [PATCH 13/35] Look for correct column

---
 test/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/cli.py b/test/cli.py
index 58dc6aa7..2fb53b94 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -957,7 +957,7 @@ def test_026_cli_report(self):
             "device",
             "mean_latency",
             "throughput",
-            "all_build_stages",
+            "selected_stages",
         ]
         linear_summary = summary[1]
         assert len(summary) == len(test_scripts)

From 00d96f177888db5ac6985568a08bbb8e8b4132e5 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 16:56:32 -0800
Subject: [PATCH 14/35] stages_selected vs selected_stages

---
 test/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/cli.py b/test/cli.py
index 2fb53b94..74041e27 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -996,7 +996,7 @@ def test_026_cli_report(self):
         result_dict = report.get_dict(
             summary_csv_path,
             [
-                "stages_selected",
+                "selected_stages",
                 "stage_duration: export_pytorch",
                 "stage_state: export_pytorch",
             ],
@@ -1004,7 +1004,7 @@ def test_026_cli_report(self):
         for result in result_dict.values():
             # All of the models should have exported to ONNX, so the "onnx_exported" value
             # should be True for all of them
-            assert "export_pytorch" in yaml.safe_load(result["stages_selected"])
+            assert "export_pytorch" in yaml.safe_load(result["selected_stages"])
             assert yaml.safe_load(result["stage_state: export_pytorch"]) == "COMPLETED"
             assert yaml.safe_load(result["stage_duration: export_pytorch"]) > 0
 

From eaa135824d4fe8ba9c43825073ddd17ef5741efb Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Tue, 5 Dec 2023 17:12:48 -0800
Subject: [PATCH 15/35] Fix CI

---
 test/cli.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/test/cli.py b/test/cli.py
index 74041e27..f28fe39a 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -1002,11 +1002,10 @@ def test_026_cli_report(self):
             ],
         )
         for result in result_dict.values():
-            # All of the models should have exported to ONNX, so the "onnx_exported" value
-            # should be True for all of them
-            assert "export_pytorch" in yaml.safe_load(result["selected_stages"])
-            assert yaml.safe_load(result["stage_state: export_pytorch"]) == "COMPLETED"
-            assert yaml.safe_load(result["stage_duration: export_pytorch"]) > 0
+            # All of the models should have exported to ONNX
+            assert "export_pytorch" in result["selected_stages"]
+            assert result["stage_state: export_pytorch"] == "COMPLETED"
+            assert result["stage_duration: export_pytorch"] > 0
 
 
 if __name__ == "__main__":

From 607a7b075d58867f733a50fbcddddf7d8b996d79 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Thu, 7 Dec 2023 15:40:58 -0800
Subject: [PATCH 16/35] suggested changes

---
 src/turnkeyml/build/stage.py       |  2 +-
 src/turnkeyml/cli/report.py        |  6 +++---
 src/turnkeyml/common/filesystem.py |  4 ++--
 test/cli.py                        | 10 +++++-----
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/turnkeyml/build/stage.py b/src/turnkeyml/build/stage.py
index daad45c0..5987898a 100644
--- a/src/turnkeyml/build/stage.py
+++ b/src/turnkeyml/build/stage.py
@@ -280,7 +280,7 @@ def launch(self, state: build.State) -> build.State:
         # Collect telemetry for the build
         stats = fs.Stats(state.cache_dir, state.config.build_name, state.evaluation_id)
         stats.save_model_eval_stat(
-            fs.Keys.ALL_BUILD_STAGES,
+            fs.Keys.SELECTED_SEQUENCE_OF_STAGES,
             self.get_names(),
         )
 
diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index afb1548f..9dcd26f4 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -78,13 +78,13 @@ def summary_spreadsheets(args) -> None:
 
                         # Copy the build-specific stats
                         for key, value in build.items():
-                            # Break each value in "completed build stages" into state and duration
+                            # Break each value in "completed build stages" into status and duration
                             # to make the analysis of this data easier
                             if key == fs.Keys.COMPLETED_BUILD_STAGES:
                                 previous_state_incomplete = False
-                                for stage in build[fs.Keys.ALL_BUILD_STAGES]:
+                                for stage in build[fs.Keys.SELECTED_SEQUENCE_OF_STAGES]:
                                     duration_column_name = f"stage_duration: {stage}"
-                                    state_column_name = f"stage_state: {stage}"
+                                    state_column_name = f"stage_status: {stage}"
                                     if stage in build[fs.Keys.COMPLETED_BUILD_STAGES]:
                                         evaluation_stats[
                                             state_column_name
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index f53bbb8b..1c291373 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -322,10 +322,10 @@ class Keys:
     # ONNX model input tensor dimensions
     ONNX_INPUT_DIMENSIONS = "onnx_input_dimensions"
     # List of all build stages in the Sequence
-    ALL_BUILD_STAGES = "selected_stages"
+    SELECTED_SEQUENCE_OF_STAGES = "selected_sequence_of_stages"
     # Map of build stages that completed successfully to the
     # execution time for that stage. We can figure out if any build
-    # stages failed if selected_stages != completed_build_stages.keys().
+    # stages failed if selected_sequence_of_stages != completed_build_stages.keys().
     COMPLETED_BUILD_STAGES = "completed_build_stages"
     # Location of the most up-to-date ONNX file for this build. If the
     # build completed successfully, this is the final ONNX file.
diff --git a/test/cli.py b/test/cli.py
index f28fe39a..e32c1355 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -957,7 +957,7 @@ def test_026_cli_report(self):
             "device",
             "mean_latency",
             "throughput",
-            "selected_stages",
+            "selected_sequence_of_stages",
         ]
         linear_summary = summary[1]
         assert len(summary) == len(test_scripts)
@@ -996,15 +996,15 @@ def test_026_cli_report(self):
         result_dict = report.get_dict(
             summary_csv_path,
             [
-                "selected_stages",
+                "selected_sequence_of_stages",
                 "stage_duration: export_pytorch",
-                "stage_state: export_pytorch",
+                "stage_status: export_pytorch",
             ],
         )
         for result in result_dict.values():
             # All of the models should have exported to ONNX
-            assert "export_pytorch" in result["selected_stages"]
-            assert result["stage_state: export_pytorch"] == "COMPLETED"
+            assert "export_pytorch" in result["selected_sequence_of_stages"]
+            assert result["stage_status: export_pytorch"] == "COMPLETED"
             assert result["stage_duration: export_pytorch"] > 0
 
 

From 7e7401a3c560498fba8a89954a8fd210db47e419 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Thu, 7 Dec 2023 17:12:57 -0800
Subject: [PATCH 17/35] lint

---
 src/turnkeyml/cli/report.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index 5add5abe..9dcd26f4 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -81,7 +81,6 @@ def summary_spreadsheets(args) -> None:
                             # Break each value in "completed build stages" into status and duration
                             # to make the analysis of this data easier
                             if key == fs.Keys.COMPLETED_BUILD_STAGES:
-<<<<<<< HEAD
                                 previous_state_incomplete = False
                                 for stage in build[fs.Keys.SELECTED_SEQUENCE_OF_STAGES]:
                                     duration_column_name = f"stage_duration: {stage}"
@@ -108,11 +107,6 @@ def summary_spreadsheets(args) -> None:
                                 # Do not add the raw version of COMPLETED_BUILD_STAGES to the report
                                 continue
 
-=======
-                                for subkey, subvalue in value.items():
-                                    evaluation_stats[subkey] = subvalue
-
->>>>>>> main
                             # If a build or benchmark is still marked as "running" at
                             # reporting time, it
                             # must have been killed by a time out, out-of-memory (OOM), or some
@@ -122,12 +116,8 @@ def summary_spreadsheets(args) -> None:
                             ) and value == fs.FunctionStatus.RUNNING:
                                 value = fs.FunctionStatus.KILLED
 
-<<<<<<< HEAD
                             # Add stats ensuring that those are all in lower case
                             evaluation_stats[key.lower()] = value
-=======
-                            evaluation_stats[key] = value
->>>>>>> main
 
                         all_evaluation_stats.append(evaluation_stats)
                 except yaml.scanner.ScannerError:

From 38fd493ac4df8449c0cbda92962e5399cd0a0109 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Mon, 18 Dec 2023 15:39:38 -0800
Subject: [PATCH 18/35] Update branch

---
 test/cli.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/cli.py b/test/cli.py
index b668af38..30fd84ea 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -958,7 +958,6 @@ def test_026_cli_report(self):
             "mean_latency",
             "throughput",
             "selected_sequence_of_stages",
-            "completed_build_stages",
         ]
         linear_summary = summary[1]
         assert len(summary) == len(test_scripts)

From f443f68fb7ce88b3eaf4ea5d1f5bcacecd7ebb43 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Sun, 7 Jan 2024 12:01:39 -0800
Subject: [PATCH 19/35] Unified terminology

---
 src/turnkeyml/analyze/script.py    |  4 ++--
 src/turnkeyml/build/ignition.py    |  2 +-
 src/turnkeyml/build/stage.py       | 10 +++++-----
 src/turnkeyml/build_api.py         |  2 +-
 src/turnkeyml/common/build.py      |  2 +-
 src/turnkeyml/common/filesystem.py |  2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py
index 3c288909..77976558 100644
--- a/src/turnkeyml/analyze/script.py
+++ b/src/turnkeyml/analyze/script.py
@@ -272,7 +272,7 @@ def explore_invocation(
             )
 
             stats.save_model_eval_stat(
-                fs.Keys.BUILD_STATUS, fs.FunctionStatus.SUCCESSFUL
+                fs.Keys.BUILD_STATUS, fs.FunctionStatus.COMPLETED
             )
 
             model_to_benchmark = build_state.results[0]
@@ -317,7 +317,7 @@ def explore_invocation(
                 )
 
             stats.save_model_eval_stat(
-                fs.Keys.BENCHMARK_STATUS, fs.FunctionStatus.SUCCESSFUL
+                fs.Keys.BENCHMARK_STATUS, fs.FunctionStatus.COMPLETED
             )
 
             invocation_info.status_message = "Model successfully benchmarked!"
diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py
index 86c51c94..b98b1ee3 100644
--- a/src/turnkeyml/build/ignition.py
+++ b/src/turnkeyml/build/ignition.py
@@ -324,7 +324,7 @@ def load_or_make_state(
 
             if (
                 model_type == build.ModelType.UNKNOWN
-                and state.build_status == build.Status.SUCCESSFUL_BUILD
+                and state.build_status == build.Status.COMPLETED_BUILD
             ):
                 msg = (
                     "Model caching is disabled for successful builds against custom Sequences. "
diff --git a/src/turnkeyml/build/stage.py b/src/turnkeyml/build/stage.py
index 5987898a..73266d99 100644
--- a/src/turnkeyml/build/stage.py
+++ b/src/turnkeyml/build/stage.py
@@ -137,12 +137,12 @@ def fire_helper(self, state: build.State) -> Tuple[build.State, int]:
         else:
             self.status_line(successful=True, verbosity=state.monitor)
 
-            # Stages should not set build.Status.SUCCESSFUL_BUILD, as that is
+            # Stages should not set build.Status.COMPLETED_BUILD, as that is
             # reserved for Sequence.launch()
-            if state.build_status == build.Status.SUCCESSFUL_BUILD:
+            if state.build_status == build.Status.COMPLETED_BUILD:
                 raise exp.StageError(
                     "TurnkeyML Stages are not allowed to set "
-                    "`state.build_status == build.Status.SUCCESSFUL_BUILD`, "
+                    "`state.build_status == build.Status.COMPLETED_BUILD`, "
                     "however that has happened. If you are a plugin developer, "
                     "do not do this. If you are a user, please file an issue at "
                     "https://github.com/onnx/turnkeyml/issues."
@@ -268,7 +268,7 @@ def launch(self, state: build.State) -> build.State:
 
         if state.build_status == build.Status.NOT_STARTED:
             state.build_status = build.Status.PARTIAL_BUILD
-        elif state.build_status == build.Status.SUCCESSFUL_BUILD:
+        elif state.build_status == build.Status.COMPLETED_BUILD:
             msg = """
             build_model() is running a build on a model that already built successfully, which
             should not happen because the build should have loaded from cache or rebuilt from scratch.
@@ -319,7 +319,7 @@ def launch(self, state: build.State) -> build.State:
 
         else:
             state.current_build_stage = None
-            state.build_status = build.Status.SUCCESSFUL_BUILD
+            state.build_status = build.Status.COMPLETED_BUILD
 
             # We use a deepcopy here because the Stage framework supports
             # intermediate_results of any type, including model objects in memory.
diff --git a/src/turnkeyml/build_api.py b/src/turnkeyml/build_api.py
index b9a6aed6..206a7794 100644
--- a/src/turnkeyml/build_api.py
+++ b/src/turnkeyml/build_api.py
@@ -105,7 +105,7 @@ def build_model(
 
     # Return a cached build if possible, otherwise prepare the model State for
     # a build
-    if state.build_status == build.Status.SUCCESSFUL_BUILD:
+    if state.build_status == build.Status.COMPLETED_BUILD:
         # Successful builds can be loaded from cache and returned with
         # no additional steps
         additional_msg = " (build_name auto-selected)" if config.auto_name else ""
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index e4a1f6fc..fc1b0f1f 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -141,7 +141,7 @@ class Status(enum.Enum):
     NOT_STARTED = "not_started"
     PARTIAL_BUILD = "partial_build"
     BUILD_RUNNING = "build_running"
-    SUCCESSFUL_BUILD = "successful_build"
+    COMPLETED_BUILD = "completed_build"
     FAILED_BUILD = "failed_build"
 
 
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index 1c291373..86a68133 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -362,7 +362,7 @@ class Keys:
 
 class FunctionStatus:
     RUNNING = "running"
-    SUCCESSFUL = "successful"
+    COMPLETED = "completed"
     FAILED = "failed"
     KILLED = "killed"
 

From a7e86aef2e9fdbd334d1836267d10097545ec9f5 Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Sun, 7 Jan 2024 13:10:56 -0800
Subject: [PATCH 20/35] Suggested implementation

---
 src/turnkeyml/build/stage.py       | 26 ++++++++++++++++++++++----
 src/turnkeyml/cli/report.py        | 29 -----------------------------
 src/turnkeyml/common/filesystem.py |  4 ----
 test/build_model.py                | 26 +++++++++++++-------------
 test/cli.py                        |  4 ++--
 test/plugins.py                    |  8 ++++++--
 6 files changed, 43 insertions(+), 54 deletions(-)

diff --git a/src/turnkeyml/build/stage.py b/src/turnkeyml/build/stage.py
index 73266d99..1598bce5 100644
--- a/src/turnkeyml/build/stage.py
+++ b/src/turnkeyml/build/stage.py
@@ -3,6 +3,7 @@
 import time
 import os
 import copy
+import enum
 from typing import List, Tuple
 from multiprocessing import Process
 import psutil
@@ -50,6 +51,11 @@ def _name_is_file_safe(name: str):
 
 
 class Stage(abc.ABC):
+    class Status(enum.Enum):
+        NOT_STARTED = "not_started"
+        COMPLETED = "completed"
+        INCOMPLETE = "incomplete"
+
     def status_line(self, successful, verbosity):
         """
         Print a line of status information for this Stage into the monitor.
@@ -83,6 +89,8 @@ def __init__(
         _name_is_file_safe(unique_name)
 
         self.unique_name = unique_name
+        self.status_key = f"stage_status:{unique_name}"
+        self.duration_key = f"stage_duration:{unique_name}"
         self.monitor_message = monitor_message
         self.progress = None
         self.logfile_path = None
@@ -284,9 +292,19 @@ def launch(self, state: build.State) -> build.State:
             self.get_names(),
         )
 
+        # At the beginning of a sequence no stage has started
+        for stage in self.stages:
+            stats.save_model_eval_stat(stage.status_key, Stage.Status.NOT_STARTED.value)
+            stats.save_model_eval_stat(stage.duration_key, "-")
+
         # Run the build
         try:
             for stage in self.stages:
+                # Set status as incomplete, since stage just started
+                stats.save_model_eval_stat(
+                    stage.status_key, Stage.Status.INCOMPLETE.value
+                )
+
                 # Collect telemetry about the stage
                 state.current_build_stage = stage.unique_name
                 start_time = time.time()
@@ -297,11 +315,11 @@ def launch(self, state: build.State) -> build.State:
                 # Collect telemetry about the stage
                 execution_time = time.time() - start_time
 
-                stats.save_model_eval_sub_stat(
-                    parent_key=fs.Keys.COMPLETED_BUILD_STAGES,
-                    key=stage.unique_name,
-                    value=execution_time,
+                # Set status as completed
+                stats.save_model_eval_stat(
+                    stage.status_key, Stage.Status.COMPLETED.value
                 )
+                stats.save_model_eval_stat(stage.duration_key, execution_time)
 
         except exp.StageError as e:
             # Advance the cursor below the monitor so
diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index 9dcd26f4..ccce025a 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -78,35 +78,6 @@ def summary_spreadsheets(args) -> None:
 
                         # Copy the build-specific stats
                         for key, value in build.items():
-                            # Break each value in "completed build stages" into status and duration
-                            # to make the analysis of this data easier
-                            if key == fs.Keys.COMPLETED_BUILD_STAGES:
-                                previous_state_incomplete = False
-                                for stage in build[fs.Keys.SELECTED_SEQUENCE_OF_STAGES]:
-                                    duration_column_name = f"stage_duration: {stage}"
-                                    state_column_name = f"stage_status: {stage}"
-                                    if stage in build[fs.Keys.COMPLETED_BUILD_STAGES]:
-                                        evaluation_stats[
-                                            state_column_name
-                                        ] = "COMPLETED"
-                                        evaluation_stats[duration_column_name] = build[
-                                            fs.Keys.COMPLETED_BUILD_STAGES
-                                        ][stage]
-                                    elif not previous_state_incomplete:
-                                        previous_state_incomplete = True
-                                        evaluation_stats[
-                                            state_column_name
-                                        ] = "INCOMPLETE"
-                                        evaluation_stats[duration_column_name] = "-"
-                                    else:
-                                        evaluation_stats[
-                                            state_column_name
-                                        ] = "NOT STARTED"
-                                        evaluation_stats[duration_column_name] = "-"
-
-                                # Do not add the raw version of COMPLETED_BUILD_STAGES to the report
-                                continue
-
                             # If a build or benchmark is still marked as "running" at
                             # reporting time, it
                             # must have been killed by a time out, out-of-memory (OOM), or some
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index 86a68133..46a6b27e 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -323,10 +323,6 @@ class Keys:
     ONNX_INPUT_DIMENSIONS = "onnx_input_dimensions"
     # List of all build stages in the Sequence
     SELECTED_SEQUENCE_OF_STAGES = "selected_sequence_of_stages"
-    # Map of build stages that completed successfully to the
-    # execution time for that stage. We can figure out if any build
-    # stages failed if selected_sequence_of_stages != completed_build_stages.keys().
-    COMPLETED_BUILD_STAGES = "completed_build_stages"
     # Location of the most up-to-date ONNX file for this build. If the
     # build completed successfully, this is the final ONNX file.
     ONNX_FILE = "onnx_file"
diff --git a/test/build_model.py b/test/build_model.py
index 4503294e..4d590a5b 100644
--- a/test/build_model.py
+++ b/test/build_model.py
@@ -105,7 +105,7 @@ def full_compilation_pytorch_model():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 def full_compilation_keras_subclass_model():
@@ -118,7 +118,7 @@ def full_compilation_keras_subclass_model():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 def full_compilation_keras_sequential_model():
@@ -131,7 +131,7 @@ def full_compilation_keras_sequential_model():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 def full_compilation_onnx_model():
@@ -152,7 +152,7 @@ def full_compilation_onnx_model():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 def full_compilation_hummingbird_rf():
@@ -167,7 +167,7 @@ def full_compilation_hummingbird_rf():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 def full_compilation_hummingbird_xgb():
@@ -182,7 +182,7 @@ def full_compilation_hummingbird_xgb():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 def full_compilation_hummingbird_lgbm():
@@ -197,7 +197,7 @@ def full_compilation_hummingbird_lgbm():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 def full_compilation_hummingbird_kn():
@@ -212,7 +212,7 @@ def full_compilation_hummingbird_kn():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 def scriptmodule_functional_check():
@@ -229,7 +229,7 @@ def scriptmodule_functional_check():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 def custom_stage():
@@ -280,7 +280,7 @@ def fire(self, state):
         cache_dir=cache_location,
     )
 
-    return state.build_status == build.Status.SUCCESSFUL_BUILD
+    return state.build_status == build.Status.COMPLETED_BUILD
 
 
 class FullyCustomStage(stage.Stage):
@@ -580,7 +580,7 @@ def test_013_set_onnx_opset(self):
             sequence=sequences.optimize_fp16,
         )
 
-        assert state.build_status == build.Status.SUCCESSFUL_BUILD
+        assert state.build_status == build.Status.COMPLETED_BUILD
 
         onnx_model = onnx.load(state.results[0])
         model_opset = getattr(onnx_model.opset_import[0], "version", None)
@@ -599,7 +599,7 @@ def test_014_export_only(self):
             sequence=sequences.onnx_fp32,
         )
 
-        assert state.build_status == build.Status.SUCCESSFUL_BUILD
+        assert state.build_status == build.Status.COMPLETED_BUILD
         assert os.path.exists(export.base_onnx_file(state))
         assert not os.path.exists(export.opt_onnx_file(state))
 
@@ -635,7 +635,7 @@ def test_015_receive_onnx(self):
         )
 
         # Make sure the build was successful
-        assert state.build_status == build.Status.SUCCESSFUL_BUILD
+        assert state.build_status == build.Status.COMPLETED_BUILD
 
         # Get ONNX file's opset
         onnx_model = onnx.load(onnx_file)
diff --git a/test/cli.py b/test/cli.py
index 30fd84ea..7bc3cbd1 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -151,7 +151,7 @@ def assert_success_of_builds(
                     build_state.config.build_name,
                     build_state.evaluation_id,
                 )
-                assert build_state.build_status == build.Status.SUCCESSFUL_BUILD
+                assert build_state.build_status == build.Status.COMPLETED_BUILD
                 script_build_found = True
                 builds_found += 1
 
@@ -555,7 +555,7 @@ def test_011_report_on_failed_build(self):
 
         # Ensure test failed
         build_state = build.load_state(state_path=filesystem.get_all(cache_dir)[0])
-        assert build_state.build_status != build.Status.SUCCESSFUL_BUILD
+        assert build_state.build_status != build.Status.COMPLETED_BUILD
 
         # Generate report
         testargs = [
diff --git a/test/plugins.py b/test/plugins.py
index 49dd52dd..859dda0e 100644
--- a/test/plugins.py
+++ b/test/plugins.py
@@ -14,6 +14,7 @@
 # Create a cache directory a directory with test models
 cache_dir, corpus_dir = common.create_test_dir("plugins")
 
+
 class Testing(unittest.TestCase):
     def setUp(self) -> None:
         filesystem.rmdir(cache_dir)
@@ -41,11 +42,14 @@ def test_001_device_naming(self):
         _, build_state = common.get_stats_and_state(test_script, cache_dir)
 
         # Check if build was successful
-        assert build_state.build_status == build.Status.SUCCESSFUL_BUILD
+        assert build_state.build_status == build.Status.COMPLETED_BUILD
 
         # Check if default part and config were assigned
         expected_device = "example_family::part1::config1"
-        assert build_state.config.device == expected_device, f"Got {build_state.config.device}, expected {expected_device}"
+        assert (
+            build_state.config.device == expected_device
+        ), f"Got {build_state.config.device}, expected {expected_device}"
+
 
 if __name__ == "__main__":
     unittest.main()

From 2d2d8fa1e797f4e7751cff2a251ffef268d61b9a Mon Sep 17 00:00:00 2001
From: danielholanda <holand.daniel@gmail.com>
Date: Sun, 7 Jan 2024 13:32:07 -0800
Subject: [PATCH 21/35] Release notes

---
 docs/release_notes.md        | 3 ++-
 src/turnkeyml/build/stage.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/release_notes.md b/docs/release_notes.md
index c9246136..e8ea6faf 100644
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@@ -30,7 +30,8 @@ None
 
 ### Developer Breaking Changes
 
-None
+- `build.Status.COMPLETED_BUILD` is now called `build.Status.COMPLETED_BUILD`
+- `COMPLETED_BUILD_STAGES` column in the report was removed.
 
 # Version 1.0.0
 
diff --git a/src/turnkeyml/build/stage.py b/src/turnkeyml/build/stage.py
index 1598bce5..d8af78d7 100644
--- a/src/turnkeyml/build/stage.py
+++ b/src/turnkeyml/build/stage.py
@@ -325,7 +325,7 @@ def launch(self, state: build.State) -> build.State:
             # Advance the cursor below the monitor so
             # we can print an error message
             stage_depth_in_sequence = self.get_depth() - self.get_names().index(
-                stage.unique_name
+                stage.unique_name  # pylint: disable=undefined-loop-variable
             )
             stdout_lines_to_advance = stage_depth_in_sequence - 2
             cursor_down = "\n" * stdout_lines_to_advance

From 95bd5deeadb9e87e7e02e494405e950627146bab Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Tue, 9 Jan 2024 16:15:43 -0500
Subject: [PATCH 22/35] Unify all build/benchmark/stage status to one enum

---
 src/turnkeyml/analyze/script.py    | 14 ++++----
 src/turnkeyml/build/ignition.py    | 14 ++++----
 src/turnkeyml/build/stage.py       | 54 +++++++++++++++---------------
 src/turnkeyml/build_api.py         |  2 +-
 src/turnkeyml/cli/report.py        |  7 ++--
 src/turnkeyml/common/build.py      | 34 +++++++++++++++----
 src/turnkeyml/common/filesystem.py |  7 ----
 test/build_model.py                | 26 +++++++-------
 test/cli.py                        |  4 +--
 test/plugins.py                    |  2 +-
 10 files changed, 89 insertions(+), 75 deletions(-)

diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py
index 77976558..50052197 100644
--- a/src/turnkeyml/analyze/script.py
+++ b/src/turnkeyml/analyze/script.py
@@ -88,9 +88,9 @@ def set_status_on_exception(build_state: build.State, stats: fs.Stats):
     # We get `state` when the build tool succeeds, so we can use that to identify
     # whether the exception was thrown during build or benchmark
     if not build_state:
-        stats.save_model_eval_stat(fs.Keys.BUILD_STATUS, fs.FunctionStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BUILD_STATUS, build.FunctionStatus.ERROR)
     else:
-        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.FunctionStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.ERROR)
 
 
 def explore_invocation(
@@ -257,7 +257,9 @@ def explore_invocation(
             # we will try to catch the exception and note it in the stats.
             # If a concluded build still has a status of "running", this means
             # there was an uncaught exception.
-            stats.save_model_eval_stat(fs.Keys.BUILD_STATUS, fs.FunctionStatus.RUNNING)
+            stats.save_model_eval_stat(
+                fs.Keys.BUILD_STATUS, build.FunctionStatus.INCOMPLETE
+            )
 
             build_state = build_model(
                 model=model_info.model,
@@ -272,7 +274,7 @@ def explore_invocation(
             )
 
             stats.save_model_eval_stat(
-                fs.Keys.BUILD_STATUS, fs.FunctionStatus.COMPLETED
+                fs.Keys.BUILD_STATUS, build.FunctionStatus.SUCCESSFUL
             )
 
             model_to_benchmark = build_state.results[0]
@@ -294,7 +296,7 @@ def explore_invocation(
                 rt_args_to_use = tracer_args.rt_args
 
             stats.save_model_eval_stat(
-                fs.Keys.BENCHMARK_STATUS, fs.FunctionStatus.RUNNING
+                fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.INCOMPLETE
             )
 
             model_handle = runtime_info["RuntimeClass"](
@@ -317,7 +319,7 @@ def explore_invocation(
                 )
 
             stats.save_model_eval_stat(
-                fs.Keys.BENCHMARK_STATUS, fs.FunctionStatus.COMPLETED
+                fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.SUCCESSFUL
             )
 
             invocation_info.status_message = "Model successfully benchmarked!"
diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py
index b98b1ee3..ebf43958 100644
--- a/src/turnkeyml/build/ignition.py
+++ b/src/turnkeyml/build/ignition.py
@@ -157,10 +157,7 @@ def validate_cached_model(
     )
     if build_conditions_changed:
         # Show an error if build_name is not specified for different models on the same script
-        if (
-            state.uid == build.unique_id()
-            and state.build_status != build.Status.PARTIAL_BUILD
-        ):
+        if state.uid == build.unique_id():
             msg = (
                 "You are building multiple different models in the same script "
                 "without specifying a unique build_model(..., build_name=) for each build."
@@ -201,8 +198,9 @@ def validate_cached_model(
                 result.append(msg)
     else:
         if (
-            state.build_status == build.Status.FAILED_BUILD
-            or state.build_status == build.Status.BUILD_RUNNING
+            state.build_status == build.FunctionStatus.ERROR
+            or state.build_status == build.FunctionStatus.INCOMPLETE
+            or state.build_status == build.FunctionStatus.KILLED
         ) and turnkey_version == state.turnkey_version:
             msg = (
                 "build_model() has detected that you already attempted building "
@@ -324,7 +322,7 @@ def load_or_make_state(
 
             if (
                 model_type == build.ModelType.UNKNOWN
-                and state.build_status == build.Status.COMPLETED_BUILD
+                and state.build_status == build.FunctionStatus.SUCCESSFUL
             ):
                 msg = (
                     "Model caching is disabled for successful builds against custom Sequences. "
@@ -335,7 +333,7 @@ def load_or_make_state(
                 return _begin_fresh_build(state_args, state_type)
             elif (
                 model_type == build.ModelType.UNKNOWN
-                and state.build_status == build.Status.PARTIAL_BUILD
+                and state.build_status == build.FunctionStatus.INCOMPLETE
             ):
                 msg = (
                     f"Model {config.build_name} was partially built in a previous call to "
diff --git a/src/turnkeyml/build/stage.py b/src/turnkeyml/build/stage.py
index d8af78d7..aa91c739 100644
--- a/src/turnkeyml/build/stage.py
+++ b/src/turnkeyml/build/stage.py
@@ -3,7 +3,6 @@
 import time
 import os
 import copy
-import enum
 from typing import List, Tuple
 from multiprocessing import Process
 import psutil
@@ -51,11 +50,6 @@ def _name_is_file_safe(name: str):
 
 
 class Stage(abc.ABC):
-    class Status(enum.Enum):
-        NOT_STARTED = "not_started"
-        COMPLETED = "completed"
-        INCOMPLETE = "incomplete"
-
     def status_line(self, successful, verbosity):
         """
         Print a line of status information for this Stage into the monitor.
@@ -115,10 +109,10 @@ def fire_helper(self, state: build.State) -> Tuple[build.State, int]:
                 including in the event of an exception
         """
 
-        # Set the build status to BUILD_RUNNING to indicate that a Stage
+        # Set the build status to INCOMPLETE to indicate that a Stage
         # started running. This allows us to test whether the Stage exited
-        # unexpectedly, before it was able to set FAILED_BUILD
-        state.build_status = build.Status.BUILD_RUNNING
+        # unexpectedly, before it was able to set ERROR
+        state.build_status = build.FunctionStatus.INCOMPLETE
 
         self.logfile_path = os.path.join(
             build.output_dir(state.cache_dir, state.config.build_name),
@@ -139,18 +133,18 @@ def fire_helper(self, state: build.State) -> Tuple[build.State, int]:
                 successful=False,
                 verbosity=state.monitor,
             )
-            state.build_status = build.Status.FAILED_BUILD
+            state.build_status = build.FunctionStatus.ERROR
             raise
 
         else:
             self.status_line(successful=True, verbosity=state.monitor)
 
-            # Stages should not set build.Status.COMPLETED_BUILD, as that is
-            # reserved for Sequence.launch()
-            if state.build_status == build.Status.COMPLETED_BUILD:
+            # Stages should not set build.FunctionStatus.SUCCESSFUL for the whole build,
+            # as that is reserved for Sequence.launch()
+            if state.build_status == build.FunctionStatus.SUCCESSFUL:
                 raise exp.StageError(
                     "TurnkeyML Stages are not allowed to set "
-                    "`state.build_status == build.Status.COMPLETED_BUILD`, "
+                    "`state.build_status == build.FunctionStatus.SUCCESSFUL`, "
                     "however that has happened. If you are a plugin developer, "
                     "do not do this. If you are a user, please file an issue at "
                     "https://github.com/onnx/turnkeyml/issues."
@@ -274,9 +268,7 @@ def launch(self, state: build.State) -> build.State:
         can include both Stages and Sequences (ie, sequences can be nested).
         """
 
-        if state.build_status == build.Status.NOT_STARTED:
-            state.build_status = build.Status.PARTIAL_BUILD
-        elif state.build_status == build.Status.COMPLETED_BUILD:
+        if state.build_status == build.FunctionStatus.SUCCESSFUL:
             msg = """
             build_model() is running a build on a model that already built successfully, which
             should not happen because the build should have loaded from cache or rebuilt from scratch.
@@ -294,32 +286,30 @@ def launch(self, state: build.State) -> build.State:
 
         # At the beginning of a sequence no stage has started
         for stage in self.stages:
-            stats.save_model_eval_stat(stage.status_key, Stage.Status.NOT_STARTED.value)
+            stats.save_model_eval_stat(
+                stage.status_key, build.FunctionStatus.NOT_STARTED.value
+            )
             stats.save_model_eval_stat(stage.duration_key, "-")
 
         # Run the build
+        start_time = time.time()
         try:
             for stage in self.stages:
                 # Set status as incomplete, since stage just started
                 stats.save_model_eval_stat(
-                    stage.status_key, Stage.Status.INCOMPLETE.value
+                    stage.status_key, build.FunctionStatus.INCOMPLETE.value
                 )
 
                 # Collect telemetry about the stage
                 state.current_build_stage = stage.unique_name
-                start_time = time.time()
 
                 # Run the stage
                 state = stage.fire_helper(state)
 
-                # Collect telemetry about the stage
-                execution_time = time.time() - start_time
-
-                # Set status as completed
+                # Set status as successful
                 stats.save_model_eval_stat(
-                    stage.status_key, Stage.Status.COMPLETED.value
+                    stage.status_key, build.FunctionStatus.SUCCESSFUL.value
                 )
-                stats.save_model_eval_stat(stage.duration_key, execution_time)
 
         except exp.StageError as e:
             # Advance the cursor below the monitor so
@@ -333,11 +323,16 @@ def launch(self, state: build.State) -> build.State:
             print(cursor_down)
 
             printing.log_error(e)
+
+            stats.save_model_eval_stat(
+                stage.status_key, build.FunctionStatus.ERROR.value
+            )
+
             raise
 
         else:
             state.current_build_stage = None
-            state.build_status = build.Status.COMPLETED_BUILD
+            state.build_status = build.FunctionStatus.SUCCESSFUL
 
             # We use a deepcopy here because the Stage framework supports
             # intermediate_results of any type, including model objects in memory.
@@ -347,6 +342,11 @@ def launch(self, state: build.State) -> build.State:
 
             return state
 
+        finally:
+            # Collect telemetry about the stage
+            execution_time = time.time() - start_time
+            stats.save_model_eval_stat(stage.duration_key, execution_time)
+
     def status_line(self, successful, verbosity):
         """
         This override of status_line simply propagates status_line()
diff --git a/src/turnkeyml/build_api.py b/src/turnkeyml/build_api.py
index 206a7794..95272661 100644
--- a/src/turnkeyml/build_api.py
+++ b/src/turnkeyml/build_api.py
@@ -105,7 +105,7 @@ def build_model(
 
     # Return a cached build if possible, otherwise prepare the model State for
     # a build
-    if state.build_status == build.Status.COMPLETED_BUILD:
+    if state.build_status == build.FunctionStatus.SUCCESSFUL:
         # Successful builds can be loaded from cache and returned with
         # no additional steps
         additional_msg = " (build_name auto-selected)" if config.auto_name else ""
diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index ccce025a..f4694b84 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -7,6 +7,7 @@
 import pandas as pd
 import turnkeyml.common.printing as printing
 import turnkeyml.common.filesystem as fs
+import turnkeyml.common.build as bd
 
 
 def get_report_name(prefix: str = "") -> str:
@@ -78,14 +79,14 @@ def summary_spreadsheets(args) -> None:
 
                         # Copy the build-specific stats
                         for key, value in build.items():
-                            # If a build or benchmark is still marked as "running" at
+                            # If a build or benchmark is still marked as "incomplete" at
                             # reporting time, it
                             # must have been killed by a time out, out-of-memory (OOM), or some
                             # other uncaught exception
                             if (
                                 key == fs.Keys.BUILD_STATUS or fs.Keys.BENCHMARK_STATUS
-                            ) and value == fs.FunctionStatus.RUNNING:
-                                value = fs.FunctionStatus.KILLED
+                            ) and value == bd.FunctionStatus.INCOMPLETE:
+                                value = bd.FunctionStatus.KILLED
 
                             # Add stats ensuring that those are all in lower case
                             evaluation_stats[key.lower()] = value
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index fc1b0f1f..cdf7b8f2 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -137,12 +137,32 @@ def hash_model(model, model_type: ModelType, hash_params: bool = True):
         raise ValueError(msg)
 
 
-class Status(enum.Enum):
+class FunctionStatus(enum.Enum):
+    # INCOMPLETE indicates stage/build is either running or was killed;
+    # if you know the process ended then it was killed;
+    # if the process is still running, stage/build is still running
+    INCOMPLETE = "incomplete"
+    # NOT_STARTED applies to stages that didnt start because
+    # the build errored out or was killed prior to stage starting
     NOT_STARTED = "not_started"
-    PARTIAL_BUILD = "partial_build"
-    BUILD_RUNNING = "build_running"
-    COMPLETED_BUILD = "completed_build"
-    FAILED_BUILD = "failed_build"
+    # SUCCESSFUL means the build/stage completed successfully
+    SUCCESSFUL = "successful"
+    # ERROR means the build/stage failed and threw some error that
+    # was caught by turnkey. You should proceed by looking at the build
+    # logs to see what happened.
+    ERROR = "error"
+    # KILLED means the build failed because the system killed it. This can
+    # happen because of an out-of-memory (OOM), timeout, system shutdown, etc.
+    # You should proceed by re-running the build and keeping an eye on it to observe
+    # why it is being killed (e.g., watch the RAM utilization to diagnose an OOM).
+    KILLED = "killed"  # you should reproduce and observe
+
+    # TODO: REMOVE!
+    # NOT_STARTED = "not_started"
+    # PARTIAL_BUILD = "partial_build"
+    # BUILD_RUNNING = "build_running"
+    # COMPLETED_BUILD = "completed_build"
+    # FAILED_BUILD = "failed_build"
 
 
 # Create a unique ID from this run by hashing pid + process start time
@@ -242,7 +262,7 @@ class State:
     model_type: ModelType = ModelType.UNKNOWN
     uid: Optional[int] = None
     model_hash: Optional[int] = None
-    build_status: Status = Status.NOT_STARTED
+    build_status: FunctionStatus = FunctionStatus.NOT_STARTED
     expected_input_shapes: Optional[Dict[str, list]] = None
     expected_input_dtypes: Optional[Dict[str, list]] = None
     expected_output_names: Optional[List] = None
@@ -346,7 +366,7 @@ def load_state(
     try:
         # Special case for loading enums
         state_dict["model_type"] = ModelType(state_dict["model_type"])
-        state_dict["build_status"] = Status(state_dict["build_status"])
+        state_dict["build_status"] = FunctionStatus(state_dict["build_status"])
         state_dict["config"] = config_type(**state_dict["config"])
 
         state = state_type(**state_dict)
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index 46a6b27e..5eb27b32 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -356,13 +356,6 @@ class Keys:
     BENCHMARK_STATUS = "benchmark_status"
 
 
-class FunctionStatus:
-    RUNNING = "running"
-    COMPLETED = "completed"
-    FAILED = "failed"
-    KILLED = "killed"
-
-
 class Stats:
     def __init__(self, cache_dir: str, build_name: str, evaluation_id: str = None):
         output_dir = build.output_dir(cache_dir, build_name)
diff --git a/test/build_model.py b/test/build_model.py
index 4d590a5b..7171820f 100644
--- a/test/build_model.py
+++ b/test/build_model.py
@@ -105,7 +105,7 @@ def full_compilation_pytorch_model():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 def full_compilation_keras_subclass_model():
@@ -118,7 +118,7 @@ def full_compilation_keras_subclass_model():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 def full_compilation_keras_sequential_model():
@@ -131,7 +131,7 @@ def full_compilation_keras_sequential_model():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 def full_compilation_onnx_model():
@@ -152,7 +152,7 @@ def full_compilation_onnx_model():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 def full_compilation_hummingbird_rf():
@@ -167,7 +167,7 @@ def full_compilation_hummingbird_rf():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 def full_compilation_hummingbird_xgb():
@@ -182,7 +182,7 @@ def full_compilation_hummingbird_xgb():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 def full_compilation_hummingbird_lgbm():
@@ -197,7 +197,7 @@ def full_compilation_hummingbird_lgbm():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 def full_compilation_hummingbird_kn():
@@ -212,7 +212,7 @@ def full_compilation_hummingbird_kn():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 def scriptmodule_functional_check():
@@ -229,7 +229,7 @@ def scriptmodule_functional_check():
         monitor=False,
         cache_dir=cache_location,
     )
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 def custom_stage():
@@ -280,7 +280,7 @@ def fire(self, state):
         cache_dir=cache_location,
     )
 
-    return state.build_status == build.Status.COMPLETED_BUILD
+    return state.build_status == build.FunctionStatus.SUCCESSFUL
 
 
 class FullyCustomStage(stage.Stage):
@@ -580,7 +580,7 @@ def test_013_set_onnx_opset(self):
             sequence=sequences.optimize_fp16,
         )
 
-        assert state.build_status == build.Status.COMPLETED_BUILD
+        assert state.build_status == build.FunctionStatus.SUCCESSFUL
 
         onnx_model = onnx.load(state.results[0])
         model_opset = getattr(onnx_model.opset_import[0], "version", None)
@@ -599,7 +599,7 @@ def test_014_export_only(self):
             sequence=sequences.onnx_fp32,
         )
 
-        assert state.build_status == build.Status.COMPLETED_BUILD
+        assert state.build_status == build.FunctionStatus.SUCCESSFUL
         assert os.path.exists(export.base_onnx_file(state))
         assert not os.path.exists(export.opt_onnx_file(state))
 
@@ -635,7 +635,7 @@ def test_015_receive_onnx(self):
         )
 
         # Make sure the build was successful
-        assert state.build_status == build.Status.COMPLETED_BUILD
+        assert state.build_status == build.FunctionStatus.SUCCESSFUL
 
         # Get ONNX file's opset
         onnx_model = onnx.load(onnx_file)
diff --git a/test/cli.py b/test/cli.py
index 7bc3cbd1..14b1c1fa 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -151,7 +151,7 @@ def assert_success_of_builds(
                     build_state.config.build_name,
                     build_state.evaluation_id,
                 )
-                assert build_state.build_status == build.Status.COMPLETED_BUILD
+                assert build_state.build_status == build.FunctionStatus.SUCCESSFUL
                 script_build_found = True
                 builds_found += 1
 
@@ -555,7 +555,7 @@ def test_011_report_on_failed_build(self):
 
         # Ensure test failed
         build_state = build.load_state(state_path=filesystem.get_all(cache_dir)[0])
-        assert build_state.build_status != build.Status.COMPLETED_BUILD
+        assert build_state.build_status != build.FunctionStatus.SUCCESSFUL
 
         # Generate report
         testargs = [
diff --git a/test/plugins.py b/test/plugins.py
index 859dda0e..84d25b39 100644
--- a/test/plugins.py
+++ b/test/plugins.py
@@ -42,7 +42,7 @@ def test_001_device_naming(self):
         _, build_state = common.get_stats_and_state(test_script, cache_dir)
 
         # Check if build was successful
-        assert build_state.build_status == build.Status.COMPLETED_BUILD
+        assert build_state.build_status == build.FunctionStatus.SUCCESSFUL
 
         # Check if default part and config were assigned
         expected_device = "example_family::part1::config1"

From 1d802ff441b28f0bde932265b61a8535bd794104 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Tue, 9 Jan 2024 16:17:00 -0500
Subject: [PATCH 23/35] cleanup

---
 src/turnkeyml/common/build.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index cdf7b8f2..317b4d1f 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -138,31 +138,24 @@ def hash_model(model, model_type: ModelType, hash_params: bool = True):
 
 
 class FunctionStatus(enum.Enum):
-    # INCOMPLETE indicates stage/build is either running or was killed;
+    # INCOMPLETE indicates stage/build/benchmark is either running or was killed;
     # if you know the process ended then it was killed;
-    # if the process is still running, stage/build is still running
+    # if the process is still running, stage/build/benchmark is still running.
     INCOMPLETE = "incomplete"
     # NOT_STARTED applies to stages that didnt start because
-    # the build errored out or was killed prior to stage starting
+    # the build errored out or was killed prior to stage starting.
     NOT_STARTED = "not_started"
-    # SUCCESSFUL means the build/stage completed successfully
+    # SUCCESSFUL means the stage/build/benchmark completed successfully
     SUCCESSFUL = "successful"
-    # ERROR means the build/stage failed and threw some error that
+    # ERROR means the stage/build/benchmark failed and threw some error that
     # was caught by turnkey. You should proceed by looking at the build
     # logs to see what happened.
     ERROR = "error"
-    # KILLED means the build failed because the system killed it. This can
+    # KILLED means the build/benchmark failed because the system killed it. This can
     # happen because of an out-of-memory (OOM), timeout, system shutdown, etc.
     # You should proceed by re-running the build and keeping an eye on it to observe
     # why it is being killed (e.g., watch the RAM utilization to diagnose an OOM).
-    KILLED = "killed"  # you should reproduce and observe
-
-    # TODO: REMOVE!
-    # NOT_STARTED = "not_started"
-    # PARTIAL_BUILD = "partial_build"
-    # BUILD_RUNNING = "build_running"
-    # COMPLETED_BUILD = "completed_build"
-    # FAILED_BUILD = "failed_build"
+    KILLED = "killed"
 
 
 # Create a unique ID from this run by hashing pid + process start time

From 45e666d854dc62c80fdc7a343c7adcb05c55b2fb Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Tue, 9 Jan 2024 16:29:55 -0500
Subject: [PATCH 24/35] fix bugs

---
 src/turnkeyml/analyze/script.py | 16 ++++++++++------
 src/turnkeyml/cli/report.py     |  4 ++--
 test/cli.py                     |  4 ++--
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py
index 50052197..aaf1bdc4 100644
--- a/src/turnkeyml/analyze/script.py
+++ b/src/turnkeyml/analyze/script.py
@@ -88,9 +88,13 @@ def set_status_on_exception(build_state: build.State, stats: fs.Stats):
     # We get `state` when the build tool succeeds, so we can use that to identify
     # whether the exception was thrown during build or benchmark
     if not build_state:
-        stats.save_model_eval_stat(fs.Keys.BUILD_STATUS, build.FunctionStatus.ERROR)
+        stats.save_model_eval_stat(
+            fs.Keys.BUILD_STATUS, build.FunctionStatus.ERROR.value
+        )
     else:
-        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.ERROR)
+        stats.save_model_eval_stat(
+            fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.ERROR.value
+        )
 
 
 def explore_invocation(
@@ -258,7 +262,7 @@ def explore_invocation(
             # If a concluded build still has a status of "running", this means
             # there was an uncaught exception.
             stats.save_model_eval_stat(
-                fs.Keys.BUILD_STATUS, build.FunctionStatus.INCOMPLETE
+                fs.Keys.BUILD_STATUS, build.FunctionStatus.INCOMPLETE.value
             )
 
             build_state = build_model(
@@ -274,7 +278,7 @@ def explore_invocation(
             )
 
             stats.save_model_eval_stat(
-                fs.Keys.BUILD_STATUS, build.FunctionStatus.SUCCESSFUL
+                fs.Keys.BUILD_STATUS, build.FunctionStatus.SUCCESSFUL.value
             )
 
             model_to_benchmark = build_state.results[0]
@@ -296,7 +300,7 @@ def explore_invocation(
                 rt_args_to_use = tracer_args.rt_args
 
             stats.save_model_eval_stat(
-                fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.INCOMPLETE
+                fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.INCOMPLETE.value
             )
 
             model_handle = runtime_info["RuntimeClass"](
@@ -319,7 +323,7 @@ def explore_invocation(
                 )
 
             stats.save_model_eval_stat(
-                fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.SUCCESSFUL
+                fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.SUCCESSFUL.value
             )
 
             invocation_info.status_message = "Model successfully benchmarked!"
diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index f4694b84..216ec73d 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -85,8 +85,8 @@ def summary_spreadsheets(args) -> None:
                             # other uncaught exception
                             if (
                                 key == fs.Keys.BUILD_STATUS or fs.Keys.BENCHMARK_STATUS
-                            ) and value == bd.FunctionStatus.INCOMPLETE:
-                                value = bd.FunctionStatus.KILLED
+                            ) and value == bd.FunctionStatus.INCOMPLETE.value:
+                                value = bd.FunctionStatus.KILLED.value
 
                             # Add stats ensuring that those are all in lower case
                             evaluation_stats[key.lower()] = value
diff --git a/test/cli.py b/test/cli.py
index 14b1c1fa..689cc315 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -997,8 +997,8 @@ def test_026_cli_report(self):
             summary_csv_path,
             [
                 "selected_sequence_of_stages",
-                "stage_duration: export_pytorch",
-                "stage_status: export_pytorch",
+                "stage_duration:export_pytorch",
+                "stage_status:export_pytorch",
             ],
         )
         for result in result_dict.values():

From e4d5f700172b050b3363805ff2fab819f8c0af7b Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Tue, 9 Jan 2024 16:38:28 -0500
Subject: [PATCH 25/35] Improved patch notes

---
 docs/release_notes.md | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/docs/release_notes.md b/docs/release_notes.md
index e8ea6faf..c043fe7a 100644
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@@ -16,11 +16,39 @@ This version focuses on improving the clarity of the telemetry reported.
 
 ### User Improvements
 
-- Report splits `stages_completed` into stage status and duration.
+- Stats and report CSV files split `stages_completed` into stage status and duration.
+- Build, benchmark, and stage status values in the stat and report files now use the same terminology values:
+
+```
+class FunctionStatus(enum.Enum):
+    # INCOMPLETE indicates stage/build/benchmark is either running or was killed;
+    # if you know the process ended then it was killed;
+    # if the process is still running, stage/build/benchmark is still running.
+    INCOMPLETE = "incomplete"
+    # NOT_STARTED applies to stages that didnt start because
+    # the build errored out or was killed prior to stage starting.
+    NOT_STARTED = "not_started"
+    # SUCCESSFUL means the stage/build/benchmark completed successfully
+    SUCCESSFUL = "successful"
+    # ERROR means the stage/build/benchmark failed and threw some error that
+    # was caught by turnkey. You should proceed by looking at the build
+    # logs to see what happened.
+    ERROR = "error"
+    # KILLED means the build/benchmark failed because the system killed it. This can
+    # happen because of an out-of-memory (OOM), timeout, system shutdown, etc.
+    # You should proceed by re-running the build and keeping an eye on it to observe
+    # why it is being killed (e.g., watch the RAM utilization to diagnose an OOM).
+    KILLED = "killed"
+```
+
+- The CLI help page for the `benchmark` command has been reorganized for clarity (try `turnkey benchmark -h`).
+- The CLI now provides more helpful errors when the user provides arguments incorrectly.
+
 
 ## User Breaking Changes
 
-None.
+- Previous turnkey caches are not compatible with this version and must be rebuilt.
+- The status terminology changes documented above mean that stats/reports from pre-v1.1.0 builds are not directly comparable to post-v1.1.0 builds.
 
 ## Developers
 
@@ -30,8 +58,7 @@ None
 
 ### Developer Breaking Changes
 
-- `build.Status.COMPLETED_BUILD` is now called `build.Status.COMPLETED_BUILD`
-- `COMPLETED_BUILD_STAGES` column in the report was removed.
+- `build.Status` and `filesystem.FunctionStatus` have both been removed, and replaced with `build.FunctionStatus` which is the union of those two Enums.
 
 # Version 1.0.0
 

From cfd8ce0a520c8dcfd41f001dbef0a7d82cbbafb3 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 10:25:25 -0500
Subject: [PATCH 26/35] Add release note for #78

---
 docs/release_notes.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/release_notes.md b/docs/release_notes.md
index c043fe7a..24d0761b 100644
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@@ -16,6 +16,7 @@ This version focuses on improving the clarity of the telemetry reported.
 
 ### User Improvements
 
+- ONNX files exported from PyTorch models now have a `torch_export_verified` key in their stats/report files that indicates whether the `torch.onnx.verification.find_mismatch()` API could find any issue with the exported ONNX file.
 - Stats and report CSV files split `stages_completed` into stage status and duration.
 - Build, benchmark, and stage status values in the stat and report files now use the same terminology values:
 

From f49821cd01e7b926867d5b9e481a9158f582c861 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 10:26:08 -0500
Subject: [PATCH 27/35] fix test

---
 test/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/cli.py b/test/cli.py
index 689cc315..3fdfd139 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -1004,8 +1004,8 @@ def test_026_cli_report(self):
         for result in result_dict.values():
             # All of the models should have exported to ONNX
             assert "export_pytorch" in result["selected_sequence_of_stages"]
-            assert result["stage_status: export_pytorch"] == "COMPLETED"
-            assert result["stage_duration: export_pytorch"] > 0
+            assert result["stage_status:export_pytorch"] == "COMPLETED"
+            assert result["stage_duration:export_pytorch"] > 0
 
 
 if __name__ == "__main__":

From 8441018e11e149301ab79470fa58f99abdeea5bd Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 10:32:03 -0500
Subject: [PATCH 28/35] zzzz

---
 test/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/cli.py b/test/cli.py
index 3fdfd139..a1335dc4 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -1004,7 +1004,7 @@ def test_026_cli_report(self):
         for result in result_dict.values():
             # All of the models should have exported to ONNX
             assert "export_pytorch" in result["selected_sequence_of_stages"]
-            assert result["stage_status:export_pytorch"] == "COMPLETED"
+            assert result["stage_status:export_pytorch"] == "SUCCESSFUL"
             assert result["stage_duration:export_pytorch"] > 0
 
 

From 01fe625fdce8cd4e4ad37981dfe1b74205a45bba Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 10:38:46 -0500
Subject: [PATCH 29/35] print info on test fail

---
 test/cli.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/cli.py b/test/cli.py
index a1335dc4..88fb9b30 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -1004,8 +1004,12 @@ def test_026_cli_report(self):
         for result in result_dict.values():
             # All of the models should have exported to ONNX
             assert "export_pytorch" in result["selected_sequence_of_stages"]
-            assert result["stage_status:export_pytorch"] == "SUCCESSFUL"
-            assert result["stage_duration:export_pytorch"] > 0
+            assert result["stage_status:export_pytorch"] == "SUCCESSFUL", [
+                "stage_status:export_pytorch"
+            ]
+            assert result["stage_duration:export_pytorch"] > 0, [
+                "stage_duration:export_pytorch"
+            ]
 
 
 if __name__ == "__main__":

From ce7ae52edaeab6f77fada022528ea00f65125f51 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 10:39:13 -0500
Subject: [PATCH 30/35] fix typo

---
 test/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/cli.py b/test/cli.py
index 88fb9b30..7dfe227e 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -1004,10 +1004,10 @@ def test_026_cli_report(self):
         for result in result_dict.values():
             # All of the models should have exported to ONNX
             assert "export_pytorch" in result["selected_sequence_of_stages"]
-            assert result["stage_status:export_pytorch"] == "SUCCESSFUL", [
+            assert result["stage_status:export_pytorch"] == "SUCCESSFUL", result[
                 "stage_status:export_pytorch"
             ]
-            assert result["stage_duration:export_pytorch"] > 0, [
+            assert result["stage_duration:export_pytorch"] > 0, result[
                 "stage_duration:export_pytorch"
             ]
 

From 25bce6984dce16e7034e81e339623e139473ba84 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 10:45:11 -0500
Subject: [PATCH 31/35] fix case

---
 test/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/cli.py b/test/cli.py
index 7dfe227e..4a744c2e 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -1004,7 +1004,7 @@ def test_026_cli_report(self):
         for result in result_dict.values():
             # All of the models should have exported to ONNX
             assert "export_pytorch" in result["selected_sequence_of_stages"]
-            assert result["stage_status:export_pytorch"] == "SUCCESSFUL", result[
+            assert result["stage_status:export_pytorch"] == "successful", result[
                 "stage_status:export_pytorch"
             ]
             assert result["stage_duration:export_pytorch"] > 0, result[

From 5bee2dba71b56022ccad3b9c2c5080788d557039 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 11:23:25 -0500
Subject: [PATCH 32/35] Fix test bug. Properly initialize build and benchmark
 status

---
 src/turnkeyml/analyze/script.py |  8 ++++++++
 test/cli.py                     | 11 ++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py
index aaf1bdc4..2cc233fb 100644
--- a/src/turnkeyml/analyze/script.py
+++ b/src/turnkeyml/analyze/script.py
@@ -252,6 +252,14 @@ def explore_invocation(
 
         return
 
+    # Initialize build and benchmark status to "not started"
+    stats.save_model_eval_stat(
+        fs.Keys.BUILD_STATUS, build.FunctionStatus.NOT_STARTED.value
+    )
+    stats.save_model_eval_stat(
+        fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.NOT_STARTED.value
+    )
+
     build_state = None
     perf = None
     try:
diff --git a/test/cli.py b/test/cli.py
index 4a744c2e..85a69ca9 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -1007,9 +1007,14 @@ def test_026_cli_report(self):
             assert result["stage_status:export_pytorch"] == "successful", result[
                 "stage_status:export_pytorch"
             ]
-            assert result["stage_duration:export_pytorch"] > 0, result[
-                "stage_duration:export_pytorch"
-            ]
+            try:
+                assert int(result["stage_duration:export_pytorch"]) > 0, result[
+                    "stage_duration:export_pytorch"
+                ]
+            except ValueError:
+                # Catch the case where the value is "-" and therefore can't be
+                # converted to an int
+                assert result["stage_duration:export_pytorch"] == "-"
 
 
 if __name__ == "__main__":

From 32098b1a715efa093b28a006ae9cf77d962fb68b Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 15:15:37 -0500
Subject: [PATCH 33/35] wordsmithing

---
 docs/release_notes.md         | 41 +++++++++++++++++++++++++++--------
 src/turnkeyml/common/build.py | 36 +++++++++++++++++++++++-------
 2 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/docs/release_notes.md b/docs/release_notes.md
index 24d0761b..c940fbf3 100644
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@@ -16,30 +16,53 @@ This version focuses on improving the clarity of the telemetry reported.
 
 ### User Improvements
 
-- ONNX files exported from PyTorch models now have a `torch_export_verified` key in their stats/report files that indicates whether the `torch.onnx.verification.find_mismatch()` API could find any issue with the exported ONNX file.
+- ONNX files exported from PyTorch models now have a `torch_export_validity` key in their stats/report files that indicates whether the `torch.onnx.verification.find_mismatch()` API could find any issue with the exported ONNX file. Possible values:
+  - `valid`: passed verification.
+  - `invalid`: failed verification.
+  - `unverified`: turnkey was unable to complete the verification process.
 - Stats and report CSV files split `stages_completed` into stage status and duration.
 - Build, benchmark, and stage status values in the stat and report files now use the same terminology values:
 
 ```
 class FunctionStatus(enum.Enum):
-    # INCOMPLETE indicates stage/build/benchmark is either running or was killed;
-    # if you know the process ended then it was killed;
-    # if the process is still running, stage/build/benchmark is still running.
-    INCOMPLETE = "incomplete"
-    # NOT_STARTED applies to stages that didnt start because
-    # the build errored out or was killed prior to stage starting.
-    NOT_STARTED = "not_started"
-    # SUCCESSFUL means the stage/build/benchmark completed successfully
+    """
+    Status values that are assigned to stages, builds, benchmarks, and other
+    functionality to help the user understand whether that function completed
+    successfully or not.
+    """
+
+    # SUCCESSFUL means the stage/build/benchmark completed successfully.
     SUCCESSFUL = "successful"
+
     # ERROR means the stage/build/benchmark failed and threw some error that
     # was caught by turnkey. You should proceed by looking at the build
     # logs to see what happened.
+
     ERROR = "error"
+
     # KILLED means the build/benchmark failed because the system killed it. This can
     # happen because of an out-of-memory (OOM), timeout, system shutdown, etc.
     # You should proceed by re-running the build and keeping an eye on it to observe
     # why it is being killed (e.g., watch the RAM utilization to diagnose an OOM).
     KILLED = "killed"
+
+    # The NOT_STARTED status is applied to all stages/builds/benchmarks at startup.
+    # It will be replaced by one of the other status values if the stage/build/benchmark
+    # has a chance to start running.
+    # A value of NOT_STARTED in the report CSV indicates that the stage/build/benchmark
+    # never had a chance to start because turnkey exited before that functionality had
+    # a chance to start running.
+    NOT_STARTED = "not_started"
+
+    # INCOMPLETE indicates that a stage/build/benchmark started running and did not complete.
+    # Each stage, build, and benchmark are marked as INCOMPLETE when they start running.
+    # If you open the turnkey_stats.yaml file while the stage/build/benchmark
+    # is still running, the status will show as INCOMPLETE. If the stage/build/benchmark
+    # is killed without the chance to do any stats cleanup, the status will continue to
+    # show as INCOMPLETE in turnkey_stats.yaml.
+    # When the report CSV is created, any instance of an INCOMPLETE stage/build/benchmark
+    # status will be replaced by KILLED.
+    INCOMPLETE = "incomplete"
 ```
 
 - The CLI help page for the `benchmark` command has been reorganized for clarity (try `turnkey benchmark -h`).
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index 317b4d1f..6ed3f339 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -138,25 +138,45 @@ def hash_model(model, model_type: ModelType, hash_params: bool = True):
 
 
 class FunctionStatus(enum.Enum):
-    # INCOMPLETE indicates stage/build/benchmark is either running or was killed;
-    # if you know the process ended then it was killed;
-    # if the process is still running, stage/build/benchmark is still running.
-    INCOMPLETE = "incomplete"
-    # NOT_STARTED applies to stages that didnt start because
-    # the build errored out or was killed prior to stage starting.
-    NOT_STARTED = "not_started"
-    # SUCCESSFUL means the stage/build/benchmark completed successfully
+    """
+    Status values that are assigned to stages, builds, benchmarks, and other
+    functionality to help the user understand whether that function completed
+    successfully or not.
+    """
+
+    # SUCCESSFUL means the stage/build/benchmark completed successfully.
     SUCCESSFUL = "successful"
+
     # ERROR means the stage/build/benchmark failed and threw some error that
     # was caught by turnkey. You should proceed by looking at the build
     # logs to see what happened.
+
     ERROR = "error"
+
     # KILLED means the build/benchmark failed because the system killed it. This can
     # happen because of an out-of-memory (OOM), timeout, system shutdown, etc.
     # You should proceed by re-running the build and keeping an eye on it to observe
     # why it is being killed (e.g., watch the RAM utilization to diagnose an OOM).
     KILLED = "killed"
 
+    # The NOT_STARTED status is applied to all stages/builds/benchmarks at startup.
+    # It will be replaced by one of the other status values if the stage/build/benchmark
+    # has a chance to start running.
+    # A value of NOT_STARTED in the report CSV indicates that the stage/build/benchmark
+    # never had a chance to start because turnkey exited before that functionality had
+    # a chance to start running.
+    NOT_STARTED = "not_started"
+
+    # INCOMPLETE indicates that a stage/build/benchmark started running and did not complete.
+    # Each stage, build, and benchmark are marked as INCOMPLETE when they start running.
+    # If you open the turnkey_stats.yaml file while the stage/build/benchmark
+    # is still running, the status will show as INCOMPLETE. If the stage/build/benchmark
+    # is killed without the chance to do any stats cleanup, the status will continue to
+    # show as INCOMPLETE in turnkey_stats.yaml.
+    # When the report CSV is created, any instance of an INCOMPLETE stage/build/benchmark
+    # status will be replaced by KILLED.
+    INCOMPLETE = "incomplete"
+
 
 # Create a unique ID from this run by hashing pid + process start time
 def unique_id():

From a5d3545ca412924ab08ac51fc9de330cfcb5ffcb Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 15:38:13 -0500
Subject: [PATCH 34/35] Report incomplete stages as killed

---
 src/turnkeyml/build/stage.py       |  4 ++--
 src/turnkeyml/cli/report.py        | 27 +++++++++++++++------------
 src/turnkeyml/common/filesystem.py |  7 +++++++
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/turnkeyml/build/stage.py b/src/turnkeyml/build/stage.py
index aa91c739..83658de2 100644
--- a/src/turnkeyml/build/stage.py
+++ b/src/turnkeyml/build/stage.py
@@ -83,8 +83,8 @@ def __init__(
         _name_is_file_safe(unique_name)
 
         self.unique_name = unique_name
-        self.status_key = f"stage_status:{unique_name}"
-        self.duration_key = f"stage_duration:{unique_name}"
+        self.status_key = f"{fs.Keys.STAGE_STATUS}:{unique_name}"
+        self.duration_key = f"{fs.Keys.STAGE_DURATION}:{unique_name}"
         self.monitor_message = monitor_message
         self.progress = None
         self.logfile_path = None
diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index f81da672..be5defc9 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -68,23 +68,26 @@ def summary_spreadsheets(args) -> None:
                     # load the yaml into a dict
                     model_stats = yaml.load(stream, Loader=yaml.FullLoader)
 
-                    # create a separate dict for each build
-                    for build in model_stats[fs.Keys.EVALUATIONS].values():
+                    # create a separate dict for each evaluation
+                    for evaluation in model_stats[fs.Keys.EVALUATIONS].values():
                         evaluation_stats = {}
 
-                        # Copy all of the stats for the model that are common across builds
+                        # Copy all of the stats for the model that are common across evaluation
                         for key, value in model_stats.items():
                             if key != fs.Keys.EVALUATIONS:
                                 evaluation_stats[key] = value
 
-                        # Copy the build-specific stats
-                        for key, value in build.items():
+                        # Copy the evaluation-specific stats
+                        for key, value in evaluation.items():
                             # If a build or benchmark is still marked as "incomplete" at
-                            # reporting time, it
-                            # must have been killed by a time out, out-of-memory (OOM), or some
-                            # other uncaught exception
+                            # reporting time, it must have been killed by a time out,
+                            # out-of-memory (OOM), or some other uncaught exception
                             if (
-                                key == fs.Keys.BUILD_STATUS or fs.Keys.BENCHMARK_STATUS
+                                (
+                                    key == fs.Keys.BUILD_STATUS
+                                    or fs.Keys.BENCHMARK_STATUS
+                                )
+                                or fs.Keys.STAGE_STATUS in key
                             ) and value == bd.FunctionStatus.INCOMPLETE.value:
                                 value = bd.FunctionStatus.KILLED.value
 
@@ -114,10 +117,10 @@ def summary_spreadsheets(args) -> None:
         # will indicate that no value was available
         result = {k: "-" for k in column_headers}
 
-    for key in column_headers:
-        result[key] = _good_get(evaluation_stats, key)
+        for key in column_headers:
+            result[key] = _good_get(evaluation_stats, key)
 
-    report.append(result)
+        report.append(result)
 
     # Populate results spreadsheet
     with open(report_path, "w", newline="", encoding="utf8") as spreadsheet:
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index 496957bd..e0faf83a 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -357,6 +357,13 @@ class Keys:
     # Indicates the match between the TorchScript IR graph and
     # the exported onnx model (verified with torch.onnx.verification)
     TORCH_ONNX_EXPORT_VALIDITY = "torch_export_validity"
+    # Prefix for reporting the execution duration of a stage
+    # In the report this will look like stage_duration:STAGE_NAME
+    STAGE_DURATION = "stage_duration"
+    # Prefix for reporting the execution status of a stage
+    # In the report this will look like stage_status:STAGE_NAME
+    STAGE_STATUS = "stage_status"
+
 
 class Stats:
     def __init__(self, cache_dir: str, build_name: str, evaluation_id: str = None):

From d6a878aaaa1bd5594ad765c229de8601f902bd22 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Wed, 10 Jan 2024 15:41:37 -0500
Subject: [PATCH 35/35] one more release note

---
 docs/release_notes.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/release_notes.md b/docs/release_notes.md
index c940fbf3..913dcc0a 100644
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@@ -67,6 +67,7 @@ class FunctionStatus(enum.Enum):
 
 - The CLI help page for the `benchmark` command has been reorganized for clarity (try `turnkey benchmark -h`).
 - The CLI now provides more helpful errors when the user provides arguments incorrectly.
+- Fixed a bug where multi-cache reporting could repeat entries in the report CSV file.
 
 
 ## User Breaking Changes