diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1085f7abc..9ac4a0559 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -18,7 +18,7 @@ check_python_flake8:
   script:
     - python -m pip install --upgrade pip
     - pip install flake8
-    - flake8 ./coremltools --count --select=E9,F63,F7,F82 --show-source --statistics
+    - flake8 ./coremltools --count --select=E9,F5,F63,F7,F82 --show-source --statistics
 
 ########################################################################
 #
@@ -198,18 +198,6 @@ test_macos11_py37_tf1:
     TEST_PACKAGE: coremltools.converters.mil.frontend.tensorflow
     WHEEL_PATH: build/dist/*cp37*10_15*
 
-test_macos11_py37_tf2:
-  <<: *test_macos_pkg_with_reqs
-  tags:
-    - macos11
-  dependencies:
-    - build_wheel_macos_py37
-  variables:
-    PYTHON: "3.7"
-    REQUIREMENTS: reqs/test_tf2.pip
-    TEST_PACKAGE: coremltools.converters.mil.frontend.tensorflow2
-    WHEEL_PATH: build/dist/*cp37*10_15*
-
 test_macos11_py37_mil:
   <<: *test_macos_pkg
   tags:
@@ -218,7 +206,7 @@ test_macos11_py37_mil:
     - build_wheel_macos_py37
   variables:
     PYTHON: "3.7"
-    TEST_PACKAGE: coremltools.converters.mil
+    TEST_PACKAGE: coremltools.converters.mil.mil
     WHEEL_PATH: build/dist/*cp37*10_15*
 
 #########################################################################
diff --git a/coremltools/_deps/__init__.py b/coremltools/_deps/__init__.py
index 31ce1eec2..19796155d 100644
--- a/coremltools/_deps/__init__.py
+++ b/coremltools/_deps/__init__.py
@@ -100,7 +100,7 @@ def __get_sklearn_version(version):
 _TF_1_MIN_VERSION = "1.12.0"
 _TF_1_MAX_VERSION = "1.15.0"
 _TF_2_MIN_VERSION = "2.1.0"
-_TF_2_MAX_VERSION = "2.5.0"
+_TF_2_MAX_VERSION = "2.6.2"
 
 try:
     import tensorflow
@@ -147,7 +147,7 @@ def __get_sklearn_version(version):
 _HAS_KERAS_TF = True
 _HAS_KERAS2_TF = True
 _KERAS_MIN_VERSION = "1.2.2"
-_KERAS_MAX_VERSION = "2.2.4"
+_KERAS_MAX_VERSION = "2.6.0"
 MSG_KERAS1_NOT_FOUND = "Keras 1 not found."
 MSG_KERAS2_NOT_FOUND = "Keras 2 not found."
 
@@ -218,7 +218,7 @@ def __get_sklearn_version(version):
 
 # ---------------------------------------------------------------------------------------
 _HAS_TORCH = True
-_TORCH_MAX_VERSION = "1.9.1"
+_TORCH_MAX_VERSION = "1.10.2"
 try:
     import torch
     _warn_if_above_max_supported_version("Torch", torch.__version__, _TORCH_MAX_VERSION)
diff --git a/coremltools/converters/_converters_entry.py b/coremltools/converters/_converters_entry.py
index 0fbed79aa..26ed93251 100644
--- a/coremltools/converters/_converters_entry.py
+++ b/coremltools/converters/_converters_entry.py
@@ -2,13 +2,21 @@
 #
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-import gc
+
 import collections
+import gc
+import os
 import warnings
 
-from coremltools import ComputeUnit as _ComputeUnit
-from coremltools.converters.mil.mil.passes.quantization_passes import AbstractQuantizationPass, FP16ComputePrecision
-from coremltools.converters.mil.mil.passes.quantization_passes import ComputePrecision as precision
+from coremltools import (
+    ComputeUnit as _ComputeUnit,
+    __version__ as _ct_version
+)
+from coremltools.converters.mil.mil.passes.quantization_passes import (
+    AbstractQuantizationPass,
+    ComputePrecision as precision,
+    FP16ComputePrecision
+)
 from coremltools.converters.mil.input_types import InputType, ClassifierConfig
 from coremltools.converters.mil.converter import mil_convert
 from coremltools.converters.mil.mil import Program
@@ -16,6 +24,7 @@
 from coremltools.converters._profile_utils import _profile
 
 from coremltools.models import _METADATA_VERSION, _METADATA_SOURCE
+from coremltools.models.utils import _MLPACKAGE_EXTENSION
 from coremltools.converters.mil._deployment_compatibility import (
     AvailableTarget,
     check_deployment_compatibility,
@@ -250,10 +259,10 @@ def convert(
         - if False, identical to setting compute_units to `coremltools.ComputeUnit.ALL``
 
     package_dir : str
-        Post conversion, the model is compiled to form the MLModel object ready for prediction.
-        This requires a temporary directory to hold the mlmodelc archive.
-        - if not None, must be a path to a directory that is used for
-          temporarily storing the compiled model assets. If None, a temporary directory is created.
+        Post conversion, the model is saved at a temporary location and
+        loaded to form the MLModel object ready for prediction.
+        If package_dir is provided, model will be saved at this location instead of creating a temporary directory.
+        - if not None, must be a path to a directory with extension .mlpackage
 
     debug : bool
         This flag should generally be False except for debugging purposes
@@ -312,8 +321,6 @@ def convert(
     See `neural-network-conversion <https://coremltools.readme.io/docs/neural-network-conversion>`_ for
     more advanced options.
     """
-    from coremltools import __version__ as ct_version
-
     _check_deployment_target(minimum_deployment_target)
     exact_source = _determine_source(model, source, outputs)
     exact_target = _determine_target(convert_to, minimum_deployment_target)
@@ -337,6 +344,11 @@ def convert(
     else:
         raise ValueError("Invalid value of the argument 'compute_precision'")
 
+    if package_dir is not None:
+        _, ext = os.path.splitext(package_dir)
+        if ext != _MLPACKAGE_EXTENSION:
+            raise Exception("If package_dir is provided, it must have extension {} (not {})".format(_MLPACKAGE_EXTENSION, ext))
+
     mlmodel = mil_convert(
         model,
         convert_from=exact_source,
@@ -363,8 +375,7 @@ def convert(
 
     gc.collect()
 
-    mlmodel = _record_src_version(mlmodel, exact_source)
-    mlmodel.user_defined_metadata[_METADATA_VERSION] = ct_version
+    mlmodel = _record_build_metadata(mlmodel, exact_source)
 
     return mlmodel
 
@@ -553,16 +564,27 @@ def _determine_target(convert_to, minimum_deployment_target):
         else:
             return "mlprogram"
 
-def _record_src_version(mlmodel, exact_source):
+def _record_build_metadata(mlmodel, exact_source):
     # recording metadata: coremltools version, source framework and version
+    src_pkg, pkg_ver = None, None
     if exact_source in {"tensorflow", "tensorflow2"} and (_HAS_TF_1 or _HAS_TF_2):
+        src_pkg, pkg_ver = "tensorflow", tf.__version__
         src_pkg_version = "tensorflow=={0}".format(tf.__version__)
     elif exact_source == "pytorch" and _HAS_TORCH:
+        src_pkg, pkg_ver = "pytorch", torch.__version__
         src_pkg_version = "torch=={0}".format(torch.__version__)
     elif exact_source == 'milinternal':
+        src_pkg, pkg_ver = "milinternal", ""
         src_pkg_version = "milinternal"
     else:
         raise ValueError('Unsupported source {}'.format(exact_source))
 
     mlmodel.user_defined_metadata[_METADATA_SOURCE] = src_pkg_version
+    mlmodel.user_defined_metadata[_METADATA_VERSION] = _ct_version
+
+    build_info = {'coremltools-version': _ct_version}
+    if src_pkg is not None and pkg_ver is not None:
+        build_info['coremltools-component-' + src_pkg] = str(pkg_ver)
+    mlmodel._set_build_info_mil_attributes(build_info)
+
     return mlmodel
diff --git a/coremltools/converters/keras/_topology2.py b/coremltools/converters/keras/_topology2.py
index 37ad09711..705dc301d 100644
--- a/coremltools/converters/keras/_topology2.py
+++ b/coremltools/converters/keras/_topology2.py
@@ -1,3 +1,8 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 import keras as _keras
 import numpy as _np
 
diff --git a/coremltools/converters/mil/_deployment_compatibility.py b/coremltools/converters/mil/_deployment_compatibility.py
index b92dfbab6..d979462a1 100644
--- a/coremltools/converters/mil/_deployment_compatibility.py
+++ b/coremltools/converters/mil/_deployment_compatibility.py
@@ -1,6 +1,15 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from enum import Enum
-from coremltools import _SPECIFICATION_VERSION_IOS_13, _SPECIFICATION_VERSION_IOS_14, \
-                        _SPECIFICATION_VERSION_IOS_15
+
+from coremltools import (
+    _SPECIFICATION_VERSION_IOS_13,
+    _SPECIFICATION_VERSION_IOS_14,
+    _SPECIFICATION_VERSION_IOS_15
+)
 
 
 class AvailableTarget(Enum):
@@ -15,7 +24,7 @@ class AvailableTarget(Enum):
     macOS10_15 = _SPECIFICATION_VERSION_IOS_13
     macOS10_16 = _SPECIFICATION_VERSION_IOS_14
     macOS11 = _SPECIFICATION_VERSION_IOS_14
-    macOS12 = _SPECIFICATION_VERSION_IOS_14
+    macOS12 = _SPECIFICATION_VERSION_IOS_15
 
     # watchOS versions (aliases of iOS versions)
     watchOS6 = _SPECIFICATION_VERSION_IOS_13
diff --git a/coremltools/converters/mil/backend/mil/helper.py b/coremltools/converters/mil/backend/mil/helper.py
index 3437af8d3..138ffa992 100644
--- a/coremltools/converters/mil/backend/mil/helper.py
+++ b/coremltools/converters/mil/backend/mil/helper.py
@@ -3,16 +3,15 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-import numpy as np
 import os
-import re
+
+import numpy as np
 
 from coremltools.converters.mil.mil import types
 from coremltools.converters.mil.mil.types import builtin_to_proto_types
-from coremltools.models.model import _WEIGHTS_DIR_NAME, _WEIGHTS_FILE_NAME
+from coremltools.models.utils import _WEIGHTS_DIR_NAME, _WEIGHTS_FILE_NAME
 import coremltools.proto.FeatureTypes_pb2 as ft
 import coremltools.proto.MIL_pb2 as pm
-
 from coremltools.converters.mil.mil.types import (
     type_to_builtin_type,
     numpy_type_to_builtin_type,
@@ -318,4 +317,3 @@ def cast_to_framework_io_dtype(var, is_output):
         ioname2 = "outputs" if is_output else "inputs"
         raise NotImplementedError(ioname + var.name + " has data type " + builtin_to_string(var.dtype) + \
                                   ". ML Program models only support fp32 and int32 " + ioname2 + ".")
-
diff --git a/coremltools/converters/mil/backend/mil/load.py b/coremltools/converters/mil/backend/mil/load.py
index 964457c29..1c16855f9 100644
--- a/coremltools/converters/mil/backend/mil/load.py
+++ b/coremltools/converters/mil/backend/mil/load.py
@@ -32,9 +32,8 @@
     any_variadic,
     is_symbolic,
 )
-from coremltools.converters.mil.mil.types.type_mapping import types_int64
 from coremltools.libmilstoragepython import _BlobStorageWriter as BlobWriter
-from coremltools.models.model import _WEIGHTS_FILE_NAME
+from coremltools.models.utils import _WEIGHTS_FILE_NAME
 from coremltools.models.neural_network.flexible_shape_utils import (
     add_enumerated_image_sizes,
     add_multiarray_ndshape_enumeration,
@@ -105,8 +104,7 @@ def translate_generic_op(op, parameters, blob_writer, literal_params=[]):
     ]
     blocks = None
     if len(op.blocks) > 0:
-        blocks = [create_block(b, parameters, blob_writer) \
-                  for b in op.blocks]
+        blocks = [create_block(b, parameters, blob_writer) for b in op.blocks]
 
     op_type = op.op_type
     attr_dict = {}
@@ -253,8 +251,7 @@ def load(prog, weights_dir, resume_on_errors=False, **kwargs):
 
     function_protos = {}
     for func_name, func in prog.functions.items():
-        function_protos[func_name] = convert_function(func, prog.parameters,
-            blob_writer)
+        function_protos[func_name] = convert_function(func, prog.parameters, blob_writer)
 
     proto = pm.Program(
         version=1,
diff --git a/coremltools/converters/mil/backend/mil/passes/adjust_io_to_supported_types.py b/coremltools/converters/mil/backend/mil/passes/adjust_io_to_supported_types.py
index 668d29548..84c179824 100644
--- a/coremltools/converters/mil/backend/mil/passes/adjust_io_to_supported_types.py
+++ b/coremltools/converters/mil/backend/mil/passes/adjust_io_to_supported_types.py
@@ -1,16 +1,14 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2021, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil import types as types
+import warnings as _warnings
+
+from coremltools.converters.mil.mil import Builder as mb, types as types
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 
-import warnings as _warnings
 
 @register_pass(namespace="mil_backend")
 class adjust_io_to_supported_types(AbstractGraphPass):
@@ -164,13 +162,11 @@ def _adjust_var(var):
 
 def _adjust_func_inputs(func):
     for input_name, input_var in func.inputs.items():
-       _adjust_var(input_var)
-
+        _adjust_var(input_var)
 
 def _adjust_block_inputs(block):
     for input_var in block.inputs:
-       _adjust_var(input_var)
-
+        _adjust_var(input_var)
 
 def _adjust_ops(block):
     len_block = len(block.operations)
diff --git a/coremltools/converters/mil/backend/mil/passes/insert_image_preprocessing_op.py b/coremltools/converters/mil/backend/mil/passes/insert_image_preprocessing_op.py
index f73ab7f8d..772ad02c2 100644
--- a/coremltools/converters/mil/backend/mil/passes/insert_image_preprocessing_op.py
+++ b/coremltools/converters/mil/backend/mil/passes/insert_image_preprocessing_op.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
diff --git a/coremltools/converters/mil/backend/mil/passes/sanitize_name_strings.py b/coremltools/converters/mil/backend/mil/passes/sanitize_name_strings.py
index 24d65375a..aecab54cb 100644
--- a/coremltools/converters/mil/backend/mil/passes/sanitize_name_strings.py
+++ b/coremltools/converters/mil/backend/mil/passes/sanitize_name_strings.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
@@ -17,10 +15,7 @@ class sanitize_name_strings(AbstractGraphPass):
     of the format [a-zA-Z_][a-zA-Z0-9_]*
     """
     def apply(self, prog):
-	    sanitizer_vars = NameSanitizer(prefix="var_")
-	    sanitizer_ops = NameSanitizer(prefix="op_")
-
-	    for f in prog.functions.values():
-	        sanitize_block(f, sanitizer_vars, sanitizer_ops, prog.main_input_types)
-
-
+        for f in prog.functions.values():
+            sanitizer_vars = NameSanitizer(prefix="var_")
+            sanitizer_ops = NameSanitizer(prefix="op_")
+            sanitize_block(f, sanitizer_vars, sanitizer_ops, prog.main_input_types)
\ No newline at end of file
diff --git a/coremltools/converters/mil/backend/mil/passes/test_passes.py b/coremltools/converters/mil/backend/mil/passes/test_passes.py
index fd2900936..d2886cb2f 100644
--- a/coremltools/converters/mil/backend/mil/passes/test_passes.py
+++ b/coremltools/converters/mil/backend/mil/passes/test_passes.py
@@ -405,10 +405,10 @@ def prog(x):
             return z
 
         prog.main_input_types = (ct.ImageType(name='x',
-                                             shape=[1, 1, 20, 20],
-                                             bias=2.0,
-                                             color_layout="G",
-                                             channel_first=True),)
+                                              shape=[1, 1, 20, 20],
+                                              bias=2.0,
+                                              color_layout="G",
+                                              channel_first=True),)
 
         prev_prog, prev_block, block = apply_pass_and_basic_check(
             prog, "mil_backend::insert_image_preprocessing_ops"
@@ -715,6 +715,58 @@ def prog(x):
         assert block.find_ops(op_type="relu")[1].name == "op_1"
         assert block.find_ops(op_type="add")[0].name == "op_3"
 
+    def test_sanitize_var_names_with_two_functions(self):
+        """
+        Input:
+            main(%x: (1, 3, 20, fp32)(Tensor)) {
+              block0() {
+                %var_1!: (1, 3, 20, fp32)(Tensor) = relu(x=%x, name="var_1!")
+              } -> (%var_1!)
+            }
+
+            main_2(%x: (1, 3, 20, fp32)(Tensor)) {
+              block0() {
+                %var_1!: (1, 3, 20, fp32)(Tensor) = relu(x=%x, name="var_1!")
+              } -> (%var_1!)
+            }
+
+
+        Output:
+            main(%x: (1, 3, 20, fp32)(Tensor)) {
+              block0() {
+                %var_1!: (1, 3, 20, fp32)(Tensor) = relu(x=%x, name="var_1_")
+              } -> (%var_1_)
+            }
+
+            main_2(%x: (1, 3, 20, fp32)(Tensor)) {
+              block0() {
+                %var_1!: (1, 3, 20, fp32)(Tensor) = relu(x=%x, name="var_1_")
+              } -> (%var_1_)
+            }
+
+        """
+
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 3, 20))])
+        def prog(x):
+            z = mb.relu(x=x, name = "var_1!")
+            return z
+
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 3, 20))])
+        def prog_2(x):
+            z = mb.relu(x=x, name = "var_1!")
+            return z
+
+        prog.add_function("main_2", prog_2.functions["main"])
+        PASS_REGISTRY["mil_backend::sanitize_name_strings"](prog)
+        block = prog.functions["main"]
+        assert block.find_ops(op_type="relu")[0].outputs[0].name == "var_1_"
+        assert prog["main"].outputs[0].name == "var_1_"
+        assert block.find_ops(op_type="relu")[0].name == "var_1_"
+        block = prog.functions["main_2"]
+        assert block.find_ops(op_type="relu")[0].outputs[0].name == "var_1_"
+        assert prog["main"].outputs[0].name == "var_1_"
+        assert block.find_ops(op_type="relu")[0].name == "var_1_"
+
 
 class TestPassFuseActivationSiLU:
     """
diff --git a/coremltools/converters/mil/backend/mil/test_helper.py b/coremltools/converters/mil/backend/mil/test_helper.py
index 603eb002a..19f329856 100644
--- a/coremltools/converters/mil/backend/mil/test_helper.py
+++ b/coremltools/converters/mil/backend/mil/test_helper.py
@@ -19,9 +19,8 @@ def test_name_sanitizer(self):
                                       ("dense_2_1/BiasAdd", "dense_2_1_BiasAdd"),
                                       ("dense_2_1-BiasAdd", "dense_2_1_BiasAdd"),
                                       ("key:0", "key_0"),
-                                    ]
+        ]
 
         for i, in_and_out_str in enumerate(input_and_expected_strings):
             out = _NameSanitizer().sanitize_name(in_and_out_str[0])
             assert out == in_and_out_str[1]
-
diff --git a/coremltools/converters/mil/backend/mil/test_model_input_params.py b/coremltools/converters/mil/backend/mil/test_model_input_params.py
index 569561615..5847e1726 100644
--- a/coremltools/converters/mil/backend/mil/test_model_input_params.py
+++ b/coremltools/converters/mil/backend/mil/test_model_input_params.py
@@ -3,15 +3,15 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 import numpy as np
-import pytest
 
 import coremltools as ct
 from coremltools.converters.mil.mil.builder import Builder as mb
+from coremltools.converters.mil.mil.program import Symbol
 from coremltools.models.utils import _macos_version
 
 
 class TestMILFlexibleShapes:
-    from coremltools.converters.mil.mil.program import Symbol
+
     @mb.program(
         input_specs = [
             mb.TensorSpec(shape=[1, 3, Symbol("H"), Symbol("W")])
diff --git a/coremltools/converters/mil/backend/nn/load.py b/coremltools/converters/mil/backend/nn/load.py
index 9e1cd2d67..6bc2e82ff 100644
--- a/coremltools/converters/mil/backend/nn/load.py
+++ b/coremltools/converters/mil/backend/nn/load.py
@@ -1,15 +1,10 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-
-from collections import defaultdict
 import coremltools as ct
 from coremltools.converters.mil.input_types import (
-    ClassifierConfig,
     ImageType,
     EnumeratedShapes,
     Shape,
@@ -23,17 +18,17 @@
     is_symbolic,
 )
 from coremltools.converters._profile_utils import _profile
-from coremltools.models import MLModel
-from coremltools.models import neural_network as neural_network
+from coremltools.models import (
+    MLModel,
+    neural_network as neural_network
+)
 from coremltools.models.datatypes import Array
 from coremltools.models.neural_network import flexible_shape_utils
 from coremltools.models.neural_network.flexible_shape_utils import (
-    update_image_size_range,
     add_enumerated_image_sizes,
     set_multiarray_ndshape_range,
     add_multiarray_ndshape_enumeration,
 )
-import logging
 from .op_mapping import convert_ops
 from .passes.nn_passes import nn_backend_passes
 
@@ -81,7 +76,7 @@ def _set_user_inputs(proto, inputs):
         shape = input_type.shape
         if isinstance(shape, EnumeratedShapes):
             if isinstance(input_type, ImageType):
-                default_height , default_width = 0, 0
+                default_height, default_width = 0, 0
                 for inp in proto.description.input:
                     if inp.name == input_type.name:
                         default_height = inp.type.imageType.height
diff --git a/coremltools/converters/mil/backend/nn/op_mapping.py b/coremltools/converters/mil/backend/nn/op_mapping.py
index 6866f9a5e..d3679ca2e 100644
--- a/coremltools/converters/mil/backend/nn/op_mapping.py
+++ b/coremltools/converters/mil/backend/nn/op_mapping.py
@@ -3,8 +3,9 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-import numpy as _np
 import logging as _logging
+
+import numpy as _np
 from tqdm import tqdm as _tqdm
 
 from .mil_to_nn_mapping_registry import MIL_TO_NN_MAPPING_REGISTRY, register_mil_to_nn_mapping
@@ -319,29 +320,134 @@ def batch_norm(const_context, builder, op):
     x_name = make_input(const_context, builder, op.x)
     out_name = op.outputs[0].name
 
-    if op.x.rank == 3:
+    is_batchnorm_1d = op.x.rank == 3
+    is_batchnorm_2d = op.x.rank == 4
+    is_batchnorm_3d = op.x.rank == 5
+
+    if is_batchnorm_1d:
         x_name = op.name + "_expanded"
         builder.add_expand_dims(
             name=x_name, input_name=op.x.name, output_name=x_name, axes=[-2],
         )
         out_name += "_batch_norm"
 
-    builder.add_batchnorm(
-        name=op.name,
-        channels=channels,
-        gamma=gamma,
-        beta=beta,
-        mean=op.mean.val,
-        variance=op.variance.val,
-        input_name=x_name,
-        output_name=out_name,
-        compute_mean_var=False,
-        instance_normalization=False,
-        epsilon=op.epsilon.val,
-    )
+    if is_batchnorm_1d or is_batchnorm_2d:
+        # batch norm 1d / 2d
+        builder.add_batchnorm(
+            name=op.name,
+            channels=channels,
+            gamma=gamma,
+            beta=beta,
+            mean=op.mean.val,
+            variance=op.variance.val,
+            input_name=x_name,
+            output_name=out_name,
+            compute_mean_var=False,
+            instance_normalization=False,
+            epsilon=op.epsilon.val,
+        )
+    elif is_batchnorm_3d:
+        # batch norm 3d
+        batch_size, channel, height, width, depth = op.x.shape
+        assert not is_symbolic(channel), "Channel dimension must be known for batchnorm layer."
+        symbolic_num = sum([is_symbolic(x) for x in op.x.shape])
+
+        if symbolic_num > 1:
+            gamma_expand = _np.expand_dims(gamma, axis=(0, 2, 3, 4))
+            beta_expand = _np.expand_dims(beta, axis=(0, 2, 3, 4))
+            mean_expand = _np.expand_dims(op.mean.val, axis=(0, 2, 3, 4))
+            var_expand = _np.expand_dims(op.variance.val, axis=(0, 2, 3, 4))
+
+            # compute batch norm 3d by decomposing it into elementwise operations
+            negative_mean_name = op.name + "_negative_mean"
+            add_const(const_context, builder, negative_mean_name, -mean_expand)
+
+            numerator_name = op.name + "_numerator"
+            builder.add_add_broadcastable(
+                name=numerator_name,
+                input_names=[x_name, negative_mean_name],
+                output_name=numerator_name,
+            )
+
+            var_expand = var_expand + op.epsilon.val
+            denominator = _np.sqrt(var_expand)
+            gamma_expand = gamma_expand / denominator
+            gamma_name = op.name + "_gamma"
+            add_const(const_context, builder, gamma_name, gamma_expand)
+
+            mul_name = op.name + "_mul"
+            builder.add_multiply_broadcastable(
+                name=mul_name,
+                input_names=[numerator_name, gamma_name],
+                output_name=mul_name,
+            )
+
+            beta_name = op.name + "_beta"
+            add_const(const_context, builder, beta_name, beta_expand)
+
+            builder.add_add_broadcastable(
+                name=out_name,
+                input_names=[mul_name, beta_name],
+                output_name=out_name,
+            )
+        else:
+            is_batch_symbloic = is_symbolic(batch_size)
+            is_height_symbolic = is_symbolic(height)
+            is_width_symbolic = is_symbolic(width)
+            is_depth_symbolic = is_symbolic(depth)
+
+            if is_batch_symbloic:
+                shape1 = [-1, channel, height * width, depth]
+                shape2 = [-1, channel, height, width, depth]
+
+            elif is_height_symbolic:
+                shape1 = [batch_size, channel, -1, width*depth]
+                shape2 = [batch_size, channel, -1, width, depth]
+
+            elif is_width_symbolic:
+                shape1 = [batch_size, channel, -1, height*depth]
+                shape2 = [batch_size, channel, height, -1, depth]
+
+            elif is_depth_symbolic:
+                shape1 = [batch_size, channel, height * width, -1]
+                shape2 = [batch_size, channel, height, width, -1]
+
+            else:
+                shape1 = [batch_size, channel, height*width, depth]
+                shape2 = [batch_size, channel, height, width, depth]
+
+            reshape_4d_name = op.name + "_reshape_4d"
+            builder.add_reshape_static(
+                name=reshape_4d_name,
+                input_name=x_name,
+                output_name=reshape_4d_name,
+                output_shape=shape1,
+            )
+
+            batchnorm_name = op.name + "_batchnorm_4d"
+            builder.add_batchnorm(
+                name=batchnorm_name,
+                channels=channels,
+                gamma=gamma,
+                beta=beta,
+                mean=op.mean.val,
+                variance=op.variance.val,
+                input_name=reshape_4d_name,
+                output_name=batchnorm_name,
+                compute_mean_var=False,
+                instance_normalization=False,
+                epsilon=op.epsilon.val,
+            )
+
+            builder.add_reshape_static(
+                name=out_name,
+                input_name=batchnorm_name,
+                output_name=out_name,
+                output_shape=shape2,
+            )
 
     # Squeeze added `Width` dimension for 1d case
-    if op.x.rank == 3:
+    if is_batchnorm_1d:
         x_name = op.name + "_squeeze"
         builder.add_squeeze(
             name=x_name,
@@ -3361,7 +3467,6 @@ def _realloc_list(const_context, builder, ls_var, index_var, value_var, mode):
     else:
         add_const(const_context, builder, value_elem_shape_name, _np.array(elem_shape))
 
-
     # if elem_shape is runtime-determined, check if we need to re-initialize the array
 
     if has_dynamic_shape:
diff --git a/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py b/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py
index b82767d9d..8235056a6 100644
--- a/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py
+++ b/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
@@ -9,6 +7,7 @@
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 
+
 def _commingle_loop_vars_block(block):
     for op in list(block.operations):
         for b in op.blocks:
@@ -18,17 +17,18 @@ def _commingle_loop_vars_block(block):
             continue
 
         for block in op.blocks:
-          for v_out, vx_in in zip(op.outputs, block.inputs):
-              # Disable check as v_out is not visible in block.
-              block.replace_uses_of_var_after_op(
-                  anchor_op=None,
-                  old_var=vx_in,
-                  new_var=v_out,
-                  no_check_var_visibility=True,
-              )
+            for v_out, vx_in in zip(op.outputs, block.inputs):
+                # Disable check as v_out is not visible in block.
+                block.replace_uses_of_var_after_op(
+                    anchor_op=None,
+                    old_var=vx_in,
+                    new_var=v_out,
+                    no_check_var_visibility=True,
+                )
+
+            # replace block inputs
+            block._block_inputs = op.outputs
 
-          # replace block inputs
-          block._block_inputs = op.outputs
 
 @register_pass(namespace="nn_backend")
 class commingle_loop_vars(AbstractGraphPass):
diff --git a/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py b/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py
index a49922d7d..dffb3d32c 100644
--- a/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py
+++ b/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py
@@ -1,15 +1,13 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 
+
 def _handle_unused_inputs_func(f):
     unused_inputs = [v for v_name, v in f.inputs.items() if len(v.child_ops) == 0]
 
@@ -18,6 +16,7 @@ def _handle_unused_inputs_func(f):
             # copy the input
             v_tmp = mb.identity(x=v, name=v.name + "_tmp")
 
+
 @register_pass(namespace="nn_backend")
 class handle_unused_inputs(AbstractGraphPass):
     """
diff --git a/coremltools/converters/mil/backend/nn/passes/mlmodel_passes.py b/coremltools/converters/mil/backend/nn/passes/mlmodel_passes.py
index d5d9c7c7b..9ab855e9e 100644
--- a/coremltools/converters/mil/backend/nn/passes/mlmodel_passes.py
+++ b/coremltools/converters/mil/backend/nn/passes/mlmodel_passes.py
@@ -1,12 +1,8 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from itertools import permutations
-
 
 def _get_nn_spec(spec):
     if spec.WhichOneof("Type") == "neuralNetwork":
@@ -455,7 +451,7 @@ def solve_dp(layers):
             # Get layers to delete using sol_bt
             cursor = 0
             while cursor < len(dp):
-                if sol_bt[cursor] == None:
+                if sol_bt[cursor] is None:
                     break
                 cursor = sol_bt[cursor]
                 tmp = [layers[i] for i in range(cursor, cursor + dp[cursor])]
diff --git a/coremltools/converters/mil/backend/nn/passes/test_mlmodel_passes.py b/coremltools/converters/mil/backend/nn/passes/test_mlmodel_passes.py
index 6c7412e58..3c8669c6f 100644
--- a/coremltools/converters/mil/backend/nn/passes/test_mlmodel_passes.py
+++ b/coremltools/converters/mil/backend/nn/passes/test_mlmodel_passes.py
@@ -4,16 +4,18 @@
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import copy
-import pytest
-import numpy as np
-import unittest
 from sys import platform
+import unittest
+
+import numpy as np
 
 from coremltools._deps import _IS_MACOS
 import coremltools.models.datatypes as datatypes
 from coremltools.models.utils import _macos_version
-from coremltools.models import neural_network as neural_network
-from coremltools.models import MLModel
+from coremltools.models import (
+    neural_network as neural_network,
+    MLModel
+)
 from coremltools.models.neural_network.printer import print_network_spec
 from coremltools.converters.mil.backend.nn.passes.mlmodel_passes import (
     remove_disconnected_layers,
@@ -21,6 +23,7 @@
     remove_redundant_transposes,
 )
 
+
 DEBUG = False
 np.random.seed(10)
 
@@ -338,7 +341,7 @@ def _test_builder(self, builder, input_shape, expected_layer_num=None):
         remove_redundant_transposes(builder.spec)
 
         layers = builder.spec.neuralNetwork.layers
-        if expected_layer_num == None:
+        if expected_layer_num is None:
             self.assertTrue(len(layers) < num_layers_before)
         else:
             self.assertEqual(len(layers), expected_layer_num)
@@ -1029,7 +1032,6 @@ def _build_and_test_network(input_size, transpose_layers, expected_layers):
         builder = neural_network.NeuralNetworkBuilder(
             [("data", datatypes.Array(2, 4, 8))], [("out", None)]
         )
-        last_layer = "data"
         builder.add_transpose(
             name="t1", axes=[0, 2, 1], input_name="data", output_name="t1"
         )
diff --git a/coremltools/converters/mil/backend/nn/passes/test_passes.py b/coremltools/converters/mil/backend/nn/passes/test_passes.py
index efc86069d..ae4753e07 100644
--- a/coremltools/converters/mil/backend/nn/passes/test_passes.py
+++ b/coremltools/converters/mil/backend/nn/passes/test_passes.py
@@ -4,11 +4,15 @@
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import copy
+
 import pytest
+
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
-from coremltools.converters.mil.testing_utils import assert_model_is_valid
-from coremltools.converters.mil.testing_utils import assert_same_output_names
+from coremltools.converters.mil.testing_utils import (
+    assert_model_is_valid,
+    assert_same_output_names
+)
 
 
 def test_commingle_loop_vars():
@@ -61,8 +65,7 @@ def prog(a, b):
     assert prog["main"].outputs[1].op is not None  # output comes from an op
     assert prog["main"].outputs[1].op.op_type == "identity"
 
-    with pytest.raises(ValueError,
-            match='used both as function\'s input and output'):
+    with pytest.raises(ValueError, match='used both as function\'s input and output'):
         # prog has input and output names 'b' that refer to different vars
         # This program can pass if we disable 'dedup_op_and_var_names' pass
         assert_model_is_valid(prog, {"a": (1, 2), "b": (1, 2)})
diff --git a/coremltools/converters/mil/converter.py b/coremltools/converters/mil/converter.py
index 1d4c573ab..99aabafd6 100644
--- a/coremltools/converters/mil/converter.py
+++ b/coremltools/converters/mil/converter.py
@@ -2,6 +2,13 @@
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import os as _os
+import shutil as _shutil
+import stat as _stat
+import tempfile as _tempfile
+import warnings as _warnings
+
 from . import InputType, ImageType
 from .mil.passes.apply_common_pass_pipeline import apply_common_pass_pipeline
 from coremltools.converters._profile_utils import _profile
@@ -9,7 +16,7 @@
 from coremltools.converters.mil.mil.passes.quantization_passes import AbstractQuantizationPass
 from coremltools.converters.mil.mil.types.symbolic import k_used_symbols, k_num_internal_syms
 from coremltools.models import MLModel
-from coremltools.models.model import _MODEL_FILE_NAME, _WEIGHTS_DIR_NAME
+from coremltools.models.model import _create_mlpackage
 from coremltools.models.utils import _MLMODEL_EXTENSION, _MLPACKAGE_EXTENSION
 
 try:
@@ -17,12 +24,6 @@
 except ModuleNotFoundError:
     pass
 
-import os as _os
-import shutil as _shutil
-import stat as _stat
-import tempfile as _tempfile
-import warnings as _warnings
-
 
 class ConverterRegistry:
     frontends = {}
@@ -220,35 +221,7 @@ def _mil_convert(
         return mil_program # mil program
 
     elif convert_to == 'mlprogram':
-        # Save proto to disk
-        proto_spec_str = proto.SerializeToString()
-        spec_file = _tempfile.NamedTemporaryFile(suffix=_MLMODEL_EXTENSION)
-        spec_file.write(proto_spec_str)
-        spec_file.flush()
-
-        # To make sure everyone can read this file
-        _os.chmod(spec_file.name, _stat.S_IRUSR | _stat.S_IWUSR | _stat.S_IRGRP | _stat.S_IROTH)
-
-        # If package directory is already provided, use that
-        package_path = kwargs.get("package_dir")
-        if not package_path:
-            package_path = _tempfile.mkdtemp(suffix=_MLPACKAGE_EXTENSION)
-
-        if _os.path.exists(package_path):
-            _shutil.rmtree(package_path)
-
-        package = ModelPackage(package_path)
-
-        # Root model file is copied into the model package.
-        package.setRootModel(spec_file.name, _MODEL_FILE_NAME, "com.apple.CoreML", "CoreML Model Specification");
-        spec_file.close() # clean up spec file now that it is part of the model package
-
-        # Weights bundle is copied into the model package. Changes to in-memory JSON is commited to disk when package goes out of scope.
-        package.addItem(weights_dir, _WEIGHTS_DIR_NAME, "com.apple.CoreML", "CoreML Model Weights")
-        _shutil.rmtree(weights_dir) # clean up weights now that it is part of the model package
-
-        package = None
-
+        package_path = _create_mlpackage(proto, weights_dir, kwargs.get("package_dir"))
         return modelClass(package_path,
                           is_temp_package=not kwargs.get('package_dir'),
                           mil_program=mil_program,
diff --git a/coremltools/converters/mil/experimental/__init__.py b/coremltools/converters/mil/experimental/__init__.py
index 0d5ea4a16..545ac7e58 100644
--- a/coremltools/converters/mil/experimental/__init__.py
+++ b/coremltools/converters/mil/experimental/__init__.py
@@ -1,4 +1,4 @@
 #  Copyright (c) 2021, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
-#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
\ No newline at end of file
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
diff --git a/coremltools/converters/mil/experimental/passes/__init__.py b/coremltools/converters/mil/experimental/passes/__init__.py
index 0d5ea4a16..545ac7e58 100644
--- a/coremltools/converters/mil/experimental/passes/__init__.py
+++ b/coremltools/converters/mil/experimental/passes/__init__.py
@@ -1,4 +1,4 @@
 #  Copyright (c) 2021, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
-#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
\ No newline at end of file
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
diff --git a/coremltools/converters/mil/experimental/passes/generic_conv_batchnorm_fusion.py b/coremltools/converters/mil/experimental/passes/generic_conv_batchnorm_fusion.py
index 8b40aa2ee..8f1d09ba3 100644
--- a/coremltools/converters/mil/experimental/passes/generic_conv_batchnorm_fusion.py
+++ b/coremltools/converters/mil/experimental/passes/generic_conv_batchnorm_fusion.py
@@ -1,11 +1,8 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2021, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-
 import os
 import numpy as np
 
@@ -68,7 +65,7 @@ def transform_pattern(pattern):
 
     # get type of the conv layer
     is_deconv = pattern.conv.op_type == 'conv_transpose'
-    is_conv_1d  = len(conv_weight.shape) == 3
+    is_conv_1d = len(conv_weight.shape) == 3
 
     # D_in denotes the spatial dimensions for conv kernel weight
     # for conv_transpose, conv_weight has shape [Cin, Cout / groups, *D_in]
@@ -167,4 +164,4 @@ def transform_pattern(pattern):
         transform_pattern=transform_pattern,
         pass_name="fuse_conv_batchnorm",
         namespace="common",
-    )
\ No newline at end of file
+    )
diff --git a/coremltools/converters/mil/experimental/passes/generic_conv_bias_fusion.py b/coremltools/converters/mil/experimental/passes/generic_conv_bias_fusion.py
index 39de8622d..2e1e5bd36 100644
--- a/coremltools/converters/mil/experimental/passes/generic_conv_bias_fusion.py
+++ b/coremltools/converters/mil/experimental/passes/generic_conv_bias_fusion.py
@@ -1,19 +1,17 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-
-import os
-import numpy as np
 import logging
+import numpy as np
+import os
 
 from coremltools.converters.mil import Builder as mb
 from coremltools.converters.mil.experimental.passes.generic_pass_infrastructure import register_generic_pass
 from coremltools.converters.mil.mil import types
 
+
 """
 Fold add/sub into bias of conv and conv_transpose
 That is, convert conv + add/sub to conv, when add/sub is adding a constant
@@ -144,9 +142,6 @@ def transform_pattern(pattern):
     if not isinstance(bias_value, np.ndarray):
         is_bias_scalar = True
 
-    # find rank of the conv input
-    rank = pattern.conv.x.rank
-
     bias_value = np.array([bias_value]) if is_bias_scalar else np.squeeze(bias_value)
 
     if pattern.add_or_sub.op_type == "sub":
@@ -179,7 +174,7 @@ def transform_pattern(pattern):
         weight_np_type = types.nptype_from_builtin(pattern.conv.inputs["weight"].sym_type.get_primitive())
         logging.warning("conv_bias_fusion pass: casting bias "
                         "from {} to {} to match the dtype of the weight of the conv layer".format(
-                        new_bias_value.dtype, weight_np_type
+                            new_bias_value.dtype, weight_np_type
                         )
         )
         new_bias_value = new_bias_value.astype(weight_np_type)
@@ -266,7 +261,7 @@ def transform_transpose_pattern(pattern):
     pattern.block.remove_ops(pattern.op_list())
 
 def _bias_mod_and_validity(bias, Cout, pattern):
- # check if the bias is compatible for fusion
+    # check if the bias is compatible for fusion
     is_bias_scalar = True
     if isinstance(bias, np.ndarray):
         if bias.shape == ():
diff --git a/coremltools/converters/mil/experimental/passes/generic_conv_scale_fusion.py b/coremltools/converters/mil/experimental/passes/generic_conv_scale_fusion.py
index d0da48e00..9d3f732e7 100644
--- a/coremltools/converters/mil/experimental/passes/generic_conv_scale_fusion.py
+++ b/coremltools/converters/mil/experimental/passes/generic_conv_scale_fusion.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2021, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
diff --git a/coremltools/converters/mil/experimental/passes/generic_gelu_tanh_approximation_fusion.py b/coremltools/converters/mil/experimental/passes/generic_gelu_tanh_approximation_fusion.py
deleted file mode 100644
index 9671262f8..000000000
--- a/coremltools/converters/mil/experimental/passes/generic_gelu_tanh_approximation_fusion.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# -*- coding: utf-8 -*-
-
-#  Copyright (c) 2020, Apple Inc. All rights reserved.
-#
-#  Use of this source code is governed by a BSD-3-clause license that can be
-#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-
-import os
-
-from coremltools.converters.mil import Builder as mb
-from coremltools.converters.mil.mil.passes.helper import _check_var_scalar_value
-from coremltools.converters.mil.experimental.passes.generic_pass_infrastructure import register_generic_pass
-
-if os.getenv("ENABLE_EXPERIMENTAL_PASSES") == "1":
-    @mb.program(input_specs=[mb.TensorSpec(shape=([1, 1024, 4096])), ])
-    def gelu_to_detect_1(x):
-        # MIL operation takes named inputs (instead of positional inputs).
-        # Here `name` argument is MANDATORY.
-        pow = mb.pow(x=x, y=3.0, name="pow")
-        mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1")
-        add = mb.add(x=x, y=mul_1, name="add")
-        mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2")
-        tanh = mb.tanh(x=mul_2, name="tanh")
-        add_1 = mb.add(x=1.0, y=tanh, name="add_1")
-        mul = mb.mul(x=0.5, y=add_1, name="mul")
-        mul_3 = mb.mul(x=mul, y=x, name="mul_3")
-        return mul_3
-    """
-    y = x * (0.5 * (tanh(((.0447)x^3 + x ) * sqrt(2/pi)) + 1))
-    
-    
-    [...] -----> pow (3) ----> mul (.044715) ---> add -----> mul (sqrt(2/pi)) ---> tanh ----> add (1) ----> mul (0.5) -----> mul ---> [...]
-      |                                            ^                                                                          ^
-      |                                            |                                                                          |
-      |------------------------------------------------------------------------------------------------------------------------
-    
-    """
-
-if os.getenv("ENABLE_EXPERIMENTAL_PASSES") == "1":
-    # In this pattern, 0.5 is first multiplied with the input which is then multiplied with the tanh term.
-    # In pattern1, 0.5 is first multiplied with the tanh term, and then multiplied with input
-    @mb.program(input_specs=[mb.TensorSpec(shape=([1, 1024, 4096])), ])
-    def gelu_to_detect_2(x):
-        pow = mb.pow(x=x, y=3.0, name ="pow")
-        mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1")
-        add = mb.add(x=x, y=mul_1, name="add")
-        mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2")
-        tanh = mb.tanh(x=mul_2, name="tanh")
-        add_1 = mb.add(x=1.0, y=tanh, name="add_1")
-        mul = mb.mul(x = 0.5, y=x, name="mul")
-        mul_3 = mb.mul(x=mul, y=add_1, name="mul_3")
-        return mul_3
-
-    """
-    y = (0.5 * x) * (tanh(((.0447)x^3 + x ) * sqrt(2/pi)) + 1)
-    
-                    ---------------------------------------------------------------------------------------------------------
-                    ^                                                                                                       |
-                    |                                                                                                       V
-     [...] -----> mul(0.5)    pow (3) ----> mul (.044715) ---> add -----> mul (sqrt(2/pi)) ---> tanh ----> add (1) -----> mul ---> [...]
-      |                         ^                               ^
-      |                         |                               |
-      |------------------------------------------------------------
-    """
-
-def var_constraints(pattern):
-    passed = True
-
-    passed = passed and (_check_var_scalar_value(pattern.mul.y, 0.5) or _check_var_scalar_value(pattern.mul.x, 0.5))
-    passed = passed and _check_var_scalar_value(pattern.pow.y, 3.0)
-
-    passed = passed and (
-                        _check_var_scalar_value(pattern.mul_1.y, 0.044715) or
-                        _check_var_scalar_value(pattern.mul_1.x,  0.044715)
-                        )
-
-    passed = passed and (
-                        _check_var_scalar_value(pattern.mul_2.y, 0.79788) or
-                        _check_var_scalar_value(pattern.mul_2.x, 0.79788)
-                        )
-
-    passed = passed and (
-                        _check_var_scalar_value(pattern.add_1.y, 1) or
-                        _check_var_scalar_value(pattern.add_1.x, 1)
-                        )
-
-    return passed
-
-def transform_pattern(pattern):
-    # remove all the ops, and replace with a gelu op
-    out_name = pattern.mul_3.outputs[0].name
-    x = mb.gelu(x=pattern.root_var, mode="TANH_APPROXIMATION", name=out_name, before_op=pattern.mul)
-
-    pattern.mul_3.enclosing_block.replace_uses_of_var_after_op(
-        anchor_op=pattern.mul_3, old_var=pattern.mul_3.outputs[0], new_var=x
-    )
-
-    # Remove all the ops at once
-    pattern.block.remove_ops(pattern.op_list())
-
-if os.getenv('ENABLE_EXPERIMENTAL_PASSES') == '1':
-    register_generic_pass(ops_arrangement=gelu_to_detect_1, var_constraints=var_constraints,
-                            transform_pattern = transform_pattern, pass_name="fuse_gelu_tanh_approximation", namespace="common")
-
-    register_generic_pass(ops_arrangement=gelu_to_detect_2, var_constraints = var_constraints,
-                        transform_pattern = transform_pattern, pass_name="fuse_gelu_tanh_approximation", namespace="common")
\ No newline at end of file
diff --git a/coremltools/converters/mil/experimental/passes/generic_layernorm_instancenorm_pattern_fusion.py b/coremltools/converters/mil/experimental/passes/generic_layernorm_instancenorm_pattern_fusion.py
index 959f7f157..35db761b5 100644
--- a/coremltools/converters/mil/experimental/passes/generic_layernorm_instancenorm_pattern_fusion.py
+++ b/coremltools/converters/mil/experimental/passes/generic_layernorm_instancenorm_pattern_fusion.py
@@ -1,16 +1,12 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2021, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-import operator
 import os
 import numpy as np
 
 from coremltools.converters.mil import Builder as mb
-from coremltools.converters.mil.mil.passes.helper import _check_var_scalar_value
 from coremltools.converters.mil.experimental.passes.generic_pass_infrastructure import register_generic_pass
 from coremltools.converters.mil.mil import get_new_symbol
 
@@ -341,7 +337,8 @@ def _instancenorm_constraints(pattern):
     passed = passed and _check_axes_and_var_shape(negative_axes, pattern.beta_var.shape)
 
     requires_rank4_transpose = False
-    if negative_axes == [-3, -2]: requires_rank4_transpose = True
+    if negative_axes == [-3, -2]:
+        requires_rank4_transpose = True
     pattern.add_attribute("requires_rank4_transpose", requires_rank4_transpose)
     pattern.add_attribute("is_instancenorm", True)
     return passed
diff --git a/coremltools/converters/mil/experimental/passes/generic_linear_bias_fusion.py b/coremltools/converters/mil/experimental/passes/generic_linear_bias_fusion.py
index afe6ada7e..79ab164b3 100644
--- a/coremltools/converters/mil/experimental/passes/generic_linear_bias_fusion.py
+++ b/coremltools/converters/mil/experimental/passes/generic_linear_bias_fusion.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2021, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
@@ -15,8 +13,8 @@
 if os.getenv("ENABLE_EXPERIMENTAL_PASSES") == "1":
     arbitrary_shape = (get_new_symbol(), get_new_symbol())
     np.random.seed()
-    arbitrary_weight = np.random.rand(4,3)
-    arbitrary_bias =  np.random.rand(4)
+    arbitrary_weight = np.random.rand(4, 3)
+    arbitrary_bias = np.random.rand(4)
 
 if os.getenv("ENABLE_EXPERIMENTAL_PASSES") == "1":
     @mb.program(input_specs=[mb.TensorSpec(shape=arbitrary_shape)])
@@ -87,8 +85,10 @@ def transform_pattern(pattern):
     linear_bias, bias, Dout = _get_linear_bias_bias_Dout(pattern, is_first_input)
     bias = np.reshape(bias, (Dout,))
 
-    if is_sub and is_first_input: bias = -bias
-    if is_sub and not is_first_input: linear_bias = -linear_bias
+    if is_sub and is_first_input:
+        bias = -bias
+    if is_sub and not is_first_input:
+        linear_bias = -linear_bias
 
     new_bias = linear_bias + bias
 
@@ -128,4 +128,4 @@ def transform_pattern(pattern):
         transform_pattern=transform_pattern,
         pass_name="fuse_linear_bias",
         namespace="common",
-    )
\ No newline at end of file
+    )
diff --git a/coremltools/converters/mil/experimental/passes/generic_pass_infrastructure.py b/coremltools/converters/mil/experimental/passes/generic_pass_infrastructure.py
index 7d44985c1..63ed80d99 100644
--- a/coremltools/converters/mil/experimental/passes/generic_pass_infrastructure.py
+++ b/coremltools/converters/mil/experimental/passes/generic_pass_infrastructure.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
@@ -10,7 +8,7 @@
 
 from ...mil.passes import pass_registry
 
-# IMPORTANT: List of Asssumptions we are making about the problem
+# IMPORTANT: List of assumptions we are making about the problem
 # 1) The user defined pattern has exactly one root variable, and one final output operation. As such, we will be searching for a singlular
 #    root variable in the larger program, and using that root variable as a starting point for our pattern matching.
 #    And, we will only match one of the final operations for the larger program.
@@ -19,7 +17,6 @@
 # 3) The outputs of an operation are stored in identical, predictable order. The child operations of an operation are stored in a random order.
 
 
-
 class Pattern:
 
     """This class will have references to all the ops that we have captured in the main, larger program.
@@ -69,9 +66,9 @@ def set_final_op(self, op_name, final_op):
     def add_attribute(self, attribute_name, attribute):
         if attribute_name in self.attribute_set:
             raise NameError("Pattern " + attribute_name + " is being overwritten. "
-                "Make sure every operation in your MIL pattern to detect "
-                "has a unique name, and that no operation in it or an attribute you are setting is named "
-                "root_var, block, final_op, op_set, or attribute_set.")
+                            "Make sure every operation in your MIL pattern to detect "
+                            "has a unique name, and that no operation in it or an attribute you are setting is named "
+                            "root_var, block, final_op, op_set, or attribute_set.")
         setattr(self, attribute_name, attribute)
 
     def add_op(self, op_name, op):
@@ -83,7 +80,7 @@ def op_list(self):
 
 def _lists_op_equality(oplist1, oplist2):
     if (len(oplist1) != len(oplist2)):
-        return False;
+        return False
 
     for i in range(len(oplist1)):
         if oplist1[i].op_type != oplist2[i].op_type:
@@ -133,7 +130,7 @@ def _pattern_detected(pattern, program_op, pattern_op, program_root_var, pattern
                     output_same = True
                     break
 
-        if output_same == False:
+        if output_same is False:
             return False
 
     if pattern_op is not None:
@@ -158,9 +155,9 @@ def _detect_pattern(program_op, ops_arrangement_root_var, block):
             # (except the last one)
             for op in pattern.op_list():
                if op is not pattern.final_op:
-                    for out in op.outputs:
-                        if out in pattern.block.outputs:
-                            return False, None
+                   for out in op.outputs:
+                       if out in pattern.block.outputs:
+                           return False, None
 
             return True, pattern
 
@@ -189,7 +186,7 @@ def _fuse_one_block(block, ops_arrangement, var_constraints, transform_pattern):
     return fusion_status
 
 
-def _fuse_all_blocks(ops_arrangement, var_constraints, transform_pattern, prog):
+def fuse_all_blocks(ops_arrangement, var_constraints, transform_pattern, prog):
     for f in prog.functions.values():
         block_changed = True
         while block_changed:
@@ -213,10 +210,11 @@ def add(self, pass_function):
         self.passes.append(pass_function)
 
 def register_generic_pass(ops_arrangement, var_constraints, transform_pattern, pass_name, namespace):
-    pass_function = partial(_fuse_all_blocks, ops_arrangement, var_constraints, transform_pattern)
+    pass_function = partial(fuse_all_blocks, ops_arrangement, var_constraints, transform_pattern)
 
     pass_id = namespace + "::" + pass_name
     if pass_id not in pass_registry.PASS_REGISTRY or not isinstance(pass_registry.PASS_REGISTRY[pass_id], PassContainer):
         pass_registry.PASS_REGISTRY.passes[pass_id] = PassContainer(pass_name)
 
     pass_registry.PASS_REGISTRY[pass_id].add(pass_function)
+
diff --git a/coremltools/converters/mil/frontend/_utils.py b/coremltools/converters/mil/frontend/_utils.py
index dec3891d8..7dd4defc4 100644
--- a/coremltools/converters/mil/frontend/_utils.py
+++ b/coremltools/converters/mil/frontend/_utils.py
@@ -65,7 +65,7 @@ def _swap(a, b):
         x = mb.matmul(x=a_var, y=b_var, transpose_x=False, transpose_y=False, name=name)
     elif parsed_vectors == ([0,1,2],[2,3,4],[0,1,3,4]) or parsed_vectors_rev == ([0,1,2],[2,3,4],[0,1,3,4]): # equation == "abc,cde->abde"
         if parsed_vectors_rev == ([0,1,2],[2,3,4],[0,1,3,4]):
-             a_var, b_var = _swap(a_var, b_var)
+            a_var, b_var = _swap(a_var, b_var)
         x_1 = mb.reshape(x=a_var, shape=[a_var.shape[0] * a_var.shape[1], a_var.shape[2]])
         x_2 = mb.reshape(x=b_var, shape=[b_var.shape[0], b_var.shape[1] * b_var.shape[2]])
         x = mb.matmul(x=x_1, y=x_2, transpose_x=False, transpose_y=False)
@@ -89,9 +89,7 @@ def _swap(a, b):
             a_var, b_var = _swap(a_var, b_var)
         x_1 = mb.reshape(x=a_var, shape=[a_var.shape[0], a_var.shape[1], a_var.shape[2] * a_var.shape[3]])
         x_2 = mb.reshape(x=b_var, shape=[b_var.shape[0] * b_var.shape[1], b_var.shape[2]])
-        x = mb.matmul(
-                x=x_1, y=x_2, transpose_x=False, transpose_y=False, name=name
-        )
+        x = mb.matmul(x=x_1, y=x_2, transpose_x=False, transpose_y=False, name=name)
     elif parsed_vectors == ([0,1,2,3],[0,3,2,4],[0,1,2,4]) or parsed_vectors_rev == ([0,1,2,3],[0,3,2,4],[0,1,2,4]): # equation == "nchw,nwhu->nchu"
         if parsed_vectors == ([0,1,2,3],[0,3,2,4],[0,1,2,4]):
             x = mb.einsum(values=(a_var, b_var), equation=equation, name=name)
diff --git a/coremltools/converters/mil/frontend/tensorflow/convert_utils.py b/coremltools/converters/mil/frontend/tensorflow/convert_utils.py
index d832e8a79..6c13dcf53 100644
--- a/coremltools/converters/mil/frontend/tensorflow/convert_utils.py
+++ b/coremltools/converters/mil/frontend/tensorflow/convert_utils.py
@@ -3,14 +3,16 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+from collections import defaultdict
 import logging
+
+from tqdm import tqdm as _tqdm
+
 from .basic_graph_ops import topsort
-from coremltools.converters.mil.mil.types.symbolic import is_symbolic, any_variadic
-from coremltools.converters.mil.mil import types
 from .tf_op_registry import _TF_OPS_REGISTRY
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic, any_variadic
 from coremltools.converters.mil.mil.var import ListVar
-from collections import defaultdict
-from tqdm import tqdm as _tqdm
 
 
 def compatible_shapes(tf_shape, inf_shape):
@@ -22,7 +24,7 @@ def compare_elem(dt, ds):
         elif is_symbolic(ds):
             if is_symbolic(dt) and dt != ds:
                 logging.warning("Symbolic dim {} and {}".format(ds, dt) +\
-                        " assumed to be equal")
+                                " assumed to be equal")
             return True
         else:
             return False
@@ -175,19 +177,16 @@ def convert_graph(context, graph, outputs=None):
             "[{}/{}] Converting {} op '{}'".format(i + 1, num_nodes, node.op, node.name)
         )
 
-        if node.op == "NoOp":
-            continue
-
-        if node.op == "Assert":
+        if node.op in ("NoOp", "Assert"):
             continue
 
-        _add_op = _TF_OPS_REGISTRY.get(node.op, None)
-        if _add_op is None:
+        add_op = _TF_OPS_REGISTRY.get(node.op, None)
+        if add_op is None:
             msg = "Conversion for TF op '{0}' not implemented.\n \n{1}".format(
                 node.op, node.original_node
             )
             raise NotImplementedError(msg)
-        _add_op(context, node)
+        add_op(context, node)
 
         if len(node.outputs) > 0:
             # set_global / get_global / NoOp has no direct consumer / outputs
diff --git a/coremltools/converters/mil/frontend/tensorflow/converter.py b/coremltools/converters/mil/frontend/tensorflow/converter.py
index 41bd494cf..f954c9e3e 100644
--- a/coremltools/converters/mil/frontend/tensorflow/converter.py
+++ b/coremltools/converters/mil/frontend/tensorflow/converter.py
@@ -4,28 +4,26 @@
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import logging
+
+from .basic_graph_ops import simple_topsort
+from .convert_utils import convert_graph
+from .ssa_passes.tf_passes import tensorflow_passes
 from coremltools.converters.mil.input_types import (
+    _get_shaping_class,
     InputType,
-    TensorType,
     ImageType,
     RangeDim,
-    _get_shaping_class,
+    Shape as InputShape,
+    TensorType
 )
-from coremltools.converters.mil.input_types import Shape as InputShape
 from coremltools.converters.mil.mil.var import Var
-from coremltools.converters.mil.mil import get_new_symbol
 from coremltools.converters.mil.mil.types.symbolic import is_symbolic
-from coremltools.converters.mil.mil.types import is_tensor
-
-from coremltools.converters.mil.mil import types
-from .basic_graph_ops import topsort, simple_topsort
-
-from .convert_utils import convert_graph
-
-from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil import Program
-from coremltools.converters.mil.mil import Function
-from .ssa_passes.tf_passes import tensorflow_passes
+from coremltools.converters.mil.mil import (
+    Builder as mb,
+    Function,
+    get_new_symbol,
+    Program
+)
 from coremltools.converters._profile_utils import _profile
 
 
diff --git a/coremltools/converters/mil/frontend/tensorflow/dialect_ops.py b/coremltools/converters/mil/frontend/tensorflow/dialect_ops.py
index 7e9f34fb3..eb8302331 100644
--- a/coremltools/converters/mil/frontend/tensorflow/dialect_ops.py
+++ b/coremltools/converters/mil/frontend/tensorflow/dialect_ops.py
@@ -42,7 +42,7 @@ def default_inputs(self):
             init_length=1,
             dynamic_length=True,
             dtype="fp32",
-            )
+        )
 
     def __init__(self, **kwargs):
         super(tf_make_list, self).__init__(**kwargs)
@@ -89,7 +89,7 @@ def default_inputs(self):
         return DefaultInputs(
             forget_bias=1.,
             use_peephole=False,
-            )
+        )
 
     def _check_peephole_weights(self):
         # Check weight_peep_*
diff --git a/coremltools/converters/mil/frontend/tensorflow/dot_visitor.py b/coremltools/converters/mil/frontend/tensorflow/dot_visitor.py
index 8f451a7b3..f60b611cd 100644
--- a/coremltools/converters/mil/frontend/tensorflow/dot_visitor.py
+++ b/coremltools/converters/mil/frontend/tensorflow/dot_visitor.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
@@ -102,7 +100,6 @@ def visit(self, graph, node, nodename_prefix=""):
                 + node.name
                 + '"'
             )
-            innode = graph[input_name]
             self.result.append(edge)
 
         for i in node.control_inputs:
@@ -118,7 +115,6 @@ def visit(self, graph, node, nodename_prefix=""):
                 + node.name
                 + '"'
             )
-            innode = graph[input_name]
             edge = edge + " [style=dotted]"
             self.result.append(edge)
 
diff --git a/coremltools/converters/mil/frontend/tensorflow/load.py b/coremltools/converters/mil/frontend/tensorflow/load.py
index 18077f5dc..91be00e70 100644
--- a/coremltools/converters/mil/frontend/tensorflow/load.py
+++ b/coremltools/converters/mil/frontend/tensorflow/load.py
@@ -16,6 +16,7 @@
 from .tf_graph_pass import (
     cond_to_where,
     constant_propagation,
+    delete_unnecessary_constant_nodes,
     delete_asserts,
     delete_disconnected_nodes,
     functionalize_loops,
@@ -197,6 +198,7 @@ def _program_from_tf_ssa(self):
             delete_asserts,
             functionalize_loops,
             constant_propagation,
+            delete_unnecessary_constant_nodes, # must come after constant_propagation
             quantization_pass,
             cond_to_where,
             remove_variable_nodes,
diff --git a/coremltools/converters/mil/frontend/tensorflow/naming_utils.py b/coremltools/converters/mil/frontend/tensorflow/naming_utils.py
index fb3a662a3..ebb94bc3c 100644
--- a/coremltools/converters/mil/frontend/tensorflow/naming_utils.py
+++ b/coremltools/converters/mil/frontend/tensorflow/naming_utils.py
@@ -1,12 +1,9 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 
-
 _varname_charset = set(
     [chr(i) for i in range(ord("A"), ord("Z") + 1)]
     + [chr(i) for i in range(ord("a"), ord("z") + 1)]
diff --git a/coremltools/converters/mil/frontend/tensorflow/ops.py b/coremltools/converters/mil/frontend/tensorflow/ops.py
index 48ed72668..dc0e97f0b 100644
--- a/coremltools/converters/mil/frontend/tensorflow/ops.py
+++ b/coremltools/converters/mil/frontend/tensorflow/ops.py
@@ -75,6 +75,7 @@ def _value_at(x, idx):
 def _freq_to_mel(freq):
     return 1127.0 * _np.log(1 + freq / 700.0)
 
+
 def _get_MFCC_constants(spectrogram_N,
                         sample_rate,
                         upper_frequency_limit,
@@ -131,7 +132,7 @@ def _get_MFCC_constants(spectrogram_N,
         else:
             if channel >= 0:
                 weights[i] = (center_frequencies[channel + 1] - _freq_to_mel(i * hz_per_sbin)) / (
-                            center_frequencies[channel + 1] - center_frequencies[channel])
+                    center_frequencies[channel + 1] - center_frequencies[channel])
             else:
                 weights[i] = (center_frequencies[0] - _freq_to_mel(i * hz_per_sbin)) / (center_frequencies[0] - mel_low)
 
@@ -176,7 +177,7 @@ def AddN(context, node):
         if var == values[-1]:
             x = mb.add(x=prev_var, y=var, name=node.name)
         else:
-            prev_var = mb.add(x=prev_var, y=var, name=node.name+"_tmpAddN_"+str(idx))
+            prev_var = mb.add(x=prev_var, y=var, name=node.name + "_tmpAddN_" + str(idx))
     context.add(node.name, x)
 
 
@@ -495,6 +496,7 @@ def Einsum(context, node):
     x = build_einsum_mil(a, b, equation, node.name)
     context.add(node.name, x)
 
+
 @register_tf_op
 def Equal(context, node):
     x = context[node.inputs[0]]
@@ -640,6 +642,11 @@ def Log(context, node):
     x = mb.log(x=x, name=node.name)
     context.add(node.name, x)
 
+@register_tf_op
+def Log1p(context, node):
+    x = context[node.inputs[0]]
+    x = mb.log(x=x, epsilon=1., name=node.name)
+    context.add(node.name, x)
 
 @register_tf_op
 def LogicalAnd(context, node):
@@ -994,7 +1001,7 @@ def Conv3DBackpropInputV2(context, node):
         x = _transpose_NDHWC_to_NCDHW(x)
         if output_shape is not None:
             output_shape = [output_shape[0], output_shape[4],
-                output_shape[1], output_shape[2], output_shape[3]]
+                            output_shape[1], output_shape[2], output_shape[3]]
 
     # Only the last op should have the same name as node.name
     conv_name = node.name + "_x" if data_format == "NDHWC" else node.name
@@ -1036,6 +1043,7 @@ def EuclideanNorm(context, node):
     x = mb.reduce_l2_norm(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
     context.add(node.name, x)
 
+
 @register_tf_op
 def ExpandDims(context, node):
     x = context[node.inputs[0]]
@@ -1135,7 +1143,7 @@ def ImageProjectiveTransformV2(context, node):
     context.add(node.name, x)
 
 
-@register_tf_op
+@register_tf_op(tf_alias=["DivNoNan"])
 def RealDiv(context, node):
     x = context[node.inputs[0]]
     y = context[node.inputs[1]]
@@ -1418,6 +1426,23 @@ def Square(context, node):
     x = mb.mul(x=x, y=x, name=node.name)
     context.add(node.name, x)
 
+@register_tf_op
+def SparseSoftmaxCrossEntropyWithLogits(context, node):
+    feats = context[node.inputs[0]]
+    labels = context[node.inputs[1]]
+    class_nums = feats.shape[1]
+    labels = mb.one_hot(
+        indices=labels, 
+        one_hot_vector_size=class_nums,
+        )
+
+    # compute the log softmax
+    y = mb.reduce_log_sum_exp(x=feats, axes=[-1], keep_dims=True)
+    log_softmax = mb.sub(x=feats, y=y)
+    loss = mb.mul(x=labels, y=log_softmax)
+    loss = mb.mul(x=-1, y=loss)
+    loss = mb.reduce_sum(x=loss, axes=[-1], name=node.name)
+    context.add(node.name, loss)
 
 @register_tf_op
 def StridedSlice(context, node):
@@ -1471,7 +1496,7 @@ def _pad_mask(
         stride = [] if stride is None else stride.val.tolist()
 
         # pad masks function
-        new_dims = sum(i == True for i in new_axis_mask)
+        new_dims = sum(i is True for i in new_axis_mask)
         if new_dims > 0:
             x_rank = x.rank + new_dims
         else:
@@ -1572,8 +1597,8 @@ def pad_array(arr, max_rank, idx, default_value):
         new_axis_mask,
     )
 
-    if sum(i == True for i in new_axis_mask) > 0:
-        axes = [i for i, val in enumerate(new_axis_mask) if val == True]
+    if sum(i is True for i in new_axis_mask) > 0:
+        axes = [i for i, val in enumerate(new_axis_mask) if val is True]
         x = mb.expand_dims(x=x, axes=axes, name=node.name + "_new_axes")
 
     x = mb.slice_by_index(
@@ -1595,8 +1620,12 @@ def Sum(context, node):
     x = context[node.inputs[0]]
     axes = _check_axes_type(context[node.inputs[1]])
     keep_dims = node.attr.get("keep_dims", False)
-    x = mb.reduce_sum(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
-    context.add(node.name, x)
+    input_type = x.sym_type
+    if _is_scalar(input_type):
+        context.add(node.name, x, is_new_var=False)
+    else:
+        x = mb.reduce_sum(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
+        context.add(node.name, x)
 
 
 @register_tf_op
@@ -1636,7 +1665,7 @@ def MatrixDiag(context, node):
         raise NotImplementedError('Only support MatrixDiag op with input rank = 1.')
     length = mb.shape(x=x)
     x = mb.expand_dims(x=x, axes=[0])
-    reps = mb.concat(values=[length,[1]], axis=0)
+    reps = mb.concat(values=[length, [1]], axis=0)
     x = mb.tile(x=x, reps=reps)
     x = mb.band_part(x=x, lower=0, upper=0, name=node.name)
     context.add(node.name, x)
@@ -1874,11 +1903,11 @@ def Select(context, node):
     if rank_cond == 1 and rank_a > 1:
         axes = [-i - 1 for i in range(rank_a - rank_cond)]
         cond = mb.expand_dims(x=cond, axes=axes)
-        
+
     if not types.is_bool(cond.dtype):
         # cond must be bool type
         cond = mb.cast(x=cond, dtype="bool")
-        
+
     x = mb.select(cond=cond, a=a, b=b, name=node.name)
     context.add(node.name, x)
 
@@ -2083,8 +2112,8 @@ def Tile(context, node):
 @register_tf_op
 def Where(context, node):
     if len(node.inputs) > 1:
-        raise NotImplementedError('tf.where with x,y will be supported by '+\
-                'MIL::select in the future')
+        raise NotImplementedError('tf.where with x,y will be supported by '
+                                  'MIL::select in the future')
     x = context[node.inputs[0]]
     # rdar://78409794 (Remove cast in tf Where op lowering after rdar://77514629
     # goes into MIL build)
@@ -2132,7 +2161,7 @@ def Conv2DBackpropInput(context, node):
         x = _transpose_NHWC_to_NCHW(x)
         if output_shape is not None:
             output_shape = [output_shape[0], output_shape[3],
-                output_shape[1], output_shape[2]]
+                            output_shape[1], output_shape[2]]
 
     # Only the last op should have the same name as node.name
     conv_name = node.name + "x" if data_format == "NHWC" else node.name
@@ -2303,8 +2332,8 @@ def ResizeNearestNeighbor(context, node):
     else:
         raise NotImplementedError(
             "ResizeNearestNeighbor op with align_corners={}and half_pixel_centers={} not supported".format(
-                    align_corners, half_pixel_centers
-                )
+                align_corners, half_pixel_centers
+            )
         )
 
     # transpose again
@@ -3007,7 +3036,7 @@ def AudioSpectrogram(context, node):
     fout = fout.astype(_np.int)
 
     # construct constant for hann window tensor, of shape (window_size,)
-    h = _np.arange(window_size) * ((2*_np.pi) / window_size)
+    h = _np.arange(window_size) * ((2 * _np.pi) / window_size)
     h = 0.5 - 0.5 * _np.cos(h)
 
     # construct the constant DFT matrices
diff --git a/coremltools/converters/mil/frontend/tensorflow/parsed_tf_node.py b/coremltools/converters/mil/frontend/tensorflow/parsed_tf_node.py
index 34ad0985d..337c66dd4 100644
--- a/coremltools/converters/mil/frontend/tensorflow/parsed_tf_node.py
+++ b/coremltools/converters/mil/frontend/tensorflow/parsed_tf_node.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py
index 22b10682b..60a4715bc 100644
--- a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py
+++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py
@@ -51,8 +51,7 @@ def _backfill_make_list_elem_type_block(block):
         with block:
             # elem_shape can be runtime-detemrined, which cannot be inferred here at this point,
             # so we add an internal _const_symbolic node to cover both static and dynamic cases.
-            elem_shape = [dim.name if is_symbolic(dim) else dim for dim in
-                elem_type.get_shape()]
+            elem_shape = [dim.name if is_symbolic(dim) else dim for dim in elem_type.get_shape()]
             new_list = mb.make_list(
                 init_length=op.init_length,
                 dynamic_length=op.dynamic_length,
diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py
index 4c253d58c..60edda24a 100644
--- a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py
+++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py
@@ -1,17 +1,16 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import logging
 
-from coremltools.converters.mil.mil.passes.pass_registry import register_pass
-from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
-from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil import types
 import numpy as np
-import logging
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+
 
 @register_pass(namespace="tensorflow")
 class expand_tf_lstm(AbstractGraphPass):
diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py
index e4ed522cf..ea3717614 100644
--- a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py
+++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py
@@ -1,17 +1,15 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import logging
 
-from coremltools.converters.mil.mil.passes.pass_registry import register_pass
-from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
-from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil import types
 import numpy as np
-import logging
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 
 
 @register_pass(namespace="tensorflow")
@@ -31,8 +29,6 @@ class tf_lstm_to_core_lstm(AbstractGraphPass):
     - If tf_lstm_block_cell: only cs, h output (outputs[1], outputs[6])
       are consumed. Similar to above.
 
-    - batch size == 1
-
     Inputs:
 
         prog: Program
@@ -71,12 +67,6 @@ def _check_unsupported_outputs(unsupported_outputs):
                 return False
         return True
 
-
-    if op.op_type == "tf_lstm_block_cell":
-        batch = op.x.shape[0]
-    else:  # tf_lstm_block
-        batch = op.x.shape[1]
-
     # Check for unsupported configuration : When peephole is present
     if op.use_peephole.val:
         return False
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_custom_ops.py b/coremltools/converters/mil/frontend/tensorflow/test/test_custom_ops.py
index ec0ac6778..22b3af499 100644
--- a/coremltools/converters/mil/frontend/tensorflow/test/test_custom_ops.py
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_custom_ops.py
@@ -236,7 +236,6 @@ def CustomTopK(context, node):
 
         _TF_OPS_REGISTRY["TopKV2"] = default_tf_topk
 
-
     @pytest.mark.skipif(not testing_reqs._HAS_TF_1, reason=MSG_TF1_NOT_FOUND)
     @pytest.mark.parametrize(
         "use_cpu_only, backend, rank, k",
@@ -267,5 +266,5 @@ def test_tf(self, use_cpu_only, backend, rank, k):
                 k == layers[-1].custom.parameters["k"].intValue
             ), "Incorrect parameter value k"
             assert (
-                True == layers[-1].custom.parameters["sorted"].boolValue
+                layers[-1].custom.parameters["sorted"].boolValue is True
             ), "Incorrect parameter value for Sorted"
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_load.py b/coremltools/converters/mil/frontend/tensorflow/test/test_load.py
index 35f12061c..6bd87c5a1 100644
--- a/coremltools/converters/mil/frontend/tensorflow/test/test_load.py
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_load.py
@@ -3,11 +3,14 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-import numpy as np
+
 import os
-import pytest
-import shutil
 import tempfile
+import shutil
+
+import numpy as np
+import pytest
+
 import coremltools as ct
 import coremltools.converters as converter
 from coremltools._deps import _IS_MACOS
@@ -195,7 +198,7 @@ def build_model(x):
             e.match(r"Unable to determine the shape of input .*")
 
         mlmodel = converter.convert(model, source=frontend,
-                inputs=[ct.TensorType(shape=(1,))])
+                                    inputs=[ct.TensorType(shape=(1,))])
         assert mlmodel is not None
 
     @pytest.mark.xfail(reason="Rank-0 input is not supported", run=True)
@@ -225,10 +228,12 @@ def build_flexible_model(x):
 
         # static-Flexible shape
         mlmodel = converter.convert(
-            model, inputs=[
+            model,
+            inputs=[
                 # Use TF's input shapes (None, 4, 5)
-                TensorType(name=input_name)],
-                outputs=[output_name]
+                TensorType(name=input_name)
+            ],
+            outputs=[output_name]
         )
         assert mlmodel is not None
         input_values = [random_gen((3, 4, 5), -10.0, 10.0)]
@@ -278,7 +283,7 @@ def build_flexible_model(x):
             np.allclose(ret[output_name], np.maximum(input_values[0], 0.0))
 
         if _IS_MACOS:
-            with pytest.raises(RuntimeError) as e:
+            with pytest.raises(RuntimeError):
                 input_values = [random_gen((2, 4, 5), -10.0, 10.0)]
                 input_dict = {input_name: input_values[0]}
                 ret = mlmodel.predict(input_dict)
@@ -422,7 +427,6 @@ def test_invalid_converter_minimum_deployment_flag(self):
     def test_invalid_converter_target(self):
         with tf.Graph().as_default() as graph:
             x = tf.placeholder(tf.float32, shape=(3, 4, 5))
-            out = tf.nn.relu(x)
         with pytest.raises(NotImplementedError) as e:
             converter.convert(graph, convert_to="invalid", source="tensorflow")
         e.match(r"Backend converter .* not implemented")
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py
index ccf8a3c46..f2e5cb1bb 100644
--- a/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py
@@ -212,6 +212,7 @@ def test(self, use_cpu_only, backend, rank):
             pytest.xfail('Rank 0 not supported by CoreML runtime')
 
         input_shape = np.random.randint(low=1, high=4, size=rank)
+
         @make_tf_graph([input_shape])
         def build_model(x):
             return x
@@ -556,6 +557,32 @@ def build_model(x):
         )
 
 
+class Testlog1p(TensorFlowBaseTest):
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [1, 3, 5]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=4, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.math.log1p(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, 0.0, 2.0)]
+        input_dict = dict(zip(inputs, input_values))
+        TensorFlowBaseTest.run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
 class TestSelect(TensorFlowBaseTest):
     @pytest.mark.parametrize(
         "use_cpu_for_conversion, backend, rank, broadcast, dynamic",
@@ -1768,14 +1795,15 @@ def build_model(x):
                 pass
 
             return tf.nn.conv3d_transpose(
-                    x,
-                    weight,
-                    output_shape=output_shape,
-                    strides=strides,
-                    padding=padding,
-                    dilations=dilations,
-                    data_format=data_format,
-                )
+                x,
+                weight,
+                output_shape=output_shape,
+                strides=strides,
+                padding=padding,
+                dilations=dilations,
+                data_format=data_format,
+            )
+
         model, inputs, outputs = build_model
 
         input_values = [(np.random.rand(*input_shape).astype(np.float32))]
@@ -1890,7 +1918,7 @@ def build_model(x, y):
         ),
     )
     def test_binary_compare(self, use_cpu_for_conversion, backend, rank, tf_op,
-            broadcast_case):
+                            broadcast_case):
         if rank == 0 or broadcast_case == 0:
             pytest.xfail("Rank-0 input is not supported")
 
@@ -1949,7 +1977,7 @@ def build_model(x, y):
         ),
     )
     def test_binary_logical(self, use_cpu_for_conversion, backend, rank, tf_op,
-            broadcast_case):
+                            broadcast_case):
         if rank == 0 or broadcast_case == 0:
             pytest.xfail("Rank-0 input is not supported")
 
@@ -2290,11 +2318,11 @@ def test_resize_bilinear(
         @make_tf_graph([input_shape])
         def build_model(x):
             return tf.raw_ops.ResizeBilinear(
-                    images=x,
-                    size=target_shape,
-                    half_pixel_centers=half_pixel_centers,
-                    align_corners=align_corners,
-                )
+                images=x,
+                size=target_shape,
+                half_pixel_centers=half_pixel_centers,
+                align_corners=align_corners,
+            )
 
         model, inputs, outputs = build_model
         input_values = [random_gen(input_shape, -100, 100)]
@@ -2332,7 +2360,7 @@ def test_upsampling_2d(
         def build_model(x):
             return tf.keras.layers.UpSampling2D(
                     size=upsample_factor, data_format=data_format, interpolation="nearest"
-                )(x)
+            )(x)
 
         model, inputs, outputs = build_model
         input_values = [random_gen(input_shape, -100, 100)]
@@ -3362,7 +3390,7 @@ def build_model(x, indices):
 
         axis = 0 if mode == "Gather" else axis
         input_dict = {inputs[0]: np.random.rand(*x_shape).astype(np.float32),
-                       inputs[1]: np.random.randint(0, x_shape[axis], size=indices_shape, dtype=np.int32)}
+                      inputs[1]: np.random.randint(0, x_shape[axis], size=indices_shape, dtype=np.int32)}
 
         TensorFlowBaseTest.run_compare_tf(
             model,
@@ -4138,7 +4166,6 @@ def build_model(x):
                            frontend_only=False,
                            backend=backend)
 
-
         def test_tf_dynamic():
             shape = np.random.randint(low=1, high=3, size=rank)
             @make_tf_graph([(len(shape), tf.int32)])
@@ -4191,6 +4218,8 @@ def test_non_max_suppression(
     ):
         if backend == ("mlprogram", "fp16") and not use_cpu_only:
             pytest.xfail("rdar://80661262 ([GPU failures ] NonMaximumSuppression FP16 coremltools unit tests)")
+        if backend == ("mlprogram", "fp16") and use_cpu_only:
+            pytest.xfail("rdar://86581713 ([MIL / FP16 / CPU only] NonMaximumSuppression appears to be swapping output values")
 
         boxes_val = random_gen(shape=(num_boxes, 4), rand_min=0, rand_max=32)
         scores_val = random_gen(shape=(num_boxes,), rand_min=-100, rand_max=100)
@@ -4269,6 +4298,32 @@ def build_model(x, depth_input):
                use_cpu_only=use_cpu_only,
                frontend_only=False, backend=backend)
 
+class TestSparseSoftmaxCrossEntropyWithLogits(TensorFlowBaseTest):
+    
+    @pytest.mark.parametrize("use_cpu_only, backend, class_num",
+                             itertools.product(
+                                 [True, False],
+                                 backends,
+                                 [1, 3],
+                             )
+                             )   
+    def test(self, use_cpu_only, backend, class_num):
+        batch_size = 2
+        feature_shape = [batch_size, class_num]
+        label_shape = [batch_size, tf.int32]
+
+        @make_tf_graph([feature_shape, label_shape])
+        def build_model(feat, label):
+            return tf.raw_ops.SparseSoftmaxCrossEntropyWithLogits(features=feat, labels=label)[0]
+            
+        model, inputs, outputs = build_model
+        features = np.random.rand(batch_size, class_num)
+        labels = np.random.randint(low=0, high=class_num, size=(batch_size,), dtype=np.int32)
+        input_values = [features, labels]
+        input_dict = dict(zip(inputs, input_values))
+        TensorFlowBaseTest.run_compare_tf(model, input_dict, outputs,
+                       use_cpu_only=use_cpu_only,
+                       frontend_only=False, backend=backend)
 
 class TestPad(TensorFlowBaseTest):
     @pytest.mark.parametrize("use_cpu_only, backend, rank, mode, dynamic, trial",
@@ -4576,6 +4631,9 @@ class TestSplit(TensorFlowBaseTest):
         itertools.product([True, False], backends, [1, 2, 3, 4], [True, False]),
     )
     def test_split(self, use_cpu_for_conversion, backend, rank, dynamic):
+        if dynamic:
+            pytest.xfail("rdar://85318486 (Python unit tests on Split layer failing for both NNv1 and MIL backends)")
+
         if backend[0] == "mlprogram" and not use_cpu_for_conversion:
             pytest.xfail("rdar://80397986")
 
@@ -4712,7 +4770,6 @@ def build_model(x):
     @pytest.mark.parametrize(
         "use_cpu_only, backend, shape", itertools.product([True, False], backends, [[3, 1], [4, 3]])
     )
-
     def test_unstack_and_stack(self, use_cpu_only, backend, shape):
         @make_tf_graph([shape])
         def build_model(x):
@@ -5017,18 +5074,22 @@ def test(self, use_cpu_only, backend, length, dynamic):
             a, b = np.prod(input_shape[:2]), np.prod(input_shape[2:])
             size = np.array([a,b]).astype(np.int32)
             reshape_shape = [2]
+
             @make_tf_graph([input_shape, reshape_shape+[tf.int32]])
             def build_model(x, reshape):
                 x = tf.reshape(x, reshape)
                 x = tf.reshape(x, [-1])
                 return tf.raw_ops.MatrixDiag(diagonal=x)
+
             model, inputs, outputs = build_model
             input_values = [random_gen(input_shape, -1, 1), size]
         else:
             input_shape = [length]
+
             @make_tf_graph([input_shape])
             def build_model(x):
                 return tf.raw_ops.MatrixDiag(diagonal=x)
+
             model, inputs, outputs = build_model
             input_values = [random_gen(input_shape, -1, 1)]
 
@@ -5696,17 +5757,15 @@ def test_tf_no_variable(
                 backend=backend,
             )
 
-    @pytest.mark.xfail(
-        reason="Revert the assumption of invoking set_global before get_global: <rdar://problem/63326545>",
-        run=False,
-    )
     @pytest.mark.parametrize(
         "use_cpu_only, backend, batch",
-        itertools.product([True, False], backends, [1, 2],),
+        itertools.product([True], backends, [1, 2],),
     )
     def test_tf_lstm_block_cell(self, use_cpu_only, backend, batch):
+        # tf.contrib.rnn.LSTMBlockCell runs a single step of an LSTM. It needs to be wrapped
+        # inside a for loop to handle inputs with sequence length more than 1. In that case, use
+        # tf.contrib.rnn.LSTMBlockFusedCell
         input_dim, hidden_dim = 2, 3
-        # [timelen, batch_size, num_inputs]
         x_shape = (batch, input_dim)
         init_h = np.random.rand(batch, hidden_dim).astype(np.float32)
         init_c = np.random.rand(batch, hidden_dim).astype(np.float32)
@@ -5717,7 +5776,7 @@ def test_tf_lstm_block_cell(self, use_cpu_only, backend, batch):
             )
             res = rnn_cell(x, (init_h, init_c))
             cs_new, h_new = res[1][0], res[1][1]
-            res = [h_new, cs_new]
+            res = [h_new, cs_new] # shape of h_new, cs_new: (batch_dim, hidden_dim)
 
             TensorFlowBaseTest.run_compare_tf(
                 graph,
@@ -5730,6 +5789,97 @@ def test_tf_lstm_block_cell(self, use_cpu_only, backend, batch):
                 freeze_graph=True,
             )
 
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, batch_size",
+        itertools.product([True], backends, [1, 2],),
+    )
+    def test_tf_lstm_block_fused_cell(self, use_cpu_only, backend, batch_size):
+        # tf.contrib.rnn.LSTMBlockFusedCell runs an LSTM over a sequence of inputs
+        input_dim, hidden_dim = 4, 3
+        seq_length = 5
+        init_h = np.zeros((batch_size, hidden_dim)).astype(np.float32)
+        init_c = np.zeros((batch_size, hidden_dim)).astype(np.float32)
+        x_shape = (seq_length, batch_size, input_dim)
+        with tf.Graph().as_default() as graph:
+            lstm_cell = tf.contrib.rnn.LSTMBlockFusedCell(
+                num_units=hidden_dim,
+                forget_bias=2.0,
+                cell_clip=None,
+                use_peephole=False,
+            )
+
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            # shape of output: (seq_length, batch_size, hidden_dim)
+            # shape of output_state: Tuple of shape ((batch_size, hidden_dim), (batch_size, hidden_dim))
+            output, output_state = lstm_cell(
+                inputs=x,
+                initial_state=(init_c, init_h),
+            )
+            output = tf.nn.relu(output)
+
+            res = TensorFlowBaseTest.run_compare_tf(
+                graph,
+                {x: np.random.rand(*x_shape).astype(np.float32),},
+                output,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+                # variable needs to be frozen
+                freeze_graph=True,
+            )
+
+            # check that the resulting program has the LSTM block as a fused op
+            coreml_model = res[1]
+            mil_prog = coreml_model._get_mil_internal()
+            assert len(mil_prog.find_ops(op_type="lstm")) == 1
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend",
+        itertools.product([True, False], backends,),
+    )
+    def test_tf_multiple_lstm_block_fused_cell(self, use_cpu_only, backend):
+        '''
+        Define a network with a stack of fused LSTM ops:
+
+        %input (shape: (Seq, Batch, idim) == (5, 2, 4))
+        %x1 = LSTM(h=10) (%input) # shape = (5, 2, 10)
+        %x2 = LSTM(h=20) (%x1) # shape = (5, 2, 20)
+        %x3 = slice()(%x2) # shape = (1, 2, 20), to get the final seq value
+        %x4 = reshape((1, -1)) (%x3) # shape = (1, 40)
+        %x5 = Dense(h=3)(%x4) # shape = (1, 3)
+        '''
+        input_dim = 4
+        seq_length = 5
+        batch_size = 2
+        x_shape = (seq_length, batch_size, input_dim)
+
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape) # shape = (5, 2, 4)
+
+            lstm_cell_1 = tf.contrib.rnn.LSTMBlockFusedCell(num_units=10)
+            x1, _ = lstm_cell_1(x, dtype=tf.float32) # shape = (5, 2, 10)
+            lstm_cell_2 = tf.contrib.rnn.LSTMBlockFusedCell(num_units=20)
+            x2 , _ = lstm_cell_2(x1, dtype=tf.float32) # shape = (5, 2, 20)
+            x3 = tf.slice(x2, begin=[4, 0, 0], size=[1, 2, 20]) # shape = [1, 2, 20]
+            x4 = tf.reshape(x3, shape=(1, -1)) # shape = [1, 40]
+            x5 = tf.linalg.matmul(x4, tf.constant(np.arange(1, 40*3, dtype=np.float32), shape=[40, 3])) # shape: [1, 3]
+
+            res = TensorFlowBaseTest.run_compare_tf(
+                graph,
+                {x: np.random.rand(*x_shape).astype(np.float32),},
+                x5,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+                # variable needs to be frozen
+                freeze_graph=True,
+            )
+
+            # check that the resulting program has the LSTM block ops as fused ops
+            coreml_model = res[1]
+            mil_prog = coreml_model._get_mil_internal()
+            assert len(mil_prog.find_ops(op_type="lstm")) == 2
+
 
 class TestVariable(TensorFlowBaseTest):
     @pytest.mark.xfail(reason="Investigate get_global <rdar://79621723>", run=False)
@@ -5876,6 +6026,7 @@ def test(self, use_cpu_only, backend):
             pytest.xfail("operation is ill-conditioned on FP16")
         input_shape = (5, 20)
         input_value = random_gen(input_shape, rand_min=-10, rand_max=10)
+
         @make_tf_graph([input_shape])
         def build_model(x):
             return tf.math.log_softmax(x)
@@ -5903,6 +6054,7 @@ def test(self, use_cpu_only, backend, rank, min_and_max):
         input_shape = np.random.randint(low=2, high=4, size=rank)
         min_val, max_val = min_and_max
         input_value = random_gen(input_shape, rand_min=min_val-1, rand_max=max_val+1)
+
         @make_tf_graph([input_shape])
         def build_model(x):
             return tf.raw_ops.ClipByValue(t=x, clip_value_min=min_val, clip_value_max=max_val)
@@ -6016,7 +6168,6 @@ def test_mfcc(self, use_cpu_only, backend, params):
         filterbank_channel_count = params[5]
         dct_coefficient_count = params[6]
 
-
         @make_tf_graph([input_shape])
         def build_model(x):
             y = tf.raw_ops.AudioSpectrogram(input=x,
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_parse.py b/coremltools/converters/mil/frontend/tensorflow/test/test_parse.py
index 41adf5095..892ffa680 100644
--- a/coremltools/converters/mil/frontend/tensorflow/test/test_parse.py
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_parse.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_parsed_tf_node.py b/coremltools/converters/mil/frontend/tensorflow/test/test_parsed_tf_node.py
index 05b467a55..355c48bb5 100644
--- a/coremltools/converters/mil/frontend/tensorflow/test/test_parsed_tf_node.py
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_parsed_tf_node.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/testing_utils.py b/coremltools/converters/mil/frontend/tensorflow/test/testing_utils.py
index 223898ba7..f56a6f5bb 100644
--- a/coremltools/converters/mil/frontend/tensorflow/test/testing_utils.py
+++ b/coremltools/converters/mil/frontend/tensorflow/test/testing_utils.py
@@ -224,6 +224,10 @@ def run_compare_tf(
         The relative tolerance parameter.
     validate_shapes_only: bool
         If true, skip element-wise value comparision.
+    freeze_graph: bool
+        If True, use the "tensorflow.python.tools.freeze_graph" function
+        to freeze the TF graph prior to conversion. This will ensure that
+        all the variables in the graph have been converted to constants.
     tf_outputs: float or list[float]
         If present, use it as TensorFlow predictions
 
@@ -234,6 +238,39 @@ def run_compare_tf(
         # use_cpu_for_conversion = True && use_cpu_only = False
         raise ValueError("use_cpu_for_conversion = True && use_cpu_only = False is an invalid test case")
 
+    if not isinstance(output_nodes, (tuple, list)):
+        output_nodes = [output_nodes]
+
+    if freeze_graph:
+        with tempfile.TemporaryDirectory() as model_dir:
+            graph_def_file = os.path.join(model_dir, "tf_graph.pb")
+            checkpoint_file = os.path.join(model_dir, "tf_model.ckpt")
+            static_model_file = os.path.join(model_dir, "tf_static.pb")
+
+            with tf.Session(graph=graph) as sess:
+                sess.run(tf.global_variables_initializer())
+                if tf_outputs is None:
+                    tf_outputs = sess.run(output_nodes, feed_dict=feed_dict)
+                tf.train.write_graph(sess.graph, model_dir, graph_def_file, as_text=False)
+                saver = tf.train.Saver()
+                saver.save(sess, checkpoint_file)
+                output_node_names = get_tf_node_names(output_nodes, mode="outputs")
+                output_node_names = [name.split(":")[0] for name in output_node_names]
+                output_op_names = ",".join(output_node_names)
+                freeze_g(
+                    input_graph=graph_def_file,
+                    input_saver="",
+                    input_binary=True,
+                    input_checkpoint=checkpoint_file,
+                    output_node_names=output_op_names,
+                    restore_op_name="save/restore_all",
+                    filename_tensor_name="save/Const:0",
+                    output_graph=static_model_file,
+                    clear_devices=True,
+                    initializer_nodes="",
+                )
+            graph = load_tf_pb(static_model_file)
+
     mlmodel, input_key_values, output_names, output_nodes = tf_graph_to_mlmodel(
         graph, feed_dict, output_nodes, frontend, backend, use_cpu_for_conversion=use_cpu_for_conversion,
     )
@@ -242,46 +279,11 @@ def run_compare_tf(
        or (mlmodel.is_package and coremltoolsutils._macos_version() < (12, 0)):
         return mlmodel._spec, mlmodel, input_key_values, None
 
-    if not isinstance(output_nodes, (tuple, list)):
-        output_nodes = [output_nodes]
-
-    if freeze_graph:
-        model_dir = tempfile.mkdtemp()
-        graph_def_file = os.path.join(model_dir, "tf_graph.pb")
-        checkpoint_file = os.path.join(model_dir, "tf_model.ckpt")
-        static_model_file = os.path.join(model_dir, "tf_static.pb")
-        coreml_model_file = os.path.join(model_dir, "coreml_model.mlmodel")
-
+    if tf_outputs is None:
         with tf.Session(graph=graph) as sess:
             sess.run(tf.global_variables_initializer())
             tf_outputs = sess.run(output_nodes, feed_dict=feed_dict)
 
-            tf.train.write_graph(sess.graph, model_dir, graph_def_file, as_text=False)
-            saver = tf.train.Saver()
-            saver.save(sess, checkpoint_file)
-            freeze_g(
-                input_graph=graph_def_file,
-                input_saver="",
-                input_binary=True,
-                input_checkpoint=checkpoint_file,
-                output_node_names=",".join([n.op.name for n in output_nodes]),
-                restore_op_name="save/restore_all",
-                filename_tensor_name="save/Const:0",
-                output_graph=static_model_file,
-                clear_devices=True,
-                initializer_nodes="",
-            )
-        graph = load_tf_pb(static_model_file)
-
-        # Need to convert again using frozen graph
-        mlmodel, input_key_values, output_names, output_nodes = tf_graph_to_mlmodel(
-            graph, feed_dict, output_nodes, frontend, backend
-        )
-    else:
-        if not tf_outputs:
-            with tf.Session(graph=graph) as sess:
-                sess.run(tf.global_variables_initializer())
-                tf_outputs = sess.run(output_nodes, feed_dict=feed_dict)
     expected_outputs = {name: val for name, val in zip(output_names, tf_outputs)}
 
     for k,v in input_key_values.items():
@@ -345,13 +347,13 @@ def run_compare_tf(graph, feed_dict, output_nodes, use_cpu_only=False,
                        validate_shapes_only=False, freeze_graph=False,
                        tf_outputs=None):
         res = run_compare_tf(graph, feed_dict, output_nodes,
-                        use_cpu_only=use_cpu_only,
-                        use_cpu_for_conversion=use_cpu_for_conversion,
-                       frontend_only=frontend_only, frontend=frontend,
-                       backend=backend, atol=atol,
-                       rtol=rtol,
-                       validate_shapes_only=validate_shapes_only,
-                       freeze_graph=freeze_graph, tf_outputs=tf_outputs)
+                             use_cpu_only=use_cpu_only,
+                             use_cpu_for_conversion=use_cpu_for_conversion,
+                             frontend_only=frontend_only, frontend=frontend,
+                             backend=backend, atol=atol,
+                             rtol=rtol,
+                             validate_shapes_only=validate_shapes_only,
+                             freeze_graph=freeze_graph, tf_outputs=tf_outputs)
         alist = []
         if res is not None:
             alist = list(res)
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/__init__.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/__init__.py
index d12314e6d..346172a3e 100644
--- a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/__init__.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/__init__.py
@@ -19,3 +19,4 @@
 from .functionalize_loops import functionalize_loops
 from .cond_to_where import cond_to_where
 from .fuse_dilation_conv import fuse_dilation_conv
+from .delete_constant import delete_unnecessary_constant_nodes
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/constant_propagation.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/constant_propagation.py
index b506f388a..238ddc43e 100644
--- a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/constant_propagation.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/constant_propagation.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
@@ -11,9 +9,7 @@
 import tensorflow as tf
 
 from ..basic_graph_ops import const_determined_nodes
-from .delete_constant import delete_unnecessary_constant_nodes
 from coremltools._deps import _get_version
-from coremltools.converters._profile_utils import _profile
 from coremltools.converters.mil.mil import types
 from coremltools.converters.mil.mil.types.type_mapping import numpy_val_to_builtin_val
 
@@ -65,7 +61,6 @@ def _get_const_nodes(fn):
     return new_graph, list(constant_nodes), constant_node_num_outputs
 
 
-@_profile
 def _constant_propagation(fn, new_graph, constant_nodes, constant_node_num_outputs):
     try:
         if len(constant_nodes) > 0:
@@ -156,7 +151,6 @@ def _constant_propagation(fn, new_graph, constant_nodes, constant_node_num_outpu
         logging.exception("Constant Propagation pass failed: {}".format(e))
 
 
-@_profile
 def constant_propagation(tfssa):
     # we are going to rely on the TensorFlow graph to perform constant
     # propagation. For each graph, we construct a new graph comprising
@@ -165,4 +159,3 @@ def constant_propagation(tfssa):
     for f in tfssa.functions.values():
         const_nodes_info = _get_const_nodes(f)
         _constant_propagation(f, *const_nodes_info)
-    delete_unnecessary_constant_nodes(tfssa)
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_disconnected_nodes.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_disconnected_nodes.py
index f700dd2b3..9a83956a7 100644
--- a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_disconnected_nodes.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_disconnected_nodes.py
@@ -1,12 +1,9 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 
-
 def delete_disconnected_nodes(gd):
     # delete all nodes with no inputs and outputs
     empty_nodes = []
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/fuse_dilation_conv.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/fuse_dilation_conv.py
index 3d00ba3cd..1f5eabc13 100644
--- a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/fuse_dilation_conv.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/fuse_dilation_conv.py
@@ -1,9 +1,10 @@
-# -*- coding: utf-8 -*-
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import numpy as np
 
-from coremltools.converters.mil.mil import types
-from ..parsed_tf_node import ParsedTFNode
 from ..basic_graph_ops import replace_source, delete_node
 
 
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/insert_get_tuple.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/insert_get_tuple.py
index 48127092d..32d026a2b 100644
--- a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/insert_get_tuple.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/insert_get_tuple.py
@@ -64,7 +64,7 @@ def make_op(input_node, index, new_node_name, gto_make_op_cache):
         "TensorArrayV3",
         "Const",
     ]
-    inclusions = ["Split", "SplitV", "LSTMBlockCell", "TopK", "TopKV2", "Unpack"]
+    inclusions = ["Split", "SplitV", "LSTMBlockCell", "TopK", "TopKV2", "Unpack", "BlockLSTM"]
     gto_make_op_cache = {}
     for name in list(gddict.keys()):
         new_node = ParsedTFNode()
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/quantization_pass.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/quantization_pass.py
index af46af9f0..ca06494c8 100644
--- a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/quantization_pass.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/quantization_pass.py
@@ -1,13 +1,10 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 from ..basic_graph_ops import delete_node
-import logging
-import sys
+
 
 def delete_fakequant_node_and_repair_graph(g, node):
     inputs = node.inputs
@@ -63,4 +60,4 @@ def quantization_pass(tfssa):
         supporting weight quantization.
     """
     for v in tfssa.functions.values():
-        quantization_pass_impl(v)
\ No newline at end of file
+        quantization_pass_impl(v)
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/tensor_array_transform.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/tensor_array_transform.py
index 6cfe21805..27be50b64 100644
--- a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/tensor_array_transform.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/tensor_array_transform.py
@@ -1,12 +1,9 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 
-
 # A TensorArray is essentially a runtime vector<Tensor> with
 #
 #  - an optional requirement "infer_shape" (True by default) that all Tensors
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/variable_node_transform.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/variable_node_transform.py
index 602075d57..ed577324b 100644
--- a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/variable_node_transform.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/variable_node_transform.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
diff --git a/coremltools/converters/mil/frontend/tensorflow/tfssa.py b/coremltools/converters/mil/frontend/tensorflow/tfssa.py
index 004f42c02..aabe7ca4e 100644
--- a/coremltools/converters/mil/frontend/tensorflow/tfssa.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tfssa.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
diff --git a/coremltools/converters/mil/frontend/tensorflow2/load.py b/coremltools/converters/mil/frontend/tensorflow2/load.py
index dd32a2eba..84fe27722 100644
--- a/coremltools/converters/mil/frontend/tensorflow2/load.py
+++ b/coremltools/converters/mil/frontend/tensorflow2/load.py
@@ -30,6 +30,7 @@
 from coremltools.converters.mil.frontend.tensorflow.parsed_tf_node import ParsedTFNode
 from coremltools.converters.mil.frontend.tensorflow.tf_graph_pass import (
     constant_propagation,
+    delete_unnecessary_constant_nodes,
     delete_disconnected_nodes,
     fuse_dilation_conv,
     insert_get_tuple,
@@ -47,6 +48,17 @@
 
 
 class TF2Loader(TFLoader):
+    """
+    There are the steps how the TF2Loader loads and converts the TF2 model
+    1. Get the concrete functions from the Keras model (only 1 concrete function is supported now)
+    2. Get the tensorflow graphdef from the concrete function by doing
+       (a) calling tensorflow's convert_variables_to_constants_v2 API to freeze variables into constants
+       (b) run grappler optimizations on the graphdef ("constfold", "dependency", "debug_stripper")
+    3. Extract sub graph based on "outputs"
+    4. Construct tfssa IR from graphdef
+    5. Run tfssa graph passes
+    6. Convert tfssa to program by TF2Converter
+    """
     def __init__(self, model, debug=False, **kwargs):
         """
         TensorFlow 2.x model loader.
@@ -67,9 +79,24 @@ def __init__(self, model, debug=False, **kwargs):
             Dictionary of additional arguments.
         """
         TFLoader.__init__(self, model, debug, **kwargs)
+        
+        """
+        tf_ssa graph passes
+        Notes:
+        - "flatten_while_loop_namespaces" should be after "constant_propagation"
+          as it changes node names which constant propagation pass is relying on
+          to perform session.run(), renamed nodes are not understandable for TF.
+        """
+        self.tfssa_passes = [
+            constant_propagation,
+            delete_unnecessary_constant_nodes, # delete_unnecessary_constant_nodes must come right after constant_propagation
+            rewrite_control_flow_functions,
+            flatten_sub_graph_namespaces,
+            remove_variable_nodes,
+            fuse_dilation_conv,
+        ]
 
-    def _graph_def_from_model(self, outputs=None):
-        """Overwrites TFLoader._graph_def_from_model()"""
+    def _get_concrete_functions_and_graph_def(self):
         msg = (
             "Expected model format: [SavedModel | [concrete_function] | "
             "tf.keras.Model | .h5], got {}"
@@ -98,12 +125,18 @@ def _graph_def_from_model(self, outputs=None):
                     cfs = sv if isinstance(sv, list) else list(sv)
                 else:
                     raise NotImplementedError(msg.format(self.model))
-
-            graph_def = self._graph_def_from_concrete_fn(cfs)
-            return self.extract_sub_graph(graph_def, outputs)
         else:
             raise NotImplementedError(msg.format(self.model))
 
+        graph_def = self._graph_def_from_concrete_fn(cfs)
+        
+        return cfs, graph_def
+
+    def _graph_def_from_model(self, outputs=None):
+        """Overwrites TFLoader._graph_def_from_model()"""
+        _, graph_def = self._get_concrete_functions_and_graph_def()
+        return self.extract_sub_graph(graph_def, outputs)
+
     def _tf_ssa_from_graph_def(self, fn_name="main"):
         """Overwrites TFLoader._tf_ssa_from_graph_def()"""
         with _tf.Graph().as_default() as tf_graph:
@@ -132,18 +165,8 @@ def _tf_ssa_from_graph_def(self, fn_name="main"):
 
         return tf_ssa
 
-    def _program_from_tf_ssa(self):
-        # Notes:
-        # - "flatten_while_loop_namespaces" should be after "constant_propagation"
-        #   as it changes node names which constant propagation pass is relying on
-        #   to perform session.run(), renamed nodes are not understandable for TF.
-        tf_passes = [
-            constant_propagation,
-            rewrite_control_flow_functions,
-            flatten_sub_graph_namespaces,
-            remove_variable_nodes,
-            fuse_dilation_conv,
-        ]
+    def _run_tf_ssa_passes(self):
+        tf_passes = self.tfssa_passes
 
         if self.debug:
             for tf_pass in _tqdm(
@@ -171,6 +194,8 @@ def _program_from_tf_ssa(self):
                 filename="/tmp/ssa_after_tf_passes", cleanup=True
             )
 
+    def _program_from_tf_ssa(self):
+        self._run_tf_ssa_passes()
         converter = TF2Converter(self._tf_ssa, **self.kwargs)
         return converter.convert()
 
@@ -277,21 +302,15 @@ def _dict_from_graph_def(graph, fn_name="main", sg_input_shapes=None):
 
     @staticmethod
     def _concrete_fn_from_tf_keras_or_h5(keras_model):
-        if isinstance(keras_model, _tf.keras.Model):
-            input_signature = _saving_utils.model_input_signature(
-                keras_model, keep_original_batch_size=True
-            )
-            fn = _saving_utils.trace_model_call(keras_model, input_signature)
-        else:
+        if not isinstance(keras_model, _tf.keras.Model):
             keras_model = _tf.keras.models.load_model(keras_model)
-            input_signature = _saving_utils.model_input_signature(
-                keras_model, keep_original_batch_size=True
-            )
-            fn = _saving_utils.trace_model_call(keras_model, input_signature)
+        input_signature = _saving_utils.model_input_signature(
+            keras_model, keep_original_batch_size=True
+        )
+        fn = _saving_utils.trace_model_call(keras_model, input_signature)
         return [fn.get_concrete_function()]
 
-    @staticmethod
-    def _graph_def_from_concrete_fn(cfs):
+    def _graph_def_from_concrete_fn(self, cfs):
         if len(cfs) != 1:
             raise NotImplementedError("Only a single concrete function is supported.")
 
@@ -303,11 +322,15 @@ def _graph_def_from_concrete_fn(cfs):
 
         # run a Grappler's constant folding pass.
         fn_inputs = [t for t in frozen_fn.inputs if t.dtype != _dtypes.resource]
+        grappler_optimizers_list = self._get_grappler_optimizers_list()
         graph_def = _run_graph_optimizations(
             graph_def,
             fn_inputs,
             frozen_fn.outputs,
-            config=_get_grappler_config(["constfold", "dependency", "debug_stripper"]),
+            config=_get_grappler_config(grappler_optimizers_list),
             graph=frozen_fn.graph,
         )
         return graph_def
+
+    def _get_grappler_optimizers_list(self):
+        return ["constfold", "dependency", "debug_stripper"]
diff --git a/coremltools/converters/mil/frontend/tensorflow2/ops.py b/coremltools/converters/mil/frontend/tensorflow2/ops.py
index 15d548dcf..36e4a40fe 100644
--- a/coremltools/converters/mil/frontend/tensorflow2/ops.py
+++ b/coremltools/converters/mil/frontend/tensorflow2/ops.py
@@ -10,6 +10,8 @@
 from coremltools.converters.mil.frontend.tensorflow.ops import (
     _transpose_NHWC_to_NCHW,
     _transpose_NCHW_to_NHWC,
+    _transpose_NDHWC_to_NCDHW,
+    _transpose_NCDHW_to_NDHWC
 )
 from coremltools.converters.mil.frontend.tensorflow.tf_op_registry import register_tf_op
 from coremltools.converters.mil.mil.types import builtin_to_string
@@ -69,11 +71,15 @@ def _add_batch_norm(x, mean, variance, scale, offset, epsilon, name):
 
     if data_format == "NHWC":
         x = _transpose_NHWC_to_NCHW(x)
+    elif data_format == "NDHWC":
+        x = _transpose_NDHWC_to_NCDHW(x)
 
     x = _add_batch_norm(x, mean, variance, scale, offset, epsilon, batch_norm_name)
 
     if data_format == "NHWC":
         x = _transpose_NCHW_to_NHWC(x, node.name)
+    elif data_format == "NDHWC":
+        x = _transpose_NCDHW_to_NDHWC(x, node.name)
 
     # Inference only batch norm does not have meaningful outputs for
     # batch_mean, batch_variance etc.
diff --git a/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/test_v2_passes.py b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/test_v2_passes.py
index 5cb4a2289..12e110428 100644
--- a/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/test_v2_passes.py
+++ b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/test_v2_passes.py
@@ -3,17 +3,17 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import copy
+
+import numpy as np
+
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
-    assert_op_count_match,
     assert_model_is_valid,
     assert_same_output_names,
 )
 from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
 from coremltools.converters.mil.mil import types
-import copy
-
-import numpy as np
 
 np.random.seed(1984)
 validate_model = True
diff --git a/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/tf_passes.py b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/tf_passes.py
index b1c241aa4..c83d5a6fd 100644
--- a/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/tf_passes.py
+++ b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/tf_passes.py
@@ -7,7 +7,6 @@
 
 from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
 
-
 def tensorflow_passes(prog):
     passes = [
         "common::dead_code_elimination",
@@ -19,6 +18,10 @@ def tensorflow_passes(prog):
         # DCE to reduce tf_lstm_block outputs and allow lstm_rewrite to
         # ssa lstm
         "common::dead_code_elimination",
+        # tensorflow::tf_lstm_to_core_lstm must come before
+        # tensorflow::expand_tf_lstm
+        "tensorflow::tf_lstm_to_core_lstm",
+        "tensorflow::expand_tf_lstm",
     ]
 
     prog.validate()
diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_load.py b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_load.py
index 787782647..ea440a276 100644
--- a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_load.py
+++ b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_load.py
@@ -8,6 +8,7 @@
 import tempfile
 
 import pytest
+
 import coremltools.converters as converter
 from coremltools.converters.mil.input_types import TensorType
 from coremltools.converters.mil.frontend.tensorflow.test.testing_utils import (
diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py
index 1b76c0984..4cd7d8996 100644
--- a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py
+++ b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py
@@ -5,10 +5,10 @@
 
 from distutils.version import StrictVersion as _StrictVersion
 import itertools
-import numpy as np
-import pytest
 import random
 
+import numpy as np
+import pytest
 
 import coremltools as ct
 from coremltools._deps import _get_version
@@ -28,6 +28,9 @@
 
 tf = pytest.importorskip("tensorflow", minversion="2.1.0")
 import tensorflow as _tf  # should be after pytest.importorskip checks
+from tensorflow.keras import Input
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Conv2D, GlobalMaxPooling2D
 
 
 class TestActivation(TensorFlowBaseTest):
@@ -419,7 +422,7 @@ def test_depth_wise_conv(
         batch_size,
     ):
         s1, s2, k1, k2 = spatial_dim_and_ks
-        c_in, c_out = 2, 6
+        c_in = 2
 
         if len(strides) != np.sum(strides) and len(dilations) != np.sum(dilations):
             # TF produces incorrect output for non-one strides + dilations
@@ -466,9 +469,9 @@ def test_conv2d_padding_dynamic_input(
         backend,
         padding,
     ):
-        from tensorflow.keras import Input
-        from tensorflow.keras.models import Model
-        from tensorflow.keras.layers import Conv2D, GlobalMaxPooling2D
+
+        if backend[0] == "mlprogram" and ct.utils._macos_version() > (12, 0):
+            pytest.xfail("rdar://88857567")
 
         # Test same padding
         input_layer = Input(batch_size=1, shape=(None, None, 1))
@@ -482,7 +485,7 @@ def test_conv2d_padding_dynamic_input(
         model = Model(inputs=[input_layer], outputs=[output_layer])
         TensorFlowBaseTest.run_compare_tf_keras(
             model,
-            [random_gen((1, 80, 40 ,1), rand_min=-10, rand_max=10)],
+            [random_gen((1, 80, 40, 1), rand_min=-10, rand_max=10)],
             use_cpu_only=use_cpu_only,
             backend=backend,
         )
@@ -855,6 +858,9 @@ def test_batch_normalization(
                 )
             ]
         )
+        random_weights = np.random.rand(4, shape[axis])
+        model.layers[0].set_weights(random_weights)
+
         TensorFlowBaseTest.run_compare_tf_keras(
             model,
             [random_gen(shape, rand_min=-10, rand_max=10)],
@@ -883,6 +889,9 @@ def test_fused_batch_norm_v3(
                 )
             ]
         )
+        random_weights = np.random.rand(4, shape[axis])
+        model.layers[0].set_weights(random_weights)
+
         TensorFlowBaseTest.run_compare_tf_keras(
             model,
             [random_gen(shape, rand_min=-10, rand_max=10)],
@@ -1247,9 +1256,9 @@ def test_lstm_dynamic_batch(self, use_cpu_only, backend):
         h0 = tf.keras.layers.Input(shape=(512,))
         c0 = tf.keras.layers.Input(shape=(512,))
         out, hn, cn = tf.keras.layers.LSTM(512,
-                                        return_sequences=True,
-                                        return_state=True,
-                                        recurrent_activation='sigmoid')(inp)
+                                           return_sequences=True,
+                                           return_state=True,
+                                           recurrent_activation='sigmoid')(inp)
         model = tf.keras.models.Model(inputs=[inp, h0, c0], outputs=[out, hn, cn])
         batch_size = 2
         TensorFlowBaseTest.run_compare_tf_keras(
@@ -1281,19 +1290,16 @@ def _get_keras_simple_lstm_model(input_shape):
         def _test_for_symbolic_shapes(keras_input_shape, input_shape_for_conversion, are_symbols_expected):
             keras_model = _get_keras_simple_lstm_model(keras_input_shape)
             res = TensorFlowBaseTest.run_compare_tf_keras(
-                        keras_model,
-                        [
-                            random_gen((1, 32, 10), -1, 1),
-                        ],
-                        inputs_for_conversion=[ct.TensorType(shape=input_shape_for_conversion)],
-                        use_cpu_only=use_cpu_only,
-                        backend=backend,
-                    )
+                keras_model,
+                [random_gen((1, 32, 10), -1, 1)],
+                inputs_for_conversion=[ct.TensorType(shape=input_shape_for_conversion)],
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
             coreml_model = res[1]
             mil_prog = coreml_model._get_mil_internal()
             assert is_symbolic_dim_in_prog(mil_prog) == are_symbols_expected
 
-
         _test_for_symbolic_shapes(keras_input_shape=(1, 32, 10),
                                   input_shape_for_conversion=(1, 32, 10),
                                   are_symbols_expected=False)
@@ -1316,6 +1322,79 @@ def _test_for_symbolic_shapes(keras_input_shape, input_shape_for_conversion, are
                                       input_shape_for_conversion=(ct.RangeDim(1, 10), ct.RangeDim(16, 64), 10),
                                       are_symbols_expected=True)
 
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend",
+        itertools.product([True, False], backends,),
+    )
+    def test_lstm_block_fused_op(self, use_cpu_only, backend):
+        '''
+        Define a model with custom LSTM ops that uses tf.raw_ops.BlockLSTM and
+        verify that it converts to a fused lstm op.
+
+        %x (shape: (Seq, Batch, idim) == (5, 2, 4))
+        %x1 = LSTM(h=10) (%input) # shape = (5, 2, 10)
+        %x2 = LSTM(h=20) (%x1) # shape = (5, 2, 20)
+        %x3 = slice()(%x2) # shape = (1, 2, 20), to get the final seq value
+        %x4 = reshape((1, -1)) (%x3) # shape = (1, 40)
+        %x5 = Dense(h=3)(%x4) # shape = (1, 3)
+        '''
+        class CustomLSTM(tf.keras.layers.Layer):
+            def __init__(self, num_units, max_seq_length, batch_size):
+                super(CustomLSTM, self).__init__()
+                self.hidden_dim = num_units
+                self.seq_length = max_seq_length
+                self.batch_size = batch_size
+
+            def build(self, input_shape):
+                input_dim = input_shape[-1]
+                self.w = self.add_weight(
+                    shape=(input_dim + self.hidden_dim, 4 * self.hidden_dim),
+                    initializer="random_normal",
+                    trainable=True,
+                )
+                self.b = self.add_weight(shape=(4 * self.hidden_dim,), initializer="random_normal", trainable=True)
+                self.init_h = tf.constant(np.zeros((self.batch_size, self.hidden_dim)).astype(np.float32))
+                self.init_c = tf.constant(np.zeros((self.batch_size, self.hidden_dim)).astype(np.float32))
+
+            def call(self, inputs):
+                _, output_state, _, _, _, _, output = tf.raw_ops.BlockLSTM(
+                    seq_len_max=self.seq_length,
+                    x=inputs,
+                    cs_prev=self.init_c,
+                    h_prev=self.init_h,
+                    w=self.w,
+                    wci=tf.constant(np.zeros((self.hidden_dim)).astype(np.float32)),
+                    wcf=tf.constant(np.zeros((self.hidden_dim)).astype(np.float32)),
+                    wco=tf.constant(np.zeros((self.hidden_dim)).astype(np.float32)),
+                    b=self.b,
+                )
+                return output
+
+        input_dim = 4
+        seq_length = 5
+        batch_size = 2
+        x_shape = (seq_length, batch_size, input_dim)
+        hidden_dim_1 = 10
+        hidden_dim_2 = 20
+
+        x = tf.keras.Input(batch_input_shape=x_shape)  # (5, 2, 4)
+        x1 = CustomLSTM(num_units=hidden_dim_1, max_seq_length=seq_length, batch_size=batch_size)(x)  # (5, 2, 10)
+        x2 = CustomLSTM(num_units=hidden_dim_2, max_seq_length=seq_length, batch_size=batch_size)(x1)  # (5, 2, 20)
+        x3 = tf.slice(x2, begin=[4, 0, 0], size=[1, 2, 20])  # (1, 2, 20)
+        x4 = tf.reshape(x3, shape=(1, -1))  # (1, 40)
+        x5 = tf.keras.layers.Dense(3)(x4)  # (1, 3)
+        keras_model = tf.keras.Model(inputs=x, outputs=x5)
+
+        res = TensorFlowBaseTest.run_compare_tf_keras(
+            keras_model,
+            [random_gen(x_shape, -1, 1)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+        coreml_model = res[1]
+        mil_prog = coreml_model._get_mil_internal()
+        # assert that "lstm" ops are present in the mil program
+        assert len(mil_prog.find_ops(op_type="lstm")) == 2
 
 
 class TestRepeatVector(TensorFlowBaseTest):
diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/testing_utils.py b/coremltools/converters/mil/frontend/tensorflow2/test/testing_utils.py
index 41506fe1d..e916bdc66 100644
--- a/coremltools/converters/mil/frontend/tensorflow2/test/testing_utils.py
+++ b/coremltools/converters/mil/frontend/tensorflow2/test/testing_utils.py
@@ -127,7 +127,7 @@ def run_compare_tf2(
         shape = [RangeDim() if s is None or s == -1 else s \
                 for s in list(t.get_shape())]
         inputs.append(TensorType(name=name, shape=shape,
-            dtype=t.dtype.as_numpy_dtype))
+                                 dtype=t.dtype.as_numpy_dtype))
     outputs = []
     for t in output_names:
         name = get_tf_node_names(t)[0]
@@ -254,16 +254,16 @@ class TensorFlow2BaseTest(TensorFlowBaseTest):
 
     @staticmethod
     def run_compare_tf2(model,
-            input_dict,
-            output_names,
-            use_cpu_only=False,
-            use_cpu_for_conversion=False,
-            frontend_only=False,
-            frontend="tensorflow",
-            backend=("neuralnetwork", "fp32"),
-            debug=False,
-            atol=1e-04,
-            rtol=1e-05):
+                        input_dict,
+                        output_names,
+                        use_cpu_only=False,
+                        use_cpu_for_conversion=False,
+                        frontend_only=False,
+                        frontend="tensorflow",
+                        backend=("neuralnetwork", "fp32"),
+                        debug=False,
+                        atol=1e-04,
+                        rtol=1e-05):
         res = run_compare_tf2(model,
                               input_dict,
                               output_names,
@@ -282,8 +282,8 @@ def run_compare_tf2(model,
 
     @staticmethod
     def run_compare_tf_keras(model, input_values, inputs_for_conversion=None, use_cpu_only=False,
-            frontend_only=False, frontend="tensorflow",
-            backend=("neuralnetwork", "fp32"), atol=1e-04, rtol=1e-05):
+                             frontend_only=False, frontend="tensorflow",
+                             backend=("neuralnetwork", "fp32"), atol=1e-04, rtol=1e-05):
         res = run_compare_tf_keras(model, input_values,
                                    inputs_for_conversion=inputs_for_conversion,
                                    use_cpu_only=use_cpu_only,
diff --git a/coremltools/converters/mil/frontend/torch/converter.py b/coremltools/converters/mil/frontend/torch/converter.py
index 6a4f54904..e2e530564 100644
--- a/coremltools/converters/mil/frontend/torch/converter.py
+++ b/coremltools/converters/mil/frontend/torch/converter.py
@@ -41,10 +41,12 @@
 
 
 class TranscriptionContext:
-    """Maintains a map from torch operations to their MIL values
-        while building the graph. Can be used to process subgraphs recursively
-        by pushing new context when stepping into a subgraph and popping that
-        context when stepping out."""
+    """
+    Maintains a map from torch operations to their MIL values
+    while building the graph. Can be used to process subgraphs recursively
+    by pushing new context when stepping into a subgraph and popping that
+    context when stepping out.
+    """
 
     def __init__(self, name=None):
         self.name = name if name else ""
@@ -65,9 +67,10 @@ def add(self, ssa_var, torch_name=None):
         self._current_graph[-1][torch_name] = ssa_var
 
     def __getitem__(self, torch_name):
-        """ Lookup a name in the context. Note that since nested blocks must be
-            able to access anything that was defined before them, we have to
-            search all contexts for a name, starting with the most local scope.
+        """
+        Lookup a name in the context. Note that since nested blocks must be
+        able to access anything that was defined before them, we have to
+        search all contexts for a name, starting with the most local scope.
         """
         for idx in reversed(range(len(self._current_graph))):
             current_graph = self._current_graph[idx]
@@ -120,25 +123,28 @@ def __repr__(self):
 
 
 class TorchConverter:
-    """Class that handles conversion of pytorch models represented in TorchScript
+    """
+    Class that handles conversion of pytorch models represented in TorchScript
     format to the MIL format.
 
     Models passed to the @TorchConverter go from:
     TorchScript -> Expanded/Optimized Torch IR -> Internal Graph -> CoreML SSA
     The internal graph representation was added to make testing easier.
-
-    Arguments:
-        torchscript: torch.jit.ScriptModule object representing the model to convert.
-        inputs: Input values and optional names. See kwarg in load.py for full description.
-        outputs: Names of the graph's outputs. See kwarg in load.py for full description.
-        cut_at_symbols: A list of internal symbol name strings. Graph conversion will
-            terminate once these symbols have been generated. For debugging use
-            only. See kwarg in load.py.
     """
 
     def __init__(
         self, torchscript, inputs, outputs=None, cut_at_symbols=None,
     ):
+        """
+        Arguments:
+            torchscript: torch.jit.ScriptModule object representing the model to convert.
+            inputs: Input values and optional names. See kwarg in load.py for full description.
+            outputs: Names of the graph's outputs. See kwarg in load.py for full description.
+            cut_at_symbols: A list of internal symbol name strings. Graph conversion will
+                terminate once these symbols have been generated. For debugging use
+                only. See kwarg in load.py.
+        """
+
         assert isinstance(torchscript, _torch.jit.ScriptModule)
         self.inputs = inputs
         for idx, inp in enumerate(self.inputs):
@@ -163,12 +169,15 @@ def __init__(
             p(self.graph)
         self.inputs = [v for v in self.graph.inputs.values()]
         self.torch_passes = torch_passes
+        self._prog = Program()
 
     @staticmethod
     def _check_ops(graph):
-        """ Returns the set of ops in @graph that are implemented, and the set
-            for which no conversion function is registered. @graph can be
-            either InternalTorchIRGraph or InternalTorchIRBlock."""
+        """
+        Returns the set of ops in @graph that are implemented, and the set
+        for which no conversion function is registered. @graph can be
+        either InternalTorchIRGraph or InternalTorchIRBlock.
+        """
         implemented_ops = set()
         missing_ops = set()
         for node in graph.nodes:
@@ -185,7 +194,8 @@ def _check_ops(graph):
 
     @staticmethod
     def _create_placeholder(_input):
-        """Converts an InputType into a Placeholder.
+        """
+        Converts an InputType into a Placeholder.
 
         _input: TensorType
         """
@@ -194,8 +204,10 @@ def _create_placeholder(_input):
         return mb.placeholder(shape, dtype=dtype)
 
     def check_ops(self):
-        """ Returns the set of ops in @self.graph that are implemented, and
-            the set for which no conversion function is registered."""
+        """
+        Returns the set of ops in @self.graph that are implemented, and
+        the set for which no conversion function is registered.
+        """
         return TorchConverter._check_ops(self.graph)
 
     def convert_const(self):
@@ -204,11 +216,10 @@ def convert_const(self):
             self.context.add(const)
 
     def convert(self):
-
         _logging.info("Converting graph.")
 
         # This will hold the converted model.
-        prog = Program()
+        prog = self._prog
 
         # Construct placeholder for input to ssa function
         # This is where input renaming occurs
@@ -393,7 +404,8 @@ def _lower_graph_block(graph):
 
     @staticmethod
     def _expand_and_optimize_ir(torchscript):
-        """Given a torch.jit.ScriptModule, convert it to a optimized
+        """
+        Given a torch.jit.ScriptModule, convert it to a optimized
         torch._C.Graph and dict of model parameter's names to tensors.
         """
         graph = torchscript.forward.graph
@@ -410,15 +422,6 @@ def _expand_and_optimize_ir(torchscript):
         _torch._C._jit_pass_dce(graph)
         # From PyTorch code: checks well-formedness and invariants of graph.
         _torch._C._jit_pass_lint(graph)
-        # From PyTorch code: remove all in-place ops and replace them with
-        # out-of-place equivalents.
-        # e.g.
-        #   %foo = aten::add_(%foo, %n)
-        # becomes
-        #   %foo.2 = aten::add(%foo, %n)
-        _torch._C._jit_pass_remove_inplace_ops(graph)
-        _torch._C._jit_pass_dce(graph)
-        _torch._C._jit_pass_lint(graph)
         # Replaces a couple specific ops patterns (add, sub, mul, div, chunk).
         if version_lt(_torch, '1.6.0'):
             _torch._C._jit_pass_canonicalize_ops(graph)
diff --git a/coremltools/converters/mil/frontend/torch/dialect_ops.py b/coremltools/converters/mil/frontend/torch/dialect_ops.py
index e097f6956..bdafa7e84 100644
--- a/coremltools/converters/mil/frontend/torch/dialect_ops.py
+++ b/coremltools/converters/mil/frontend/torch/dialect_ops.py
@@ -122,7 +122,7 @@ class torch_upsample_bilinear(Operation):
     def default_inputs(self):
         return DefaultInputs(
             align_corners=True,
-            )
+        )
 
     def __init__(self, **kwargs):
         super(torch_upsample_bilinear, self).__init__(**kwargs)
@@ -196,7 +196,7 @@ def default_inputs(self):
             begin_mask=None,
             end_mask=None,
             squeeze_mask=None,
-            )
+        )
 
     def __init__(self, **kwargs):
         super(torch_tensor_assign, self).__init__(**kwargs)
diff --git a/coremltools/converters/mil/frontend/torch/internal_graph.py b/coremltools/converters/mil/frontend/torch/internal_graph.py
index d6155c8a9..6d5e81843 100644
--- a/coremltools/converters/mil/frontend/torch/internal_graph.py
+++ b/coremltools/converters/mil/frontend/torch/internal_graph.py
@@ -6,29 +6,32 @@
 from collections import OrderedDict
 from itertools import islice
 
-import torch
-
 
 def _make_ssa_name(name):
-    """Converts a symbol name (string) into an SSA name, by prepending '%'.
+    """
+    Converts a symbol name (string) into an SSA name, by prepending '%'.
     Only used for pretty printing the graph.
     """
+    if name is None:
+        return "None"
     return "%" + name
 
 
 def _ssa_name_list(names):
-    """Take a list of symbol names (strings) and return them as SSA names. Only
+    """
+    Take a list of symbol names (strings) and return them as SSA names. Only
     used for pretty printing the graph.
     """
     return [_make_ssa_name(x) for x in names]
 
 
 def _find_new_name(old_name, node_names):
-    """Disambiguate a node's name from a list of existing node names by adding
+    """
+    Disambiguate a node's name from a list of existing node names by adding
     successively larger integers.
     """
     count = 0
-    new_name = old_name + "." + str(count)
+    new_name = old_name + "." + str(count) if count != 0 else old_name
     while new_name in node_names:
         count += 1
         new_name = old_name + "." + str(count)
@@ -46,17 +49,20 @@ def _replace_in_list(ls, old_val, new_val):
 
 
 class InternalTorchIRBlock:
-    """CoreML internal representation of a torch IR block.
+    """
+    CoreML internal representation of a torch IR block.
+    """
 
+    def __init__(self, raw_block=None, parent=None, nodes=None, inputs=None, outputs=None):
+        """"
         Arguments:
             raw_block: The torch._C.Block to convert, or None.
             parent: The InternalTorchIRNode this block belongs to.
             nodes: If @raw_block is None, the list of InternalTorchIRNodes in the block
             inputs: If @raw_block is None, the list of input symbols.
             outputs: If @raw_block is None, the list of output symbols.
-    """
+        """
 
-    def __init__(self, raw_block=None, parent=None, nodes=None, inputs=None, outputs=None):
         self.nodes = []
         node_names = set()
         self.inputs = []
@@ -112,12 +118,18 @@ def replace_name(self, old_name, new_name):
 
 
 class InternalTorchIRNode:
-    """CoreML internal representation of a torch IR node.
+    """
+    CoreML internal representation of a torch IR node.
     Can construct itself from a provided torchIR node or manually constructed with
     args for testing.
 
     See InternalTorchIRGraph for the motivation behind this structure.
+    """
 
+    def __init__(
+        self, node=None, parent=None, attr=None, inputs=None, outputs=None, kind=None, blocks=None,
+    ):
+        """
         Arguments:
             node: The torch._C.Node to convert, or None.
             parent: The InternalTorchIRGraph/Block this node belongs to.
@@ -126,11 +138,8 @@ class InternalTorchIRNode:
             outputs: If @node is not specified, the list of output symbols.
             kind: If @node is not specified, the kind (op) of the node.
             blocks: If @node is not specified, the list of InternalTorchIRBlock.
-    """
+        """
 
-    def __init__(
-        self, node=None, parent=None, attr=None, inputs=None, outputs=None, kind=None, blocks=None,
-    ):
         self.parent = parent
         if node is not None:
             self.inputs = [_input.debugName() for _input in node.inputs()]
@@ -189,7 +198,8 @@ def replace_name(self, old_name, new_name):
 
 
 class InternalTorchIRGraph:
-    """CoreML internal representation of a torch IR graph. A torch._C.Graph
+    """
+    CoreML internal representation of a torch IR graph. A torch._C.Graph
     object is not an ideal structure to use in converting to CoreML. Conversion
     to an InternalTorchIRGraph is inserted between the original graph and the
     final CoreML model to address several issues:
@@ -207,7 +217,13 @@ class InternalTorchIRGraph:
           they have to come from actually converting a PyTorch graph. With an
           internal structure, we can directly build the test cases we need for
           unit testing.
+    """
 
+    def __init__(
+            self, raw_graph=None, params_dict=None, input_values=None, cut_at_symbols=None,
+            nodes=None, params=None, inputs=None, outputs=None,
+    ):
+        """
         Arguments:
             raw_graph: raw_graph: The torch._C.Graph to convert, or None.
             params_dict: A dictionary mapping graph parameter names to tensors.
@@ -224,11 +240,8 @@ class InternalTorchIRGraph:
             inputs: If @raw_graph is None, the OrderedDict mapping input names
                 to their example values.
             outputs: If @raw_graph is None, the list of outputs from the graph.
-    """
+        """
 
-    def __init__(
-        self, raw_graph=None, params_dict=None, input_values=None, cut_at_symbols=None, nodes=None, params=None, inputs=None, outputs=None,
-    ):
         self.nodes = []
         node_names = set()
         self.params = {}
diff --git a/coremltools/converters/mil/frontend/torch/load.py b/coremltools/converters/mil/frontend/torch/load.py
index d85ccf8b7..3d6ad4a7a 100644
--- a/coremltools/converters/mil/frontend/torch/load.py
+++ b/coremltools/converters/mil/frontend/torch/load.py
@@ -3,14 +3,11 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-
-import logging as _logging
 import os.path as _os_path
 import torch as _torch
 
 from .converter import TorchConverter, torch_to_mil_types
 from coremltools.converters.mil.input_types import InputType, TensorType
-from coremltools.converters.mil.mil import Program, types
 
 
 def load(model_spec, debug=False, **kwargs):
@@ -95,4 +92,3 @@ def _perform_torch_convert(converter, debug):
         raise e
 
     return prog
-
diff --git a/coremltools/converters/mil/frontend/torch/ops.py b/coremltools/converters/mil/frontend/torch/ops.py
index 637742e23..b87105977 100644
--- a/coremltools/converters/mil/frontend/torch/ops.py
+++ b/coremltools/converters/mil/frontend/torch/ops.py
@@ -12,16 +12,18 @@
 import torch
 from tqdm import tqdm as _tqdm
 
+from .._utils import build_einsum_mil
+from .torch_op_registry import _TORCH_OPS_REGISTRY, register_torch_op
 from coremltools.converters.mil.mil import (
     Builder as mb,
     types,
     Symbol
 )
-from coremltools.converters.mil.mil.var import Var, ListVar
-from .torch_op_registry import _TORCH_OPS_REGISTRY, register_torch_op
-from coremltools.converters.mil.mil.types.symbolic import any_symbolic, is_symbolic, is_compatible_symbolic_vector
+from coremltools.converters.mil.mil.ops.defs.tensor_transformation import _solve_slice_by_index_shape
 from coremltools.converters.mil.mil.types import is_bool
-from .._utils import build_einsum_mil
+from coremltools.converters.mil.mil.types.symbolic import any_symbolic, is_symbolic, is_compatible_symbolic_vector
+from coremltools.converters.mil.mil.var import Var, ListVar
+from coremltools.converters.mil.mil.ops.defs._utils import MAX_SIZE_CONSTANT_FOLDING
 
 # The pytorch args for many of the below ops were sourced from
 # https://github.com/pytorch/pytorch/blob/d971007c291c0ead1003d12cd553d18ddb582207/torch/csrc/jit/mobile/register_mobile_ops.cpp#L216
@@ -33,8 +35,10 @@
 
 
 def _all_outputs_present(context, graph):
-    """ Returns true if all the symbols in the graph's output list are
-        present in context."""
+    """
+    Returns true if all the symbols in the graph's output list are
+    present in context.
+    """
     for outp in graph.outputs:
         try:
             context[outp]
@@ -42,13 +46,22 @@ def _all_outputs_present(context, graph):
             return False
     return True
 
-def _value_at(x, idx):
+def _value_at(x, idx, name=None):
     """
     input x: 1D tensor (vector).
     return value at index idx. x[idx].
+    Could specify the name of the returned MIL scalar tensor as well.
     """
     assert x.rank == 1
-    return mb.slice_by_index(x=x, begin=[idx], end=[0], squeeze_mask=[True])
+    args = {
+        "x": x,
+        "begin": [idx],
+        "end": [0],
+        "squeeze_mask": [True],
+    }
+    if name is not None:
+        args["name"] = name
+    return mb.slice_by_index(**args)
 
 
 def convert_nodes(context, graph):
@@ -165,7 +178,7 @@ def convert_block(context, block, inputs):
     types.int32: "int32",
 }
 
-def _get_inputs(context, node, expected=None):
+def _get_inputs(context, node, expected=None, min_expected=None):
     """
     Look up a node's inputs in @context and return them as a list. If
     @expected is not None, also verifies the number of inputs matches the
@@ -181,14 +194,22 @@ def _get_inputs(context, node, expected=None):
                     node.name, node.kind, len(inputs), expected
                 )
             )
+    if min_expected is not None:
+        if len(inputs) < min_expected:
+            raise ValueError(
+                "node {} ({}) got {} input(s), expected minimum {} inputs".format(
+                    node.name, node.kind, len(inputs), min_expected
+                )
+            )
 
     return inputs
 
 
 def _list_select(shape_var, index):
-    """ Sometimes we need to select a specific item from a list. If that item
-        is known at compile time, extract it as a const. Otherwise, if it's
-        symbolic, use gather.
+    """
+    Sometimes we need to select a specific item from a list. If that item
+    is known at compile time, extract it as a const. Otherwise, if it's
+    symbolic, use gather.
     """
     # TODO: gather doesn't work when the shape is known size.
     if shape_var.val is not None:
@@ -339,6 +360,17 @@ def constant(context, node):
     const = _construct_constant(val, name)
     context.add(const, torch_name=name)
 
+
+@register_torch_op
+def selu(context, node):
+    ALPHA = 1.6732632423543772
+    SCALE = 1.0507009873554805
+
+    x = _get_inputs(context, node, expected=1)[0]
+    x = mb.elu(x=x, alpha=ALPHA)
+    x = mb.mul(x=x, y=SCALE, name=node.name)
+    context.add(x)
+
 @register_torch_op
 def dot(context, node):
     inputs = _get_inputs(context, node, expected=2)
@@ -386,6 +418,17 @@ def norm(context, node):
         temp = mb.pow(x=temp, y=1./num.val, name=node.name)
     context.add(temp)
 
+@register_torch_op
+def hardswish(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+    x = inputs[0]
+
+    w = mb.thresholded_relu(x=x, alpha=-3.0)
+    y = mb.sigmoid_hard(x=w, alpha=1.0/6, beta=0.5) # ``y = min(max(alpha * x + beta, -1), 1)
+    result = mb.mul(x=w, y=y, name=node.name)
+
+    context.add(result)
+
 def _array_construct(context, node, array_type):
     assert len(node.outputs) == 1
     inputs = _get_inputs(context, node)
@@ -771,9 +814,55 @@ def relu(context, node):
 def prelu(context, node):
     inputs = _get_inputs(context, node, expected=2)
     x = inputs[0]
-    alpha = inputs[1].val
-    alpha_vec = _np.ones((x.shape[1],))*alpha
-    res = mb.prelu(x=x, alpha=alpha_vec, name=node.name)
+    alpha = inputs[1]
+    # In the MIL backend, it assumes that the inputs of prelu should have
+    # at least rank 3, i.e. [batch, channel, spatial_dims*].
+    if x.rank < 3:
+        x = mb.expand_dims(x=x, axes=[1])
+        x = mb.prelu(x=x, alpha=alpha)
+        res = mb.squeeze(x=x, axes=[1], name=node.name)
+    else:
+        alpha = alpha.val
+        alpha_vec = _np.ones((x.shape[1],))*alpha
+        res = mb.prelu(x=x, alpha=alpha_vec, name=node.name)
+    context.add(res)
+
+@register_torch_op
+def linspace(context, node):
+    inputs = _get_inputs(context, node, min_expected=3)
+
+    start = inputs[0]
+    end = inputs[1]
+    nums = inputs[2]
+    start = mb.cast(x=start, dtype="fp32")
+    end = mb.cast(x=end, dtype="fp32")
+
+    if start.val is not None and end.val is not None and nums.val is not None:
+        start_val = start.val
+        end_val = end.val
+        nums_val = nums.val
+        if nums_val < MAX_SIZE_CONSTANT_FOLDING:
+            res = mb.const(val=_np.linspace(start_val, end_val, nums_val), name=node.name)
+            context.add(res)
+            return
+
+    if nums.val is None:
+        msg = "Dynamic steps input for torch.linspace is not supported. Please use torch.arange instead"
+        raise NotImplementedError(msg)
+    else:
+        if nums.val == 1:
+            res = mb.expand_dims(x=start, axes=[0], name=node.name)
+        else:
+            # step = (end - start) / (nums - 1)
+            x = mb.sub(x=end, y=start)
+            y = mb.sub(x=nums, y=1)
+            step = mb.real_div(x=x, y=y)
+
+            # Note that the range_1d op excluded the end point,
+            # so we have to add the end back to the resulting array.
+            arange = mb.range_1d(end=end, start=start, step=step)
+            new_end = mb.expand_dims(x=end, axes=[0])
+            res = mb.concat(values=[arange, new_end], axis=0, name=node.name)
     context.add(res)
 
 @register_torch_op
@@ -791,6 +880,16 @@ def einsum(context, node):
     x = build_einsum_mil(a, b, equation, node.name)
     context.add(x)
 
+@register_torch_op
+def eye(context, node):
+    inputs = _get_inputs(context, node, expected=[5, 6])
+    if len(inputs) == 5:
+        eye = _np.eye(inputs[0].val)
+    if len(inputs) == 6:
+        eye = _np.eye(inputs[0].val, inputs[1].val)
+    eye = mb.const(val=eye, name=node.name)
+    context.add(eye)
+
 @register_torch_op
 def elu(context, node):
     ## Torch port to ATen adds scale and input_scale which is set to 1
@@ -806,6 +905,18 @@ def leaky_relu(context, node):
     res = mb.leaky_relu(x=inputs[0], alpha=inputs[1], name=node.name)
     context.add(res)
 
+@register_torch_op()
+def rrelu(context, node):
+    inputs = _get_inputs(context, node, expected=5)
+
+    # Alpha in evaluation mode is just the average between upper and lower.
+    lower_alpha = inputs[1]
+    upper_alpha = inputs[2]
+    alpha = (lower_alpha.val + upper_alpha.val) / 2
+
+    res = mb.leaky_relu(x=inputs[0], alpha=alpha, name=node.name)
+    context.add(res)
+
 @register_torch_op
 def softplus(context, node):
     inputs = _get_inputs(context, node, expected=3)
@@ -874,7 +985,7 @@ def _max_pool(context, node, inputs):
     x = inputs[0]
     kernel_sizes = inputs[1]
     strides = inputs[2]
-    if strides.op.op_type == "const"  and (not list(strides.val)):
+    if strides.op.op_type == "const" and (not list(strides.val)):
         strides = mb.const(val=kernel_sizes.val, name=strides.name)
 
     pad_type = "custom"
@@ -1238,7 +1349,6 @@ def adaptive_max_pool2d(context, node):
     context.add(max_pool)
 
 
-
 @register_torch_op
 def batch_norm(context, node):
     inputs = _get_inputs(context, node, expected=9)
@@ -1303,107 +1413,39 @@ def _add_batch_norm_dynamic():
     def _add_batch_norm_1d():
         # first expand the 3d tensor to 4d, and call the standard mb.batch_norm
         x = mb.expand_dims(x=_input, axes=[-1], name=node.name + "_rank2_expansion")
-        name = node.name + "_batch_norm_1d"
-        batch_norm = mb.batch_norm(
+        bn = mb.batch_norm(
             x=x,
             mean=running_mean,
             variance=running_var,
             gamma=weight,
             beta=bias,
             epsilon=eps,
-            name=name,
+            name=node.name + "_batch_norm_1d",
         )
-        batch_norm = mb.squeeze(x=batch_norm, name=node.name, axes=[-1])
-        context.add(batch_norm)
+        bn = mb.squeeze(x=bn, name=node.name, axes=[-1])
+        context.add(bn)
 
-    def _add_batch_norm_2d():
-        batch_norm = mb.batch_norm(
+    def _add_batch_norm():
+        bn = mb.batch_norm(
             x=_input,
             mean=running_mean,
             variance=running_var,
             gamma=weight,
             beta=bias,
             epsilon=eps,
-            name=name,
+            name=node.name,
         )
-        context.add(batch_norm)
-
-    def _add_batch_norm_3d():
-        # # if the input shape is symbolic, bacth norm is computed by breaking it into elementwise ops
-        # if the input shape is compile time determined, we reshape the tensor
-        # to a 4d tensor, and call the standard mb.batch_norm
-        batch_size, channel, height, width, depth = _input.shape
-        assert not is_symbolic(channel), "Channel dimension must be known for batchnorm layer."
-
-        symbolic_num = sum([is_symbolic(x) for x in _input.shape])
-
-        if symbolic_num > 1:
-            weight_expand = mb.expand_dims(x=weight, axes=[0,2,3,4], name=name + "_expand_weight_3d")
-            bias_exapnd = mb.expand_dims(x=bias, axes=[0,2,3,4], name=name + "_expand_bias_3d")
-            running_mean_expand = mb.expand_dims(x=running_mean, axes=[0,2,3,4], name=name + "_expand_mean_3d")
-            running_var_expand = mb.expand_dims(x=running_var, axes=[0,2,3,4], name=name + "_expand_var_3d")
-
-            # compute batch norm 3d by decomposing it into elementwise operations
-            numerator = mb.sub(x=_input, y=running_mean_expand)
-            denominator = mb.add(x=running_var_expand, y=eps)
-            denominator = mb.sqrt(x=denominator)
-            x = mb.real_div(x=numerator, y=denominator)
-            x = mb.mul(x=x, y=weight_expand)
-            batch_norm = mb.add(x=x, y=bias_exapnd, name=name)
-
-        else:
-            batch_size, channel, height, width, depth = _input.shape
-            is_batch_symbloic = is_symbolic(batch_size)
-            is_height_symbolic = is_symbolic(height)
-            is_width_symbolic = is_symbolic(width)
-            is_depth_symbolic = is_symbolic(depth)
-
-            if is_batch_symbloic:
-                shape1 = [-1, channel, height*width, depth]
-                shape2 = [-1, channel, height, width, depth]
-
-            elif is_height_symbolic:
-                shape1 = [batch_size, channel, -1, width*depth]
-                shape2 = [batch_size, channel, -1, width, depth]
+        context.add(bn)
 
-            elif is_width_symbolic:
-                shape1 = [batch_size, channel, -1, height*depth]
-                shape2 = [batch_size, channel, height, -1, depth]
-
-            elif is_depth_symbolic:
-                shape1 = [batch_size, channel, height*width, -1]
-                shape2 = [batch_size, channel, height, width, -1]
-
-            else:
-                shape1 = [batch_size, channel, height*width, depth]
-                shape2 = [batch_size, channel, height, width, depth]
-
-            reshape_4d = mb.reshape(x=_input, shape=shape1, name=name + "_reshape_4d")
-            batch_norm = mb.batch_norm(
-                x=reshape_4d,
-                mean=running_mean,
-                variance=running_var,
-                gamma=weight,
-                beta=bias,
-                epsilon=eps,
-                name=name + "_batch_norm_4d",
-            )
-            batch_norm = mb.reshape(x=batch_norm, shape=shape2, name=name)
-
-        context.add(batch_norm)
-
-    is_batch_norm_1d = input_rank == 2
-    is_batch_norm_2d = (input_rank == 3 or input_rank == 4)
-    is_batch_norm_3d = input_rank == 5
+    is_batch_norm_1d_rank_2 = input_rank == 2
 
     if training or running_mean.val is None or running_var.val is None or weight is None or bias is None:
         _add_batch_norm_dynamic()
-    elif is_batch_norm_1d:
+    elif is_batch_norm_1d_rank_2:
         _add_batch_norm_1d()
-    elif is_batch_norm_2d:
-        _add_batch_norm_2d()
-    elif is_batch_norm_3d:
-        _add_batch_norm_3d()
+    else:
+        _add_batch_norm()
+
 
 @register_torch_op
 def instance_norm(context, node):
@@ -1421,6 +1463,7 @@ def instance_norm(context, node):
     )
     context.add(x)
 
+
 @register_torch_op
 def group_norm(context, node):
     inputs = _get_inputs(context, node, expected=6)
@@ -2676,47 +2719,48 @@ def nonzero(context, node):
     nonzero = mb.non_zero(x=x, name=node.name)
     context.add(nonzero)
 
-@register_torch_op
-def _internal_tensor_value_assign(context, node):
 
-    def _get_slice_params(context, data, inputs):
-        rank = data.rank
-        begin = [0] * rank
-        end = [0] * rank
-        stride = [1] * rank
-        begin_mask = [False] * rank
-        end_mask = [False] * rank
-        squeeze_mask = [False] * rank
+def _get_slice_params(context, data, inputs):
+    rank = data.rank
+    begin = [0] * rank
+    end = [0] * rank
+    stride = [1] * rank
+    begin_mask = [False] * rank
+    end_mask = [False] * rank
+    squeeze_mask = [False] * rank
 
-        num_of_slice_set = len(inputs) // 2
+    num_of_slice_set = len(inputs) // 2
 
-        for i in range(num_of_slice_set):
-            if inputs[2*i + 1] is None:
-                # This is pure index select
-                idx = context[inputs[2*i]].val
-                begin[i] = idx
-                squeeze_mask[i] = True
+    for i in range(num_of_slice_set):
+        if inputs[2*i + 1] is None:
+            # This is pure index select
+            idx = context[inputs[2*i]].val
+            begin[i] = idx
+            squeeze_mask[i] = True
+        else:
+            # This is a slice
+            begin_var = context[inputs[2*i]]
+            end_var = context[inputs[2*i+1]]
+
+            if begin_var is None:
+                begin_mask[i] = True
             else:
-                # This is a slice
-                begin_var = context[inputs[2*i]]
-                end_var = context[inputs[2*i+1]]
+                begin[i] = begin_var.val
 
-                if begin_var is None:
-                    begin_mask[i] = True
-                else:
-                    begin[i] = begin_var.val
+            if end_var is None:
+                end_mask[i] = True
+            else:
+                end[i] = end_var.val
 
-                if end_var is None:
-                    end_mask[i] = True
-                else:
-                    end[i] = end_var.val
+    for i in range(num_of_slice_set, rank):
+        begin_mask[i] = True
+        end_mask[i] = True
 
-        for i in range(num_of_slice_set, rank):
-            begin_mask[i] = True
-            end_mask[i] = True
+    return begin, end, stride, begin_mask, end_mask, squeeze_mask
 
-        return begin, end, stride, begin_mask, end_mask, squeeze_mask
 
+@register_torch_op
+def _internal_op_tensor_inplace_copy(context, node):
     data = context[node.inputs[0]]
     updates = context[node.inputs[1]]
     begin, end, stride, begin_mask, end_mask, squeeze_mask = _get_slice_params(context, data, node.inputs[2:])
@@ -2732,33 +2776,67 @@ def _get_slice_params(context, data, inputs):
         squeeze_mask=squeeze_mask,
         name=node.name,
     )
+    context.add(updated_x)
+
 
+@register_torch_op
+def _internal_op_tensor_inplace_fill(context, node):
+    data = context[node.inputs[0]]
+    fill_scalar = context[node.inputs[1]]
+
+    begin, end, stride, begin_mask, end_mask, squeeze_mask = _get_slice_params(context, data, node.inputs[2:])
+    fill_shape = _solve_slice_by_index_shape(data.shape, begin, end, stride, begin_mask, end_mask, squeeze_mask)
+    update_values = _np.full(fill_shape, fill_scalar.val)
+
+    updated_x = mb.torch_tensor_assign(
+        data=data,
+        updates=update_values,
+        begin=begin,
+        end=end,
+        stride=stride,
+        begin_mask=begin_mask,
+        end_mask=end_mask,
+        squeeze_mask=squeeze_mask,
+        name=node.name,
+    )
     context.add(updated_x)
 
+
 @register_torch_op
 def index_put(context, node):
     inputs = _get_inputs(context, node, expected=4)
     x = inputs[0]
     indices = inputs[1]
     values = inputs[2]
-    accumulate = inputs[3]
+    accumulate = inputs[3].val
     rank = x.rank
+    mode = "add" if accumulate else "update"
+
+    indices_type = indices[0].sym_type.get_primitive()
 
-    if len(indices) != 1 or indices[0] is None or indices[0].sym_type.get_primitive() != types.bool:
-        raise NotImplementedError("Unsupported index_put_ usage.")
+    if indices_type == types.bool:
+        assert len(indices) == 1, "Unsupported index_put_ usage."
+        indices = indices[0]
+        assert indices.shape == x.shape, "indices shape must equal to input shape for index put operation."
+        indices = mb.cast(x=indices, dtype="int32")
+        indices = mb.non_zero(x=indices)
+
+    if indices_type == types.int:
+        if len(indices) > 1:
+            indices = mb.stack(values=indices, axis=rank-1)
+        else:
+            indices = mb.expand_dims(x=indices[0], axes=[-1])
 
-    indices = indices[0]
-    assert indices.shape == x.shape, "indices shape must equal to input shape for index put operation."
-    indices = mb.cast(x=indices, dtype="int32")
-    non_zeros_indices = mb.non_zero(x=indices)
     if len(values.shape) == 0:
         values = mb.expand_dims(x=values, axes=[0])
+
     if values.rank == 1 and values.shape[0] == 1:
-        reps = _value_at(mb.shape(x=non_zeros_indices), 0)
+        reps = _value_at(mb.shape(x=indices), 0)
         reps = mb.expand_dims(x=reps, axes=[0])
         values = mb.tile(x=values, reps=reps)
-    scatter_x = mb.scatter_nd(data=x, indices=non_zeros_indices, updates=values, mode="update", name=node.name)
-    context.add(scatter_x)
+
+    result = mb.scatter_nd(data=x, indices=indices, updates=values, mode=mode, name=node.name)
+    context.add(result)
 
 @register_torch_op
 def index(context, node):
@@ -2785,7 +2863,8 @@ def index(context, node):
     if (len(indices) == 1 and
         indices[0] is not None and
         indices[0].sym_type.get_primitive() == types.bool and
-        indices[0].shape == x.shape):
+        indices[0].shape == x.shape
+    ):
 
         indices = indices[0]
         x_reshape = mb.reshape(x=x, shape=[-1])
@@ -2931,17 +3010,46 @@ def ones_like(context, node):
     fill = mb.fill(shape=size, value=1.0, name=node.name)
     context.add(fill)
 
-@register_torch_op
+def _make_fill_op(size, val, name):
+    assert val is not None
+    if isinstance(size, list):
+        size = mb.concat(values=size, axis=0)
+    fill = mb.fill(shape=size, value=val, name=name)
+    return fill
+
+@register_torch_op()
 def full(context, node):
     inputs = _get_inputs(context, node)
     size = inputs[0]
     val = inputs[1].val
-    assert val is not None
-    if isinstance(size, list):
-        size = mb.concat(values=size, axis=0)
-    full = mb.fill(shape=size, value=val, name=node.name)
-    context.add(full)
+    result = _make_fill_op(size, val, node.name)
+    context.add(result)
 
+@register_torch_op()
+def new_full(context, node):
+    # The difference between "new_full" and "full" is that the "new_full" is called from
+    # an existing tensor: tensor.new_full(size, fill_value), while the "full" is called
+    # from the torch API: torch.full(size, fill_value).
+    # But they are basically doing the same thing.
+    inputs = _get_inputs(context, node)
+    size = inputs[1]
+    val = inputs[2].val
+    result = _make_fill_op(size, val, node.name)
+    context.add(result)
+
+@register_torch_op()
+def bitwise_not(context, node):
+    inputs = _get_inputs(context, node)
+    x = inputs[0]
+    dtype = x.dtype
+    if types.is_int(dtype):
+        x = mb.add(x=x, y=1.0)
+        x = mb.mul(x=x, y=-1, name=node.name)
+    elif types.is_bool(dtype):
+        x = mb.logical_not(x=x, name=node.name)
+    else:
+        raise ValueError("Not supported type {} found for 'bitwise_not' op".format(dtype))
+    context.add(x)
 
 def _avg_pool(context, node, inputs):
     x = inputs[0]
@@ -3199,7 +3307,6 @@ def to(context, node):
         dtype = inputs[1].val
     elif len(inputs) == 6:
         _input = inputs[0]
-        device = inputs[1]
         dtype = inputs[2].val
         # non_blocking = inputs[3]
         # copy = inputs[4]
@@ -3345,7 +3452,15 @@ def arange(context, node):
         raise ValueError(
             "arange must have exactly 5, 6, or 7 inputs, got {}".format(len(inputs))
         )
-
+    # If start, end, and step don't have the same dtype, we cast them to fp32
+    int_start = isinstance(start, int) or types.is_int(start.dtype)
+    int_end = isinstance(end, int) or types.is_int(end.dtype)
+    int_step = isinstance(step, int) or types.is_int(step.dtype)
+
+    if int_start != int_end or int_start != int_step:
+        start = mb.cast(x=start, dtype="fp32")
+        end = mb.cast(x=end, dtype="fp32")
+        step = mb.cast(x=step, dtype="fp32")
     res = mb.range_1d(start=start, end=end, step=step, name=node.name)
     context.add(res)
 
@@ -3499,6 +3614,23 @@ def zeros(context, node):
     context.add(zeros)
 
 
+@register_torch_op
+def new_zeros(context, node):
+    inputs = _get_inputs(context, node)
+    shape = inputs[1]
+    if isinstance(shape, list):
+        # when the size is dynamic, it is a list of pymil scalar,
+        # we need to concat them first to get a shape.
+        shape = mb.concat(values=shape, axis=0)
+    context.add(mb.fill(shape=shape, value=0., name=node.name))
+
+@register_torch_op
+def dim(context, node):
+    inputs = _get_inputs(context, node)
+    shape = mb.shape(x=inputs[0])
+    rank = mb.shape(x=shape)
+    context.add(_value_at(rank, 0, node.name))
+
 @register_torch_op
 def min(context, node):
     inputs = _get_inputs(context, node, expected=3)
@@ -3765,8 +3897,13 @@ def where(context, node):
     if not types.is_bool(cond.dtype):
         # cond must be bool type
         cond = mb.cast(x=cond, dtype="bool")
-
-    context.add(mb.select(cond=cond, a=inputs[1], b=inputs[2], name=node.name))
+    if not any([any_symbolic(x.shape) for x in inputs[:3]]):
+        # broadcast all tensors to the same shape
+        broadcast_inputs = _broadcast_tensors([cond, inputs[1], inputs[2]])
+        result = mb.select(cond=broadcast_inputs[0], a=broadcast_inputs[1], b=broadcast_inputs[2], name=node.name)
+    else:
+        result = mb.select(cond=cond, a=inputs[1], b=inputs[2], name=node.name)
+    context.add(result)
 
 @register_torch_op
 def neg(context, node):
@@ -3872,7 +4009,7 @@ def std(context, node):
     context.add(y,node.name)
 
 @register_torch_op
-def copy_(context, node):
+def copy(context, node):
     inputs = _get_inputs(context, node, expected=[2, 3])
     context.add(mb.identity(x=inputs[0], name=node.name))
 
@@ -3884,8 +4021,32 @@ def dtype(context, node):
 
 @register_torch_op
 def tensor(context, node):
+    def _make_tensor(list_of_tensor, name, rank):
+        if rank == 6:
+            raise NotImplementedError("CoreML only supports tensor rank <= 5.")
+        if not isinstance(list_of_tensor, list):
+            return list_of_tensor
+        values = [_make_tensor(x, name + "_r_" + str(i), rank + 1) for i, x in enumerate(list_of_tensor)]
+        if len(values) == 1:
+            return mb.expand_dims(x=values[0], axes=[0], name=name)
+        return mb.stack(values=values, axis=0, name=name)
+
     inputs = _get_inputs(context, node, expected=4)
-    val = inputs[0].val # element val to fill
+
+    # Case 1: Using torch.tensor to create a const tensor
+    # For example:
+    # torch.tensor([[[0, 0], [0, 10], [5, 10], [5, 0]]], dtype=torch.float32)
+    val = inputs[0]
+    if isinstance(val, list):
+        context.add(_make_tensor(val, node.name, 1))
+        return
+     
+    if val.shape != ():
+        context.add(mb.identity(x=val, name=node.name))
+        return
+
+    # Case 2: Create a tensor filled with a single value
+    val = val.val # element val to fill
     msg_prefix = 'torch::tensor {} '.format(node.name)
     if val is None:
         raise ValueError(msg_prefix + 'val is None')
@@ -4075,11 +4236,12 @@ def _broadcast_tensors(tensors):
 
     def _solve_broadcast_shape(shapes):
         rank = _np.max([len(shape) for shape in shapes])
-        shapes = [[1]*(rank - len(shape)) + shape for shape in shapes]
+        shapes = [[1] * (rank - len(shape)) + shape for shape in shapes]
         result_shape = []
         for i in range(rank):
             dims = [shapes[j][i] for j in range(len(tensors))]
             if any_symbolic(dims):
+                # rdar://85559497 (Handle dynamic shapes inputs broadcast for pytorch)
                 raise NotImplementedError("Only static shaped inputs are supported for torch.broadcast_tensors conversion.")
             result_shape.append(_np.max(dims))
         return result_shape
diff --git a/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py b/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py
index 7b42cc694..e1d5ec570 100644
--- a/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py
+++ b/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py
@@ -50,7 +50,6 @@ def _torch_tensor_assign_to_core_block(block):
 
 def _transform_tensor_assign(op, block):
     begin = op.begin.val
-    end = op.end.val
     strides = op.stride.val
 
     begin_mask = op.begin_mask.val
diff --git a/coremltools/converters/mil/frontend/torch/test/test_api.py b/coremltools/converters/mil/frontend/torch/test/test_api.py
index 31b629b45..c33f343c5 100644
--- a/coremltools/converters/mil/frontend/torch/test/test_api.py
+++ b/coremltools/converters/mil/frontend/torch/test/test_api.py
@@ -1,12 +1,19 @@
-import pytest
-import coremltools as ct
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 import os
 
+import pytest
+
+import coremltools as ct
 from coremltools._deps import (
     _HAS_TORCH,
     MSG_TORCH_NOT_FOUND,
 )
 
+
 @pytest.mark.skipif(not _HAS_TORCH, reason=MSG_TORCH_NOT_FOUND)
 class TestPyTorchConverter:
     @staticmethod
@@ -22,7 +29,7 @@ def test_no_inputs():
         traced_model = torch.jit.trace(model, example_input)
 
         with pytest.raises(ValueError) as e:
-            mlmodel = ct.convert(traced_model)
+            ct.convert(traced_model)
         e.match(r'Expected argument for pytorch "inputs" not provided')
 
     @staticmethod
@@ -54,5 +61,3 @@ def forward(self, x):
                 )
             ],
         )
-
-
diff --git a/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py b/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py
index 24b59aed5..36f70e15c 100644
--- a/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py
+++ b/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py
@@ -9,7 +9,6 @@
 
 from .testing_utils import convert_to_mlmodel, TorchBaseTest
 
-
 # Custom layer imports
 
 from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op
diff --git a/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py b/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py
index 8b0356fe9..497d581d7 100644
--- a/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py
+++ b/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py
@@ -19,7 +19,7 @@
 from coremltools.converters.mil.testing_utils import random_gen
 
 from .. import ops
-from ..converter import TorchConverter, TranscriptionContext
+from ..converter import TranscriptionContext
 from ..internal_graph import InternalTorchIRNode
 
 
@@ -168,7 +168,7 @@ def test_add_no_scale_factor(self, context):
 
     @pytest.mark.parametrize(
         "test_input_1, test_input_2",
-        [(np.random.rand(3, 2), np.random.rand(3, 2)), (np.random.rand(3, 2), 5),],
+        [(np.random.rand(3, 2), np.random.rand(3, 2)), (np.random.rand(3, 2), 5), ],
     )
     def test_sub(self, context, test_input_1, test_input_2):
         scale_factor = 1
@@ -183,7 +183,7 @@ def test_sub(self, context, test_input_1, test_input_2):
 
     @pytest.mark.parametrize(
         "test_input_1, test_input_2",
-        [(np.random.rand(3, 2), np.random.rand(3, 2)), (np.random.rand(3, 2), 5),],
+        [(np.random.rand(3, 2), np.random.rand(3, 2)), (np.random.rand(3, 2), 5), ],
     )
     def test_rsub(self, context, test_input_1, test_input_2):
         scale_factor = 1
@@ -612,7 +612,7 @@ def test_convolution2d(
             dilation=dilation,
         )
         expected_shape = tuple(torch_conv(test_input).shape)
-        assert ssa.val == None
+        assert ssa.val is None
         assert expected_shape == ssa.shape
 
     @pytest.mark.parametrize(
@@ -756,7 +756,7 @@ def test_convolution_transpose2d(
             dilation=dilation,
         )
         expected_shape = tuple(torch_conv(test_input).shape)
-        assert ssa.val == None
+        assert ssa.val is None
         assert expected_shape == ssa.shape
 
     @pytest.mark.parametrize(
@@ -912,7 +912,7 @@ def test_adaptive_avg_pool2d_exception(self, context):
             outputs=[output_name],
         )
         with pytest.raises(ValueError):
-            ssa = self._construct_test_graph(
+            self._construct_test_graph(
                 context,
                 ops.adaptive_avg_pool2d,
                 adaptive_avg_pool2d_node,
@@ -946,7 +946,7 @@ def test_batch_norm(self, context, input_shape):
         ssa = self._construct_test_graph(
             context, ops.batch_norm, batch_norm_node, output_name, constants=constants
         )
-        assert ssa.val == None
+        assert ssa.val is None
         assert ssa.shape == tuple(test_input.shape)
 
     @pytest.mark.parametrize("input_shape", [(1, 3, 15, 15), (1, 1, 1, 1)])
@@ -974,7 +974,7 @@ def test_instance_norm(self, context, input_shape):
         ssa = self._construct_test_graph(
             context, ops.instance_norm, instant_norm_node, output_name, constants=constants
         )
-        assert ssa.val == None
+        assert ssa.val is None
         assert ssa.shape == tuple(test_input.shape)
 
     @pytest.mark.parametrize("axis", [1, 2, 3])
@@ -1077,7 +1077,7 @@ def test_item_exception(self, context):
             kind="item", inputs=input_list, outputs=[output_name]
         )
         with pytest.raises(ValueError):
-            ssa = self._construct_test_graph(
+            self._construct_test_graph(
                 context, ops.item, item_node, output_name, constants=constants,
             )
 
@@ -1114,7 +1114,7 @@ def test_layer_norm(self, context, input_shape):
             graph_inputs=graph_inputs,
             constants=constants,
         )
-        assert ssa.val == None
+        assert ssa.val is None
         assert ssa.shape == input_shape
 
     @pytest.mark.parametrize("shape", [(1, 2), (2, 3, 4, 5), (3, 4, 5),])
@@ -1741,7 +1741,7 @@ def test_sort(self, context, input_size, dim, descending):
         node = InternalTorchIRNode(
             kind="sort", inputs=input_list, outputs=["out1", "out2"],
         )
-        ssa = self._construct_test_graph(context, ops.sort, node, constants=constants)
+        self._construct_test_graph(context, ops.sort, node, constants=constants)
         expected_sort, expected_index = torch.sort(
             test_input, dim=dim, descending=descending
         )
@@ -1810,7 +1810,7 @@ def test_topk(self, context, input_shape, k, dim, largest):
         topk_node = InternalTorchIRNode(
             kind="topk", inputs=input_list, outputs=["out1", "out2"]
         )
-        ssa = self._construct_test_graph(
+        self._construct_test_graph(
             context, ops.topk, topk_node, constants=constants
         )
         topk_result = context["out1"].val
diff --git a/coremltools/converters/mil/frontend/torch/test/test_passes.py b/coremltools/converters/mil/frontend/torch/test/test_passes.py
index b669137b4..2c6d3a76e 100644
--- a/coremltools/converters/mil/frontend/torch/test/test_passes.py
+++ b/coremltools/converters/mil/frontend/torch/test/test_passes.py
@@ -4,6 +4,7 @@
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 from collections import OrderedDict
+
 import numpy as np
 import pytest
 import torch
@@ -171,7 +172,6 @@ def test_transform_inplace_ops_graph(self):
         np.testing.assert_equal(len(graph.outputs), 1)
         np.testing.assert_equal(graph.outputs[0], graph.nodes[-1].outputs[0])
 
-
     def test_transform_inplace_ops_loop(self):
         # The test graph is:
         #    graph(
@@ -265,7 +265,6 @@ def test_transform_inplace_ops_loop(self):
         # That graph output should now be the output of the graph.
         np.testing.assert_equal(loop_node.outputs[0], graph.outputs[0])
 
-
     @pytest.mark.xfail(reason="rdar://64235006")
     def test_transform_inplace_ops_if(self):
         # The test graph is:
diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py b/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py
index a3853c679..6d58b6d51 100644
--- a/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py
+++ b/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py
@@ -23,11 +23,6 @@
 from coremltools import TensorType
 from coremltools._deps import version_lt
 
-pytestmark = pytest.mark.skipif(
-    sys.version_info >= (3, 8), reason="Segfault with Python 3.8+"
-)  # rdar://problem/65730375
-
-
 
 backends = testing_reqs.backends
 torch = pytest.importorskip("torch")
@@ -39,6 +34,153 @@
 COMMON_SHAPES = [(1, 10), (1, 5, 6), (1, 3, 5, 6), (1, 3, 4, 5, 6)]
 COMMON_SHAPES_ALL = [(1, )] + COMMON_SHAPES
 
+class TestScriptedModels(TorchBaseTest):
+
+    @pytest.mark.parametrize(
+        "use_cpu_for_conversion, backend", itertools.product([True, False], backends)
+    )
+    def test_cond(self, use_cpu_for_conversion, backend):
+        if backend[0] == "mlprogram":
+            pytest.skip("rdar://81169758 (Cond tests hang on mlprogram backend)")
+        if backend[0] == "mlprogram" and not use_cpu_for_conversion:
+            pytest.xfail("rdar://78343191 ((MIL GPU) Core ML Tools Unit Test failures [failure to load or Seg fault])")
+
+        class TestNet(nn.Module):
+            def forward(self, x):
+                if torch.squeeze(x) < 10.:
+                    return x*10.
+                else:
+                    return x*2.
+
+        torch_model = TestNet().eval()
+
+        self.run_compare_torch(torch.tensor([1.]), torch_model,
+            input_as_shape=False, backend=backend,
+            use_cpu_for_conversion=use_cpu_for_conversion, use_scripting=True)
+        self.run_compare_torch(torch.tensor([11.]), torch_model,
+            input_as_shape=False, backend=backend,
+            use_cpu_for_conversion=use_cpu_for_conversion, use_scripting=True)
+
+    @pytest.mark.parametrize("backend", backends)
+    def test_for_loop(self, backend):
+        class TestLayer(nn.Module):
+            def __init__(self):
+                super(TestLayer, self).__init__()
+
+            def forward(self, x):
+                x = 2.0 * x
+                return x
+
+        class TestNet(nn.Module):
+            input_size = (64,)
+
+            def __init__(self):
+                super(TestNet, self).__init__()
+                layer = TestLayer()
+                self.layer = torch.jit.trace(layer, torch.rand(self.input_size))
+
+            def forward(self, x):
+                for _ in range(7):
+                    x = self.layer(x)
+                return x
+
+        model = TestNet().eval()
+        
+        self.run_compare_torch(model.input_size, model, backend=backend, use_scripting=True)
+
+    @pytest.mark.parametrize("backend", backends)
+    def test_while_loop(self, backend):
+        class TestLayer(nn.Module):
+            def __init__(self):
+                super(TestLayer, self).__init__()
+
+            def forward(self, x):
+                x = 0.5 * x
+                return x
+
+        class TestNet(nn.Module):
+            input_size = (1,)
+
+            def __init__(self):
+                super(TestNet, self).__init__()
+                layer = TestLayer()
+                self.layer = torch.jit.trace(layer, torch.rand(self.input_size))
+
+            def forward(self, x):
+                while x > 0.01:
+                    x = self.layer(x)
+                return x
+
+        model = TestNet().eval()
+
+        self.run_compare_torch(model.input_size, model, backend=backend, use_scripting=True)
+
+    @pytest.mark.parametrize("backend", backends)
+    def test_if(self, backend):
+        if backend[0] == 'mlprogram':
+            pytest.xfail("Not supported on ML Program backend")
+
+        class TestLayer(nn.Module):
+            def __init__(self):
+                super(TestLayer, self).__init__()
+
+            def forward(self, x):
+                x = torch.mean(x)
+                return x
+
+        class TestNet(nn.Module):
+            input_size = (64,)
+
+            def __init__(self):
+                super(TestNet, self).__init__()
+                layer = TestLayer()
+                self.layer = torch.jit.trace(layer, torch.rand(self.input_size))
+
+            def forward(self, x):
+                m = self.layer(x)
+                if m < 0:
+                    scale = -2.0
+                else:
+                    scale = 2.0
+                x = scale * x
+                return x
+
+        model = TestNet().eval()
+
+        self.run_compare_torch(model.input_size, model, backend=backend, use_scripting=True)
+
+    @pytest.mark.parametrize("backend", backends)
+    def test_linear(self, backend):
+        class Model(torch.nn.Module):
+            def __init__(self):
+                super(Model, self).__init__()
+                self.linear = torch.nn.Linear(2, 2)
+
+            def forward(self, x):
+                return self.linear(x)
+
+        model = Model().eval()
+        
+        self.run_compare_torch(
+            torch.tensor([[1.,2.]]), 
+            model,
+            input_as_shape=False, 
+            backend=backend,
+            use_scripting=True,
+        )
+
+    @pytest.mark.parametrize("backend", backends)
+    def test_conv(self, backend):
+        pytest.xfail("rdar://88194776 ([Converter] coremltools is not working with scripted torch convolution model)")
+        model = torch.nn.Conv2d(in_channels=2, out_channels=3, kernel_size=1,
+                                padding="same", stride=1, dilation=1, groups=1, bias=False)
+        self.run_compare_torch(
+            (1, 2, 4, 5), 
+            model,
+            backend=backend,
+            use_scripting=True,
+        )
+
 
 class TestAffineGrid(TorchBaseTest):
     @pytest.mark.parametrize(
@@ -200,6 +342,20 @@ def test_sort(self, shape, axis, descending, backend):
         TorchBaseTest.run_compare_torch(shape, model, backend=backend)
 
 
+class TestSelu(TorchBaseTest):
+    @pytest.mark.parametrize(
+        "inplace, backend",
+        itertools.product(
+            [True, False],
+            backends,
+        )
+    )
+    def test_selu(self, inplace, backend):
+        x = torch.tensor([-6., -4., -2., 0., 2., 4., 6.])
+        model = torch.nn.SELU(inplace=inplace)
+        TorchBaseTest.run_compare_torch(x, model, input_as_shape=False, backend=backend)
+
+
 class TestMv(TorchBaseTest):
     @pytest.mark.parametrize("matrix_shape, backend",
                              itertools.product([(2, 3), (10, 12), (10, 1), (1, 5)], backends)
@@ -261,6 +417,38 @@ def test_number_norm(self, shape, backend, p, keepdim):
             TorchBaseTest.run_compare_torch(shape, model, backend=backend, places=2)
 
 
+class TestHardswish(TorchBaseTest):
+
+    class HardswishModel(nn.Module):
+        def __init__(self, inplace=False):
+            super(TestHardswish.HardswishModel, self).__init__()
+            self.activation = nn.Hardswish(inplace=inplace)
+
+        def forward(self, x):
+            return self.activation(x)
+
+
+    def test_longer_range_input_element_values(self):
+        x = torch.tensor([-6., -4., -2., 0., 2., 4., 6.])
+
+        model = TestHardswish.HardswishModel()
+        TorchBaseTest.run_compare_torch(x, model, input_as_shape=False)
+
+        model = TestHardswish.HardswishModel(inplace=True)
+        TorchBaseTest.run_compare_torch(x, model, input_as_shape=False)
+
+    @pytest.mark.parametrize(
+        "shape, backend",
+        itertools.product(
+            COMMON_SHAPES,
+            backends,
+        )
+    )
+    def test_additional_shapes_and_backends(self, shape, backend):
+        model = TestHardswish.HardswishModel()
+        TorchBaseTest.run_compare_torch(shape, model, backend=backend)
+
+
 class TestBatchNorm(TorchBaseTest):
     @pytest.mark.parametrize(
         "num_features, eps, affine, backend",
@@ -812,100 +1000,18 @@ def test_convolution_transpose3d(
         self.run_compare_torch((1, in_channels, depth, height, width), model,
                            backend=backend)
 
-
-class TestCond(TorchBaseTest):
-    @pytest.mark.parametrize(
-        "use_cpu_for_conversion, backend", itertools.product([True, False], backends)
-    )
-    def test_cond(self, use_cpu_for_conversion, backend):
-        if backend[0] == "mlprogram":
-            pytest.skip("rdar://81169758 (Cond tests hang on mlprogram backend)")
-        if backend[0] == "mlprogram" and not use_cpu_for_conversion:
-            pytest.xfail("rdar://78343191 ((MIL GPU) Core ML Tools Unit Test failures [failure to load or Seg fault])")
-
-        class TestNet(nn.Module):
-            def forward(self, x):
-                if torch.squeeze(x) < 10.:
-                    return x*10.
-                else:
-                    return x*2.
-
-        model = TestNet().eval()
-        torch_model = torch.jit.script(model)
-
-        self.run_compare_torch(torch.tensor([1.]), torch_model,
-            input_as_shape=False, backend=backend,
-            use_cpu_for_conversion=use_cpu_for_conversion)
-        self.run_compare_torch(torch.tensor([11.]), torch_model,
-            input_as_shape=False, backend=backend,
-            use_cpu_for_conversion=use_cpu_for_conversion)
-
-class TestLoop(TorchBaseTest):
-    @pytest.mark.parametrize("backend", backends)
-    def test_for_loop(self, backend):
-        class TestLayer(nn.Module):
-            def __init__(self):
-                super(TestLayer, self).__init__()
-
-            def forward(self, x):
-                x = 2.0 * x
-                return x
-
-        class TestNet(nn.Module):
-            input_size = (64,)
-
-            def __init__(self):
-                super(TestNet, self).__init__()
-                layer = TestLayer()
-                self.layer = torch.jit.trace(layer, torch.rand(self.input_size))
-
-            def forward(self, x):
-                for _ in range(7):
-                    x = self.layer(x)
-                return x
-
-        model = TestNet().eval()
-        torch_model = torch.jit.script(model)
-
-        self.run_compare_torch(model.input_size, torch_model, backend=backend)
-
-    @pytest.mark.parametrize("backend", backends)
-    def test_while_loop(self, backend):
-        class TestLayer(nn.Module):
-            def __init__(self):
-                super(TestLayer, self).__init__()
-
-            def forward(self, x):
-                x = 0.5 * x
-                return x
-
-        class TestNet(nn.Module):
-            input_size = (1,)
-
-            def __init__(self):
-                super(TestNet, self).__init__()
-                layer = TestLayer()
-                self.layer = torch.jit.trace(layer, torch.rand(self.input_size))
-
-            def forward(self, x):
-                while x > 0.01:
-                    x = self.layer(x)
-                return x
-
-        model = TestNet().eval()
-        torch_model = torch.jit.script(model)
-
-        self.run_compare_torch(model.input_size, torch_model, backend=backend)
-
-
 class TestUpsample(TorchBaseTest):
     @pytest.mark.parametrize(
         "output_size, align_corners, backend",
-         itertools.product(
-            [(10, 10), (1, 1), (2, 3), (190, 170)],
+        itertools.product(
+            [(10, 10),
+             # PyTorch has a bug for the following parameter:
+             # (1, 1),
+             # See: https://github.com/pytorch/pytorch/issues/71188
+             (2, 3), (190, 170)],
             [True, False],
             backends,
-         )
+        )
     )
     def test_upsample_bilinear2d_with_output_size(
         self, output_size, align_corners, backend
@@ -919,9 +1025,9 @@ def test_upsample_bilinear2d_with_output_size(
 
     @pytest.mark.parametrize(
         "scales_h, scales_w, align_corners, recompute_scale_factor, backend",
-         itertools.product(
+        itertools.product(
             [2, 0.5, 4.1], [3, 0.5, 5.3], [True, False], [True, False], backends
-         )
+        )
     )
     def test_upsample_bilinear2d_with_scales(
         self, scales_h, scales_w, align_corners, recompute_scale_factor, backend
@@ -953,9 +1059,9 @@ def _is_float_value(x, threshold=0.001):
 
     @pytest.mark.parametrize(
         "output_size, backend",
-         itertools.product(
-           [(10, 10), (190, 170)], backends
-         )
+        itertools.product(
+            [(10, 10), (190, 170)], backends
+        )
     )
     def test_upsample_nearest2d_with_output_size(self, output_size, backend):
         input_shape = (1, 3, 10, 10)
@@ -1041,42 +1147,6 @@ def _is_float_value(x, threshold=0.001):
                 if layer.WhichOneof('layer') == "upsample":
                     assert len(layer.upsample.fractionalScalingFactor) == 0
 
-class TestBranch(TorchBaseTest):
-    @pytest.mark.parametrize("backend", backends)
-    def test_if(self, backend):
-        if backend[0] == 'mlprogram':
-            pytest.xfail("Not supported on ML Program backend")
-
-        class TestLayer(nn.Module):
-            def __init__(self):
-                super(TestLayer, self).__init__()
-
-            def forward(self, x):
-                x = torch.mean(x)
-                return x
-
-        class TestNet(nn.Module):
-            input_size = (64,)
-
-            def __init__(self):
-                super(TestNet, self).__init__()
-                layer = TestLayer()
-                self.layer = torch.jit.trace(layer, torch.rand(self.input_size))
-
-            def forward(self, x):
-                m = self.layer(x)
-                if m < 0:
-                    scale = -2.0
-                else:
-                    scale = 2.0
-                x = scale * x
-                return x
-
-        model = TestNet().eval()
-        torch_model = torch.jit.script(model)
-
-        self.run_compare_torch(model.input_size, torch_model, backend=backend)
-
 
 class TestAvgPool(TorchBaseTest):
 
@@ -1664,11 +1734,34 @@ def __init__(self):
                 super(TestNet, self).__init__()
 
             def forward(self, x):
-                x = torch.cat((x,), axis=1)
-                return x
+                x = torch.cat((x,), axis=1)
+                return x
+
+        model = TestNet()
+        self.run_compare_torch((1, 3, 16, 16), model, backend=backend)
+
+class TestBitwiseNot(TorchBaseTest):
+    @pytest.mark.parametrize(
+        "backend, input_type",
+        itertools.product(
+            backends,
+            ["int", "bool"],
+        ),
+    )
+    def test_bitwise_not(self, backend, input_type):
+        class TestNet(nn.Module):
+            def __init__(self):
+                super(TestNet, self).__init__()
+
+            def forward(self, x):
+                return torch.bitwise_not(x)
 
         model = TestNet()
-        self.run_compare_torch((1, 3, 16, 16), model, backend=backend)
+        if input_type == "int":
+            torch_in = torch.tensor([1, 2, 3, -5, 0], dtype=torch.int32)
+        elif input_type == "bool":
+            torch_in = torch.tensor([True, False, True, False])
+        self.run_compare_torch(torch_in, model, backend=backend, input_as_shape=False)
 
 class TestFull(TorchBaseTest):
     @pytest.mark.parametrize(
@@ -1720,6 +1813,154 @@ def forward(self, x):
 
         self.run_compare_torch(shape, FullStaticModel().eval(), backend=backend)
 
+class TestDim(TorchBaseTest):
+    @pytest.mark.parametrize("shape, backend",
+        itertools.product(
+            [
+                (1,),
+                (2, 3),
+                (1, 1, 2, 5, 1),
+            ],
+            backends,
+            )
+        )
+    def test_dim(self, shape, backend):
+        class DimModel(nn.Module):
+            def __init__(self):
+                super(DimModel, self).__init__()
+
+            def forward(self, x):
+                return torch.tensor([x.dim()])
+
+        self.run_compare_torch(shape, DimModel().eval(), backend=backend)
+
+
+class TestNewZeros(TorchBaseTest):
+    @pytest.mark.parametrize(
+        "backend, rank",
+        itertools.product(
+            backends,
+            [1, 3],
+        ),
+    )
+    def test_new_zeros_dynamic(self, backend, rank):
+        class ZerosDynamicModel(nn.Module):
+            def __init__(self):
+                super(ZerosDynamicModel, self).__init__()
+
+            def forward(self, x):
+                if rank == 1:
+                    h = x[0]
+                    x = torch.zeros(h)
+                elif rank == 3:
+                    h, w, d = x[0], x[1], x[2]
+                    x = torch.zeros(h, w, d)
+                return x.new_zeros(x.shape)
+
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+        torch_in = torch.tensor(input_shape)
+        model = ZerosDynamicModel().eval()
+        torch_out = model(torch_in)
+        self.run_compare_torch(torch_in, model, expected_results=torch_out,
+                           input_as_shape=False, backend=backend)
+
+    @pytest.mark.parametrize("shape, backend",
+        itertools.product(
+            [
+                (1,),
+                (2, 3),
+                (1, 1, 2, 5, 1),
+            ],
+            backends,
+            )
+        )
+    def test_new_zeros_static(self, shape, backend):
+        class ZerosStaticModel(nn.Module):
+            def __init__(self):
+                super(ZerosStaticModel, self).__init__()
+
+            def forward(self, x):
+                return x.new_zeros(x.shape)
+
+        self.run_compare_torch(shape, ZerosStaticModel().eval(), backend=backend)
+
+
+class TestNewFull(TorchBaseTest):
+    @pytest.mark.parametrize(
+        "backend, rank",
+        itertools.product(
+            backends,
+            [1, 3],
+        ),
+    )
+    def test_new_full_dynamic(self, backend, rank):
+        class FullDynamicModel(nn.Module):
+            def __init__(self):
+                super(FullDynamicModel, self).__init__()
+
+            def forward(self, x):
+                if rank == 1:
+                    h = x[0]
+                    x = torch.zeros(h)
+                elif rank == 3:
+                    h, w, d = x[0], x[1], x[2]
+                    x = torch.zeros(h, w, d)
+                return x.new_full(x.shape, fill_value=3.14)
+
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+        torch_in = torch.tensor(input_shape)
+        model = FullDynamicModel().eval()
+        torch_out = model(torch_in)
+        self.run_compare_torch(torch_in, model, expected_results=torch_out,
+                           input_as_shape=False, backend=backend)
+
+    @pytest.mark.parametrize("shape_val, backend",
+        itertools.product(
+            [
+                [(1,), 0.],
+                [(2, 3), 3.1415],
+                [(1, 1, 2, 5, 1), -2.],
+            ],
+            backends,
+            )
+        )
+    def test_new_full_static(self, shape_val, backend):
+        shape, val = shape_val
+        class FullStaticModel(nn.Module):
+            def __init__(self):
+                super(FullStaticModel, self).__init__()
+
+            def forward(self, x):
+                return x.new_full(x.shape, fill_value=val)
+
+        self.run_compare_torch(shape, FullStaticModel().eval(), backend=backend)
+
+class TestEye(TorchBaseTest):
+    @pytest.mark.parametrize(
+        "backend, eye_type",
+        itertools.product(
+            backends,
+            ["single", "double"],
+        ),
+    )
+    def test(self, backend, eye_type):
+        class Model(nn.Module):
+            def __init__(self):
+                super(Model, self).__init__()
+
+            def forward(self, x):
+                if eye_type == "single":
+                    eye = torch.eye(3)
+                    return x + eye
+                elif eye_type == "double":
+                    eye = torch.eye(2,3)
+                    return x + eye
+
+        input_shape = (3,3) if eye_type == "single" else (2,3)
+        model = Model().eval()
+        self.run_compare_torch(input_shape, model, backend=backend)
+
+
 class TestOnes(TorchBaseTest):
     @pytest.mark.parametrize(
         "backend, rank",
@@ -1901,6 +2142,121 @@ def test_unsqueeze(self, backend, rank_and_axis):
         model = ModuleWrapper(function=torch.unsqueeze, kwargs={"dim": axis})
         self.run_compare_torch(input_shape, model, backend=backend)
 
+class TestLinspace(TorchBaseTest):
+    @pytest.mark.parametrize(
+        "backend, start_end, steps",
+        itertools.product(
+            backends,
+            [(-0.1, -0.7), (1, 10)],
+            [1, 3],
+        ),
+    )
+    def test_linspace_static(self, backend, start_end, steps):
+        input_shape = tuple([steps])
+        start, end = start_end
+        class Model(nn.Module):
+            def __init__(self):
+                super(Model, self).__init__()
+
+            def forward(self, x):
+                return torch.linspace(start, end, steps)
+                
+        model = Model()
+        self.run_compare_torch(input_shape, model, backend=backend)
+
+    @pytest.mark.parametrize(
+        "backend",
+        backends,
+    )
+    def test_linspace_static_large(self, backend):
+        input_shape = tuple([1])
+        class Model(nn.Module):
+            def __init__(self):
+                super(Model, self).__init__()
+
+            def forward(self, x):
+                return torch.linspace(1, 2_000_000, 2_000_000)
+                
+        model = Model()
+        self.run_compare_torch(input_shape, model, backend=backend)
+
+    @pytest.mark.parametrize(
+        "backend, start_end, steps",
+        itertools.product(
+            backends,
+            [(-0.1, -0.7), (1, 10)],
+            [1, 2, 100],
+        ),
+    )
+    def test_linspace_dynamic(self, backend, start_end, steps):
+        input_shape = tuple([steps])
+        start, end = start_end
+        class Model(nn.Module):
+            def __init__(self):
+                super(Model, self).__init__()
+
+            def forward(self, x):
+                return torch.linspace(x[0], x[1], steps)
+                
+        model = Model()
+        inputs = [torch.Tensor([start, end])]
+        self.run_compare_torch(inputs, model, backend=backend, input_as_shape=False)
+
+class TestArange(TorchBaseTest):
+    @pytest.mark.parametrize(
+        "backend, start_end_step",
+        itertools.product(
+            backends,
+            [
+                (-0.1, -0.7, -0.07), 
+                (3, 10, 0.3), 
+                (1, 10, 100), 
+                (1, 300000, 1),
+                (1, 10, 1e-6),
+            ],
+        ),
+    )
+    def test_arange_static(self, backend, start_end_step):
+        if start_end_step == (1, 10, 1e-6):
+            pytest.xfail("rdar://88998831 (range_1d has numerical issue when the step is small)")
+        input_shape = tuple([1,])
+        start, end, step = start_end_step
+        class Model(nn.Module):
+            def __init__(self):
+                super(Model, self).__init__()
+
+            def forward(self, x):
+                return torch.arange(start, end, step)
+                
+        model = Model()
+        self.run_compare_torch(input_shape, model, backend=backend)
+
+    @pytest.mark.parametrize(
+        "backend, start_end_step",
+        itertools.product(
+            backends,
+            [
+                (-0.1, -0.7, -0.07), 
+                (3, 10, 0.3), 
+                (1, 10, 100), 
+                (1, 300000, 1),
+            ],
+        ),
+    )
+    def test_arange_dynamic(self, backend, start_end_step):
+        input_shape = tuple([1,])
+        start, end, step = start_end_step
+        class Model(nn.Module):
+            def __init__(self):
+                super(Model, self).__init__()
+
+            def forward(self, x):
+                return torch.arange(x[0], x[1], x[2])
+                
+        model = Model()
+        inputs = [torch.tensor([start, end, step])]
+        self.run_compare_torch(inputs, model, backend=backend, input_as_shape=False)
+
 class TestEinsum(TorchBaseTest):
     @pytest.mark.parametrize(
         "backend, equation, reverse_input_order",
@@ -1933,7 +2289,7 @@ def forward(self, x, y):
             input_shapes = [[1,2,3,4], [1,2,4,4]]
         elif equation == "abc,cd->abd":
             input_shapes = [[2,3,4], [4,5]]
-        elif equation ==  "abc,cde->abde":
+        elif equation == "abc,cde->abde":
             input_shapes = [[2,3,4], [4,5,6]]
         elif equation == "btnh,bfnh->bnft":
             input_shapes = [[1,2,3,4], [1,5,3,4]]
@@ -2059,11 +2415,16 @@ def test_relu6(self, backend, shape):
         )
 
     @pytest.mark.parametrize(
-        "backend, alpha", itertools.product(backends, [0.1, 0.25, 2.0]),
+        "backend, alpha, shape", 
+        itertools.product(
+            backends, 
+            [0.25, 2.0],
+            [(1, 5, 6, 7), (1, 128)],
+        ),
     )
-    def test_prelu(self, backend, alpha):
-        input_shape = (1, 5, 6, 7)
-        C = input_shape[1]
+    def test_prelu(self, backend, alpha, shape):
+        input_shape = shape
+        C = input_shape[1] if len(input_shape) >= 3 else 1
         model = nn.PReLU(C, alpha).eval()
         self.run_compare_torch(
             input_shape, model, backend=backend,
@@ -2087,6 +2448,18 @@ def test_leaky_relu(self, backend, shape, alpha):
             shape, model, backend=backend,
         )
 
+    @pytest.mark.parametrize(
+        "backend, shape",
+        itertools.product(backends,
+            COMMON_SHAPES_ALL,
+        )
+    )
+    def test_randomized_leaky_relu(self, backend, shape):
+        model = nn.RReLU(lower=0.01, upper=0.9).eval()
+        self.run_compare_torch(
+            shape, model, backend=backend,
+        )
+
     @pytest.mark.parametrize(
         "backend, shape",
         itertools.product(backends, COMMON_SHAPES_ALL),
@@ -2457,7 +2830,7 @@ def forward(self, tokens, context, context_length):
                    TensorType(name="context_length", shape=(1,), dtype=np.int32),
                    ]
         self.run_compare_torch(inputs, model, rand_range=(0, 8),
-                               backend=backend, use_scripting=False)
+                               backend=backend)
 
 
 class TestRepeat(TorchBaseTest):
@@ -2745,6 +3118,38 @@ def forward(self, cond, x, y):
             input_as_shape=False,
         )
 
+    @pytest.mark.parametrize(
+        "backend, shapes",
+        itertools.product(
+            backends,
+            [
+                [(1, 2), (1, 2), (1, 1)],
+                [(1, 2, 3), (1, 1, 1), (1, 1, 3)],
+            ]
+        ),
+    )
+    def test_where_test3(self, backend, shapes):
+
+        class WhereModel(nn.Module):
+            def __init__(self):
+                super(WhereModel, self).__init__()
+
+            def forward(self, cond, x, y):
+                return torch.where(cond, x, y)
+        cond_shape, x_shape, y_shape = shapes
+        cond = torch.rand(*cond_shape) > 0.5
+        inputs = [cond, torch.rand(*x_shape), torch.rand(*y_shape)]
+        model = WhereModel()
+        expected_results = model(*inputs)
+        self.run_compare_torch(
+            inputs,
+            model,
+            backend=backend,
+            expected_results=expected_results,
+            input_as_shape=False,
+        )
+
+
 class TestSelect(TorchBaseTest):
     @pytest.mark.parametrize(
         "backend, dim_index",
@@ -2802,6 +3207,66 @@ def test_non_zero(self, backend, rank):
         self.run_compare_torch(input, model,
             input_as_shape=False, backend=backend)
 
+class TestTorchTensor(TorchBaseTest):
+    @pytest.mark.parametrize(
+        "backend, rank",
+        itertools.product(
+            backends,
+            [1, 2, 3, 4, 5],
+        ),
+    )   
+    def test_torch_tensor(self, backend, rank):
+        
+        class Model(nn.Module):
+            def __init__(self, rank):
+                super(Model, self).__init__()
+                self.rank = rank
+
+            def forward(self, x):
+                with torch.no_grad():
+                    if self.rank == 1:
+                        return self.generate_tensor_rank_1(x)
+                    if self.rank == 2:
+                        return self.generate_tensor_rank_2(x)
+                    if self.rank == 3:
+                        return self.generate_tensor_rank_3(x)
+                    if self.rank == 4:
+                        return self.generate_tensor_rank_4(x)
+                    if self.rank == 5:
+                        return self.generate_tensor_rank_5(x)   
+
+            @torch.jit.script
+            def generate_tensor_rank_1(x):
+                _, _, h, w = x.shape
+                return torch.tensor([h, w, 0, 1], dtype=torch.int32)
+
+            @torch.jit.script
+            def generate_tensor_rank_2(x):
+                _, _, h, w = x.shape
+                return torch.tensor([[0, h], [h, w], [w, w]], dtype=torch.float32)
+
+            @torch.jit.script
+            def generate_tensor_rank_3(x):
+                _, _, h, w = x.shape
+                return torch.tensor([[[h, 1]],[[3, w]]], dtype=torch.int32)
+
+            @torch.jit.script
+            def generate_tensor_rank_4(x):
+                _, _, h, w = x.shape
+                return torch.tensor([[[[h, h], [h, w]],[[w, w], [w, 1]]],[[[0, 0], [1, 1]],[[0, h], [h, w]]]], dtype=torch.float32)
+
+            @torch.jit.script
+            def generate_tensor_rank_5(x):
+                _, _, h, w = x.shape
+                return torch.tensor([[[[[h, w], [w, w]],[[1, 1],[0, h]]]]], dtype=torch.float32)
+
+        shape = (1, 1, 3, 4)
+        model = Model(rank)
+        self.run_compare_torch(
+            shape, model, backend=backend,
+        )
+        
+
 class TestTensorAssign(TorchBaseTest):
 
     @pytest.mark.parametrize(
@@ -2820,6 +3285,7 @@ def forward(self, x):
                 y = x + 1
                 x[1] = 2 * y[1]
                 return x, y
+
         shape = (5,)
         model = TensorAssignModel()
         self.run_compare_torch(
@@ -2845,6 +3311,7 @@ def forward(self, x, y):
                 y[0] = x[0] * 10
                 z = x + y
                 return z, x, y
+
         shape = (5,)
         model = TensorAssignModel()
         self.run_compare_torch(
@@ -2889,10 +3356,11 @@ def __init__(self):
                 super(TensorAssignModel, self).__init__()
 
             def forward(self, x, y):
-                x[0] = torch.tensor([1.,2.,3.,4.])
+                x[0] = torch.tensor([1., 2., 3., 4.])
                 x[3] = 1
                 y[0] = x[0]
                 return x, y
+
         shape = (5,4)
         model = TensorAssignModel()
         self.run_compare_torch(
@@ -2913,6 +3381,7 @@ def __init__(self):
             def forward(self, x):
                 x[:,1] = torch.tensor([1., 2.])
                 return x
+
         shape = (2,10)
         model = TensorAssignModel()
         self.run_compare_torch(
@@ -2932,6 +3401,7 @@ def __init__(self):
             def forward(self, x):
                 x[:,1,:] = torch.tensor([1., 2., 3., 4., 5., 6.]).view(2,3)
                 return x
+
         shape = (2,10,3)
         model = TensorAssignModel()
         self.run_compare_torch(
@@ -2953,6 +3423,7 @@ def forward(self, x, y):
                 mask = torch.tensor([True, False, False, False, True, True]).view(3,2)
                 x[mask] = y[mask]
                 return x
+
         shape = (3,2)
         model = IndexPutModel()
         self.run_compare_torch(
@@ -2978,6 +3449,7 @@ def forward(self, x):
                 if rank == 1:
                     x[mask] = torch.tensor([1.])
                 return x
+
         shape = (3,2)
         model = IndexPutModel()
         self.run_compare_torch(
@@ -3008,6 +3480,42 @@ def forward(self, x, y):
             inputs, model, backend=backend, input_as_shape=False,
         )
 
+    @pytest.mark.parametrize(
+        "backend, rank, accumulate",
+        itertools.product(
+            backends,
+            [1, 2],
+            [True, False]
+        ),
+    )
+    def test_index_put_case_4(self, backend, rank, accumulate):
+        class IndexPutModel(torch.nn.Module):
+            def __init__(self):
+                super(IndexPutModel, self).__init__()
+
+            def forward(self, x, indices, values):
+                x.index_put_(tuple(indices.t()), values, accumulate=accumulate)
+                return x
+
+        if rank == 1:
+            inputs = [
+                torch.Tensor([1., 2., 3., 4., 5., 6]),
+                torch.LongTensor([[0], [4]]),
+                torch.Tensor([3., 7.])
+            ]
+        elif rank == 2:
+            inputs = [
+                torch.ones([3, 4]),
+                torch.LongTensor([[0, 1], [1, 2], [2, 2]]),
+                torch.Tensor([1., 5., 8.]),
+            ]
+
+        model = IndexPutModel()
+        self.run_compare_torch(
+            inputs, model, backend=backend, input_as_shape=False,
+        )
+
+
 class TestIndex(TorchBaseTest):
     @pytest.mark.parametrize(
         "backend, shape",
@@ -3370,9 +3878,9 @@ def test_pad_constant(self, backend, rank: int):
             raise NotImplementedError("Only supports < 6D constant padding")
         val = float(np.random.random(1))
         input_shape = tuple(np.random.randint(low=1, high=10, size=rank))
-        pad_dims = np.random.randint(low=1, high=rank+1)
+        pad_dims = np.random.randint(low=1, high=rank + 1)
         pad = list(np.random.randint(low=0, high=10,
-                                     size=pad_dims*2))
+                                     size=pad_dims * 2))
         model = ModuleWrapper(function=torch.nn.functional.pad,
                               kwargs={"pad": pad, "mode": "constant", "value": val})
         self.run_compare_torch(
@@ -3457,7 +3965,6 @@ def __init__(self):
                     self.index = torch.randint(0, shapes[dim], size=shapes)
 
                 def forward(self, x):
-                    index = torch.tensor(self.index)
                     return x.scatter_add_(dim, self.index, self.source)
 
             self.run_compare_torch(shapes, TestModel().eval(), backend=backend)
diff --git a/coremltools/converters/mil/frontend/torch/test/testing_utils.py b/coremltools/converters/mil/frontend/torch/test/testing_utils.py
index 218411e9d..0636c9569 100644
--- a/coremltools/converters/mil/frontend/torch/test/testing_utils.py
+++ b/coremltools/converters/mil/frontend/torch/test/testing_utils.py
@@ -8,13 +8,11 @@
 import torch
 import torch.nn as nn
 
+from ..converter import torch_to_mil_types
 from coremltools import TensorType, RangeDim
+from coremltools._deps import _IS_MACOS
 import coremltools.models.utils as coremltoolsutils
-from ..converter import torch_to_mil_types
-from coremltools.models import MLModel
-from coremltools._deps import _IS_MACOS, _HAS_TORCH
 from coremltools.converters.mil.mil.types.type_mapping import nptype_from_builtin
-from coremltools.converters.mil.testing_reqs import ct
 from coremltools.converters.mil.testing_utils import ct_convert
 
 
@@ -44,6 +42,7 @@ def _flatten(object):
             flattened_list.append(item)
     return flattened_list
 
+
 def _copy_input_data(input_data):
     if isinstance(input_data, (list, tuple)):
         return [_copy_input_data(x) for x in input_data]
@@ -55,7 +54,8 @@ def contains_op(torch, op_string):
 
 
 def convert_to_coreml_inputs(input_description, inputs):
-    """Convenience function to combine a CoreML model's input description and
+    """
+    Convenience function to combine a CoreML model's input description and
     set of raw inputs into the format expected by the model's predict function.
     """
     flattened_inputs = _flatten(inputs)
@@ -70,8 +70,8 @@ def convert_to_coreml_inputs(input_description, inputs):
     return coreml_inputs
 
 
-def convert_to_mlmodel(model_spec, tensor_inputs, backend=("neuralnetwork", "fp32"), converter_input_type=None,
-                       use_cpu_for_conversion=False):
+def convert_to_mlmodel(model_spec, tensor_inputs, backend=("neuralnetwork", "fp32"),
+                       converter_input_type=None,use_cpu_for_conversion=False):
     def _convert_to_inputtype(inputs):
         if isinstance(inputs, list):
             return [_convert_to_inputtype(x) for x in inputs]
@@ -134,10 +134,10 @@ def flatten_and_detach_torch_results(torch_results):
 
 
 def convert_and_compare(input_data, model_spec,
-        expected_results=None, atol=1e-4,
-        backend=("neuralnetwork", "fp32"),
-        converter_input_type=None,
-        use_cpu_for_conversion=False):
+                        expected_results=None, atol=1e-4,
+                        backend=("neuralnetwork", "fp32"),
+                        converter_input_type=None,
+                        use_cpu_for_conversion=False):
     """
     If expected results is not set, it will by default
     be set to the flattened output of the torch model.
@@ -162,15 +162,16 @@ def convert_and_compare(input_data, model_spec,
         torch_input = _copy_input_data(input_data)
         expected_results = torch_model(*torch_input)
     expected_results = flatten_and_detach_torch_results(expected_results)
-    mlmodel = convert_to_mlmodel(model_spec, input_data, backend=backend, converter_input_type=converter_input_type,
+    mlmodel = convert_to_mlmodel(model_spec, input_data, backend=backend,
+                                 converter_input_type=converter_input_type,
                                  use_cpu_for_conversion=use_cpu_for_conversion)
-    coreml_inputs = convert_to_coreml_inputs(mlmodel.input_description,
-                                             input_data)
+
+    coreml_inputs = convert_to_coreml_inputs(mlmodel.input_description, input_data)
 
     if not _IS_MACOS or (mlmodel.is_package and coremltoolsutils._macos_version() < (12, 0)):
         return model_spec, mlmodel, coreml_inputs, None
 
-    _ , dtype = backend
+    _, dtype = backend
     if dtype == "fp16":
         atol = max(atol * 100.0, 5e-1)
 
@@ -180,18 +181,19 @@ def convert_and_compare(input_data, model_spec,
             coreml_results[key] for key in sorted(coreml_results.keys())
         ]
         for torch_result, coreml_result in zip(expected_results,
-                                                   sorted_coreml_results):
+                                               sorted_coreml_results):
             if torch_result.shape == ():
                 torch_result = np.array([torch_result])
             np.testing.assert_equal(coreml_result.shape, torch_result.shape)
             np.testing.assert_allclose(coreml_result, torch_result,
-                                           atol=atol)
+                                       atol=atol)
     return model_spec, mlmodel, coreml_inputs, coreml_results
 
 
 class TorchBaseTest(object):
-    testclassname=''
-    testmodelname=''
+    testclassname = ''
+    testmodelname = ''
+
     @pytest.fixture(autouse=True)
     def store_testname_with_args(self, request):
         TorchBaseTest.testclassname = type(self).__name__
@@ -213,7 +215,8 @@ def run_compare_torch(
         Args:
             input_as_shape <bool>: If true generates random input data with shape.
             expected_results <iterable, optional>: Expected result from running pytorch model.
-            converter_input_type: If not None, then pass it to the "inputs" argument to the ct.convert() call.
+            converter_input_type: If not None, then pass it to the "inputs" argument to the 
+                ct.convert() call.
         """
         model.eval()
         if input_as_shape:
@@ -226,11 +229,11 @@ def run_compare_torch(
 
         model_spec, mlmodel, coreml_inputs, coreml_results = \
             convert_and_compare(
-            input_data, model_spec, expected_results=expected_results,
-            atol=10.0 ** -places, backend=backend,
-            converter_input_type=converter_input_type,
-            use_cpu_for_conversion=use_cpu_for_conversion,
-        )
+                input_data, model_spec, expected_results=expected_results,
+                atol=10.0 ** -places, backend=backend,
+                converter_input_type=converter_input_type,
+                use_cpu_for_conversion=use_cpu_for_conversion,
+            )
 
         return model_spec, mlmodel, coreml_inputs, coreml_results, \
-               TorchBaseTest.testclassname, TorchBaseTest.testmodelname
+            TorchBaseTest.testclassname, TorchBaseTest.testmodelname
diff --git a/coremltools/converters/mil/frontend/torch/torch_op_registry.py b/coremltools/converters/mil/frontend/torch/torch_op_registry.py
index 64407d3d0..128fdd5ae 100644
--- a/coremltools/converters/mil/frontend/torch/torch_op_registry.py
+++ b/coremltools/converters/mil/frontend/torch/torch_op_registry.py
@@ -31,15 +31,25 @@ def register_torch_op(_func=None, torch_alias=None, override=False):
 
     def func_wrapper(func):
         f_name = func.__name__
+
+        if f_name.endswith("_"):
+            raise Exception(
+                "Attempting to register \"{}\" op. Do not register inplace ops. (inplace torch ops"
+                " end in a \"_\"). Instead register the normal op version: \"{}\". The inplace"
+                " version will be supported automatically.".format(f_name, f_name[:-1])
+            )
         if not override and f_name in _TORCH_OPS_REGISTRY:
             raise ValueError("Torch op {} already registered.".format(f_name))
+
         _TORCH_OPS_REGISTRY[f_name] = func
+
         if torch_alias is not None:
             for name in torch_alias:
                 if not override and name in _TORCH_OPS_REGISTRY:
                     msg = "Torch op alias {} already registered."
                     raise ValueError(msg.format(name))
                 _TORCH_OPS_REGISTRY[name] = func
+
         return func
 
     if _func is None:
diff --git a/coremltools/converters/mil/frontend/torch/torchir_passes.py b/coremltools/converters/mil/frontend/torch/torchir_passes.py
index 0b9e23713..4f3bcb1e3 100644
--- a/coremltools/converters/mil/frontend/torch/torchir_passes.py
+++ b/coremltools/converters/mil/frontend/torch/torchir_passes.py
@@ -3,17 +3,20 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from collections import OrderedDict
+from collections import defaultdict, OrderedDict
 import logging as _logging
 
 from .internal_graph import InternalTorchIRNode, InternalTorchIRGraph
 
 def generate_tensor_assignment_ops(graph):
     """
-    This graph pass handles the tensor value assignement,
+    This graph pass handles inplace tensor assignements, specifically it handles:
+    `torch.Tensor.copy_` and `torch.Tensor.fill_`. There are many other inplace tensor
+    assignments which are currently not handled.
+
     for instance:
 
-        def forward(self, x): # x a tensor with shape [4,10]
+        def forward(self, x):    # x a tensor with shape [4,10]
             x[:2, 4] = [[1],[3]]
             return x
 
@@ -22,21 +25,21 @@ def forward(self, x): # x a tensor with shape [4,10]
         input -> %x
         %1 = slice(%x, dim=0, begin=0, end=2) # the slice for dimension 0
         %2 = select(%1, dim=1, index=4) # the select for dimension 1
-        %3 = copy(%2, value=[[1], [3]])
+        %3 = copy_(%2, value=[[1], [3]])
         output -> %x
 
-    This graph pass fuses the sequences into a single InternalTorchIRNode of a new kind, which is defined as _internal_tensor_value_assign.
+    This graph pass fuses the sequences into a single InternalTorchIRNode of a new kind, which is defined as `_internal_op_tensor_inplace_copy`.
 
         input -> %x
         %nodes_to_fuse = [slice(%x, begin=0, end=2), select(%1, dim=1, index=4)]
-        %x_internal_tensor_assign_1 = _internal_tensor_value_assign(%x, value=[[1],[3]], nodes_to_fuse=nodes_to_fuse)
+        %x_internal_tensor_assign_1 = _internal_op_tensor_inplace_copy(%x, value=[[1],[3]], nodes_to_fuse=nodes_to_fuse)
         output -> x_internal_tensor_assign_1
 
     The _internal_tensor_value_assign op takes an additional internal data member nodes_to_fuse,
     which is a list of select / slice InternalTorchIRNodes that need to be fused.
     Here is a more complicated example:
 
-        def forward(self, x): # x a tensor with shape [4,10]
+        def forward(self, x):    # x a tensor with shape [4,10]
             x[0, 0] = 1
             x[1:2, 1:2] = [[0]]
             return x
@@ -45,19 +48,27 @@ def forward(self, x): # x a tensor with shape [4,10]
         input -> %x
         %1 = select(%x, dim=0, index=0)
         %2 = select(%1, dim=0, index=0)
-        %3 = copy(%2, value=1)
+        %3 = copy_(%2, value=1)
         %4 = slice(%x, dim=0, begin=1, end=2)
         %5 = slice(%4, dim=1, begin=1, end=2)
-        %6 = copy(%5, value=[[0]])
+        %6 = copy_(%5, value=[[0]])
         output -> %x
 
     Output graph:
         input -> %x
         %nodes_to_fuse_1 = [select(%x, dim=0, index=0), select(%1, dim=0, index=0)]
-        %x_internal_tensor_assign_1 = _internal_tensor_value_assign(%x, value=1, nodes_to_fuse=nodes_to_fuse_1)
+        %x_internal_tensor_assign_1 = _internal_op_tensor_inplace_copy(%x, value=1, nodes_to_fuse=nodes_to_fuse_1)
         %nodes_to_fuse_2 = [slice(%x, dim=0, begin=1, end=2), slice(%4, dim=1, begin=1, end=2)]
-        %x_internal_tensor_assign_2 = _internal_tensor_value_assign(%x_internal_tensor_assign_1, value=[[0]], nodes_to_fuse=nodes_to_fuse_2)
+        %x_internal_tensor_assign_2 = _internal_op_tensor_inplace_copy(%x_internal_tensor_assign_1, value=[[0]], nodes_to_fuse=nodes_to_fuse_2)
         output -> x_internal_tensor_assign_2
+
+    torch.Tensor.fill_ works in a similar way, except the InternalTorchIRNodes is defined by `_internal_op_tensor_inplace_fill`.
+
+    A fill_ operator is generated from the following forward pass:
+
+        def forward(self, x):    # x a tensor with shape [5, 4]
+            x[2] = 9
+            return x
     """
 
     TENSOR_ASSIGMENT_PREFIX = "_internal_tensor_assign_"
@@ -77,7 +88,7 @@ def _construct_nodes_to_fuse_inputs(nodes_to_fuse):
         return inputs
 
     tensor_to_node_sequence_mapping = {}
-    updated_tensor_count = {}
+    updated_tensor_count = defaultdict(lambda : 0)
 
     for i in range(len(graph.nodes)):
         node = graph.nodes[i]
@@ -86,7 +97,7 @@ def _construct_nodes_to_fuse_inputs(nodes_to_fuse):
             input_name = node.inputs[idx]
             node.inputs[idx] = _get_updated_name(input_name, updated_tensor_count)
 
-        if node.kind in ["select", "slice"]:
+        if node.kind in ("select", "slice"):
             node_input = node.inputs[0]
             node_output = node.outputs[0]
             node_sequence = tensor_to_node_sequence_mapping.get(node_input, [])
@@ -95,30 +106,34 @@ def _construct_nodes_to_fuse_inputs(nodes_to_fuse):
             node_sequence.append(node)
             tensor_to_node_sequence_mapping[node_output] = node_sequence
 
-        if node.kind == "copy_":
+        if node.kind in ("copy_", "fill_"):
             node_input = node.inputs[0]
-            if node_input in tensor_to_node_sequence_mapping:
-                nodes_to_fuse = tensor_to_node_sequence_mapping[node_input]
-                source_tensor = nodes_to_fuse[0].inputs[0]
-                origin_name = source_tensor.split(TENSOR_ASSIGMENT_PREFIX)[0]
-
-                if origin_name not in updated_tensor_count:
-                    updated_tensor_count[origin_name] = 1
-                else:
-                    updated_tensor_count[origin_name] += 1
-
-                outputs = [_get_updated_name(origin_name, updated_tensor_count)]
-
-                update_value = node.inputs[1]
-                nodes_to_fuse_inputs = _construct_nodes_to_fuse_inputs(nodes_to_fuse)
-                tensor_assign_node = InternalTorchIRNode(
-                                    node=None,
-                                    inputs=[source_tensor, update_value] + nodes_to_fuse_inputs,
-                                    outputs=outputs,
-                                    kind="_internal_tensor_value_assign",
-                                    blocks=[],
-                                )
-                graph.nodes[i] = tensor_assign_node
+            if node_input not in tensor_to_node_sequence_mapping:
+                raise ValueError("No matching select or slice.")
+
+            if node.kind == "copy_":
+                kind = "_internal_op_tensor_inplace_copy"
+            else:
+                kind = "_internal_op_tensor_inplace_fill"
+
+            nodes_to_fuse = tensor_to_node_sequence_mapping[node_input]
+            source_tensor = nodes_to_fuse[0].inputs[0]
+            origin_name = source_tensor.split(TENSOR_ASSIGMENT_PREFIX)[0]
+
+            updated_tensor_count[origin_name] += 1
+
+            outputs = [_get_updated_name(origin_name, updated_tensor_count)]
+
+            update_value = node.inputs[1]
+            nodes_to_fuse_inputs = _construct_nodes_to_fuse_inputs(nodes_to_fuse)
+            tensor_assign_node = InternalTorchIRNode(
+                node=None,
+                inputs=[source_tensor, update_value] + nodes_to_fuse_inputs,
+                outputs=outputs,
+                kind=kind,
+                blocks=[],
+            )
+            graph.nodes[i] = tensor_assign_node
 
     # modify the graph outputs if it is effected by this graph pass
     for idx in range(len(graph.outputs)):
@@ -128,7 +143,8 @@ def _construct_nodes_to_fuse_inputs(nodes_to_fuse):
 
 
 def remove_getattr_nodes(graph):
-    """Remove the getattr nodes in the graph
+    """
+    Remove the getattr nodes in the graph
     """
 
     getattr_nodes = []
@@ -260,8 +276,9 @@ def flatten_graph_input_values(graph):
 
 
 def flatten_graph_output_values(graph):
-    """ CoreML can't handle nested iterables of tensors, so we flatten the
-        outputs of any graph that produces them.
+    """
+    CoreML can't handle nested iterables of tensors, so we flatten the
+    outputs of any graph that produces them.
     """
     node_names = [node.name for node in graph.nodes]
     new_graph_outputs = graph.outputs
diff --git a/coremltools/converters/mil/input_types.py b/coremltools/converters/mil/input_types.py
index 1e575564d..b0d9138bd 100644
--- a/coremltools/converters/mil/input_types.py
+++ b/coremltools/converters/mil/input_types.py
@@ -3,8 +3,8 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-import logging
 import numpy as np
+
 from coremltools.converters.mil.mil.types.symbolic import is_symbolic
 from coremltools.converters.mil.mil import types
 from coremltools.converters.mil.mil.types.type_mapping import (
@@ -28,19 +28,19 @@ def __init__(
         class_labels: str / list of int / list of str
             If a ``list`` is given, the ``list`` maps the index of the output of a
             neural network to labels in a classifier.
-            
+
             If a ``str`` is given, the ``str`` points to a file which maps the index
             to labels in a classifier.
-        
+
         predicted_feature_name: str
             Name of the output feature for the class labels exposed in the
             Core ML neural network classifier. Default: ``'classLabel'``.
-        
+
         predicted_probabilities_output: str
             If provided, then this is the name of the neural network blob which
             generates the probabilities for each class label (typically the output
-            of a softmax layer). 
-            
+            of a softmax layer).
+
             If not provided, then the last output layer is assumed.
         """
         self.class_labels = class_labels
@@ -57,10 +57,10 @@ def __init__(self, name=None, shape=None, dtype=types.fp32):
         ----------
         name: (str)
             The name of the input.
-        
+
         shape: list, tuple, Shape object, EnumeratedShapes object, or None
             The shape(s) that are valid for this input.
-            
+
             If set to ``None``, the shape will be infered from the model itself.
         """
 
@@ -89,26 +89,26 @@ def __init__(
         ----------
         scale: (float)
             The scaling factor for all values in the image channels.
-        
+
         bias: float or list of float
             If ``color_layout`` is ``'G'``, bias would be a ``float``.
-            
+
             If `color_layout` is ``'RGB'`` or ``'BGR'``, bias would be a list of ``float``.
-        
+
         color_layout: string
             Color layout of the image.
-            
+
             Valid values:
                 * ``'G'``: Grayscale
                 * ``'RGB'``: [Red, Green, Blue]
                 * ``'BGR'``: [Blue, Green, Red]
-            
+
         channel_first: (bool) or None
             Set to ``True`` if input format is channel first.
-            
+
             Default format:
-            	For TensorFlow: channel last (``channel_first=False``).
-            	
+                For TensorFlow: channel last (``channel_first=False``).
+
                 For PyTorch: channel first (``channel_first=True``).
         """
         super(ImageType, self).__init__(name, shape)
@@ -148,27 +148,27 @@ def __init__(self, name=None, shape=None, dtype=None,
         name: str
             Input name. Must match an input name in the model (usually the
             Placeholder name for TensorFlow or the input name for PyTorch).
-            
+
             The ``name`` is required except for a TensorFlow model in which there is
             exactly one input Placeholder.
-        
+
         shape: (1) list of positive int or RangeDim, or (2) EnumeratedShapes
             The shape of the input.
-            
+
             For TensorFlow:
               * The ``shape`` is optional. If omitted, the shape is inferred from
                 TensorFlow graph's Placeholder shape.
-            
+
             For PyTorch:
               * The ``shape`` is required.
-        
+
         dtype: np.generic or mil.type type
             Numpy ``dtype`` (for example, ``np.int32``). Default is ``np.float32``.
-        
+
         default_value: np.ndarray
             If provided, the input is considered optional. At runtime, if the
             input is not provided, ``default_value`` is used.
-            
+
             Limitations:
               *  If ``default_value`` is ``np.ndarray``, all
                  elements are required to have the same value.
@@ -180,9 +180,9 @@ def __init__(self, name=None, shape=None, dtype=None,
         --------
         * ``ct.TensorType(name="input", shape=(1, 2, 3))` implies `dtype ==
           np.float32``
-        
+
         * ``ct.TensorType(name="input", shape=(1, 2, 3), dtype=np.int32)``
-    	
+
         * ``ct.TensorType(name="input", shape=(1, 2, 3),
           dtype=ct.converters.mil.types.fp32)``
         """
@@ -243,17 +243,17 @@ def __init__(self, lower_bound=1, upper_bound=-1, default=None,
         ----------
         lower_bound: (int)
             The minimum valid value for the shape.
-        
+
         upper_bound: (int)
             The maximum valid value for the shape.
-            
+
             Set to ``-1`` if there's no upper limit.
-        
+
         default: (int) or None
             The default value that is used for initiating the model, and set in
 
             input shape field of the model file.
-            
+
             If set to ``None``, ``lower_bound`` would be used as default.
 
         symbol: (str)
@@ -366,9 +366,9 @@ def has_symbolic(self):
         return any(is_symbolic(s) for s in self.symbolic_shape)
 
     def to_list(self, allow_symbolic=False):
-      if not allow_symbolic and self.has_symbolic:
-          return None
-      return self.symbolic_shape
+        if not allow_symbolic and self.has_symbolic:
+            return None
+        return self.symbolic_shape
 
 
 class EnumeratedShapes(object):
@@ -380,14 +380,14 @@ def __init__(self, shapes, default=None):
         ----------
         shapes: list of Shape objects, or Shape-compatible lists.
             The valid shapes of the inputs.
-            
+
             If input provided is not Shape object, but can be converted to Shape,
             the Shape object would be stored in ``shapes`` instead.
-        
+
         default: tuple of int or None
             The default shape that is used for initiating the model, and set in
             the metadata of the model file.
-            
+
             If None, then the first element in ``shapes`` is used.
         """
         from coremltools.converters.mil.mil import get_new_symbol
diff --git a/coremltools/converters/mil/mil/block.py b/coremltools/converters/mil/mil/block.py
index 60e4602d8..a71798c32 100644
--- a/coremltools/converters/mil/mil/block.py
+++ b/coremltools/converters/mil/mil/block.py
@@ -6,17 +6,16 @@
 from collections import Counter, OrderedDict
 import copy
 import logging
-import numpy as _np
+
 from . import SPACES, types
-from .var import Var, InternalVar, ListVar
-from .visitors.dot_visitor import DotVisitor
 from .types.symbolic import (
     k_used_symbols,
     k_num_internal_syms,
-    any_symbolic,
     is_symbolic,
 )
-from .input_type import TupleInputType
+from .var import Var, InternalVar
+from .visitors.dot_visitor import DotVisitor
+
 
 # BLOCK_STACK[-1] is the current block
 BLOCK_STACK = []
@@ -103,7 +102,6 @@ def __init__(self, block_inputs=None, outer_op=None, name=None):
         # Must be set before self.validate()
         self.outer_op = outer_op
 
-
         self._block_inputs = block_inputs
         if self._block_inputs is None:
             self._block_inputs = tuple()
diff --git a/coremltools/converters/mil/mil/builder.py b/coremltools/converters/mil/mil/builder.py
index 5954f9b4f..47647338d 100644
--- a/coremltools/converters/mil/mil/builder.py
+++ b/coremltools/converters/mil/mil/builder.py
@@ -51,8 +51,8 @@ class Builder:
     >>> from coremltools.converters.mil.mil import Program, Function
 
     >>> prog = Program()
-    >>> func_inputs = {"x": mb.placeholder(_shape=[2,3]),
-    >>>                "y": mb.placeholder(_shape=[2,3])}
+    >>> func_inputs = {"x": mb.placeholder(shape=[2,3]),
+    >>>                "y": mb.placeholder(shape=[2,3])}
     >>> with Function(func_inputs) as ssa_fun:
     >>>   x, y = ssa_fun.inputs['x'], ssa_fun.inputs['x']
     >>>   res_var = mb.add(x=x, y=y) # created within ssa_fun block
@@ -194,8 +194,8 @@ def _add_op(cls, op_cls, **kwargs):
         return new_op.outputs
 
     @staticmethod
-    def placeholder(shape, dtype=None):
-        return Placeholder(shape, dtype)
+    def placeholder(shape, dtype=None, allow_rank0_input=False):
+        return Placeholder(shape, dtype, allow_rank0_input=allow_rank0_input)
 
     @staticmethod
     def TensorSpec(shape, dtype=None):
diff --git a/coremltools/converters/mil/mil/input_type.py b/coremltools/converters/mil/mil/input_type.py
index 69e701378..5565a0c17 100644
--- a/coremltools/converters/mil/mil/input_type.py
+++ b/coremltools/converters/mil/mil/input_type.py
@@ -26,7 +26,8 @@
                         types.fp64,
                     ]
 
-class DefaultInputs(object):
+
+class DefaultInputs:
     def __init__(self, **kwargs):
         # Since python 3.6, kwargs preserves the input order. See
         # https://docs.python.org/3/whatsnew/3.6.html#whatsnew36-pep468
@@ -44,7 +45,8 @@ def __add__(self, default_inputs):
             self._ordered_dict[k] = v
         return self
 
-class InputSpec(object):
+
+class InputSpec:
     def __init__(self, **kwargs):
         # Since python 3.6, kwargs preserves the input order. See
         # https://docs.python.org/3/whatsnew/3.6.html#whatsnew36-pep468
@@ -124,7 +126,7 @@ def validate_inputs(self, op_name, op_type, candidate_kvs):
 
 
 
-class _InputType(object):
+class _InputType:
     """
     (Untyped) input containing fundamental properties of all inputs to an
     Operation:
@@ -179,6 +181,7 @@ def type_str(self):
         """Descriptive string describing expected mil types"""
         return self.__str__(self)
 
+
 class ListInputType(_InputType):
     def __init__(self, **kwargs):
         super(ListInputType, self).__init__(**kwargs)
@@ -190,6 +193,7 @@ def _is_compatible(self, v):
     def type_str(self):
         return 'list'
 
+
 class ScalarOrTensorInputType(_InputType):
     def __init__(self, **kwargs):
         super(ScalarOrTensorInputType, self).__init__(**kwargs)
@@ -413,6 +417,7 @@ def _is_compatible(self, v):
     def type_str(self):
         return 'tuple'
 
+
 class InternalInputType(_InputType):
     """
     InternalInputType specifies input types outside of Program's type system.
diff --git a/coremltools/converters/mil/mil/operation.py b/coremltools/converters/mil/mil/operation.py
index 9bcb7545c..75d3cfaea 100644
--- a/coremltools/converters/mil/mil/operation.py
+++ b/coremltools/converters/mil/mil/operation.py
@@ -413,8 +413,7 @@ def _ensure_required_inputs(self):
                 raise ValueError(msg_prefix + \
                     "Required input {} is missing".format(name))
 
-    def _validate_and_set_inputs(self, input_kvs,
-        no_check_var_types=False):
+    def _validate_and_set_inputs(self, input_kvs, no_check_var_types=False):
         """
         For each k, v in `input_kvs`, perform the followings:
 
@@ -456,7 +455,7 @@ def check_and_detach(v_new, v_old, op, no_check_var_types):
 
         for name, var in input_kvs.items():
             # TODO: remove InternalVar check
-            #if not isinstance(var, InternalVar):
+            # if not isinstance(var, InternalVar):
 
             # Remove this operation itself from existing input
             # Var's child_ops
diff --git a/coremltools/converters/mil/mil/ops/defs/_utils.py b/coremltools/converters/mil/mil/ops/defs/_utils.py
index 5cca93fc7..4aa960a8d 100644
--- a/coremltools/converters/mil/mil/ops/defs/_utils.py
+++ b/coremltools/converters/mil/mil/ops/defs/_utils.py
@@ -8,6 +8,7 @@
 from coremltools.converters.mil.mil import get_new_symbol, types
 from coremltools.converters.mil.mil.types.symbolic import is_symbolic
 
+MAX_SIZE_CONSTANT_FOLDING = 1024 * 1024 / 4 # When a fp32 const takes over 1MB, we won't create a const op for that
 
 def broadcast_shapes(shape_x, shape_y):
     """
diff --git a/coremltools/converters/mil/mil/ops/defs/control_flow.py b/coremltools/converters/mil/mil/ops/defs/control_flow.py
index b95175a4c..cf2933f35 100644
--- a/coremltools/converters/mil/mil/ops/defs/control_flow.py
+++ b/coremltools/converters/mil/mil/ops/defs/control_flow.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
@@ -20,7 +19,6 @@
     DefaultInputs,
     InputSpec,
     InternalScalarOrTensorInputType,
-    InternalStringInputType,
     IntTensorInputType,
     IntInputType,
     ListInputType,
@@ -44,6 +42,7 @@
 )
 from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op
 
+
 @register_op(doc_str="")
 class cond(Operation):
     """
@@ -310,7 +309,7 @@ class while_loop(Operation):
 
     _body: function  (Required)
         * A Python function that takes ``loop_vars`` as positional arguments.
-        * The function must return the same number of output vars as ``loop_var``
+        * The function must return the same number of output vars as ``loop_vars``
           with the same types.
 
     loop_vars: tuple (Required)
@@ -357,7 +356,7 @@ def _build_block(self, block_inputs):
         # Cond block:
         block_name = self.name + '_cond_block'
         with Block(block_inputs=block_inputs, outer_op=self,
-                name=block_name) as cond_block:
+                   name=block_name) as cond_block:
 
             cond_func = self._cond.val
             cond_var = cond_func(*cond_block.inputs)
@@ -367,13 +366,12 @@ def _build_block(self, block_inputs):
         # Body block
         block_name = self.name + '_body_block'
         with Block(block_inputs=block_inputs, outer_op=self,
-                name=block_name) as body_block:
+                   name=block_name) as body_block:
             body_func = self._body.val
             exit_vars = body_func(*body_block.inputs)
             exit_vars = list(exit_vars) if isinstance(exit_vars, (list, tuple)) \
                 else [exit_vars]
             body_block.set_outputs(exit_vars)
-            #self.blocks.append(body_block)
 
         return cond_block, body_block, exit_vars
 
@@ -719,7 +717,6 @@ def type_inference(self):
         return list_elem_type
 
 
-
 @register_op(doc_str="")
 class list_gather(Operation):
     """
diff --git a/coremltools/converters/mil/mil/ops/defs/linear.py b/coremltools/converters/mil/mil/ops/defs/linear.py
index 8d37c21dc..a50587222 100644
--- a/coremltools/converters/mil/mil/ops/defs/linear.py
+++ b/coremltools/converters/mil/mil/ops/defs/linear.py
@@ -64,7 +64,20 @@ def type_inference(self):
         x_shape = self.x.shape
         weight_shape = self.weight.shape
         assert len(weight_shape) == 2
-
+        if not (
+                x_shape[-1] == weight_shape[-1]
+                or is_symbolic(x_shape[-1])
+                or is_symbolic(weight_shape[-1])
+        ):
+            msg = "Op '{}' (linear op): Size of the last dimension of x, which is {}, " \
+                  "does not match the last dimension of weights, which is {}"
+            raise ValueError(msg.format(self.name, x_shape[-1], weight_shape[-1]))
+        if self.bias is not None:
+            assert len(self.bias.shape) == 1
+            if len(self.bias.val) != weight_shape[-2]:
+                msg = "Op '{}' (linear op): Size of the bias, which is {}, " \
+                      "does not match the first dimension of weights, which is {}"
+                raise ValueError(msg.format(self.name, len(self.bias.val), weight_shape[-2]))
         shape = list(x_shape)
         shape[-1] = weight_shape[0]
         return types.tensor(x_type, tuple(shape))
diff --git a/coremltools/converters/mil/mil/ops/defs/normalization.py b/coremltools/converters/mil/mil/ops/defs/normalization.py
index 9f065a664..f1fa74825 100644
--- a/coremltools/converters/mil/mil/ops/defs/normalization.py
+++ b/coremltools/converters/mil/mil/ops/defs/normalization.py
@@ -26,19 +26,19 @@ class batch_norm(Operation):
     """
     Normalize input tensor ``x`` by ``mean`` and ``variance``, and optionally apply a
     scale ``gamma`` and an offset ``beta``:
-    
+
     .. math::
        y_i = \\gamma_i \\dfrac{ (x_i - mean_i)}{\\sqrt{variance_i + epsilon}} + beta_i \\;,\\;i=1,....,C
-    
+
     The ``mean``, ``variance``, ``gamma``, and ``beta``
     must be 1-D tensors whose lengths are equal to the second axis (the "depth"
     or "channel" dimension) of ``x``.
-    
+
     Parameters
     ----------
     x: tensor<[n,C,*D], T> (Required)
-        * ``3 <= rank <= 4``.
-        * ``*D`` refers to the spatial dimensions, ``1 <= rank(*D) <= 2``.
+        * ``3 <= rank <= 5``.
+        * ``*D`` refers to the spatial dimensions, ``1 <= rank(*D) <= 3``.
         * ``n`` is the batch dimension.
     mean: const tensor<[C], T> (Required)
     variance: const tensor<[C], T> (Required)
@@ -50,7 +50,7 @@ class batch_norm(Operation):
         * Default is all zeros.
     epsilon: const fp32 (Optional)
         * Default is ``1e-5``.
-    
+
     Returns
     -------
     tensor<[n,C,*D], T>
@@ -60,7 +60,7 @@ class batch_norm(Operation):
     ----------
     T: fp32
     """
-    
+
     input_spec = InputSpec(
         x=TensorInputType(),
         mean=TensorInputType(const=True),
@@ -75,7 +75,7 @@ def default_inputs(self):
             gamma=None,
             beta=None,
             epsilon=1e-5,
-            )
+        )
 
     def __init__(self, **kwargs):
         super(batch_norm, self).__init__(**kwargs)
@@ -89,7 +89,7 @@ def type_inference(self):
 class instance_norm(Operation):
     """
     Apply instance normalization to the n-dimensional input tensor.
-    
+
     Parameters
     ----------
     x: tensor<[n,C,*D], T>  (Required)
@@ -104,13 +104,13 @@ class instance_norm(Operation):
         * Default to all zeros.
     epsilon: const f32 (Optional)
         * Default to ``1e-5``.
-    
+
     Returns
     -------
     tensor<[n,C,*D], T>
         * Output tensor has the same shape and type as the input ``x``.
     """
-    
+
     input_spec = InputSpec(
         x=TensorInputType(),
         gamma=TensorInputType(const=True, optional=True),
@@ -123,7 +123,7 @@ def default_inputs(self):
             gamma=None,
             beta=None,
             epsilon=1e-5,
-            )
+        )
 
     def __init__(self, **kwargs):
         super(instance_norm, self).__init__(**kwargs)
@@ -138,11 +138,11 @@ class l2_norm(Operation):
     """
     Apply L2 normalization to the n-dimensional input tensor. That is, divide the input
     tensor by the square root of the sum of squares of all elements of the input.
-    
+
     .. math::
        x_i \\leftarrow \\dfrac{x_i}{\\sqrt{\\sum{x_i^2} + \\epsilon}}
-    
-    
+
+
     Parameters
     ----------
     x: tensor<[*D,C,H,W], T> (Required)
@@ -159,12 +159,12 @@ class l2_norm(Operation):
     -------
     tensor<[\*D,C,H,W], T>
         * Same type and shape as the input tensor ``x``.
-    
+
     Attributes
     ----------
     T: fp32
     """
-    
+
     input_spec = InputSpec(
         x=TensorInputType(),
         epsilon=FloatInputType(const=True, optional=True),
@@ -225,7 +225,7 @@ class layer_norm(Operation):
     ----------
     T: fp32
     """
-    
+
     input_spec = InputSpec(
         x=TensorInputType(),
         axes=IntTensorInputType(const=True, optional=True),
@@ -249,8 +249,8 @@ def __init__(self, **kwargs):
     def _is_compatible_shape(shapea, shapeb):
         if not len(shapea) == len(shapeb):
             return False
-        for a,b in zip(shapea, shapeb):
-            if any_symbolic([a,b]):
+        for a, b in zip(shapea, shapeb):
+            if any_symbolic([a, b]):
                 continue
             if a != b:
                 return False
@@ -302,11 +302,11 @@ def np_layer_norm(x, axes, gamma, beta, epsilon=1e-5):
 class local_response_norm(Operation):
     """
     Apply local response normalization to the n-dimensional input tensor:
-    
+
     .. math::
        x_i \\leftarrow \\dfrac{x_i}{\\left ( k + \\dfrac{\\alpha}{\\text{size}} \\sum_j x_j^2 \\right )^\\beta}
-    
-    
+
+
     Parameters
     ----------
     x: tensor<[n,C,*D], T> (Required)
@@ -324,17 +324,17 @@ class local_response_norm(Operation):
     k: const fp32 (Optional)
         * Additive factor.
         * Default is ``1.0``.
-    
+
     Returns
     -------
     tensor<[n,C,*D], T>
         * Same type and shape as the input tensor ``x``.
-    
+
     Attributes
     ----------
     T: fp32
     """
-    
+
     input_spec = InputSpec(
         x=TensorInputType(),
         size=IntInputType(const=True),
diff --git a/coremltools/converters/mil/mil/ops/defs/random.py b/coremltools/converters/mil/mil/ops/defs/random.py
index 36518b8fe..f76f216c0 100644
--- a/coremltools/converters/mil/mil/ops/defs/random.py
+++ b/coremltools/converters/mil/mil/ops/defs/random.py
@@ -3,6 +3,7 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+from ._op_reqs import register_op
 from coremltools.converters.mil.mil import get_new_symbol, get_new_variadic_symbol, types
 from coremltools.converters.mil.mil.input_type import (
     DefaultInputs,
@@ -13,11 +14,8 @@
     TensorInputType,
     StringInputType
 )
-
-from coremltools.converters.mil.mil.types.symbolic import any_symbolic
 from coremltools.converters.mil.mil.operation import Operation
-
-from ._op_reqs import register_op
+from coremltools.converters.mil.mil.types.symbolic import any_symbolic
 
 
 class RandomDistribution(Operation):
@@ -95,7 +93,7 @@ def default_inputs(self):
             DefaultInputs(
                 seed=-1,
                 prob=0.5,
-                )
+            )
 
     def __init__(self, **kwargs):
         super(random_bernoulli, self).__init__(**kwargs)
@@ -148,7 +146,7 @@ def default_inputs(self):
             mode="logits",
             size=1,
             seed=-1,
-            )
+        )
 
     def __init__(self, **kwargs):
         super(random_categorical, self).__init__(**kwargs)
@@ -204,7 +202,7 @@ def default_inputs(self):
                 mean=0.,
                 stddev=1.,
                 seed=-1,
-                )
+            )
 
     def __init__(self, **kwargs):
         super(random_normal, self).__init__(**kwargs)
@@ -270,7 +268,7 @@ def default_inputs(self):
                 low=0.,
                 high=1.,
                 seed=-1,
-                )
+            )
 
     def __init__(self, **kwargs):
         super(random_uniform, self).__init__(**kwargs)
diff --git a/coremltools/converters/mil/mil/ops/defs/recurrent.py b/coremltools/converters/mil/mil/ops/defs/recurrent.py
index 2f3c2af4c..b60b93046 100644
--- a/coremltools/converters/mil/mil/ops/defs/recurrent.py
+++ b/coremltools/converters/mil/mil/ops/defs/recurrent.py
@@ -114,7 +114,7 @@ def default_inputs(self):
             output_sequence=False,
             recurrent_activation="sigmoid",
             activation="tanh",
-            )
+        )
 
     def __init__(self, **kwargs):
         super(gru, self).__init__(**kwargs)
@@ -159,7 +159,7 @@ def type_inference(self):
                 "Incorrect weight matrix: hidden dim size mismatch. \
                 Provided weight_ih {}, weight_hh {}. Expecting <b, 3*H>").format(
                     self.weight_ih.shape, self.weight_hh.shape
-                )
+            )
 
         out_seq_len = sequence_length if self.output_sequence.val else 1
         output_shape = [out_seq_len, batch_size, hidden_size]
@@ -364,13 +364,12 @@ def weight_shape_check(wt_ih, wt_hh):
                     )
 
             hidden_size = wt_hh.shape[1]
-            input_size = wt_ih.shape[1]
             if wt_hh.shape[0] // hidden_size != 4 or wt_ih.shape[0] // hidden_size != 4:
                 raise ValueError(
                     "Incorrect weight matrix: hidden dim size mismatch. \
                                 Provided weight_ih {}, weight_hh {}. Expecting <4*H, H>").format(
                                     wt_ih.shape, wt_hh.shape
-                                )
+                )
 
         direction = self.direction.val
         valid_directions = {"forward", "reverse", "bidirectional"}
@@ -378,7 +377,7 @@ def weight_shape_check(wt_ih, wt_hh):
             raise ValueError(
                 "Direction {} not supported. Supported directions: {}").format(
                     direction, valid_directions
-                )
+            )
 
         weight_shape_check(self.weight_ih, self.weight_hh)
         if direction == "bidirectional":
diff --git a/coremltools/converters/mil/mil/ops/defs/scatter_gather.py b/coremltools/converters/mil/mil/ops/defs/scatter_gather.py
index f7337218d..03b4e49fa 100644
--- a/coremltools/converters/mil/mil/ops/defs/scatter_gather.py
+++ b/coremltools/converters/mil/mil/ops/defs/scatter_gather.py
@@ -2,6 +2,7 @@
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 import numpy as np
 import numbers
 
@@ -17,7 +18,6 @@
 from coremltools.converters.mil.mil.operation import precondition
 from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op
 from coremltools.converters.mil.mil.types.symbolic import is_compatible_symbolic_vector, is_symbolic
-
 from coremltools.converters.mil.mil.operation import (
     SYMBOL,
     VALUE
@@ -55,7 +55,7 @@ class gather(Operation):
     .. math::
        x[p_0, ..., p_{axis-1}, ~~~~~~~ indices[i_0, ..., i_{M-1}], ~~~~~~~ p_{axis+1}, ..., p_{rank(x)-1}]
 
-    Where ``M = rank(x)``.
+    Where ``M = rank(indices)``.
 
     Parameters
     ----------
@@ -149,7 +149,7 @@ class scatter(Operation):
     .. math::
        updates[p_0, ..., p_{axis-1}, i, p_{axis+1}, ..., p_D]
 
-    * For ``j! = i``:
+    * For ``j != i``:
 
     .. math::
        output[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D] =
@@ -167,7 +167,7 @@ class scatter(Operation):
     .. math::
        x[p_0, ..., p_{axis-1}, indice[i], p_{axis+1}, ..., p_D]
 
-    * For ``j! = i``:
+    * For ``j != i``:
 
     .. math::
        output[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D] =
diff --git a/coremltools/converters/mil/mil/ops/defs/tensor_operation.py b/coremltools/converters/mil/mil/ops/defs/tensor_operation.py
index f1ffe22f0..52a72e0f8 100644
--- a/coremltools/converters/mil/mil/ops/defs/tensor_operation.py
+++ b/coremltools/converters/mil/mil/ops/defs/tensor_operation.py
@@ -40,7 +40,7 @@
 )
 
 from ._op_reqs import register_op
-from ._utils import promoted_primitive_type
+from ._utils import promoted_primitive_type, MAX_SIZE_CONSTANT_FOLDING
 
 
 @register_op(doc_str="")
@@ -561,7 +561,12 @@ def value_inference(self):
         start = self.start.val
         end = self.end.val
         step = self.step.val
-        return np.arange(start, end, step).astype(np.int32)
+        shape = (end - start) / step
+        # To prevent from creating constant greater then 1MB,
+        # a upper bound of the size of the resulting array is set.
+        if shape > MAX_SIZE_CONSTANT_FOLDING:
+            return None
+        return np.arange(start, end, step)
 
     def type_inference(self):
         start = self.start.sym_val
diff --git a/coremltools/converters/mil/mil/ops/defs/tensor_transformation.py b/coremltools/converters/mil/mil/ops/defs/tensor_transformation.py
index 2344420d4..929073c0e 100644
--- a/coremltools/converters/mil/mil/ops/defs/tensor_transformation.py
+++ b/coremltools/converters/mil/mil/ops/defs/tensor_transformation.py
@@ -486,7 +486,7 @@ class slice_by_index(Operation):
     With a tensor ``x``, this method achieves the following:
     
     ``result = x[begin[0]: end[0]: stride[0], begin[1]: end[1]: stride[1], ...]``
-    
+
     Note: This method does not support pure indexing. You would need to do a 
     squeeze if indexing is intended.
 
@@ -571,7 +571,6 @@ def type_inference(self):
     def value_inference(self):
         if self.x.sym_val is None or self.begin.val is None or self.end.val is None:
             return None
-        x_shape = self.x.shape
         begin = [int(i) for i in list(self.begin.val[:])]
         end = [int(i) for i in list(self.end.val[:])]
         stride = [1] * self.x.rank if self.stride is None else self.stride.val
@@ -816,8 +815,7 @@ def type_inference(self):
             for i in sorted(axes)[::-1]:  # descending order
                 if len(squeezed_shape) <= i:
                     raise ValueError(
-                        "Cannot squeeze dim {} for shape"
-                        + " {}".format(i, squeezed_shape)
+                        "Cannot squeeze dim {} for shape {}".format(i, squeezed_shape)
                     )
                 squeezed_shape.pop(i)
 
@@ -828,7 +826,7 @@ def value_inference(self):
         if self.x.val is None:
             return None
         if self.axes is None:
-            val =  np.squeeze(self.x.val)
+            val = np.squeeze(self.x.val)
         else:
             val = np.squeeze(self.x.val, axis=tuple(self.axes.val))
         return val if val.shape != () else self.x.val[0]
diff --git a/coremltools/converters/mil/mil/ops/tests/test_elementwise_unary.py b/coremltools/converters/mil/mil/ops/tests/test_elementwise_unary.py
index 9ebc0abd1..2215383d6 100644
--- a/coremltools/converters/mil/mil/ops/tests/test_elementwise_unary.py
+++ b/coremltools/converters/mil/mil/ops/tests/test_elementwise_unary.py
@@ -544,7 +544,6 @@ def build(x):
             shape = mb.shape(x=x)
             return mb.cast(x=shape, dtype="int32")
 
-        prog = Program()
         with Function(input_placeholders) as ssa_func:
             output_vars = build(**ssa_func.inputs)
             assert is_compatible_symbolic_vector(output_vars.sym_val, [get_new_symbol(), 1])
@@ -663,16 +662,6 @@ def test_builder_to_backend_stress_cast(
             pytest.xfail("rdar://78343191 ((MIL GPU) Core ML Tools Unit Test failures [failure to load or Seg fault])")
 
         src_dtype, dst_dtype = src_dst
-
-        type_map = {
-            "int32": np.int32,
-            "int64": np.int64,
-            "fp16": np.float16,
-            "fp32": np.float32,
-            "fp64": np.float64,
-            "bool": np.bool,
-        }
-
         x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
         numpy_pred = x.astype(dtype=np.float16)
 
@@ -707,7 +696,7 @@ def test_erf_value_inference(self):
 
         @mb.program(input_specs=[])
         def prog():
-            return  mb.erf(x=x)
+            return mb.erf(x=x)
 
         ops = list(prog.functions.values())[0].operations
         assert len(ops) == 2
diff --git a/coremltools/converters/mil/mil/ops/tests/test_image_resizing.py b/coremltools/converters/mil/mil/ops/tests/test_image_resizing.py
index 96826de50..92c14fed7 100644
--- a/coremltools/converters/mil/mil/ops/tests/test_image_resizing.py
+++ b/coremltools/converters/mil/mil/ops/tests/test_image_resizing.py
@@ -29,6 +29,10 @@ class TestAffine:
     def test_builder_to_backend_smoke(self, use_cpu_only, backend):
         if backend[0] == "neuralnetwork":
             pytest.xfail("nn backend not supported")
+        if backend[0] == "mlprogram" and backend[1] == "fp16":
+            pytest.xfail("rdar://86653285 ([ MIL ] TestAffine::test_builder_to_backend_smoke[[use_cpu_only=False]-mlprogram-fp16] CI Failure)")
+        if backend[0] == "mlprogram" and backend[1] == "fp32":
+            pytest.xfail("rdar://88039548 (test_image_resizing.py::TestAffine::test_builder_to_backend_smoke is failing)")
 
         x_val = np.array([11.0, 22.0, 33.0, 44.0], dtype=np.float32).reshape(
             [1, 1, 2, 2]
@@ -299,6 +303,9 @@ def test_builder_to_backend_smoke(self, use_cpu_only, backend):
         if backend[0] == "mlprogram":
             pytest.xfail("Seg fault: rdar://78343191 ((MIL GPU) Core ML Tools Unit Test failures [failure to load or Seg fault])")
 
+        if backend[0] == "neuralnetwork":
+            pytest.xfail("rdar://85318710 (Coremltools Smoke test on ResizeBilinear failing on NNv1 backend.)")
+
         x = np.array([0, 1], dtype=np.float32).reshape(1, 1, 2)
         input_placeholder_dict = {"x": mb.placeholder(shape=x.shape)}
         input_value_dict = {"x": x}
diff --git a/coremltools/converters/mil/mil/ops/tests/test_normalization.py b/coremltools/converters/mil/mil/ops/tests/test_normalization.py
index 4f598e804..4981ea56a 100644
--- a/coremltools/converters/mil/mil/ops/tests/test_normalization.py
+++ b/coremltools/converters/mil/mil/ops/tests/test_normalization.py
@@ -304,7 +304,7 @@ def build(x):
             output = x_val/norm
         else:
             batch_dim_prod = np.prod(shape[:batch_dims])
-            reshape_x_val = np.reshape(x_val,(batch_dim_prod,-1))
+            reshape_x_val = np.reshape(x_val, (batch_dim_prod, -1))
             norm = la.norm(reshape_x_val, axis=1, keepdims=True)
             output = reshape_x_val/norm
             output = np.reshape(output, shape)
@@ -353,6 +353,9 @@ def _np_layer_norm(x, axes, gamma=None, beta=None, epsilon=1e-5):
         "use_cpu_only, backend", itertools.product([True, False], backends,)
     )
     def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        if backend[0] == "mlprogram" and backend[1] == "fp32":
+            pytest.xfail("rdar://88039548 (test_image_resizing.py::TestAffine::test_builder_to_backend_smoke is failing)")
+
         x_val = np.array([[[1.0, -7.0], [5.0, -6.0], [-3.0, -5.0]]], dtype=np.float32)
         input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
         input_values = {"x": x_val}
@@ -417,6 +420,9 @@ def build(x):
         "use_cpu_only, backend", itertools.product([True, False], backends,)
     )
     def test_builder_to_backend_smoke_rank_2(self, use_cpu_only, backend):
+        if backend[0] == "mlprogram" and backend[1] == "fp32":
+            pytest.xfail("rdar://88039548 (test_image_resizing.py::TestAffine::test_builder_to_backend_smoke is failing)")
+
         x_val = np.array([[1.0, -7.0], [5.0, -6.0], [-3.0, -5.0]], dtype=np.float32)
         gamma_val = np.array([1.0, 1.0], dtype=np.float32)
         beta_val = np.array([1.0, 0.0], dtype=np.float32)
diff --git a/coremltools/converters/mil/mil/ops/tests/test_tensor_operation.py b/coremltools/converters/mil/mil/ops/tests/test_tensor_operation.py
index 3ffef6a45..7e3dd02c8 100644
--- a/coremltools/converters/mil/mil/ops/tests/test_tensor_operation.py
+++ b/coremltools/converters/mil/mil/ops/tests/test_tensor_operation.py
@@ -12,7 +12,7 @@
     get_new_symbol,
     types
 )
-from coremltools.converters.mil.testing_utils import random_gen, ssa_fn
+from coremltools.converters.mil.testing_utils import random_gen, ssa_fn, get_op_types_in_program
 from .testing_utils import UNK_SYM, UNK_VARIADIC, run_compare_builder
 
 if testing_reqs._HAS_TF_1 or testing_reqs._HAS_TF_2:
@@ -980,6 +980,42 @@ def build(x, y, z):
             backend=backend,
         )
 
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_large_array(self, use_cpu_only, backend):
+        
+        input_placeholders = {
+            "x": mb.placeholder(shape=(1,)), # dummpy input
+        }
+        input_values = {"x": 0.5}
+
+        def build(x):
+            return [mb.range_1d(start=0.0, end=2000000.0, step=1.0)]
+
+        expected_output_types = [
+            (2000000, types.fp32)
+        ]
+
+        expected_outputs = [
+            np.arange(0.0, 2000000.0, 1.0),
+        ]
+
+        mlmodel = run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+        # verify that the range_1d op is not const folded
+        prog = mlmodel._mil_program
+        ops = get_op_types_in_program(prog)
+        assert ops == ["range_1d", "identity"]
+
     @ssa_fn
     def test_builder_eval(self):
         v = mb.range_1d(start=5, end=15, step=2)
diff --git a/coremltools/converters/mil/mil/ops/tests/testing_utils.py b/coremltools/converters/mil/mil/ops/tests/testing_utils.py
index a8b4eb45c..96f0240ae 100644
--- a/coremltools/converters/mil/mil/ops/tests/testing_utils.py
+++ b/coremltools/converters/mil/mil/ops/tests/testing_utils.py
@@ -57,6 +57,9 @@ def run_compare_builder(
             Argument which is passed as is to the unified converter API.
             That is, "ct.convert(...., useCPUOnly=use_cpu_for_conversion)"
             It forces the model to be loaded on the CPU context, post conversion.
+
+    Returns:
+        The converted mlmodel
     """
     if not isinstance(expected_output_types, list):
         expected_output_types = [expected_output_types]
@@ -119,7 +122,7 @@ def run_compare_builder(
                          useCPUOnly=use_cpu_for_conversion)
 
     if frontend_only:
-        return
+        return mlmodel
 
     if expected_outputs:
         assert len(output_vars) == len(expected_outputs), (
@@ -142,3 +145,5 @@ def run_compare_builder(
         also_compare_shapes=also_compare_shapes,
         dtype=backend[1]
     )
+    
+    return mlmodel
diff --git a/coremltools/converters/mil/mil/passes/__init__.py b/coremltools/converters/mil/mil/passes/__init__.py
index 901dc952e..7cba6372f 100644
--- a/coremltools/converters/mil/mil/passes/__init__.py
+++ b/coremltools/converters/mil/mil/passes/__init__.py
@@ -44,7 +44,6 @@
 )
 
 from coremltools.converters.mil.experimental.passes import (
-    generic_gelu_tanh_approximation_fusion,
     generic_layernorm_instancenorm_pattern_fusion,
     generic_linear_bias_fusion,
     generic_conv_batchnorm_fusion,
diff --git a/coremltools/converters/mil/mil/passes/add_conv_transpose_output_shape.py b/coremltools/converters/mil/mil/passes/add_conv_transpose_output_shape.py
index 32170a290..5bb03418f 100644
--- a/coremltools/converters/mil/mil/passes/add_conv_transpose_output_shape.py
+++ b/coremltools/converters/mil/mil/passes/add_conv_transpose_output_shape.py
@@ -29,9 +29,9 @@ def apply(self, prog):
             _handle_block(f)
 
 def _match_pattern(op):
-  return op.op_type == "conv_transpose" \
-      and op.output_shape is None \
-      and not any_symbolic(op.outputs[0].shape)
+    return op.op_type == "conv_transpose" \
+        and op.output_shape is None \
+        and not any_symbolic(op.outputs[0].shape)
 
 def _handle_block(block):
     for op in list(block.operations):
diff --git a/coremltools/converters/mil/mil/passes/const_elimination.py b/coremltools/converters/mil/mil/passes/const_elimination.py
index 7aa53c436..f81b566d7 100644
--- a/coremltools/converters/mil/mil/passes/const_elimination.py
+++ b/coremltools/converters/mil/mil/passes/const_elimination.py
@@ -24,16 +24,21 @@ class const_elimination(AbstractGraphPass):
     #   %4 = other_op(%2_const, %3)
     #
     """
+    def __init__(self):
+        self.ops_to_skip = set()
 
-    def _const_elimination_block(self, block, ops_to_ignore):
+    def set_ops_to_skip(self, prog):
+        pass
+
+    def _const_elimination_block(self, block):
         # shallow copy hides changes on f.operations during the loop
         for op in list(block.operations):
 
-            if op in ops_to_ignore:
+            if op.op_type == "const" or op in self.ops_to_skip:
                 continue
 
             for b in op.blocks:
-                self._const_elimination_block(b, ops_to_ignore)
+                self._const_elimination_block(b)
 
             all_outputs_are_const = True
             for i, o in enumerate(op.outputs):
@@ -57,28 +62,8 @@ def _const_elimination_block(self, block, ops_to_ignore):
             if all_outputs_are_const:
                 op.remove_from_block()
 
-    def _get_ops_to_ignore(self, prog):
-        """
-        utility function to get the ops which cannot be removed in the const elimination pass, which is all the const ops.
-        """
-        ops_to_ignore = set()
-
-        def _get_ops_to_ignore_block(block):
-
-            for op in list(block.operations):
-
-                for b in op.blocks:
-                    _get_ops_to_ignore_block(b)
-
-                if op.op_type == "const":
-                    ops_to_ignore.add(op)
-
-        for f in prog.functions.values():
-            _get_ops_to_ignore_block(f)
-
-        return ops_to_ignore
 
     def apply(self, prog):
-        ops_to_ignore = self._get_ops_to_ignore(prog)
+        self.set_ops_to_skip(prog)
         for f in prog.functions.values():
-            self._const_elimination_block(f, ops_to_ignore)
+            self._const_elimination_block(f)
diff --git a/coremltools/converters/mil/mil/passes/conv_batchnorm_fusion.py b/coremltools/converters/mil/mil/passes/conv_batchnorm_fusion.py
index 02986e6c3..29bbfcbe1 100644
--- a/coremltools/converters/mil/mil/passes/conv_batchnorm_fusion.py
+++ b/coremltools/converters/mil/mil/passes/conv_batchnorm_fusion.py
@@ -113,42 +113,6 @@ def _try_to_transform(conv_op, bn_op, block):
     block.remove_ops([conv_op, bn_op])
     return True
 
-
-def _fuse_conv_batchnorm_block(block):
-
-    def _match_pattern(op):
-        if op.op_type == "conv" or op.op_type == "conv_transpose":
-            # abort fusion if op output is also a block output
-            if op.outputs[0] in op.enclosing_block.outputs:
-                return None
-            # find batch_norm op
-            child_ops = op.outputs[0].child_ops
-            if len(child_ops) == 1:
-                bn_op_candidate = list(child_ops)[0]
-                if bn_op_candidate.op_type == "batch_norm":
-                    return bn_op_candidate
-        return None
-
-    fusion_occurred = False
-    for op in list(block.operations):
-        for b in op.blocks:
-            block_changed = True
-            while block_changed:
-                block_changed = _fuse_conv_batchnorm_block(b)
-        if len(op.blocks) > 0:
-            # This op can't be conv or conv_transpose
-            continue
-
-        bn_op = _match_pattern(op)
-        if bn_op is not None:
-            with block:
-                fusion_occurred = _try_to_transform(op, bn_op, block)
-            # has to break as the downstream iterator is affected.
-            if fusion_occurred:
-                return fusion_occurred
-    return fusion_occurred
-
-
 @register_pass(namespace="common")
 class fuse_conv_batchnorm(AbstractGraphPass):
     """
@@ -165,8 +129,50 @@ class fuse_conv_batchnorm(AbstractGraphPass):
         ...
 
     """
+    def __init__(self):
+        self.ops_to_skip = set()
+
+    def set_ops_to_skip(self, prog):
+        pass
+
+    def _fuse_conv_batchnorm_block(self, block):
+
+        def _match_pattern(op):
+            if op.op_type == "conv" or op.op_type == "conv_transpose":
+                # abort fusion if op output is also a block output
+                if op.outputs[0] in op.enclosing_block.outputs:
+                    return None
+                # find batch_norm op
+                child_ops = op.outputs[0].child_ops
+                if len(child_ops) == 1:
+                    bn_op_candidate = list(child_ops)[0]
+                    if bn_op_candidate.op_type == "batch_norm":
+                        return bn_op_candidate
+            return None
+
+        fusion_occurred = False
+        for op in list(block.operations):
+            for b in op.blocks:
+                block_changed = True
+                while block_changed:
+                    block_changed = self._fuse_conv_batchnorm_block(b)
+            if len(op.blocks) > 0:
+                # This op can't be conv or conv_transpose
+                continue
+            if op in self.ops_to_skip:
+                continue
+            bn_op = _match_pattern(op)
+            if bn_op is not None:
+                with block:
+                    fusion_occurred = _try_to_transform(op, bn_op, block)
+                # has to break as the downstream iterator is affected.
+                if fusion_occurred:
+                    return fusion_occurred
+        return fusion_occurred
+
     def apply(self, prog):
+        self.set_ops_to_skip(prog)
         for f in prog.functions.values():
             block_changed = True
             while block_changed:
-                block_changed = _fuse_conv_batchnorm_block(f)
+                block_changed = self._fuse_conv_batchnorm_block(f)
diff --git a/coremltools/converters/mil/mil/passes/conv_bias_fusion.py b/coremltools/converters/mil/mil/passes/conv_bias_fusion.py
index 0a88ddec1..fe6443097 100644
--- a/coremltools/converters/mil/mil/passes/conv_bias_fusion.py
+++ b/coremltools/converters/mil/mil/passes/conv_bias_fusion.py
@@ -29,249 +29,6 @@ def _match_pattern(op):
                 return add_op_candidate
     return None
 
-def _try_to_transform_transpose_pattern(conv_op, block):
-    ops_to_remove = []
-
-    # conv layer
-    if conv_op.op_type != "conv" and conv_op.op_type != "conv_transpose":
-        return False
-    is_deconv = conv_op.op_type == "conv_transpose"
-    ops_to_remove.append(conv_op)
-
-    # transpose layer
-    if not _check_child_op_type(conv_op, "transpose"):
-        return False
-    transpose_op = list(conv_op.outputs[0].child_ops)[0]
-    ops_to_remove.append(transpose_op)
-
-    # add/sub layer
-    if not _check_child_op_type(transpose_op, "add") and not _check_child_op_type(transpose_op, "sub"):
-        return False
-    add_or_sub_op = list(transpose_op.outputs[0].child_ops)[0]
-    ops_to_remove.append(add_or_sub_op)
-
-    # get the bias
-    if add_or_sub_op.x.val is None and add_or_sub_op.y.val is None:
-        return False
-    bias = add_or_sub_op.x.val if add_or_sub_op.x.val is not None else add_or_sub_op.y.val
-    is_first_input = add_or_sub_op.y.val is not None
-    is_sub = add_or_sub_op.op_type == "sub"
-
-
-    # get the conv bias/weight
-    conv_shape = conv_op.outputs[0].shape
-    Cout = conv_shape[1]
-    conv_weight = conv_op.weight.val
-    conv_weight_type = conv_weight.dtype
-    conv_bias = np.zeros(Cout).astype(conv_weight_type) if conv_op.bias is None else conv_op.bias.val
-
-    # check if the bias is compatible for fusion
-    is_bias_scalar = True
-    if isinstance(bias, np.ndarray):
-        if bias.shape == ():
-            bias = bias.tolist()
-        elif np.prod(bias.shape) == 1:
-            bias = np.squeeze(bias).tolist()
-        else:
-            is_bias_scalar = False
-
-    if not is_bias_scalar:
-        if np.prod(bias.shape) != Cout:
-            return False
-        rank = transpose_op.outputs[0].rank
-        cout_dim = transpose_op.perm.val.tolist().index(1) - rank
-        if bias.shape[cout_dim] != Cout:
-            return False
-        bias = np.reshape(bias, (Cout))
-
-    # compute the new bias
-    if is_sub:
-        if is_first_input:
-            bias = -bias
-        else:
-            conv_bias = -conv_bias
-
-    new_bias = conv_bias + bias
-
-    # compute the new weight
-    if is_sub and not is_first_input:
-        new_weight = -conv_weight
-    else:
-        new_weight = conv_weight
-
-    # check that none of the op in this pattern is connected to the output
-    # (except the last op)
-    for op in ops_to_remove[:-1]:
-        for out in op.outputs:
-            if out in block.outputs:
-                return False
-
-    # create a new conv op with the new weight, bias value, copying rest of the attributes
-    conv_kargs = {"weight": new_weight, "bias": new_bias, "before_op": conv_op}
-
-    for k, v in conv_op.inputs.items():
-        if k in ["weight", "bias"]:
-            continue
-        conv_kargs[k] = v
-
-    if is_deconv:
-        x = mb.conv_transpose(**conv_kargs)
-    else:
-        x = mb.conv(**conv_kargs)
-
-    # create a new transpose op
-    out_name = add_or_sub_op.outputs[0].name
-    tranpose_kargs = {"x": x, "name": out_name, "before_op": transpose_op}
-    for k, v in transpose_op.inputs.items():
-        if k == "x":
-            continue
-        tranpose_kargs[k] = v
-    x = mb.transpose(**tranpose_kargs)
-
-    add_or_sub_op.enclosing_block.replace_uses_of_var_after_op(
-        anchor_op=add_or_sub_op, old_var=add_or_sub_op.outputs[0], new_var=x
-    )
-
-    # Remove all the ops at once
-    block.remove_ops(ops_to_remove)
-    return True
-
-
-def _try_to_transform(conv_op, add_op, block):
-    if add_op.op_type == "sub":
-        bias_var = add_op.y
-    else:
-        bias_var = add_op.x if add_op.x.val is not None else add_op.y
-    bias_value = bias_var.val
-
-    is_conv_op = (conv_op.op_type == "conv")
-
-    # check that the bias value is a constant array or a scalar constant
-    if not isinstance(bias_value, (np.ndarray, np.generic)):
-        return False
-
-    is_bias_scalar = False
-    if not isinstance(bias_value, np.ndarray):
-        is_bias_scalar = True
-
-    # find rank of the conv input
-    rank = conv_op.x.rank
-    if rank is None:
-        return False
-    if not (rank == 3 or rank == 4 or rank == 5):
-        return False
-
-    # check compatibility of bias value with the rank of the conv op
-    # either bias value should be a scalar or:
-    # rank=3 ==> (B,C,D), which means bias must be (1,C,1) or (C,1)
-    # rank=4 ==> (B,C,D1,D2), which means bias must be (1,C,1,1) or (C,1,1)
-    # rank=5 ==> (B,C,D1,D2,D3), which means bias must be (1,C,1,1,1) or (C,1,1,1)
-
-    if is_bias_scalar:
-        bias_value = np.array([bias_value])
-    else:
-        # check that there is at most one dimension in the shape that is not 1
-        if len(np.squeeze(bias_value).shape) > 1:
-            return False
-        # check that addition is not happening on the batch dimension
-        if len(bias_value) == rank:
-            if bias_value.shape[0] != 1:
-                return False
-        # check that last rank-2 entries in the shape vector are all 1s
-        if np.prod(bias_value.shape[-(rank - 2) :]) != 1:
-            return False
-        bias_value = np.squeeze(bias_value)
-
-    if add_op.op_type == "sub":
-        bias_value *= -1
-
-    # everything looks good, now find the new updated bias
-    old_bias = conv_op.inputs.get("bias", None)
-    old_bias_value = None
-    if old_bias is not None and old_bias.val is not None:
-        old_bias_value = old_bias.val
-    if old_bias is None:
-        # need to create a fresh numpy array for bias
-        if np.prod(bias_value.shape) == 1:
-            # its a scalar bias
-            # need to find the value of Cout to form a new bias
-            if conv_op.weight.val is None:
-                return False
-            # conv_transpose has weight format [K, C_out, spatial dims]
-            # conv has weight format [C_out, K, spatial dims]
-            Cout = conv_op.weight.val.shape[0 if is_conv_op else 1]
-            new_bias_value = np.broadcast_to(bias_value, (Cout,))
-        else:
-            new_bias_value = bias_value
-    else:
-        # just need to update the existing bias array
-        try:
-            new_bias_value = old_bias_value + bias_value
-        except:
-            return False
-
-    # create a new conv op with the new bias value, copying rest of the attributes
-    out_name = add_op.outputs[0].name
-    if new_bias_value.dtype != np.float32 and new_bias_value.dtype != np.float16:
-        # cast the bias to match the weight type
-        weight_np_type = types.nptype_from_builtin(conv_op.inputs["weight"].sym_type.get_primitive())
-        logging.warning("conv_bias_fusion pass: casting bias "
-                        "from {} to {} to match the dtype of the weight of the conv layer".format(
-                        new_bias_value.dtype, weight_np_type
-                        )
-        )
-        new_bias_value = new_bias_value.astype(weight_np_type)
-    new_bias_var = mb.const(val=new_bias_value, before_op=conv_op)
-
-    conv_kargs = {"bias": new_bias_var, "name": out_name, "before_op": conv_op}
-
-    for k, v in conv_op.inputs.items():
-        if k == "bias":
-            continue
-        conv_kargs[k] = v
-
-    if is_conv_op:
-        x = mb.conv(**conv_kargs)
-    else:
-        x = mb.conv_transpose(**conv_kargs)
-
-    add_op.enclosing_block.replace_uses_of_var_after_op(
-        anchor_op=add_op, old_var=add_op.outputs[0], new_var=x
-    )
-    # Remove all the ops at once
-    block.remove_ops([conv_op, add_op])
-    return True
-
-
-def _fuse_conv_bias_block(block):
-    fusion_status = False
-    for op in list(block.operations):
-        for b in op.blocks:
-            block_changed = True
-            while block_changed:
-                block_changed = _fuse_conv_bias_block(b)
-        if len(op.blocks) > 0:
-            # This op can't be conv or conv_transpose
-            continue
-
-        # pattern 1 : conv + add/sub
-        add_op = _match_pattern(op)
-        if add_op is not None:
-            with block:
-                fusion_status = _try_to_transform(op, add_op, block)
-            # has to break as the downstream iterator is affected.
-            if fusion_status:
-                return fusion_status
-
-        # pattern 2 : conv + transpose + add/sub
-        with block:
-            fusion_status = _try_to_transform_transpose_pattern(op, block)
-            if fusion_status:
-                return fusion_status
-
-    return fusion_status
-
-
 @register_pass(namespace="common")
 class fuse_conv_bias(AbstractGraphPass):
     """
@@ -306,8 +63,266 @@ class fuse_conv_bias(AbstractGraphPass):
         ...
 
     """
+    def __init__(self):
+        self.ops_to_skip = set()
+
+    def set_ops_to_skip(self, prog):
+        pass
+
     def apply(self, prog):
+        self.set_ops_to_skip(prog)
         for f in prog.functions.values():
             block_changed = True
             while block_changed:
-                block_changed = _fuse_conv_bias_block(f)
+                block_changed = self._fuse_conv_bias_block(f)
+
+    def _try_to_transform_transpose_pattern(self, conv_op, block):
+        if conv_op in self.ops_to_skip:
+            return False
+
+        ops_to_remove = []
+
+        # conv layer
+        if conv_op.op_type != "conv" and conv_op.op_type != "conv_transpose":
+            return False
+        is_deconv = conv_op.op_type == "conv_transpose"
+        ops_to_remove.append(conv_op)
+
+        # transpose layer
+        if not _check_child_op_type(conv_op, "transpose"):
+            return False
+        transpose_op = list(conv_op.outputs[0].child_ops)[0]
+        ops_to_remove.append(transpose_op)
+
+        # add/sub layer
+        if not _check_child_op_type(transpose_op, "add") and not _check_child_op_type(transpose_op, "sub"):
+            return False
+        add_or_sub_op = list(transpose_op.outputs[0].child_ops)[0]
+
+        if add_or_sub_op in self.ops_to_skip:
+            return False
+
+        ops_to_remove.append(add_or_sub_op)
+
+        # get the bias
+        if add_or_sub_op.x.val is None and add_or_sub_op.y.val is None:
+            return False
+        bias = add_or_sub_op.x.val if add_or_sub_op.x.val is not None else add_or_sub_op.y.val
+        is_first_input = add_or_sub_op.y.val is not None
+        is_sub = add_or_sub_op.op_type == "sub"
+
+
+        # get the conv bias/weight
+        conv_shape = conv_op.outputs[0].shape
+        Cout = conv_shape[1]
+        conv_weight = conv_op.weight.val
+        conv_weight_type = conv_weight.dtype
+        conv_bias = np.zeros(Cout).astype(conv_weight_type) if conv_op.bias is None else conv_op.bias.val
+
+        # check if the bias is compatible for fusion
+        is_bias_scalar = True
+        if isinstance(bias, np.ndarray):
+            if bias.shape == ():
+                bias = bias.tolist()
+            elif np.prod(bias.shape) == 1:
+                bias = np.squeeze(bias).tolist()
+            else:
+                is_bias_scalar = False
+
+        if not is_bias_scalar:
+            if np.prod(bias.shape) != Cout:
+                return False
+            rank = transpose_op.outputs[0].rank
+            cout_dim = transpose_op.perm.val.tolist().index(1) - rank
+            if bias.shape[cout_dim] != Cout:
+                return False
+            bias = np.reshape(bias, (Cout))
+
+        # compute the new bias
+        if is_sub:
+            if is_first_input:
+                bias = -bias
+            else:
+                conv_bias = -conv_bias
+
+        new_bias = conv_bias + bias
+
+        # compute the new weight
+        if is_sub and not is_first_input:
+            new_weight = -conv_weight
+        else:
+            new_weight = conv_weight
+
+        # check that none of the op in this pattern is connected to the output
+        # (except the last op)
+        for op in ops_to_remove[:-1]:
+            for out in op.outputs:
+                if out in block.outputs:
+                    return False
+
+        # create a new conv op with the new weight, bias value, copying rest of the attributes
+        conv_kargs = {"weight": new_weight, "bias": new_bias, "before_op": conv_op}
+
+        for k, v in conv_op.inputs.items():
+            if k in ["weight", "bias"]:
+                continue
+            conv_kargs[k] = v
+
+        if is_deconv:
+            x = mb.conv_transpose(**conv_kargs)
+        else:
+            x = mb.conv(**conv_kargs)
+
+        # create a new transpose op
+        out_name = add_or_sub_op.outputs[0].name
+        tranpose_kargs = {"x": x, "name": out_name, "before_op": transpose_op}
+        for k, v in transpose_op.inputs.items():
+            if k == "x":
+                continue
+            tranpose_kargs[k] = v
+        x = mb.transpose(**tranpose_kargs)
+
+        add_or_sub_op.enclosing_block.replace_uses_of_var_after_op(
+            anchor_op=add_or_sub_op, old_var=add_or_sub_op.outputs[0], new_var=x
+        )
+
+        # Remove all the ops at once
+        block.remove_ops(ops_to_remove)
+        return True
+
+
+    def _try_to_transform(self, conv_op, add_op, block):
+        if conv_op in self.ops_to_skip or add_op in self.ops_to_skip:
+            return False
+
+        if add_op.op_type == "sub":
+            bias_var = add_op.y
+        else:
+            bias_var = add_op.x if add_op.x.val is not None else add_op.y
+        bias_value = bias_var.val
+
+        is_conv_op = (conv_op.op_type == "conv")
+
+        # check that the bias value is a constant array or a scalar constant
+        if not isinstance(bias_value, (np.ndarray, np.generic)):
+            return False
+
+        is_bias_scalar = False
+        if not isinstance(bias_value, np.ndarray):
+            is_bias_scalar = True
+
+        # find rank of the conv input
+        rank = conv_op.x.rank
+        if rank is None:
+            return False
+        if not (rank == 3 or rank == 4 or rank == 5):
+            return False
+
+        # check compatibility of bias value with the rank of the conv op
+        # either bias value should be a scalar or:
+        # rank=3 ==> (B,C,D), which means bias must be (1,C,1) or (C,1)
+        # rank=4 ==> (B,C,D1,D2), which means bias must be (1,C,1,1) or (C,1,1)
+        # rank=5 ==> (B,C,D1,D2,D3), which means bias must be (1,C,1,1,1) or (C,1,1,1)
+
+        if is_bias_scalar:
+            bias_value = np.array([bias_value])
+        else:
+            # check that there is at most one dimension in the shape that is not 1
+            if len(np.squeeze(bias_value).shape) > 1:
+                return False
+            # check that addition is not happening on the batch dimension
+            if len(bias_value) == rank:
+                if bias_value.shape[0] != 1:
+                    return False
+            # check that last rank-2 entries in the shape vector are all 1s
+            if np.prod(bias_value.shape[-(rank - 2) :]) != 1:
+                return False
+            bias_value = np.squeeze(bias_value)
+
+        if add_op.op_type == "sub":
+            bias_value *= -1
+
+        # everything looks good, now find the new updated bias
+        old_bias = conv_op.inputs.get("bias", None)
+        old_bias_value = None
+        if old_bias is not None and old_bias.val is not None:
+            old_bias_value = old_bias.val
+        if old_bias is None:
+            # need to create a fresh numpy array for bias
+            if np.prod(bias_value.shape) == 1:
+                # its a scalar bias
+                # need to find the value of Cout to form a new bias
+                if conv_op.weight.val is None:
+                    return False
+                # conv_transpose has weight format [K, C_out, spatial dims]
+                # conv has weight format [C_out, K, spatial dims]
+                Cout = conv_op.weight.val.shape[0 if is_conv_op else 1]
+                new_bias_value = np.broadcast_to(bias_value, (Cout,))
+            else:
+                new_bias_value = bias_value
+        else:
+            # just need to update the existing bias array
+            try:
+                new_bias_value = old_bias_value + bias_value
+            except:
+                return False
+
+        # create a new conv op with the new bias value, copying rest of the attributes
+        out_name = add_op.outputs[0].name
+        if new_bias_value.dtype != np.float32 and new_bias_value.dtype != np.float16:
+            # cast the bias to match the weight type
+            weight_np_type = types.nptype_from_builtin(conv_op.inputs["weight"].sym_type.get_primitive())
+            logging.warning("conv_bias_fusion pass: casting bias "
+                            "from {} to {} to match the dtype of the weight of the conv layer".format(
+                            new_bias_value.dtype, weight_np_type
+                            )
+            )
+            new_bias_value = new_bias_value.astype(weight_np_type)
+        new_bias_var = mb.const(val=new_bias_value, before_op=conv_op)
+
+        conv_kargs = {"bias": new_bias_var, "name": out_name, "before_op": conv_op}
+
+        for k, v in conv_op.inputs.items():
+            if k == "bias":
+                continue
+            conv_kargs[k] = v
+
+        if is_conv_op:
+            x = mb.conv(**conv_kargs)
+        else:
+            x = mb.conv_transpose(**conv_kargs)
+
+        add_op.enclosing_block.replace_uses_of_var_after_op(
+            anchor_op=add_op, old_var=add_op.outputs[0], new_var=x
+        )
+        # Remove all the ops at once
+        block.remove_ops([conv_op, add_op])
+        return True
+
+    def _fuse_conv_bias_block(self, block):
+        fusion_status = False
+        for op in list(block.operations):
+            for b in op.blocks:
+                block_changed = True
+                while block_changed:
+                    block_changed = self._fuse_conv_bias_block(b)
+            if len(op.blocks) > 0:
+                # This op can't be conv or conv_transpose
+                continue
+
+            # pattern 1 : conv + add/sub
+            add_op = _match_pattern(op)
+            if add_op is not None:
+                with block:
+                    fusion_status = self._try_to_transform(op, add_op, block)
+                # has to break as the downstream iterator is affected.
+                if fusion_status:
+                    return fusion_status
+
+            # pattern 2 : conv + transpose + add/sub
+            with block:
+                fusion_status = self._try_to_transform_transpose_pattern(op, block)
+                if fusion_status:
+                    return fusion_status
+
+        return fusion_status
\ No newline at end of file
diff --git a/coremltools/converters/mil/mil/passes/conv_scale_fusion.py b/coremltools/converters/mil/mil/passes/conv_scale_fusion.py
index 1c9e5e476..f71a61f85 100644
--- a/coremltools/converters/mil/mil/passes/conv_scale_fusion.py
+++ b/coremltools/converters/mil/mil/passes/conv_scale_fusion.py
@@ -1,15 +1,14 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2021, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import numpy as np
 
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 from coremltools.converters.mil.mil import Builder as mb
-import numpy as np
+
 
 def _try_to_transform(conv_op, scale_op, block):
 
@@ -42,7 +41,7 @@ def _try_to_transform(conv_op, scale_op, block):
 
     # get type of the conv layer
     is_deconv = conv_op.op_type == 'conv_transpose'
-    is_conv_1d  = len(conv_weight.shape) == 3
+    is_conv_1d = len(conv_weight.shape) == 3
 
     # D_in denotes the spatial dimensions for conv kernel weight
     # for conv_transpose, conv_weight has shape [Cin, Cout / groups, *D_in]
@@ -71,7 +70,7 @@ def _try_to_transform(conv_op, scale_op, block):
     if scale_op.op_type == "real_div":
         scale = 1./scale
 
-     # get the type of the conv weight
+    # get the type of the conv weight
     conv_weight_type = conv_weight.dtype
 
     # create bias for conv if not exist
@@ -132,41 +131,6 @@ def _try_to_transform(conv_op, scale_op, block):
     block.remove_ops([conv_op, scale_op])
     return True
 
-
-def _fuse_conv_scale_block(block):
-
-    def _match_pattern(op):
-        if op.op_type == "conv" or op.op_type == "conv_transpose":
-            # abort fusion if op output is also a block output
-            if op.outputs[0] in op.enclosing_block.outputs:
-                return None
-            # find batch_norm op
-            child_ops = op.outputs[0].child_ops
-            if len(child_ops) == 1:
-                scale_op_candidate = list(child_ops)[0]
-                if scale_op_candidate.op_type in ["mul", "real_div"]:
-                    return scale_op_candidate
-        return None
-
-    fusion_occurred = False
-    for op in list(block.operations):
-        for b in op.blocks:
-            block_changed = True
-            while block_changed:
-                block_changed = _fuse_conv_scale_block(b)
-        if len(op.blocks) > 0:
-            # This op can't be conv or conv_transpose
-            continue
-
-        scale_op = _match_pattern(op)
-        if scale_op is not None:
-            with block:
-                fusion_occurred = _try_to_transform(op, scale_op, block)
-            # has to break as the downstream iterator is affected.
-            if fusion_occurred:
-                return fusion_occurred
-    return fusion_occurred
-
 @register_pass(namespace="common")
 class fuse_conv_scale(AbstractGraphPass):
     """
@@ -187,8 +151,53 @@ class fuse_conv_scale(AbstractGraphPass):
         ...
 
     """
+    def __init__(self):
+        self.ops_to_skip = set()
+
+    def set_ops_to_skip(self, prog):
+        pass
+
+    def _fuse_conv_scale_block(self, block):
+
+        def _match_pattern(op):
+            if op.op_type == "conv" or op.op_type == "conv_transpose":
+                # abort fusion if op output is also a block output
+                if op.outputs[0] in op.enclosing_block.outputs:
+                    return None
+                # find batch_norm op
+                child_ops = op.outputs[0].child_ops
+                if len(child_ops) == 1:
+                    scale_op_candidate = list(child_ops)[0]
+                    if scale_op_candidate.op_type in ["mul", "real_div"]:
+                        return scale_op_candidate
+            return None
+
+        fusion_occurred = False
+        for op in list(block.operations):
+            for b in op.blocks:
+                block_changed = True
+                while block_changed:
+                    block_changed = self._fuse_conv_scale_block(b)
+            if len(op.blocks) > 0:
+                # This op can't be conv or conv_transpose
+                continue
+
+            scale_op = _match_pattern(op)
+
+            if op in self.ops_to_skip or scale_op in self.ops_to_skip:
+                continue
+
+            if scale_op is not None:
+                with block:
+                    fusion_occurred = _try_to_transform(op, scale_op, block)
+                # has to break as the downstream iterator is affected.
+                if fusion_occurred:
+                    return fusion_occurred
+        return fusion_occurred
+
     def apply(self, prog):
+        self.set_ops_to_skip(prog)
         for f in prog.functions.values():
             block_changed = True
             while block_changed:
-                block_changed = _fuse_conv_scale_block(f)
+                block_changed = self._fuse_conv_scale_block(f)
diff --git a/coremltools/converters/mil/mil/passes/dedup_op_and_var_names.py b/coremltools/converters/mil/mil/passes/dedup_op_and_var_names.py
index c1fdfb10d..d0c52d2a6 100644
--- a/coremltools/converters/mil/mil/passes/dedup_op_and_var_names.py
+++ b/coremltools/converters/mil/mil/passes/dedup_op_and_var_names.py
@@ -3,12 +3,13 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import collections
+import itertools
+
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 from coremltools.converters.mil.mil import Function
-import re
-import collections
-import itertools
+
 
 def _gen_new_name(seen_names, curr_name):
     if curr_name not in seen_names:
@@ -55,11 +56,12 @@ def _ensure_unique_var_names(v_set):
     """
     names = [v.name for v in v_set]
     dup_names = [name for name, count in \
-            collections.Counter(names).items() if count > 1]
+                 collections.Counter(names).items() if count > 1]
     if len(dup_names) > 0:
         raise ValueError('Var names {} is used both as '.format(dup_names) +\
                 'function\'s input and output')
 
+
 @register_pass(namespace="common")
 class dedup_op_and_var_names(AbstractGraphPass):
     """
@@ -69,7 +71,7 @@ class dedup_op_and_var_names(AbstractGraphPass):
     variable names are tracked separately, so an op may have the same name as
     a variable.
 
-	The pass preserves input and output name. Raises ValueError if we cannot
+    The pass preserves input and output name. Raises ValueError if we cannot
     dedup without changing the input/output var names.
 
     func main(%input):
diff --git a/coremltools/converters/mil/mil/passes/detect_concat_interleave.py b/coremltools/converters/mil/mil/passes/detect_concat_interleave.py
index 47ea9fd16..58993a454 100644
--- a/coremltools/converters/mil/mil/passes/detect_concat_interleave.py
+++ b/coremltools/converters/mil/mil/passes/detect_concat_interleave.py
@@ -1,16 +1,15 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import numpy as np
 
+from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
-from coremltools.converters.mil.mil import Builder as mb
-import numpy as np
-from coremltools.converters.mil.mil.types.symbolic import is_symbolic, any_symbolic
+from coremltools.converters.mil.mil.types.symbolic import any_symbolic
+
 
 def _match_pattern(op):
     if op.outputs[0] in op.enclosing_block.outputs:
diff --git a/coremltools/converters/mil/mil/passes/divide_to_multiply.py b/coremltools/converters/mil/mil/passes/divide_to_multiply.py
index ed5a1d22b..70a991b98 100644
--- a/coremltools/converters/mil/mil/passes/divide_to_multiply.py
+++ b/coremltools/converters/mil/mil/passes/divide_to_multiply.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
@@ -12,6 +10,7 @@
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.mil import types as _types
 
+
 def _divide_to_multiply_block(block):
     for op in list(block.operations):
         for b in op.blocks:
diff --git a/coremltools/converters/mil/mil/passes/gelu_exact_fusion.py b/coremltools/converters/mil/mil/passes/gelu_exact_fusion.py
index 27ea7bbc7..15d78788d 100644
--- a/coremltools/converters/mil/mil/passes/gelu_exact_fusion.py
+++ b/coremltools/converters/mil/mil/passes/gelu_exact_fusion.py
@@ -3,12 +3,10 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 from coremltools.converters.mil.mil import Builder as mb
 from .helper import _check_child_op_type, _check_var_scalar_value
-import numpy as np
 
 def _try_to_transform(op, block):
     ops_to_remove = []
diff --git a/coremltools/converters/mil/mil/passes/gelu_tanh_approximation_fusion.py b/coremltools/converters/mil/mil/passes/gelu_tanh_approximation_fusion.py
index 4ef421b77..21f94e217 100644
--- a/coremltools/converters/mil/mil/passes/gelu_tanh_approximation_fusion.py
+++ b/coremltools/converters/mil/mil/passes/gelu_tanh_approximation_fusion.py
@@ -1,168 +1,124 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-
-from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil import Builder as mb
+from coremltools.converters.mil.experimental.passes.generic_pass_infrastructure import fuse_all_blocks
+from coremltools.converters.mil.mil import get_new_symbol
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
-from coremltools.converters.mil.mil import Builder as mb
-from .helper import _check_child_op_type, _check_var_scalar_value
-import numpy as np
-
-def _try_to_transform(pow_op, block):
-    all_ops = [pow_op]
-    root_var = pow_op.x
-
-    # check that root_var feeds into exactly 3 ops
-    if len(list(root_var.child_ops)) != 3:
-        return False
-
-    # check for 1st mul op
-    if not _check_child_op_type(pow_op, "mul"):
-        return False
-    mul_op1 = list(pow_op.outputs[0].child_ops)[0]
-    if not (
-        (
-            mul_op1.x == pow_op.outputs[0]
-            and _check_var_scalar_value(mul_op1.y, 0.044715)
-        )
-        or (
-            mul_op1.y == pow_op.outputs[0]
-            and _check_var_scalar_value(mul_op1.x, 0.044715)
-        )
-    ):
-        return False
-    all_ops.append(mul_op1)
-
-    # check for 1st add op
-    if not _check_child_op_type(mul_op1, "add"):
-        return False
-    add_op1 = list(mul_op1.outputs[0].child_ops)[0]
-    if not (
-        (add_op1.x == mul_op1.outputs[0] and add_op1.y == root_var)
-        or (add_op1.y == mul_op1.outputs[0] and add_op1.x == root_var)
-    ):
-        return False
-    all_ops.append(add_op1)
-
-    # check for 2nd mul op
-    if not _check_child_op_type(add_op1, "mul"):
-        return False
-    mul_op2 = list(add_op1.outputs[0].child_ops)[0]
-    if not (
-        (
-            mul_op2.x == add_op1.outputs[0]
-            and _check_var_scalar_value(mul_op2.y, 0.79788)
-        )
-        or (
-            mul_op2.y == add_op1.outputs[0]
-            and _check_var_scalar_value(mul_op2.x, 0.79788)
-        )
-    ):
-        return False
-    all_ops.append(mul_op2)
-
-    # check for tanh op
-    if not _check_child_op_type(mul_op2, "tanh"):
-        return False
-    tanh_op = list(mul_op2.outputs[0].child_ops)[0]
-    all_ops.append(tanh_op)
-
-    # check for 2nd add op
-    if not _check_child_op_type(tanh_op, "add"):
-        return False
-    add_op2 = list(tanh_op.outputs[0].child_ops)[0]
-    if not (
-        (add_op2.x == tanh_op.outputs[0] and _check_var_scalar_value(add_op2.y, 1))
-        or (add_op2.y == tanh_op.outputs[0] and _check_var_scalar_value(add_op2.x, 1))
-    ):
-        return False
-    all_ops.append(add_op2)
-
-    # check for 3rd mul op
-    if not _check_child_op_type(add_op2, "mul"):
-        return False
-    mul_op3 = list(add_op2.outputs[0].child_ops)[0]
-    if not (
-        (mul_op3.x == add_op2.outputs[0] and _check_var_scalar_value(mul_op3.y, 0.5))
-        or (mul_op3.y == add_op2.outputs[0] and _check_var_scalar_value(mul_op3.x, 0.5))
-    ):
-        return False
-    all_ops.append(mul_op3)
-
-    # check for 4th mul op
-    if not _check_child_op_type(mul_op3, "mul"):
-        return False
-    mul_op4 = list(mul_op3.outputs[0].child_ops)[0]
-    if not (
-        (mul_op4.x == mul_op3.outputs[0] and mul_op4.y == root_var)
-        or (mul_op4.y == mul_op3.outputs[0] and mul_op4.x == root_var)
-    ):
-        return False
-    all_ops.append(mul_op4)
-
-    # check that none of the op in this pattern is connected to the output
-    # (except the last mul op)
-    for i, op in enumerate(all_ops):
-        if i == len(all_ops) - 1:
-            continue
-        for out in op.outputs:
-            if out in block.outputs:
-                return False
+from coremltools.converters.mil.mil.passes.helper import _check_var_scalar_value
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+
+
+def is_var_constraint_satisifed(pattern):
+
+    passed = _check_var_scalar_value(pattern.mul.y, 0.5) or _check_var_scalar_value(pattern.mul.x, 0.5)
+    passed = passed and _check_var_scalar_value(pattern.pow.y, 3.0)
+
+    passed = passed and (
+                        _check_var_scalar_value(pattern.mul_1.y, 0.044715) or
+                        _check_var_scalar_value(pattern.mul_1.x,  0.044715)
+                        )
 
+    passed = passed and (
+                        _check_var_scalar_value(pattern.mul_2.y, 0.79788) or
+                        _check_var_scalar_value(pattern.mul_2.x, 0.79788)
+                        )
+
+    passed = passed and (
+                        _check_var_scalar_value(pattern.add_1.y, 1) or
+                        _check_var_scalar_value(pattern.add_1.x, 1)
+                        )
+
+    return passed
+
+
+def transform_pattern(pattern):
     # remove all the ops, and replace with a gelu op
-    out_name = mul_op4.outputs[0].name
-    x = mb.gelu(x=root_var, mode="TANH_APPROXIMATION", name=out_name, before_op=pow_op)
+    out_name = pattern.mul_3.outputs[0].name
+    x = mb.gelu(x=pattern.root_var, mode="TANH_APPROXIMATION", name=out_name, before_op=pattern.mul)
 
-    mul_op4.enclosing_block.replace_uses_of_var_after_op(
-        anchor_op=mul_op4, old_var=mul_op4.outputs[0], new_var=x
+    pattern.mul_3.enclosing_block.replace_uses_of_var_after_op(
+        anchor_op=pattern.mul_3, old_var=pattern.mul_3.outputs[0], new_var=x
     )
+
     # Remove all the ops at once
-    block.remove_ops(all_ops)
-    return True
-
-
-def _fuse_gelu_tanh_block(block):
-    fusion_status = False
-    for op in list(block.operations):
-        for b in op.blocks:
-            block_changed = True
-            while block_changed:
-                block_changed = _fuse_gelu_tanh_block(b)
-        if len(op.blocks) > 0:
-            # This op can't be pow
-            continue
-
-        # start pattern match if pow op with power 3 is encountered
-        if op.op_type == "pow":
-            if _check_var_scalar_value(op.y, 3):
-                with block:
-                    fusion_status = _try_to_transform(op, block)
-                # has to break as the downstream iterator is affected.
-                if fusion_status:
-                    return fusion_status
-    return fusion_status
+    pattern.block.remove_ops(pattern.op_list())
 
-@register_pass(namespace="common")
-class fuse_gelu_tanh_approximation(AbstractGraphPass):
+def get_gelu_pattern1():
     """
-    Identify the pattern that corresponds to the tanh approximate version of gelu, and replace it with a single
-    gelu layer with mode=TANH_APPROXIMATION
+    y = x * (0.5 * (tanh(((.0447)x^3 + x ) * sqrt(2/pi)) + 1))
 
-    y = ( tanh((.0447)x^3 + x ) * (sqrt(2/pi)) + 1 ) * 0.5 * x
 
     [...] -----> pow (3) ----> mul (.044715) ---> add -----> mul (sqrt(2/pi)) ---> tanh ----> add (1) ----> mul (0.5) -----> mul ---> [...]
       |                                            ^                                                                          ^
       |                                            |                                                                          |
       |------------------------------------------------------------------------------------------------------------------------
 
+    """
+    @mb.program(input_specs=[mb.TensorSpec(shape=([get_new_symbol(), get_new_symbol(), get_new_symbol()])), ])
+    def gelu_to_detect_1(x):
+        # MIL operation takes named inputs (instead of positional inputs).
+        # Here `name` argument is MANDATORY.
+        pow = mb.pow(x=x, y=3.0, name="pow")
+        mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1")
+        add = mb.add(x=x, y=mul_1, name="add")
+        mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2")
+        tanh = mb.tanh(x=mul_2, name="tanh")
+        add_1 = mb.add(x=1.0, y=tanh, name="add_1")
+        mul = mb.mul(x=0.5, y=add_1, name="mul")
+        mul_3 = mb.mul(x=mul, y=x, name="mul_3")
+        return mul_3
+
+    return gelu_to_detect_1
+
+
+def get_gelu_pattern2():
+    """
+    y = (0.5 * x) * (tanh(((.0447)x^3 + x ) * sqrt(2/pi)) + 1)
+
+                    ---------------------------------------------------------------------------------------------------------
+                    ^                                                                                                       |
+                    |                                                                                                       V
+     [...] -----> mul(0.5)    pow (3) ----> mul (.044715) ---> add -----> mul (sqrt(2/pi)) ---> tanh ----> add (1) -----> mul ---> [...]
+      |                         ^                               ^
+      |                         |                               |
+      |------------------------------------------------------------
+    """
+    @mb.program(input_specs=[mb.TensorSpec(shape=([get_new_symbol(), get_new_symbol(), get_new_symbol()])), ])
+    def gelu_to_detect_2(x):
+        pow = mb.pow(x=x, y=3.0, name="pow")
+        mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1")
+        add = mb.add(x=x, y=mul_1, name="add")
+        mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2")
+        tanh = mb.tanh(x=mul_2, name="tanh")
+        add_1 = mb.add(x=1.0, y=tanh, name="add_1")
+        mul = mb.mul(x=0.5, y=x, name="mul")
+        mul_3 = mb.mul(x=mul, y=add_1, name="mul_3")
+        return mul_3
+
+    return gelu_to_detect_2
+
 
+@register_pass(namespace="common")
+class fuse_gelu_tanh_approximation(AbstractGraphPass):
     """
+    Identify the pattern that corresponds to the tanh approximate version of gelu, and replace it with a single
+    gelu layer with mode=TANH_APPROXIMATION
+
+    The implementation of this pass uses the generic graph pattern matching and transform algorithm implemented in
+    coremltools.converters.mil.experimental.passes.generic_pass_infrastructure and documented in
+    coremltools/converters/mil/experimental/passes/readme.md
+    """
+
     def apply(self, prog):
-        for f in prog.functions.values():
-            block_changed = True
-            while block_changed:
-                block_changed = _fuse_gelu_tanh_block(f)
+        fuse_all_blocks(ops_arrangement=get_gelu_pattern1(),
+                        var_constraints=is_var_constraint_satisifed,
+                        transform_pattern=transform_pattern,
+                        prog=prog)
+
+        fuse_all_blocks(ops_arrangement=get_gelu_pattern2(),
+                        var_constraints=is_var_constraint_satisifed,
+                        transform_pattern=transform_pattern,
+                        prog=prog)
diff --git a/coremltools/converters/mil/mil/passes/graph_pass.py b/coremltools/converters/mil/mil/passes/graph_pass.py
index c272a74bf..b5f8727dd 100644
--- a/coremltools/converters/mil/mil/passes/graph_pass.py
+++ b/coremltools/converters/mil/mil/passes/graph_pass.py
@@ -5,11 +5,13 @@
 
 class AbstractGraphPass():
 
-    def __call__(self, prog):
-        self.apply(prog)
+	def __call__(self, prog):
+		self.apply(prog)
 
-    def apply(self, prog):
-        raise NotImplementedError(
-            'Graph pass transformation not implemented for "{}".'.format(self)
-        )
+	def __str__(self):
+		return type(self).__name__
 
+	def apply(self, prog):
+		raise NotImplementedError(
+			'Graph pass transformation not implemented for "{}".'.format(self)
+		)
\ No newline at end of file
diff --git a/coremltools/converters/mil/mil/passes/layernorm_instancenorm_pattern_fusion.py b/coremltools/converters/mil/mil/passes/layernorm_instancenorm_pattern_fusion.py
index fce0b8002..7508f3047 100644
--- a/coremltools/converters/mil/mil/passes/layernorm_instancenorm_pattern_fusion.py
+++ b/coremltools/converters/mil/mil/passes/layernorm_instancenorm_pattern_fusion.py
@@ -1,19 +1,22 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import logging
+import numpy as np
 from typing import List, Optional
 
-import numpy as np
-from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil import Operation, Block, Var, Program
+from coremltools.converters.mil.mil import (
+    Block,
+    Builder as mb,
+    Operation,
+    Var
+)
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 
+
 DEBUG = False  # set to true to plot the block before and after the transformation
 
 def _check_no_output_connection(block: Block, ops: List[Operation]) -> bool:
@@ -237,8 +240,6 @@ def _try_match_and_transform_pattern_1(reduce_op, block) -> bool:
     if root_var.shape is None:
         return False
 
-    rank = len(root_var.shape)
-
     # check that root_var feeds into exactly 3 ops
     if len(list(root_var.child_ops)) != 3:
         return False
@@ -371,8 +372,6 @@ def _try_match_and_transform_pattern_2(reduce_op, block) -> bool:
     if root_var.shape is None:
         return False
 
-    rank = len(root_var.shape)
-
     # check that root_var feeds into exactly 3 ops
     if len(root_var.child_ops) != 3:
         return False
@@ -499,8 +498,6 @@ def _try_match_and_transform_pattern_3(reduce_op, block) -> bool:
     if root_var.shape is None:
         return False
 
-    rank = len(root_var.shape)
-
     # check that root_var feeds into exactly 3 ops
     if len(root_var.child_ops) != 3:
         return False
@@ -651,8 +648,6 @@ def _try_match_and_transform_pattern_4(reduce_op: Operation, block: Block) -> bo
     if root_var.shape is None:
         return False
 
-    rank = len(root_var.shape)
-
     # check that root_var feeds into exactly 4 ops
     if len(root_var.child_ops) != 4:
         return False
diff --git a/coremltools/converters/mil/mil/passes/linear_bias_fusion.py b/coremltools/converters/mil/mil/passes/linear_bias_fusion.py
index d356b025d..5d049b3b7 100644
--- a/coremltools/converters/mil/mil/passes/linear_bias_fusion.py
+++ b/coremltools/converters/mil/mil/passes/linear_bias_fusion.py
@@ -62,41 +62,6 @@ def _try_to_transform(linear_op, add_or_sub_op, block):
     block.remove_ops([linear_op, add_or_sub_op])
     return True
 
-
-def _fuse_linear_bias_block(block):
-
-    def _match_pattern(op):
-        if op.op_type == "linear":
-            # abort fusion if op output is also a block output
-            if op.outputs[0] in op.enclosing_block.outputs:
-                return None
-            # find add/sub op
-            child_ops = op.outputs[0].child_ops
-            if len(child_ops) == 1:
-                op_candidate = list(child_ops)[0]
-                if op_candidate.op_type in ["add", "sub"]:
-                    return op_candidate
-        return None
-
-    fusion_occurred = False
-    for op in list(block.operations):
-        for b in op.blocks:
-            block_changed = True
-            while block_changed:
-                block_changed = _fuse_linear_bias_block(b)
-        if len(op.blocks) > 0:
-            # This op can't be conv or conv_transpose
-            continue
-
-        add_or_sub_op = _match_pattern(op)
-        if add_or_sub_op is not None:
-            with block:
-                fusion_occurred = _try_to_transform(op, add_or_sub_op, block)
-            # has to break as the downstream iterator is affected.
-            if fusion_occurred:
-                return fusion_occurred
-    return fusion_occurred
-
 @register_pass(namespace="common")
 class fuse_linear_bias(AbstractGraphPass):
     """
@@ -128,8 +93,51 @@ class fuse_linear_bias(AbstractGraphPass):
 
         prog: Program
     """
+    def __init__(self):
+        self.ops_to_skip = set()
+
+    def set_ops_to_skip(self, prog):
+        pass
+
     def apply(self, prog):
+        self.set_ops_to_skip(prog)
         for f in prog.functions.values():
             block_changed = True
             while block_changed:
-                block_changed = _fuse_linear_bias_block(f)
+                block_changed = self._fuse_linear_bias_block(f)
+
+    def _fuse_linear_bias_block(self, block):
+
+        def _find_candicate_op(op):
+            if op.op_type != "linear":
+                return None
+            # abort fusion if op output is also a block output
+            if op.outputs[0] in op.enclosing_block.outputs:
+                return None
+            # find add/sub op
+            child_ops = op.outputs[0].child_ops
+            if len(child_ops) == 1:
+                op_candidate = list(child_ops)[0]
+                if op_candidate.op_type in ["add", "sub"]:
+                    return op_candidate
+
+        fusion_occurred = False
+        for op in list(block.operations):
+            for b in op.blocks:
+                block_changed = True
+                while block_changed:
+                    block_changed = self._fuse_linear_bias_block(b)
+            if len(op.blocks) > 0:
+                # This op can't be conv or conv_transpose
+                continue
+            if op in self.ops_to_skip:
+                continue
+
+            add_or_sub_op = _find_candicate_op(op)
+            if add_or_sub_op is not None and add_or_sub_op not in self.ops_to_skip:
+                with block:
+                    fusion_occurred = _try_to_transform(op, add_or_sub_op, block)
+                # has to break as the downstream iterator is affected.
+                if fusion_occurred:
+                    return fusion_occurred
+        return fusion_occurred
diff --git a/coremltools/converters/mil/mil/passes/loop_invariant_elimination.py b/coremltools/converters/mil/mil/passes/loop_invariant_elimination.py
index 36c2c8eb1..dcbe5235c 100644
--- a/coremltools/converters/mil/mil/passes/loop_invariant_elimination.py
+++ b/coremltools/converters/mil/mil/passes/loop_invariant_elimination.py
@@ -70,8 +70,6 @@ def _loop_invariant_elimination_block(block):
             continue
         loop_invariant_ids = _detect_loop_invariants(op)
 
-        loop_variant_vars = []
-
         # replace uses of loop_invariants with its source from outside of the
         # while_loop op.
         for i in loop_invariant_ids:
diff --git a/coremltools/converters/mil/mil/passes/matmul_weight_bias_fusion.py b/coremltools/converters/mil/mil/passes/matmul_weight_bias_fusion.py
index 21ffaf0de..6e4fbed72 100644
--- a/coremltools/converters/mil/mil/passes/matmul_weight_bias_fusion.py
+++ b/coremltools/converters/mil/mil/passes/matmul_weight_bias_fusion.py
@@ -13,25 +13,31 @@
 
 child_op_types = ["add", "sub"]
 
-def _match_pattern(op):
-
-    if op.op_type == "matmul":
-        # find add
-        child_ops = op.outputs[0].child_ops
-        if len(child_ops) == 1:
-            add_op_candidate = list(child_ops)[0]
-            if add_op_candidate.op_type in child_op_types:
-                return add_op_candidate
-    return None
-
-def _transpose(v, before_op):
+def _find_candidate_op(op):
+
+    if op.op_type != "matmul":
+        return None
+    # find add
+    child_ops = op.outputs[0].child_ops
+    if len(child_ops) == 1:
+        add_op_candidate = list(child_ops)[0]
+        if add_op_candidate.op_type in child_op_types:
+            return add_op_candidate
+
+def _transpose(v, before_op, name=None):
     """
     Transpose the last 2 dims.
-    v: Var (must be a tensor)
+    v: (Var, must be a tensor)
+    before_op: (Operation) the op right before the new added transpose op
+    name: name for the transpose op if provided
     """
     perm = list(range(v.rank))
     perm[-2], perm[-1] = perm[-1], perm[-2]
-    return mb.transpose(x=v, perm=perm, before_op=before_op)
+
+    if name is None:
+        return mb.transpose(x=v, perm=perm, before_op=before_op)
+    else:
+        return mb.transpose(x=v, perm=perm, before_op=before_op, name=name)
 
 def _try_to_transform(matmul_op, add_op, block):
     if matmul_op.x.val is None and matmul_op.y.val is None:
@@ -116,27 +122,6 @@ def _try_to_transform(matmul_op, add_op, block):
     block.remove_ops([matmul_op, add_op])
     return True
 
-
-def _fuse_matmul_weight_bias_block(block):
-    fusion_status = False
-    for op in list(block.operations):
-        for b in op.blocks:
-            block_changed = True
-            while block_changed:
-                block_changed = _fuse_matmul_weight_bias_block(b)
-        if len(op.blocks) > 0:
-            # This op can't be matmul
-            continue
-
-        add_op = _match_pattern(op)
-        if add_op is not None:
-            with block:
-                fusion_status = _try_to_transform(op, add_op, block)
-            # has to break as the downstream iterator is affected.
-            if fusion_status:
-                return fusion_status
-    return fusion_status
-
 @register_pass(namespace="common")
 class fuse_matmul_weight_bias(AbstractGraphPass):
     """
@@ -156,8 +141,41 @@ class fuse_matmul_weight_bias(AbstractGraphPass):
 
         prog: Program
     """
+    def __init__(self):
+        self.ops_to_skip = set()
+
+    def set_ops_to_skip(self, prog):
+        pass
+
     def apply(self, prog):
+        self.set_ops_to_skip(prog)
         for f in prog.functions.values():
             block_changed = True
             while block_changed:
-                block_changed = _fuse_matmul_weight_bias_block(f)
+                block_changed = self._fuse_matmul_weight_bias_block(f)
+
+    def _fuse_matmul_weight_bias_block(self, block):
+        fusion_status = False
+        for op in list(block.operations):
+            for b in op.blocks:
+                block_changed = True
+                while block_changed:
+                    block_changed = self._fuse_matmul_weight_bias_block(b)
+            if len(op.blocks) > 0:
+                # This op can't be matmul
+                continue
+
+            if op in self.ops_to_skip:
+                continue
+
+            add_op = _find_candidate_op(op)
+            if add_op in self.ops_to_skip:
+                continue
+
+            if add_op is not None:
+                with block:
+                    fusion_status = _try_to_transform(op, add_op, block)
+                # has to break as the downstream iterator is affected.
+                if fusion_status:
+                    return fusion_status
+        return fusion_status
diff --git a/coremltools/converters/mil/mil/passes/merge_consecutive_paddings.py b/coremltools/converters/mil/mil/passes/merge_consecutive_paddings.py
index 2314ac7a2..52546487a 100644
--- a/coremltools/converters/mil/mil/passes/merge_consecutive_paddings.py
+++ b/coremltools/converters/mil/mil/passes/merge_consecutive_paddings.py
@@ -3,11 +3,13 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import numpy as np
+
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 from .helper import _check_child_op_type
-import numpy as np
+
 
 def _match_pattern(block, padding_op):
 
diff --git a/coremltools/converters/mil/mil/passes/noop_elimination.py b/coremltools/converters/mil/mil/passes/noop_elimination.py
index 7637a0af1..5029b9020 100644
--- a/coremltools/converters/mil/mil/passes/noop_elimination.py
+++ b/coremltools/converters/mil/mil/passes/noop_elimination.py
@@ -150,25 +150,6 @@ def _match_pattern(op):
 
     return None
 
-def _noop_elimination_block(block):
-    for op in list(block.operations):
-        for b in op.blocks:
-            block_changed = True
-            while block_changed:
-                block_changed = _noop_elimination_block(b)
-        if len(op.blocks) > 0:
-            continue
-
-        remove_fn = _match_pattern(op)
-        if remove_fn is not None:
-            with block:
-                status = remove_fn(op, block)
-            # has to break as the downstream iterator is affected.
-            if status:
-                return status
-    return False
-
-
 @register_pass(namespace="common")
 class noop_elimination(AbstractGraphPass):
     """
@@ -187,9 +168,33 @@ class noop_elimination(AbstractGraphPass):
         ...
 
     """
+    def __init__(self):
+        self.ops_to_skip = set()
+
+    def set_ops_to_skip(self, prog):
+        pass
 
     def apply(self, prog):
+        self.set_ops_to_skip(prog)
         for f in prog.functions.values():
             block_changed = True
             while block_changed:
-                block_changed = _noop_elimination_block(f)
+                block_changed = self._noop_elimination_block(f)
+
+    def _noop_elimination_block(self, block):
+        for op in list(block.operations):
+            for b in op.blocks:
+                block_changed = True
+                while block_changed:
+                    block_changed = self._noop_elimination_block(b)
+            if len(op.blocks) > 0:
+                continue
+
+            remove_fn = _match_pattern(op)
+            if remove_fn is not None and op not in self.ops_to_skip:
+                with block:
+                    status = remove_fn(op, block)
+                # has to break as the downstream iterator is affected.
+                if status:
+                    return status
+        return False
\ No newline at end of file
diff --git a/coremltools/converters/mil/mil/passes/quantization_passes.py b/coremltools/converters/mil/mil/passes/quantization_passes.py
index c38de18b9..69071c0f6 100644
--- a/coremltools/converters/mil/mil/passes/quantization_passes.py
+++ b/coremltools/converters/mil/mil/passes/quantization_passes.py
@@ -3,17 +3,16 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+from enum import Enum as _Enum
+
 import numpy as np
 
-from coremltools.converters.mil.mil.program import Program
-from coremltools.converters.mil.mil.operation import Operation
 from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil.types import builtin_to_string, is_tensor
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
+from coremltools.converters.mil.mil.program import Program
 
-from enum import Enum
 
-class ComputePrecision(Enum):
+class ComputePrecision(_Enum):
     FLOAT16 = "float16"
     FLOAT32 = "float32"
 
@@ -168,8 +167,8 @@ def _check_underflow_to_zero(self, new_var, var):
         # We check whether there are casted values that "becomes" 0 which is not ideal for eps purposes.
         # However we skip arrays with more than 400 in case we compare through a large sparse matrix.
         if new_var.val is not None and \
-             len(var.val.flatten()) < 400 and \
-             _close_to_zero(new_var.val, np.float16).any():
+           len(var.val.flatten()) < 400 and \
+           _close_to_zero(new_var.val, np.float16).any():
             value_modified = False
             original_val = var.val.flatten()
             new_val = new_var.val.flatten()
diff --git a/coremltools/converters/mil/mil/passes/rank0_expand_dims_swap.py b/coremltools/converters/mil/mil/passes/rank0_expand_dims_swap.py
index 76e65fef1..878d77d68 100644
--- a/coremltools/converters/mil/mil/passes/rank0_expand_dims_swap.py
+++ b/coremltools/converters/mil/mil/passes/rank0_expand_dims_swap.py
@@ -3,10 +3,10 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from coremltools.converters.mil.mil.passes.pass_registry import register_pass
-from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil.passes.helper import _check_child_op_type
+from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+
 
 def _try_to_transform(op, block):
     op_type = op.op_type
diff --git a/coremltools/converters/mil/mil/passes/remove_redundant_ops.py b/coremltools/converters/mil/mil/passes/remove_redundant_ops.py
index f0b0eca6f..2bada8cf9 100644
--- a/coremltools/converters/mil/mil/passes/remove_redundant_ops.py
+++ b/coremltools/converters/mil/mil/passes/remove_redundant_ops.py
@@ -4,10 +4,8 @@
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 from collections import OrderedDict
-import numpy as np
 
 from .helper import _are_ops_identical
-from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 
@@ -188,5 +186,3 @@ def apply(self, prog):
             block_changed = True
             while block_changed:
                 block_changed = _remove_redundant_ops_in_block(f)
-
-
diff --git a/coremltools/converters/mil/mil/passes/remove_symbolic_reshape.py b/coremltools/converters/mil/mil/passes/remove_symbolic_reshape.py
index 16013648c..c41605d3d 100644
--- a/coremltools/converters/mil/mil/passes/remove_symbolic_reshape.py
+++ b/coremltools/converters/mil/mil/passes/remove_symbolic_reshape.py
@@ -1,12 +1,10 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import logging
 
-import numpy as np
 from coremltools.converters.mil.mil.types.symbolic import (
     is_symbolic,
     any_variadic,
@@ -15,7 +13,7 @@
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
-import logging
+
 
 def _remove_symbolic_reshape_block(block):
     num_changes = 0
diff --git a/coremltools/converters/mil/mil/passes/replace_stack_reshape.py b/coremltools/converters/mil/mil/passes/replace_stack_reshape.py
index 59722d8a7..3c78227ad 100644
--- a/coremltools/converters/mil/mil/passes/replace_stack_reshape.py
+++ b/coremltools/converters/mil/mil/passes/replace_stack_reshape.py
@@ -7,6 +7,7 @@
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
 
+
 def _match_operation(stack_op):
 
     # Identify if this is an op we can transform
diff --git a/coremltools/converters/mil/mil/passes/sanitize_input_output_names.py b/coremltools/converters/mil/mil/passes/sanitize_input_output_names.py
index 0c9b343f5..4fa56af14 100644
--- a/coremltools/converters/mil/mil/passes/sanitize_input_output_names.py
+++ b/coremltools/converters/mil/mil/passes/sanitize_input_output_names.py
@@ -1,13 +1,12 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+from .name_sanitization_utils import NameSanitizer, sanitize_block
 from coremltools.converters.mil.mil.passes.pass_registry import register_pass
 from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass
-from .name_sanitization_utils import NameSanitizer, sanitize_block
+
 
 @register_pass(namespace="common")
 class sanitize_input_output_names(AbstractGraphPass):
@@ -17,12 +16,12 @@ class sanitize_input_output_names(AbstractGraphPass):
     of the format [a-zA-Z_][a-zA-Z0-9_]*
     """
     def apply(self, prog):
-      sanitizer_vars = NameSanitizer(prefix="var_")
-      sanitizer_ops = NameSanitizer(prefix="op_")
+        sanitizer_vars = NameSanitizer(prefix="var_")
+        sanitizer_ops = NameSanitizer(prefix="op_")
 
-      # sanitize the input/output of the main block
-      sanitize_block(prog.functions["main"],
-                     sanitizer_vars,
-                     sanitizer_ops,
-                     prog.main_input_types,
-                     sanitize_model_inputs_outputs_only=True)
+        # sanitize the input/output of the main block
+        sanitize_block(prog.functions["main"],
+                       sanitizer_vars,
+                       sanitizer_ops,
+                       prog.main_input_types,
+                       sanitize_model_inputs_outputs_only=True)
diff --git a/coremltools/converters/mil/mil/passes/test_cast_optimization.py b/coremltools/converters/mil/mil/passes/test_cast_optimization.py
index 439827fe1..5f08c2bd9 100644
--- a/coremltools/converters/mil/mil/passes/test_cast_optimization.py
+++ b/coremltools/converters/mil/mil/passes/test_cast_optimization.py
@@ -8,7 +8,6 @@
 
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
-    assert_op_count_match,
     assert_model_is_valid,
     get_op_types_in_program,
     apply_pass_and_basic_check,
@@ -40,7 +39,7 @@ def prog(x):
         self.assertEqual(get_op_types_in_program(prog), ['cast', 'square', 'cast'])
 
         apply_pass_and_basic_check(prog, "common::cast_optimization")
-        _,_,block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
+        _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
 
         self.assertEqual(get_op_types_in_program(prog), ["square"])
 
@@ -69,7 +68,7 @@ def prog(x):
         self.assertEqual(get_op_types_in_program(prog), ['cast', 'cast', 'square'])
 
         apply_pass_and_basic_check(prog, "common::cast_optimization")
-        _,_,block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
+        _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
 
         self.assertEqual(get_op_types_in_program(prog), ["square"])
 
@@ -98,7 +97,7 @@ def prog(x):
         self.assertEqual(get_op_types_in_program(prog), ['cast', 'cast', 'square'])
 
         apply_pass_and_basic_check(prog, "common::cast_optimization")
-        _,_,block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
+        _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
 
         self.assertEqual(get_op_types_in_program(prog), ["cast", "square"])
         self.assertEqual(block.find_ops(op_type="cast")[0].dtype.val, "fp16")
@@ -132,7 +131,7 @@ def prog(x):
         self.assertEqual(get_op_types_in_program(prog), ['cast', 'cast', 'cast', 'cast', 'cast', 'cast', 'square'])
 
         apply_pass_and_basic_check(prog, "common::cast_optimization")
-        _,_,block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
+        _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
 
         self.assertEqual(get_op_types_in_program(prog), ["cast", "square"])
         self.assertEqual(block.find_ops(op_type="cast")[0].dtype.val, "fp16")
@@ -152,7 +151,7 @@ def prog(x):
                                |---->cast(dtype="fp32")---->log--->out_3
 
     Output graph:
-    
+
          |---->square--->out_1
          |
     input---->relu--->out_2
@@ -175,7 +174,7 @@ def prog(x):
         self.assertEqual(get_op_types_in_program(prog), ['cast', 'cast', 'cast', 'cast', 'square', 'relu', 'log'])
 
         apply_pass_and_basic_check(prog, "common::cast_optimization")
-        _,_,block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
+        _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination")
 
         self.assertEqual(get_op_types_in_program(prog), ["square", "relu", "log"])
 
diff --git a/coremltools/converters/mil/mil/passes/test_concat_to_pixel_shuffle.py b/coremltools/converters/mil/mil/passes/test_concat_to_pixel_shuffle.py
index 66020c1c0..cbed7c208 100644
--- a/coremltools/converters/mil/mil/passes/test_concat_to_pixel_shuffle.py
+++ b/coremltools/converters/mil/mil/passes/test_concat_to_pixel_shuffle.py
@@ -273,7 +273,7 @@ def prog(x1, x2, x3, x4):
         )
         self.assertEqual(get_op_types_in_program(prog), ["concat", "concat", "concat"])
 
-    def test_failure_5(self):
+    def test_failure_6(self):
         """
         The inputs are the wrong rank, so the pattern won't match.
         """
@@ -294,7 +294,7 @@ def prog(x1, x2, x3, x4):
         )
         self.assertEqual(get_op_types_in_program(prog), ["concat", "concat", "concat"])
 
-    def test_failure_6(self):
+    def test_failure_7(self):
         """ 
         Extra input to the w_concats means the pattern won't match.
         """
diff --git a/coremltools/converters/mil/mil/passes/test_conv_batchnorm_fusion_pass.py b/coremltools/converters/mil/mil/passes/test_conv_batchnorm_fusion_pass.py
index 8e18163fe..515faf0ad 100644
--- a/coremltools/converters/mil/mil/passes/test_conv_batchnorm_fusion_pass.py
+++ b/coremltools/converters/mil/mil/passes/test_conv_batchnorm_fusion_pass.py
@@ -3,6 +3,10 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import itertools
+import numpy as np
+import pytest
+
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
     assert_model_is_valid,
@@ -11,9 +15,6 @@
 )
 from coremltools.converters.mil import testing_reqs
 
-import pytest
-import numpy as np
-import itertools
 
 np.random.seed(1984)
 
@@ -23,34 +24,34 @@ def _apply_weight_transform(inputs, is_deconv):
     """
     Utility funtion to test the weight transform function in conv batch_norm fusion pass.
     """
-    Cin, Cout, groups = 10, 20, 10
+    Cin, _, groups = 10, 20, 10
     input_shape = (1, Cin, 2, 2)
     @mb.program(input_specs=[mb.TensorSpec(shape=input_shape)])
     def prog(x):
 
         if is_deconv:
             x = mb.conv_transpose(
-                    x=x,
-                    weight=inputs["conv_weight"],
-                    bias=inputs["conv_bias"],
-                    groups=groups,
-                )
+                x=x,
+                weight=inputs["conv_weight"],
+                bias=inputs["conv_bias"],
+                groups=groups,
+            )
         else:
             x = mb.conv(
-                    x=x,
-                    weight=inputs["conv_weight"],
-                    bias=inputs["conv_bias"],
-                    groups=groups,
-                )
-
-        x = mb.batch_norm(
                 x=x,
-                mean=inputs["mean"],
-                variance=inputs["variance"],
-                gamma=inputs["gamma"],
-                beta=inputs["beta"],
-                epsilon=inputs["epsilon"],
+                weight=inputs["conv_weight"],
+                bias=inputs["conv_bias"],
+                groups=groups,
             )
+
+        x = mb.batch_norm(
+            x=x,
+            mean=inputs["mean"],
+            variance=inputs["variance"],
+            gamma=inputs["gamma"],
+            beta=inputs["beta"],
+            epsilon=inputs["epsilon"],
+        )
         return x
 
     apply_pass_and_basic_check(
@@ -197,9 +198,6 @@ def prog(x):
         assert get_op_types_in_program(prog) == ["conv"]
 
         # validate graph pass
-        input_dict = {
-            "x": np.random.rand(*input_shape),
-        }
         output_shape = (2, Cout, 19) if rank == 3 else (2, Cout, 19, 22)
         assert_model_is_valid(
             prog,
@@ -230,11 +228,11 @@ def prog(x):
             conv_weight = np.random.rand(Cin, Cout // groups, 2) if rank == 3 else np.random.rand(Cin, Cout // groups, 2, 3)
             conv_bias = np.random.rand(Cout) if has_bias else None
             x = mb.conv_transpose(
-                    x=x,
-                    weight=conv_weight,
-                    bias=conv_bias,
-                    groups=groups,
-                )
+                x=x,
+                weight=conv_weight,
+                bias=conv_bias,
+                groups=groups,
+            )
 
             # batch_norm layer
             gamma = np.random.rand(Cout)
diff --git a/coremltools/converters/mil/mil/passes/test_conv_scale_fusion.py b/coremltools/converters/mil/mil/passes/test_conv_scale_fusion.py
index 4e278dd58..7ca1df6ec 100644
--- a/coremltools/converters/mil/mil/passes/test_conv_scale_fusion.py
+++ b/coremltools/converters/mil/mil/passes/test_conv_scale_fusion.py
@@ -3,6 +3,10 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import pytest
+import numpy as np
+import itertools
+
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
     assert_model_is_valid,
@@ -11,10 +15,6 @@
 )
 from coremltools.converters.mil import testing_reqs
 
-import pytest
-import numpy as np
-import itertools
-
 np.random.seed(1984)
 
 backends = testing_reqs.backends
@@ -23,7 +23,7 @@ def _apply_weight_transform(inputs, is_deconv, is_real_div, is_conv_first_input,
     """
     Utility funtion to test the weight transform function in conv scale fusion pass.
     """
-    Cin, Cout, groups = 10, 20, 10
+    Cin, _, groups = 10, 20, 10
     input_shape = (1, Cin, 2, 2)
     @mb.program(input_specs=[mb.TensorSpec(shape=input_shape)])
     def prog(x):
@@ -272,4 +272,4 @@ def prog(x):
             {"x": input_shape},
             expected_output_shapes={block.outputs[0].name: output_shape},
             backend=backend,
-        )
\ No newline at end of file
+        )
diff --git a/coremltools/converters/mil/mil/passes/test_dedup_op_and_var_names.py b/coremltools/converters/mil/mil/passes/test_dedup_op_and_var_names.py
index e66b49dc0..a6c13be5e 100644
--- a/coremltools/converters/mil/mil/passes/test_dedup_op_and_var_names.py
+++ b/coremltools/converters/mil/mil/passes/test_dedup_op_and_var_names.py
@@ -3,6 +3,8 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import unittest
+
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
     assert_model_is_valid,
@@ -10,8 +12,7 @@
     get_op_names_in_program,
     apply_pass_and_basic_check,
 )
-from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
-import unittest
+
 
 class OpNameDeduplicationPass(unittest.TestCase):
 
@@ -22,7 +23,7 @@ def prog(x):
             return x
 
         prev_prog, _, block = apply_pass_and_basic_check(prog,
-                "common::dedup_op_and_var_names")
+                                                         "common::dedup_op_and_var_names")
 
         self.assertEqual(get_op_types_in_program(prev_prog), ['reshape'])
         self.assertEqual(get_op_names_in_program(prev_prog), ['reshape'])
@@ -45,17 +46,17 @@ def prog(x):
             return x
 
         prev_prog, _, block = apply_pass_and_basic_check(prog,
-                "common::dedup_op_and_var_names")
+                                                         "common::dedup_op_and_var_names")
 
         self.assertEqual(get_op_types_in_program(prev_prog),
-                ['cast', 'cast', 'square'])
+                         ['cast', 'cast', 'square'])
         self.assertEqual(get_op_names_in_program(prev_prog),
-                ['castop', 'castop', 'square_last'])
+                         ['castop', 'castop', 'square_last'])
 
         self.assertEqual(get_op_types_in_program(prog),
-                ['cast', 'cast', 'square'])
+                         ['cast', 'cast', 'square'])
         self.assertEqual(get_op_names_in_program(prog),
-                ['castop', 'castop_1', 'square_last'])
+                         ['castop', 'castop_1', 'square_last'])
 
         assert_model_is_valid(
             prog,
@@ -75,18 +76,18 @@ def prog(x):
             return x
 
         prev_prog, _, block = apply_pass_and_basic_check(prog,
-                "common::dedup_op_and_var_names")
+                                                         "common::dedup_op_and_var_names")
 
         self.assertEqual(get_op_types_in_program(prev_prog),
-                ['cast', 'cast', 'cast', 'cast', 'cast', 'square'])
+                         ['cast', 'cast', 'cast', 'cast', 'cast', 'square'])
         self.assertEqual(get_op_names_in_program(prev_prog),
-                ['castop', 'castop', 'castop_2', 'castop', 'castop_2', 'square'])
+                         ['castop', 'castop', 'castop_2', 'castop', 'castop_2', 'square'])
 
         self.assertEqual(get_op_types_in_program(prog),
-                ['cast', 'cast', 'cast', 'cast', 'cast', 'square'])
+                         ['cast', 'cast', 'cast', 'cast', 'cast', 'square'])
         self.assertEqual(get_op_names_in_program(prog),
-                ['castop', 'castop_1', 'castop_2', 'castop_3',
-                    'castop_2_1', 'square'])
+                         ['castop', 'castop_1', 'castop_2', 'castop_3',
+                          'castop_2_1', 'square'])
 
         assert_model_is_valid(
             prog,
@@ -94,7 +95,6 @@ def prog(x):
             expected_output_shapes={block.outputs[0].name: (10, 20)},
         )
 
-
     def test_input_name_shadow(self):
         @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))])
         def prog(x):
@@ -105,16 +105,16 @@ def prog(x):
             return x
 
         prev_prog, _, block = apply_pass_and_basic_check(prog,
-                "common::dedup_op_and_var_names")
+                                                         "common::dedup_op_and_var_names")
         self.assertEqual(get_op_types_in_program(prev_prog),
-                ['transpose', 'relu'])
+                         ['transpose', 'relu'])
         self.assertEqual(get_op_names_in_program(prev_prog),
-                ['x', 'relu'])
+                         ['x', 'relu'])
 
         self.assertEqual(get_op_types_in_program(prog),
-                ['transpose', 'relu'])
+                         ['transpose', 'relu'])
         self.assertEqual(get_op_names_in_program(prog),
-                ['x', 'relu'])
+                         ['x', 'relu'])
 
         op = prog['main'].find_ops(op_type='transpose')[0]
         self.assertEqual("x_1", op.outputs[0].name)
@@ -143,7 +143,7 @@ def false_fn():
         assert cond_op.blocks[0].outputs[0].name == 'x'
         assert cond_op.blocks[1].outputs[0].name == 'x'
         prev_prog, _, block = apply_pass_and_basic_check(prog,
-                "common::dedup_op_and_var_names")
+                                                         "common::dedup_op_and_var_names")
         cond_op = prog.functions['main'].operations[-1]
         assert cond_op.blocks[0].outputs[0].name == 'x_1'
         assert cond_op.blocks[1].outputs[0].name == 'x_2'
diff --git a/coremltools/converters/mil/mil/passes/test_image_preprocessing.py b/coremltools/converters/mil/mil/passes/test_image_preprocessing.py
index 08ddd9fcb..cec60f309 100644
--- a/coremltools/converters/mil/mil/passes/test_image_preprocessing.py
+++ b/coremltools/converters/mil/mil/passes/test_image_preprocessing.py
@@ -2,17 +2,14 @@
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-import numpy as np
 import unittest
 
 from coremltools import ImageType
+from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
-    assert_op_count_match,
-    assert_model_is_valid,
     get_op_types_in_program,
     apply_pass_and_basic_check,
 )
-from coremltools.converters.mil.mil import Builder as mb
 
 
 class ImagePreprocessingPass(unittest.TestCase):
@@ -66,8 +63,8 @@ def prog(x):
             return mb.relu(x=x4)
 
         mlmodel = convert(prog,
-            inputs=[ImageType(name="x", shape=(10, 20, 30, 3),
-              channel_first=False)],
-            source="milinternal", convert_to="neuralnetwork")
+                          inputs=[ImageType(name="x", shape=(10, 20, 30, 3),
+                                            channel_first=False)],
+                          source="milinternal", convert_to="neuralnetwork")
         assert mlmodel is not None
         assert len(mlmodel.get_spec().neuralNetwork.layers) == 3
diff --git a/coremltools/converters/mil/mil/passes/test_linear_bias_fusion.py b/coremltools/converters/mil/mil/passes/test_linear_bias_fusion.py
index 9e0344b87..e930a28a2 100644
--- a/coremltools/converters/mil/mil/passes/test_linear_bias_fusion.py
+++ b/coremltools/converters/mil/mil/passes/test_linear_bias_fusion.py
@@ -3,17 +3,18 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import itertools
+import numpy as np
+import pytest
+
+from coremltools.converters.mil import testing_reqs
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
     assert_model_is_valid,
     get_op_types_in_program,
     apply_pass_and_basic_check,
 )
-from coremltools.converters.mil import testing_reqs
 
-import pytest
-import numpy as np
-import itertools
 
 np.random.seed(1984)
 
@@ -28,15 +29,15 @@ def prog(x):
 
         if has_bias:
             linear = mb.linear(
-                    x=x,
-                    weight=inputs["linear_weight"],
-                    bias=inputs["linear_bias"],
-                )
+                x=x,
+                weight=inputs["linear_weight"],
+                bias=inputs["linear_bias"],
+            )
         else:
             linear = mb.linear(
-                    x=x,
-                    weight=inputs["linear_weight"],
-                )
+                x=x,
+                weight=inputs["linear_weight"],
+            )
 
         if is_first_input:
             kwargs = {
@@ -121,7 +122,6 @@ def test_transform_linear(self, op_type, is_first_input, has_bias, broadcast):
         np.testing.assert_almost_equal(new_weight, expected_weight)
         np.testing.assert_almost_equal(new_bias, expected_bias)
 
-
     @pytest.mark.parametrize(
         "rank, op_type, is_first_input, broadcast, backend",
         itertools.product(
@@ -156,10 +156,10 @@ def prog(x):
                     bias = np.reshape(bias, (1, 2))
 
             x = mb.linear(
-                    x=x,
-                    weight=linear_weight,
-                    bias=linear_bias,
-                )
+                x=x,
+                weight=linear_weight,
+                bias=linear_bias,
+            )
 
             func = mb.add if op_type == "add" else mb.sub
             if is_first_input:
@@ -183,9 +183,6 @@ def prog(x):
         assert get_op_types_in_program(prog) == ["linear"]
 
         # validate graph pass
-        input_dict = {
-            "x": np.random.rand(*input_shape),
-        }
         output_shape = [1, 2, 2]
         output_shape = tuple(output_shape[-rank:])
         assert_model_is_valid(
diff --git a/coremltools/converters/mil/mil/passes/test_merge_consecutive_paddings.py b/coremltools/converters/mil/mil/passes/test_merge_consecutive_paddings.py
index a7564d6c2..b059712e4 100644
--- a/coremltools/converters/mil/mil/passes/test_merge_consecutive_paddings.py
+++ b/coremltools/converters/mil/mil/passes/test_merge_consecutive_paddings.py
@@ -3,18 +3,16 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from coremltools._deps import _IS_MACOS
+import numpy as np
+import pytest
+
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
     assert_model_is_valid,
     get_op_types_in_program,
     apply_pass_and_basic_check,
 )
-from coremltools.converters.mil.testing_reqs import ct
 
-import pytest
-
-import numpy as np
 
 np.random.seed(1984)
 
diff --git a/coremltools/converters/mil/mil/passes/test_noop_elimination.py b/coremltools/converters/mil/mil/passes/test_noop_elimination.py
index 5ef4fd5cb..5b694eb62 100644
--- a/coremltools/converters/mil/mil/passes/test_noop_elimination.py
+++ b/coremltools/converters/mil/mil/passes/test_noop_elimination.py
@@ -8,7 +8,6 @@
 import pytest
 
 from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
 from coremltools.converters.mil.testing_utils import (
     assert_model_is_valid,
     get_op_types_in_program,
@@ -57,7 +56,7 @@ def prog(x):
     elif op_type in {'sub'}:
         if pos == 'y' and (val == 0 or val == [0, 0, 0, 0]):
             new_program = ["relu"]
-            
+
     assert get_op_types_in_program(prev_prog) == original_program
     assert get_op_types_in_program(prog) == new_program
     assert_model_is_valid(
diff --git a/coremltools/converters/mil/mil/passes/test_pad_conv_pass.py b/coremltools/converters/mil/mil/passes/test_pad_conv_pass.py
index 9f4a952aa..ddbb4ebae 100644
--- a/coremltools/converters/mil/mil/passes/test_pad_conv_pass.py
+++ b/coremltools/converters/mil/mil/passes/test_pad_conv_pass.py
@@ -8,7 +8,6 @@
 
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
-    assert_op_count_match,
     assert_model_is_valid,
     get_op_types_in_program,
     apply_pass_and_basic_check,
@@ -30,7 +29,7 @@ class PadConvOptimizationPass(unittest.TestCase):
     def test_simple_direct_output(self):
         @mb.program(input_specs=[mb.TensorSpec(shape=(1, 16, 20, 24))])
         def prog(x):
-            x = mb.pad(x=x, pad=[0,0,1,1,1,1,0,0])
+            x = mb.pad(x=x, pad=[0, 0, 1, 1, 1, 1, 0, 0])
             x = mb.transpose(x=x, perm=[0, 3, 1, 2])
             x = mb.conv(x=x, weight=np.random.random([24,24,3,3]), pad_type="valid")
             x = mb.transpose(x=x, perm=[0, 2, 3, 1])
@@ -67,12 +66,12 @@ def prog(x):
     def test_pad_transposed_forked_conv(self):
         @mb.program(input_specs=[mb.TensorSpec(shape=(1, 16, 20, 24))])
         def prog(x):
-            pad = mb.pad(x=x, pad=[0,0,1,1,1,1,0,0])
+            pad = mb.pad(x=x, pad=[0, 0, 1, 1, 1, 1, 0, 0])
             x = mb.transpose(x=pad, perm=[0, 3, 1, 2])
-            x = mb.conv(x=x, weight=np.random.random([24,24,3,3]), pad_type="valid")
+            x = mb.conv(x=x, weight=np.random.random([24, 24, 3, 3]), pad_type="valid")
             x = mb.transpose(x=x, perm=[0, 2, 3, 1])
             y = mb.transpose(x=pad, perm=[0, 3, 1, 2])
-            y = mb.conv(x=y, weight=np.random.random([24,24,3,3]), pad_type="valid")
+            y = mb.conv(x=y, weight=np.random.random([24, 24, 3, 3]), pad_type="valid")
             y = mb.transpose(x=y, perm=[0, 2, 3, 1])
             return x, y
 
diff --git a/coremltools/converters/mil/mil/passes/test_passes.py b/coremltools/converters/mil/mil/passes/test_passes.py
index e59359621..937ff4eb9 100644
--- a/coremltools/converters/mil/mil/passes/test_passes.py
+++ b/coremltools/converters/mil/mil/passes/test_passes.py
@@ -113,17 +113,18 @@ def program0(x, y):
 
     @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
     def program1(x):
-        weights_val = np.random.rand(2, 4).T.astype(np.float32)
+        weights_val = np.random.rand(4, 2).T.astype(np.float32)
         weights = mb.const(val=weights_val)
-        bias_val = np.random.rand(4).astype(np.float32)
+        bias_val = np.random.rand(2).astype(np.float32)
         bias = mb.const(val=bias_val)
 
         # unused op and its inputs should be eliminated
-        mb.matmul(x=x, y=weights)
+        weights_for_matmul = mb.transpose(x=weights, perm=[1,0])
+        mb.matmul(x=x, y=weights_for_matmul)
 
         return mb.linear(x=x, weight=weights, bias=bias)
 
-    assert_op_count_match(program1, expect=6)
+    assert_op_count_match(program1, expect=8)
     prev_prog = copy.deepcopy(program1)
     PASS_REGISTRY["common::dead_code_elimination"](program1)
     assert_same_output_names(prev_prog, program1)
@@ -253,96 +254,6 @@ def cond(a, bx):
     if validate_model:
         assert_model_is_valid(prog, {"a": (1, 2), "b": (1, 2)})
 
-
-def test_gelu_tanh_approximation():
-    """
-    Detect gelu tanh approx pattern, found in the TF bert model.
-    y = ( tanh((.0447)x^3 + x ) * (sqrt(2/pi)) + 1 ) * 0.5 * x
-    """
-
-    @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))])
-    def prog(x):
-        x1 = mb.pow(x=x, y=3)
-        x1 = mb.mul(x=0.044715, y=x1)
-        x1 = mb.add(x=x1, y=x)
-        x1 = mb.mul(x=x1, y=np.sqrt(2 / np.pi))
-        x1 = mb.tanh(x=x1)
-        x1 = mb.add(x=1, y=x1)
-        x1 = mb.mul(x=0.5, y=x1)
-        x1 = mb.mul(x=x, y=x1)
-        return x1
-
-    prev_prog, prev_block, block = apply_pass_and_basic_check(
-        prog, "common::fuse_gelu_tanh_approximation"
-    )
-    assert get_op_types_in_program(prev_prog) == [
-        "pow",
-        "mul",
-        "add",
-        "mul",
-        "tanh",
-        "add",
-        "mul",
-        "mul",
-    ]
-    assert get_op_types_in_program(prog) == ["gelu"]
-    assert_model_is_valid(
-        prog,
-        {"x": (3, 5, 6)},
-        expected_output_shapes={block.outputs[0].name: (3, 5, 6)},
-    )
-
-@pytest.mark.parametrize(
-    "first_op_1, first_op_2, first_op_3, first_op_4, first_op_5, first_op_6",
-     itertools.product(
-         [True, False],
-         [True, False],
-         [True, False],
-         [True, False],
-         [True, False],
-         [True, False]
-     )
-)
-def test_gelu_tanh_approximation2(first_op_1, first_op_2, first_op_3, first_op_4, first_op_5, first_op_6):
-    """
-    Detect gelu tanh approx pattern, found in the TF Sanitized GPT2 model.
-    y = ( tanh((.0447)x^3 + x ) * (sqrt(2/pi)) + 1 ) * 0.5 * x
-    """
-
-    @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))])
-    def prog(x):
-        firstmul = mb.mul(x=x, y=0.5) if first_op_1 else mb.mul(x=0.5, y=x)
-        x1 = mb.pow(x=x, y=3)
-        x1 = mb.mul(x=0.044715, y=x1) if first_op_2 else mb.mul(x=x1, y=0.044715)
-        x1 = mb.add(x=x1, y=x) if first_op_3 else mb.add(x=x, y=x1)
-        x1 = mb.mul(x=x1, y=np.sqrt(2 / np.pi)) if first_op_4 else mb.mul(x=np.sqrt(2 / np.pi), y=x1)
-        x1 = mb.tanh(x=x1)
-        x1 = mb.add(x=1, y=x1) if first_op_5 else mb.add(x=x1, y=1)
-        x1 = mb.mul(x=firstmul, y=x1) if first_op_6 else mb.mul(x=x1, y=firstmul)
-        return x1
-
-    prev_prog, prev_block, block = apply_pass_and_basic_check(
-        prog, "common::fuse_gelu_tanh_approximation"
-    )
-    assert get_op_types_in_program(prev_prog) == [
-        "mul",
-        "pow",
-        "mul",
-        "add",
-        "mul",
-        "tanh",
-        "add",
-        "mul",
-    ]
-
-    if os.getenv('ENABLE_EXPERIMENTAL_PASSES') == '1':
-        assert get_op_types_in_program(prog) == ["gelu"]
-        assert_model_is_valid(
-            prog,
-            {"x": (3, 5, 6)},
-            expected_output_shapes={block.outputs[0].name: (3, 5, 6)},
-        )
-
 def test_generic_child_ordering():
     """
     Checks that the new generic pattern matching infrastructure works
@@ -374,7 +285,7 @@ def var_constraints(pattern):
         constraints_passed &= _check_var_scalar_value(pattern.thepowerop.y, 3)
         constraints_passed &= _check_var_scalar_value(pattern.sub_0.y, 5)
         constraints_passed &= _check_var_scalar_value(pattern.add_0.x, 5) or _check_var_scalar_value(pattern.add_0.y, 5)
-        constraints_passed &=  _check_var_scalar_value(pattern.mul_0.x, 5) or _check_var_scalar_value(pattern.mul_0.y, 5)
+        constraints_passed &= _check_var_scalar_value(pattern.mul_0.x, 5) or _check_var_scalar_value(pattern.mul_0.y, 5)
         return constraints_passed
 
     def transform_pattern(pattern):
@@ -499,69 +410,159 @@ def prog(x):
     assert get_op_types_in_program(prev_prog) == ["conv_transpose"]
     assert get_op_types_in_program(prog) == ["conv_transpose"]
     prev_conv_transpose_op = prev_prog.find_ops(op_type="conv_transpose",
-        exactly_one=True)[0]
+                                                exactly_one=True)[0]
     conv_transpose_op = prog.find_ops(op_type="conv_transpose",
-        exactly_one=True)[0]
+                                      exactly_one=True)[0]
     assert np.all(conv_transpose_op.output_shape.val ==
-        prev_conv_transpose_op.outputs[0].shape)
-
-@pytest.mark.parametrize(
-    "op_type, is_first_op1, is_first_op2, is_first_op3, is_first_op4, const_mul_first",
-    itertools.product(
-        ["real_div", "mul"],
-        [True, False],
-        [True, False],
-        [True ,False],
-        [True, False],
-        [True, False],
+                  prev_conv_transpose_op.outputs[0].shape)
+
+
+class TestGeluFusionPass:
+
+    def test_gelu_tanh_approximation1(self):
+        """
+        Detect gelu tanh approx pattern, found in the TF bert model.
+        y = ( tanh((.0447)x^3 + x ) * (sqrt(2/pi)) + 1 ) * 0.5 * x
+        """
+
+        @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))])
+        def prog(x):
+            x1 = mb.pow(x=x, y=3)
+            x1 = mb.mul(x=0.044715, y=x1)
+            x1 = mb.add(x=x1, y=x)
+            x1 = mb.mul(x=x1, y=np.sqrt(2 / np.pi))
+            x1 = mb.tanh(x=x1)
+            x1 = mb.add(x=1, y=x1)
+            x1 = mb.mul(x=0.5, y=x1)
+            x1 = mb.mul(x=x, y=x1)
+            return x1
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::fuse_gelu_tanh_approximation"
+        )
+        assert get_op_types_in_program(prev_prog) == [
+            "pow",
+            "mul",
+            "add",
+            "mul",
+            "tanh",
+            "add",
+            "mul",
+            "mul",
+        ]
+        assert get_op_types_in_program(prog) == ["gelu"]
+        assert_model_is_valid(
+            prog,
+            {"x": (3, 5, 6)},
+            expected_output_shapes={block.outputs[0].name: (3, 5, 6)},
+        )
+
+    @pytest.mark.parametrize(
+        "first_op_1, first_op_2, first_op_3, first_op_4, first_op_5, first_op_6",
+        itertools.product(
+            [True, False],
+            [True, False],
+            [True, False],
+            [True, False],
+            [True, False],
+            [True, False]
         )
     )
-def test_gelu_exact_approximation(op_type, is_first_op1, is_first_op2, is_first_op3, is_first_op4, const_mul_first):
-    """
-    Detect gelu exact pattern.
-    y = 0.5 * x * ( 1 + erf ( x / srqt(2)))
-    """
+    def test_gelu_tanh_approximation2(self, first_op_1, first_op_2, first_op_3, first_op_4, first_op_5, first_op_6):
+        """
+        Detect gelu tanh approx pattern, found in the TF Sanitized GPT2 model.
+        y = ( tanh((.0447)x^3 + x ) * (sqrt(2/pi)) + 1 ) * 0.5 * x
+        """
 
-    @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))])
-    def prog(x):
-        if op_type == "real_div":
-            x1 = mb.real_div(x=x, y=2**0.5)
-        elif op_type == "mul":
-            x1 = mb.mul(x=x, y=2**-0.5) if is_first_op1 else mb.mul(x=2**-0.5, y=x)
+        @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))])
+        def prog(x):
+            firstmul = mb.mul(x=x, y=0.5) if first_op_1 else mb.mul(x=0.5, y=x)
+            x1 = mb.pow(x=x, y=3)
+            x1 = mb.mul(x=0.044715, y=x1) if first_op_2 else mb.mul(x=x1, y=0.044715)
+            x1 = mb.add(x=x1, y=x) if first_op_3 else mb.add(x=x, y=x1)
+            x1 = mb.mul(x=x1, y=np.sqrt(2 / np.pi)) if first_op_4 else mb.mul(x=np.sqrt(2 / np.pi), y=x1)
+            x1 = mb.tanh(x=x1)
+            x1 = mb.add(x=1, y=x1) if first_op_5 else mb.add(x=x1, y=1)
+            x1 = mb.mul(x=firstmul, y=x1) if first_op_6 else mb.mul(x=x1, y=firstmul)
+            return x1
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::fuse_gelu_tanh_approximation"
+        )
+        assert get_op_types_in_program(prev_prog) == [
+            "mul",
+            "pow",
+            "mul",
+            "add",
+            "mul",
+            "tanh",
+            "add",
+            "mul",
+        ]
+
+        assert get_op_types_in_program(prog) == ["gelu"]
+        assert_model_is_valid(
+            prog,
+            {"x": (3, 5, 6)},
+            expected_output_shapes={block.outputs[0].name: (3, 5, 6)},
+        )
+
+    @pytest.mark.parametrize(
+        "op_type, is_first_op1, is_first_op2, is_first_op3, is_first_op4, const_mul_first",
+        itertools.product(
+            ["real_div", "mul"],
+            [True, False],
+            [True, False],
+            [True ,False],
+            [True, False],
+            [True, False],
+            )
+        )
+    def test_gelu_exact(self, op_type, is_first_op1, is_first_op2, is_first_op3, is_first_op4, const_mul_first):
+        """
+        Detect gelu exact pattern.
+        y = 0.5 * x * ( 1 + erf ( x / srqt(2)))
+        """
 
-        x2 = mb.erf(x=x1)
-        x3 = mb.add(x=x2, y=1) if is_first_op2 else mb.add(x=1, y=x2)
+        @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))])
+        def prog(x):
+            if op_type == "real_div":
+                x1 = mb.real_div(x=x, y=2**0.5)
+            elif op_type == "mul":
+                x1 = mb.mul(x=x, y=2**-0.5) if is_first_op1 else mb.mul(x=2**-0.5, y=x)
 
-        if const_mul_first:
-            y1 = mb.const(val=0.5)
-            y2 = x
-        else:
-            y1 = x
-            y2 = mb.const(val=0.5)
+            x2 = mb.erf(x=x1)
+            x3 = mb.add(x=x2, y=1) if is_first_op2 else mb.add(x=1, y=x2)
 
-        x4 = mb.mul(x=x3, y=y1) if is_first_op3 else mb.mul(x=y1, y=x3)
-        x5 = mb.mul(x=x4, y=y2) if is_first_op4 else mb.mul(x=y2, y=x4)
+            if const_mul_first:
+                y1 = mb.const(val=0.5)
+                y2 = x
+            else:
+                y1 = x
+                y2 = mb.const(val=0.5)
 
-        return x5
+            x4 = mb.mul(x=x3, y=y1) if is_first_op3 else mb.mul(x=y1, y=x3)
+            x5 = mb.mul(x=x4, y=y2) if is_first_op4 else mb.mul(x=y2, y=x4)
 
-    prev_prog, prev_block, block = apply_pass_and_basic_check(
-        prog, "common::fuse_gelu_exact"
-    )
+            return x5
 
-    assert get_op_types_in_program(prev_prog) == [
-        op_type,
-        "erf",
-        "add",
-        "mul",
-        "mul",
-    ]
-    assert get_op_types_in_program(prog) == ["gelu"]
-    assert_model_is_valid(
-        prog,
-        {"x": (3, 5, 6)},
-        expected_output_shapes={block.outputs[0].name: (3, 5, 6)},
-    )
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::fuse_gelu_exact"
+        )
 
+        assert get_op_types_in_program(prev_prog) == [
+            op_type,
+            "erf",
+            "add",
+            "mul",
+            "mul",
+        ]
+        assert get_op_types_in_program(prog) == ["gelu"]
+        assert_model_is_valid(
+            prog,
+            {"x": (3, 5, 6)},
+            expected_output_shapes={block.outputs[0].name: (3, 5, 6)},
+        )
 
 
 class TestLeakyReluFusionPass:
@@ -571,8 +572,8 @@ class TestLeakyReluFusionPass:
         itertools.product(
             [True, False],
             [True, False],
-            )
         )
+    )
     def test_valid_leaky_relu_pattern(self, swap_mul_input_order, swap_max_input_order):
         """
         Input graph:
diff --git a/coremltools/converters/mil/mil/passes/test_reduce_transposes_pass.py b/coremltools/converters/mil/mil/passes/test_reduce_transposes_pass.py
index 5be1db6a2..62b6abef6 100644
--- a/coremltools/converters/mil/mil/passes/test_reduce_transposes_pass.py
+++ b/coremltools/converters/mil/mil/passes/test_reduce_transposes_pass.py
@@ -17,8 +17,10 @@
 from .reduce_transposes import _find_transpose_compliment
 import unittest
 
+
 np.random.seed(1984)
 
+
 class TransposeOptimizationPass(unittest.TestCase):
     """"""
 
diff --git a/coremltools/converters/mil/mil/passes/test_replace_stack_reshape.py b/coremltools/converters/mil/mil/passes/test_replace_stack_reshape.py
index 5d7e35a8c..e5b11f7db 100644
--- a/coremltools/converters/mil/mil/passes/test_replace_stack_reshape.py
+++ b/coremltools/converters/mil/mil/passes/test_replace_stack_reshape.py
@@ -3,23 +3,22 @@
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import numpy as np
+import unittest
+
 from coremltools._deps import _IS_MACOS
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
-    assert_op_count_match,
     assert_model_is_valid,
     get_op_types_in_program,
     apply_pass_and_basic_check,
 )
 from coremltools.converters.mil.testing_reqs import ct
 
-import unittest
-import pytest
-
-import numpy as np
 
 np.random.seed(1984)
 
+
 class ReplaceStackReshapePass(unittest.TestCase):
     
     def test_with_interleave(self):
@@ -130,7 +129,7 @@ def prog(x1, x2):
         np.testing.assert_allclose(old_prediction, prediction[output_name], atol=1e-04, rtol=1e-05)
 
     def test_multiple(self):
-        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3, 4)), mb.TensorSpec(shape=(1, 2, 3, 4)), 
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3, 4)), mb.TensorSpec(shape=(1, 2, 3, 4)),
                                  mb.TensorSpec(shape=(1, 2, 3, 4)), mb.TensorSpec(shape=(1, 2, 3, 4))])
         def prog(x1, x2, x3, x4):
             a = mb.stack(values=[x1, x2], axis=1)
@@ -139,7 +138,7 @@ def prog(x1, x2, x3, x4):
             b = mb.stack(values=[x3, x4], axis=1)
             b = mb.reshape(x=b, shape=[1, 4, 3, 4])
 
-            c = mb.stack(values=[a, b], axis=2) 
+            c = mb.stack(values=[a, b], axis=2)
             c = mb.reshape(x=c, shape=[1, 4, 6, 4])
 
             return c
@@ -259,7 +258,7 @@ def prog(x1, x2):
 
     def test_negative_4(self):
         """
-        More than two inputs to the stack op -- can't be transformed. 
+        More than two inputs to the stack op -- can't be transformed.
         """
         @mb.program(input_specs=[mb.TensorSpec(shape=(1, 5, 3, 4)), mb.TensorSpec(shape=(1, 5, 3, 4)), mb.TensorSpec(shape=(1, 5, 3, 4))])
         def prog(x1, x2, x3):
diff --git a/coremltools/converters/mil/mil/passes/test_use_reflection_padding.py b/coremltools/converters/mil/mil/passes/test_use_reflection_padding.py
index 2a951665f..854839f9f 100644
--- a/coremltools/converters/mil/mil/passes/test_use_reflection_padding.py
+++ b/coremltools/converters/mil/mil/passes/test_use_reflection_padding.py
@@ -4,16 +4,13 @@
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import numpy as np
-import pytest
 
-from coremltools._deps import _IS_MACOS
 from coremltools.converters.mil.mil import Builder as mb
 from coremltools.converters.mil.testing_utils import (
     assert_model_is_valid,
     get_op_types_in_program,
     apply_pass_and_basic_check,
 )
-from coremltools.converters.mil.testing_reqs import ct
 
 
 np.random.seed(1984)
diff --git a/coremltools/converters/mil/mil/program.py b/coremltools/converters/mil/mil/program.py
index 889cb8fd6..7a9c3a083 100644
--- a/coremltools/converters/mil/mil/program.py
+++ b/coremltools/converters/mil/mil/program.py
@@ -14,7 +14,7 @@
 from coremltools.converters.mil.input_types import InputType
 
 
-class Program(object):
+class Program:
     def __init__(self):
         self.main_input_types = []
         self.functions = {}
@@ -57,7 +57,7 @@ def find_ops(self, prefix=None, op_type=None, exactly_one=False):
         return found_ops
 
     def validate(self):
-        for f_name, f in self.functions.items():
+        for f in self.functions.values():
             f.validate()
 
     def __getitem__(self, func_name):
@@ -76,20 +76,27 @@ def __str__(self):
         return s
 
 
-class Placeholder(object):
+class Placeholder:
     counter = 0
 
-    def __init__(self, sym_shape, dtype=None, name=None):
+    def __init__(self, sym_shape, dtype=None, name=None, allow_rank0_input=False):
         """
         sym_shape: () or [] for scalar. list, tuple, np.ndarray for tensor. May
         contain Symbol as symbolic shape (but not string).
 
         dtype: types.float or other scalar builtin types.
+        allow_rank0_input: A flag that allows the rank 0 placeholder.
         """
         if not isinstance(sym_shape, (list, tuple, _np.ndarray)):
             raise ValueError("Illegal shape for Placeholder: {}".format(sym_shape))
+
         if len(sym_shape) == 0:
-            raise ValueError('Rank-0 (input {}) is unsupported'.format(name))
+            if not allow_rank0_input:
+                raise ValueError('Rank-0 (input {}) is unsupported'.format(name))
+            else:
+                _logging.warning('Rank-0 (input {}) is unsupported in coreml. You might run into error while\
+                running this model'.format(name))
+
         for i, d in enumerate(sym_shape):
             if not isinstance(d, (_np.generic, int, Symbol)):
                 msg = 'Placeholder dim {} in {} is not integer or symbol'
@@ -154,10 +161,11 @@ def get_new_symbol(name=None):
 def get_existing_symbol(name):
     global k_used_symbols
     if name not in k_used_symbols:
-      msg = 'Symbol name {} does not exist'
-      raise ValueError(msg.format(name))
+        msg = 'Symbol name {} does not exist'
+        raise ValueError(msg.format(name))
     return k_used_symbols[name]
 
+
 class Symbol(_sm.Symbol):
     def __init__(self, sym_name):
         """
diff --git a/coremltools/converters/mil/mil/types/type_mapping.py b/coremltools/converters/mil/mil/types/type_mapping.py
index 78807524e..59be98ced 100644
--- a/coremltools/converters/mil/mil/types/type_mapping.py
+++ b/coremltools/converters/mil/mil/types/type_mapping.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
diff --git a/coremltools/converters/mil/mil/var.py b/coremltools/converters/mil/mil/var.py
index 6494867cf..4f5568035 100644
--- a/coremltools/converters/mil/mil/var.py
+++ b/coremltools/converters/mil/mil/var.py
@@ -5,10 +5,7 @@
 
 from coremltools.converters.mil.mil import types
 from coremltools.converters.mil.mil.types import builtin_to_string
-from coremltools.converters.mil.mil.types.symbolic import (
-    is_symbolic,
-    any_symbolic,
-)
+from coremltools.converters.mil.mil.types.symbolic import any_symbolic
 
 
 class Var(object):
@@ -18,9 +15,11 @@ class Var(object):
 
     Example Usage:
 
-    from coremltools.converters.mil.mil import Builder as mb
-    from coremltools.converters.mil.mil import Function
-    from coremltools.converters.mil.mil import types
+    from coremltools.converters.mil.mil import (
+        Builder as mb,
+        Function,
+        types
+    )
 
     func_inputs = {"a": mb.placeholder(shape=(1,2)),
                    "b": mb.placeholder(shape=(1,2)) }
diff --git a/coremltools/converters/mil/mil/visitors/dot_visitor.py b/coremltools/converters/mil/mil/visitors/dot_visitor.py
index 5c1cee11c..3c511c205 100644
--- a/coremltools/converters/mil/mil/visitors/dot_visitor.py
+++ b/coremltools/converters/mil/mil/visitors/dot_visitor.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 #  Copyright (c) 2020, Apple Inc. All rights reserved.
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
diff --git a/coremltools/converters/mil/test_flexible_shape_inputs.py b/coremltools/converters/mil/test_flexible_shape_inputs.py
index 1ba7646c4..f927ebd1a 100644
--- a/coremltools/converters/mil/test_flexible_shape_inputs.py
+++ b/coremltools/converters/mil/test_flexible_shape_inputs.py
@@ -3,12 +3,13 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-import coremltools as ct
-from coremltools._deps import _HAS_TORCH, MSG_TORCH_NOT_FOUND
 import numpy as _np
 import PIL.Image
 import pytest
 
+import coremltools as ct
+from coremltools._deps import _HAS_TORCH, MSG_TORCH_NOT_FOUND
+
 if _HAS_TORCH:
     import torch
     torch.manual_seed(10)
@@ -18,6 +19,7 @@ def __init__(self, in_channels=3, out_channels=10, kernel_size=3):
             super(TestConvModule, self).__init__()
             self.conv = torch.nn.Conv2d(in_channels, out_channels,
                                         kernel_size)
+
         def forward(self, x):
             return self.conv(x)
 
@@ -141,8 +143,3 @@ def test_image_input_enumerated(self, convert_to):
         assert spec.description.input[0].type.imageType.enumeratedSizes.sizes[0].width == 25
         assert spec.description.input[0].type.imageType.enumeratedSizes.sizes[0].height == 25
         _assert_torch_coreml_output_shapes(model, spec, traced_model, example_input, is_image_input=True)
-
-
-
-
-
diff --git a/coremltools/converters/mil/testing_reqs.py b/coremltools/converters/mil/testing_reqs.py
index 6ab10a3ae..ecfb7373a 100644
--- a/coremltools/converters/mil/testing_reqs.py
+++ b/coremltools/converters/mil/testing_reqs.py
@@ -40,7 +40,7 @@
 else:
     backends = [('mlprogram', "fp16"), ('neuralnetwork', "fp32")]
     if os.getenv('INCLUDE_MIL_FP32_UNIT_TESTS') == '1':
-            backends.append(('mlprogram', 'fp32'))
+        backends.append(('mlprogram', 'fp32'))
 
 np.random.seed(1984)
 
diff --git a/coremltools/converters/mil/testing_utils.py b/coremltools/converters/mil/testing_utils.py
index f0613c7e5..40058acf8 100644
--- a/coremltools/converters/mil/testing_utils.py
+++ b/coremltools/converters/mil/testing_utils.py
@@ -2,14 +2,16 @@
 #
 #  Use of this source code is governed by a BSD-3-clause license that can be
 #  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 import copy
 import logging
-import numpy as np
 import os
 from pathlib import Path
-import PIL.Image
 import re
 
+import numpy as np
+import PIL.Image
+
 import coremltools as ct
 from coremltools._deps import _IS_MACOS
 from coremltools.converters.mil.mil import Program, Function
@@ -212,7 +214,7 @@ def compare_backend(
             raise ValueError("Unsupported dtype config")
 
         pred = run_core_ml_predict(mlmodel, input_key_values,
-            use_cpu_only=use_cpu_only)
+                                   use_cpu_only=use_cpu_only)
         if also_compare_shapes:
             compare_shapes(
                 mlmodel,
@@ -249,7 +251,7 @@ def compare_shapes(
     if _IS_MACOS:
         if not pred:
             pred = run_core_ml_predict(mlmodel, input_key_values,
-                use_cpu_only)
+                                       use_cpu_only)
         for o, expected in expected_outputs.items():
             coreml_out = _get_coreml_out_from_dict(pred, o)
             msg = "Output: {}. expected shape {} != actual shape {}".format(
diff --git a/coremltools/converters/onnx/_converter.py b/coremltools/converters/onnx/_converter.py
index b506c40a8..3708dd9e6 100644
--- a/coremltools/converters/onnx/_converter.py
+++ b/coremltools/converters/onnx/_converter.py
@@ -89,8 +89,8 @@ def is_nd_array_supported(minimum_ios_deployment_target):
         if not SupportedVersion.ios_support_check(minimum_ios_deployment_target):
             raise TypeError(
                 "{} not supported. Please provide one of target iOS: {}".format(
-                minimum_ios_deployment_target,
-                SupportedVersion.supported_ios_version
+                    minimum_ios_deployment_target,
+                    SupportedVersion.supported_ios_version
                 )
             )
 
diff --git a/coremltools/converters/onnx/_operators.py b/coremltools/converters/onnx/_operators.py
index 346501db6..4333709a4 100644
--- a/coremltools/converters/onnx/_operators.py
+++ b/coremltools/converters/onnx/_operators.py
@@ -1125,7 +1125,7 @@ def _add_concat(input_names, output_names, **kwargs):
                 builder,
                 node,
                 graph,
-                "Unsupported axis {} in input of shape".format(
+                "Unsupported axis {} in input of shape {}".format(
                     axis, str(first_input_shape)
                 ),
             )
diff --git a/coremltools/converters/sklearn/_dict_vectorizer.py b/coremltools/converters/sklearn/_dict_vectorizer.py
index 9358723b8..6e6cd9138 100644
--- a/coremltools/converters/sklearn/_dict_vectorizer.py
+++ b/coremltools/converters/sklearn/_dict_vectorizer.py
@@ -69,14 +69,14 @@ def convert(model, input_features, output_features):
     is_str = None
     for feature_name in model.feature_names_:
         if isinstance(feature_name, str):
-            if is_str == False:
+            if is_str is False:
                 raise ValueError("Mapping of DictVectorizer mixes int and str types.")
 
             tr_spec.stringToIndex.vector.append(feature_name)
             is_str == True
 
         if isinstance(feature_name, int):
-            if is_str == True:
+            if is_str is True:
                 raise ValueError("Mapping of DictVectorizer mixes int and str types.")
 
             tr_spec.int64ToIndex.vector.append(feature_name)
diff --git a/coremltools/models/_interface_management.py b/coremltools/models/_interface_management.py
index 897488ff6..51244794d 100644
--- a/coremltools/models/_interface_management.py
+++ b/coremltools/models/_interface_management.py
@@ -2,12 +2,8 @@
 #
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-
-import numpy as _np
-
 from . import datatypes
-from ._feature_management import process_or_validate_features
-from ._feature_management import is_valid_feature_list
+from ._feature_management import process_or_validate_features, is_valid_feature_list
 from . import _feature_management as _fm
 from ..proto import Model_pb2
 
diff --git a/coremltools/models/model.py b/coremltools/models/model.py
index a00d90ff1..498279d1c 100644
--- a/coremltools/models/model.py
+++ b/coremltools/models/model.py
@@ -2,21 +2,39 @@
 #
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-import json as _json
+
+from copy import deepcopy as _deepcopy
 import os as _os
 import shutil as _shutil
 import tempfile as _tempfile
 import warnings as _warnings
-from copy import deepcopy as _deepcopy
 
+
+from ..proto import (
+    Model_pb2 as _Model_pb2,
+    MIL_pb2 as _MIL_pb2
+)
+from .utils import (
+    _create_mlpackage,
+    _has_custom_layer,
+    _is_macos,
+    _macos_version,
+    _MLMODEL_EXTENSION,
+    _MLPACKAGE_AUTHOR_NAME,
+    _MLPACKAGE_EXTENSION,
+    _WEIGHTS_DIR_NAME,
+    load_spec as _load_spec,
+    save_spec as _save_spec
+)
 from coremltools import ComputeUnit as _ComputeUnit
-from .utils import _has_custom_layer as _has_custom_layer
-from .utils import load_spec as _load_spec
-from .utils import _macos_version, _is_macos
-from .utils import save_spec as _save_spec
-from ..proto import Model_pb2 as _Model_pb2
+from coremltools.converters.mil.mil.program import Program as _Program
+
+
+try:
+    from ..libmodelpackage import ModelPackage as _ModelPackage
+except:
+    _ModelPackage = None
 
-from .utils import _MLMODEL_EXTENSION, _MLPACKAGE_EXTENSION
 
 _MLMODEL_FULL_PRECISION = "float32"
 _MLMODEL_HALF_PRECISION = "float16"
@@ -59,9 +77,7 @@
 _METADATA_VERSION = "com.github.apple.coremltools.version"
 _METADATA_SOURCE = "com.github.apple.coremltools.source"
 
-_MODEL_FILE_NAME = 'model.mlmodel'
-_WEIGHTS_FILE_NAME = 'weight.bin'
-_WEIGHTS_DIR_NAME = 'weights'
+
 
 class _FeatureDescription(object):
     def __init__(self, fd_spec):
@@ -128,6 +144,23 @@ def _get_proxy_and_spec(filename, compute_units, skip_model_load=False):
 
     return (None, specification, None)
 
+def _try_get_weights_dir_path(mlpackage_path):
+    """
+    Try to find the weights in mlpackage and return the path to the weights directory if found.
+    Return None if not found.
+    :param mlpackage_path: str, path to the mlpackage directory
+    :return: path to the weights directory inside the mlpackage directory
+    """
+    weights_dir = None
+    try:
+        if _ModelPackage.isValid(mlpackage_path):
+            item_info = _ModelPackage(mlpackage_path).findItemByNameAuthor(_WEIGHTS_DIR_NAME, _MLPACKAGE_AUTHOR_NAME)
+            if item_info is not None:
+                weights_dir = item_info.path()
+    except:
+        pass
+    return weights_dir
+
 
 class MLModel(object):
     """
@@ -170,10 +203,17 @@ class MLModel(object):
         >>> predictions = model.predict({'bedroom': 1.0, 'bath': 1.0, 'size': 1240})
 
         # Get the spec of the model
-        >>> model.spec
+        >>> spec = model.get_spec()
 
         # Save the model
-        >>> model.save('HousePricer.mlmodel')
+        >>> model.save('HousePricer.mlpackage')
+
+        # Load the model from the spec object
+        >>> spec = model.get_spec()
+        >>> # modify spec (e.g. rename inputs/ouputs etc)
+        >>> model = MLModel(spec)
+        >>> # if model type is mlprogram, i.e. spec.WhichOneof('Type') == "mlProgram", then:
+        >>> model = MLModel(spec, weights_dir=model.weights_dir)
 
     See Also
     --------
@@ -185,7 +225,8 @@ def __init__(self, model,
                  is_temp_package=False,
                  mil_program=None,
                  skip_model_load=False,
-                 compute_units=_ComputeUnit.ALL):
+                 compute_units=_ComputeUnit.ALL,
+                 weights_dir=None):
         """
         Construct an MLModel from an ``.mlmodel``.
 
@@ -193,10 +234,16 @@ def __init__(self, model,
         ----------
         model: str or Model_pb2
 
-            For MIL, the model must be a path string to the directory containing bundle
+            For MLProgram, the model can be a path string (``.mlpackage``) or ``Model_pb2``.
+            If its a path string, it must point to a directory containing bundle
             artifacts (such as ``weights.bin``).
+            If it is of type ``Model_pb2`` (spec), then ``weights_dir`` must also be provided, if the model
+            has weights, since to initialize and load the model, both the proto spec and the weights are
+            required. Proto spec for an MLProgram, unlike the NeuralNetwork, does not contain the weights,
+            they are stored separately. If the model does not have weights, an empty weights_dir can be provided.
 
-            For NeuralNetwork, the model can be a path string (``.mlmodel``) or ``Model_pb2``.
+            For non mlprogram model types, the model can be a path string (``.mlmodel``) or type ``Model_pb2``,
+            i.e. a spec object.
 
         useCPUOnly: bool
             This parameter is deprecated and will be removed in 6.0. Use the ``compute_units``
@@ -234,6 +281,10 @@ def __init__(self, model,
                 - ``coremltools.ComputeUnit.CPU_AND_GPU``: Use both the CPU and GPU,
                   but not the neural engine.
 
+        weights_dir: str
+            Path to the weight directory, required when loading an MLModel of type mlprogram,
+            from a spec object, i.e. when the argument ``model`` is of type ``Model_pb2``
+
         Notes
         -----
         Internally this maintains the following:
@@ -242,12 +293,15 @@ def __init__(self, model,
           CoreML::Python::Model (see
           `coremltools/coremlpython/CoreMLPython.mm <https://github.com/apple/coremltools/blob/main/coremlpython/CoreMLPython.mm>`_)
 
-        - ``bundle_path`` (MIL only): Directory containing all artifacts (``.mlmodel``,
+        - ``package_path`` (mlprogram only): Directory containing all artifacts (``.mlmodel``,
           weights, and so on).
 
+        - ``weights_dir`` (mlprogram only): Directory containing weights inside the package_path.
+
         Examples
         --------
-        >>> loaded_model = MLModel('my_model_file.mlmodel')
+        >>> loaded_model = MLModel('my_model.mlmodel')
+        >>> loaded_model = MLModel("my_model.mlpackage")
         """
         if useCPUOnly:
             _warnings.warn('The "useCPUOnly" parameter is deprecated and will be removed in 6.0. '
@@ -255,26 +309,40 @@ def __init__(self, model,
             compute_units = _ComputeUnit.CPU_ONLY
         if not isinstance(compute_units, _ComputeUnit):
             raise TypeError('"compute_units" parameter must be of type: coremltools.ComputeUnit')
+        self.compute_unit = compute_units
 
         self.is_package = False
         self.package_path = None
-        self.compute_unit = compute_units
+        self._weights_dir = None
         if mil_program is not None:
-            from coremltools.converters.mil.mil.program import Program
-            if not isinstance(mil_program, Program):
+            if not isinstance(mil_program, _Program):
                 raise ValueError("mil_program must be of type 'coremltools.converters.mil.Program'")
         self._mil_program = mil_program
+
         if isinstance(model, str):
             if _os.path.isdir(model):
                 self.is_package = True
                 self.package_path = model
                 self.is_temp_package = is_temp_package
+                self._weights_dir = _try_get_weights_dir_path(model)
             self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec(
                 model, compute_units, skip_model_load=skip_model_load,
             )
         elif isinstance(model, _Model_pb2.Model):
-            filename = _tempfile.mktemp(suffix=_MLMODEL_EXTENSION)
-            _save_spec(model, filename)
+            if model.WhichOneof('Type') == "mlProgram":
+                if weights_dir is None:
+                    raise Exception('MLModel of type mlProgram cannot be loaded just from the model spec object. '
+                                    'It also needs the path to the weights file. Please provide that as well, '
+                                    'using the \'weights_dir\' argument.')
+                self.is_package = True
+                self.is_temp_package = True
+                filename = _create_mlpackage(model, weights_dir, copy_weights=True)
+                self.package_path = filename
+                self._weights_dir = _try_get_weights_dir_path(filename)
+            else:
+                filename = _tempfile.mktemp(suffix=_MLMODEL_EXTENSION)
+                _save_spec(model, filename)
+
             self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec(
                 filename, compute_units, skip_model_load=skip_model_load,
             )
@@ -290,6 +358,7 @@ def __init__(self, model,
         self._input_description = _FeatureDescription(self._spec.description.input)
         self._output_description = _FeatureDescription(self._spec.description.output)
 
+
     def __del__(self):
         # Cleanup temporary package upon destruction
         if hasattr(self, 'is_package') and self.is_package \
@@ -336,6 +405,10 @@ def license(self, license):
     def version(self):
         return self._spec.description.metadata.versionString
 
+    @property
+    def weights_dir(self):
+        return self._weights_dir
+
     @version.setter
     def version(self, version_string):
         self._spec.description.metadata.versionString = version_string
@@ -390,6 +463,7 @@ def get_spec(self):
         """
         return _deepcopy(self._spec)
 
+
     def predict(self, data, useCPUOnly=False):
         """
         Return predictions for the model.
@@ -445,16 +519,14 @@ def predict(self, data, useCPUOnly=False):
             try:
                 from ..libcoremlpython import _MLModelProxy
             except Exception as e:
-                print("exception loading model proxy: %s\n" % e)
+                print("Exception loading model proxy: %s\n" % e)
                 _MLModelProxy = None
             except:
-                print("exception while loading model proxy.\n")
+                print("Exception while loading model proxy.\n")
                 _MLModelProxy = None
 
             if not _MLModelProxy:
-                raise Exception(
-                    "Unable to load CoreML.framework. Cannot make predictions."
-                )
+                raise Exception("Unable to load CoreML.framework. Cannot make predictions.")
             elif (
                 _MLModelProxy.maximum_supported_specification_version()
                 < self._spec.specificationVersion
@@ -475,9 +547,35 @@ def predict(self, data, useCPUOnly=False):
                 if self._framework_error:
                     raise self._framework_error
                 else:
-                    raise Exception(
-                        "Unable to load CoreML.framework. Cannot make predictions."
-                    )
+                    raise Exception("Unable to load CoreML.framework. Cannot make predictions.")
+
+
+    def _set_build_info_mil_attributes(self, metadata):
+        if self._spec.WhichOneof('Type') != "mlProgram":
+            # No MIL attributes to set
+            return
+
+        ml_program_attributes = self._spec.mlProgram.attributes
+        build_info_proto = ml_program_attributes["buildInfo"]
+
+        # Set ValueType to dictionary of string to string
+        str_type = _MIL_pb2.ValueType()
+        str_type.tensorType.dataType = _MIL_pb2.DataType.STRING
+        dict_type_str_to_str = _MIL_pb2.ValueType()
+        dict_type_str_to_str.dictionaryType.keyType.CopyFrom(str_type)
+        dict_type_str_to_str.dictionaryType.valueType.CopyFrom(str_type)
+        build_info_proto.type.CopyFrom(dict_type_str_to_str)
+
+        # Copy the metadata
+        build_info_dict = build_info_proto.immediateValue.dictionary
+        for k, v in metadata.items():
+            key_pair = _MIL_pb2.DictionaryValue.KeyValuePair()
+            key_pair.key.immediateValue.tensor.strings.values.append(k)
+            key_pair.key.type.CopyFrom(str_type)
+            key_pair.value.immediateValue.tensor.strings.values.append(v)
+            key_pair.value.type.CopyFrom(str_type)
+            build_info_dict.values.append(key_pair)
+
 
     def _get_mil_internal(self):
         """
@@ -495,11 +593,12 @@ def _get_mil_internal(self):
         """
         return _deepcopy(self._mil_program)
 
+
     def _verify_input_name_exists(self, input_dict):
         model_input_names = [inp.name for inp in self._spec.description.input]
         model_input_names_set = set(model_input_names)
         for given_input in input_dict.keys():
             if given_input not in model_input_names_set:
                 err_msg = "Provided key \"{}\", in the input dict, " \
-                          "does not match to any of the model input name(s), which are: {}"
+                          "does not match any of the model input name(s), which are: {}"
                 raise KeyError(err_msg.format(given_input, ",".join(model_input_names)))
diff --git a/coremltools/models/neural_network/builder.py b/coremltools/models/neural_network/builder.py
index 8347fdb9c..ec9719033 100644
--- a/coremltools/models/neural_network/builder.py
+++ b/coremltools/models/neural_network/builder.py
@@ -2396,7 +2396,7 @@ def add_convolution(
 
 				quant_lut: numpy.array(dtype=numpy.float32)
 					the LUT (look up table) to be used with LUT quantization. 
-					Must be of length 2^n bits.
+                                        Must be of length 2^n bits.
 
         Depthwise convolution
         	Depthwise convolution is a special case of convolution, in which:
@@ -2482,7 +2482,7 @@ def add_convolution(
             return
 
         # Weight assignments
-        quantization = len(kwargs) > 0 and ('quantization_type' in kwargs and kwargs.get('quantization_type') != None)
+        quantization = len(kwargs) > 0 and ('quantization_type' in kwargs and kwargs.get('quantization_type') is not None)
         if quantization:
             _verify_quantization_arguments(
                 weight=W, output_channels=output_channels, **kwargs
diff --git a/coremltools/models/neural_network/quantization_utils.py b/coremltools/models/neural_network/quantization_utils.py
index 6b327b48a..85e514f8c 100644
--- a/coremltools/models/neural_network/quantization_utils.py
+++ b/coremltools/models/neural_network/quantization_utils.py
@@ -1282,9 +1282,8 @@ def _characterize_qmodel_perf_with_data_dir(fpmodel, qspec, data_dir):
 
     if not test_image_paths:
         raise Exception(
-            """Path contains no supported image files.
-        Supported file types include jpg, bmp, png and jpeg.
-        """.format(
+            "{} contains no supported image files. "
+            "Supported file types include jpg, bmp, png and jpeg.".format(
                 data_dir
             )
         )
diff --git a/coremltools/models/neural_network/spec_inspection_utils.py b/coremltools/models/neural_network/spec_inspection_utils.py
index 91700b682..52d481dde 100644
--- a/coremltools/models/neural_network/spec_inspection_utils.py
+++ b/coremltools/models/neural_network/spec_inspection_utils.py
@@ -1,3 +1,8 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from ...proto import NeuralNetwork_pb2 as _NeuralNetwork_pb2
 
 
diff --git a/coremltools/models/neural_network/utils.py b/coremltools/models/neural_network/utils.py
index c5e31bc0f..9f6784b77 100644
--- a/coremltools/models/neural_network/utils.py
+++ b/coremltools/models/neural_network/utils.py
@@ -1,6 +1,12 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import copy as _copy
+
 from .builder import NeuralNetworkBuilder
 from coremltools.models.utils import _get_model
-import copy as _copy
 
 
 def make_image_input(
diff --git a/coremltools/models/utils.py b/coremltools/models/utils.py
index 1ba75aea0..7ace1135e 100644
--- a/coremltools/models/utils.py
+++ b/coremltools/models/utils.py
@@ -6,23 +6,30 @@
 """
 Utilities for the entire package.
 """
+from functools import lru_cache as _lru_cache
 import math as _math
 import numpy as _np
 import os as _os
 import pathlib as _pathlib
+import shutil as _shutil
+import stat as _stat
+import subprocess as _subprocess
 import sys as _sys
 import tempfile as _tempfile
 import warnings as _warnings
 
+from .._deps import _HAS_SCIPY
+
 from coremltools import ComputeUnit as _ComputeUnit
 from coremltools.converters.mil.mil.passes.name_sanitization_utils import NameSanitizer as _NameSanitizer
 from coremltools.proto import Model_pb2 as _Model_pb2
-from .._deps import _HAS_SCIPY
-
 
 _MLMODEL_EXTENSION = ".mlmodel"
 _MLPACKAGE_EXTENSION = ".mlpackage"
-
+_MODEL_FILE_NAME = 'model.mlmodel'
+_WEIGHTS_FILE_NAME = 'weight.bin'
+_WEIGHTS_DIR_NAME = 'weights'
+_MLPACKAGE_AUTHOR_NAME = "com.apple.CoreML"
 
 try:
     from ..libmodelpackage import ModelPackage as _ModelPackage
@@ -48,7 +55,58 @@ def _remove_invalid_keys(input_dict, model):
         if k not in model_input_names:
             del input_dict[k]
 
-def save_spec(spec, filename, auto_set_specification_version=False):
+def _create_mlpackage(proto_spec, weights_dir=None, package_path=None, copy_weights=False):
+    """
+    Parameters
+    ---------
+    proto_spec: Model_pb2
+    weights_dir: str
+        copy or move weights from this path to the mlpackage
+    package_path: str
+        If provided place the created mlpackage at this path
+    copy_weights: bool
+        If False, delete the weights directory provided in ``weights_dir``
+
+    :return: str
+        path to the mlpackage
+    """
+    # Save proto to disk
+    proto_spec_str = proto_spec.SerializeToString()
+    spec_file = _tempfile.NamedTemporaryFile(suffix=_MLMODEL_EXTENSION)
+    spec_file.write(proto_spec_str)
+    spec_file.flush()
+
+    # To make sure everyone can read this file
+    _os.chmod(spec_file.name, _stat.S_IRUSR | _stat.S_IWUSR | _stat.S_IRGRP | _stat.S_IROTH)
+
+    # If package directory is already provided, use that
+    if package_path is None:
+        package_path = _tempfile.mkdtemp(suffix=_MLPACKAGE_EXTENSION)
+    else:
+        name, ext = _os.path.splitext(package_path)
+        if ext != _MLPACKAGE_EXTENSION:
+            raise Exception("For an ML Package, extension must be {} (not {})".format(_MLPACKAGE_EXTENSION, ext))
+
+    if _os.path.exists(package_path):
+        _shutil.rmtree(package_path)
+
+    package = _ModelPackage(package_path)
+
+    # Root model file is copied into the model package.
+    package.setRootModel(spec_file.name, _MODEL_FILE_NAME, _MLPACKAGE_AUTHOR_NAME,
+                         "CoreML Model Specification")
+    spec_file.close()  # clean up spec file now that it is part of the model package
+
+    # Weights bundle is copied into the model package. Changes to in-memory JSON is commited to disk when package goes out of scope.
+    if weights_dir is not None:
+        package.addItem(weights_dir, _WEIGHTS_DIR_NAME, _MLPACKAGE_AUTHOR_NAME, "CoreML Model Weights")
+        if not copy_weights:
+            _shutil.rmtree(weights_dir)  # clean up weights now that it is part of the model package
+
+    return package_path
+
+
+def save_spec(spec, filename, auto_set_specification_version=False, weights_dir=None):
     """
     Save a protobuf model specification to file.
 
@@ -63,11 +121,18 @@ def save_spec(spec, filename, auto_set_specification_version=False):
     auto_set_specification_version: bool
         If true, will always try to set specification version automatically.
 
+    weights_dir: str
+        Path to the directory containing the weigths.bin file. This is required
+        when the spec if of model type mlprogram. If the mlprogram does not contain
+        any weights, this path can be an empty directory.
+
     Examples
     --------
     .. sourcecode:: python
 
         >>> coremltools.utils.save_spec(spec, 'HousePricer.mlmodel')
+        >>> coremltools.utils.save_spec(spec, 'HousePricer.mlpackage')
+        >>> coremltools.utils.save_spec(spec, 'mlprogram_model.mlpackage', weights_dir="/path/to/weights/directory")
 
     See Also
     --------
@@ -86,7 +151,6 @@ def save_spec(spec, filename, auto_set_specification_version=False):
     else:
         raise Exception("Extension must be {} or {} (not {})".format(_MLMODEL_EXTENSION, _MLPACKAGE_EXTENSION, ext))
 
-    spec = spec.SerializeToString()
     if auto_set_specification_version:
         try:
             # always try to downgrade the specification version to the
@@ -101,25 +165,20 @@ def save_spec(spec, filename, auto_set_specification_version=False):
                 RuntimeWarning,
             )
 
-    specfile = filename
-    if is_package:
-        tempfile = _tempfile.NamedTemporaryFile(suffix=_MLMODEL_EXTENSION)
-        specfile = tempfile.name
-
-    with open(specfile, "wb") as f:
-        f.write(spec)
-
     if is_package:
         if _ModelPackage is None:
             raise Exception(
                 "Unable to load libmodelpackage. Cannot save spec"
             )
-
-        package = _ModelPackage(filename)
-        model_name = _pathlib.Path(filename).with_suffix('.mlmodel').name
-        
-        # Root file is copied into the model package. Changes to in-memory JSON is commited to disk when package goes out of scope.
-        package.replaceRootModel(specfile, model_name, "com.apple.CoreML", "CoreML Model Specification");
+        if spec.WhichOneof('Type') == "mlProgram" and weights_dir is None:
+                raise Exception('spec of type mlProgram cannot be saved without the'
+                                ' weights file. Please provide the path to the weights file as well, '
+                                'using the \'weights_dir\' argument.')
+        _create_mlpackage(spec, weights_dir=weights_dir, package_path=filename, copy_weights=True)
+    else:
+        spec_str = spec.SerializeToString()
+        with open(filename, "wb") as f:
+            f.write(spec_str)
 
 def load_spec(filename):
     """
@@ -141,6 +200,7 @@ def load_spec(filename):
     .. sourcecode:: python
 
         >>> spec = coremltools.utils.load_spec('HousePricer.mlmodel')
+        >>> spec = coremltools.utils.load_spec('HousePricer.mlpackage')
 
     See Also
     --------
@@ -509,7 +569,22 @@ def rename_feature(
     .. sourcecode:: python
 
         # In-place rename of spec
+        >>> model = MLModel("model.mlmodel")
+        >>> spec = model.get_spec()
+        >>> coremltools.utils.rename_feature(spec, 'old_feature', 'new_feature_name')
+        >>> # re-initialize model
+        >>> model = MLModel(spec)
+        >>> model.save("model.mlmodel")
+
+        # Rename a spec when the model is an mlprogram, in that case, weights are stored outside of the spec
+        >>> model = coremltools.convert(torch_model, convert_to="mlprogram")
+        >>> spec = model.get_spec()
+        >>> # print info about inputs and outputs
+        >>> print(spec.description)
         >>> coremltools.utils.rename_feature(spec, 'old_feature', 'new_feature_name')
+        >>> # re-initialize model
+        >>> model = MLModel(spec, weights_dir=model.weights_dir)
+        >>> model.save("model.mlpackage")
     """
     from coremltools.models import MLModel
 
@@ -835,6 +910,7 @@ def _is_macos():
     return _sys.platform == "darwin"
 
 
+@_lru_cache()
 def _macos_version():
     """
     Returns macOS version as a tuple of integers, making it easy to do proper
@@ -842,8 +918,7 @@ def _macos_version():
     """
     if _is_macos():
         try:
-            import subprocess
-            ver_str = subprocess.run(["sw_vers", "-productVersion"], stdout=subprocess.PIPE).stdout.decode('utf-8').strip('\n')
+            ver_str = _subprocess.run(["sw_vers", "-productVersion"], stdout=_subprocess.PIPE).stdout.decode('utf-8').strip('\n')
             return tuple([int(v) for v in ver_str.split(".")])
         except:
             raise Exception("Unable to detemine the macOS version")
diff --git a/coremltools/test/api/test_api_examples.py b/coremltools/test/api/test_api_examples.py
index f31387bea..d37a26413 100644
--- a/coremltools/test/api/test_api_examples.py
+++ b/coremltools/test/api/test_api_examples.py
@@ -723,13 +723,13 @@ def test_unsanitized_input_name_during_prediction():
 
         mlmodel = ct.convert(prog)
 
-        expected_err_str = "Provided key \"x/0\", in the input dict, " \
-                           "does not match to any of the model input name\(s\), which are: .*"
-        with pytest.raises(KeyError, match=expected_err_str):
-            prediction = mlmodel.predict(
+        with pytest.raises(KeyError) as error_info:
+            mlmodel.predict(
                 {"x/0": np.random.rand(2, 3).astype(np.float32),
                  "y": np.random.rand(2, 3).astype(np.float32)}
             )
+        error_str = str(error_info.value)
+        assert "does not match any of the model input" in error_str
 
 class TestFlexibleShape:
     @staticmethod
diff --git a/coremltools/test/api/test_api_visibilities.py b/coremltools/test/api/test_api_visibilities.py
index 17181689e..d8376f009 100644
--- a/coremltools/test/api/test_api_visibilities.py
+++ b/coremltools/test/api/test_api_visibilities.py
@@ -87,6 +87,7 @@ def test_models_mlmodel(self):
             "short_description",
             "user_defined_metadata",
             "version",
+            "weights_dir",
         ]
         _check_visible_modules(_get_visible_items(ct.models.MLModel), expected)
 
diff --git a/coremltools/test/blob/test_weights.py b/coremltools/test/blob/test_weights.py
index c6f0c218f..96acb0b22 100644
--- a/coremltools/test/blob/test_weights.py
+++ b/coremltools/test/blob/test_weights.py
@@ -1,11 +1,20 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 import unittest
 import tempfile
 import os
 import shutil
+
 import numpy as np
 
-from coremltools.libmilstoragepython import _BlobStorageWriter as BlobWriter
-from coremltools.libmilstoragepython import _BlobStorageReader as BlobReader
+from coremltools.libmilstoragepython import (
+    _BlobStorageReader as BlobReader,
+    _BlobStorageWriter as BlobWriter
+)
+
 
 class WeightTest(unittest.TestCase):
     def setUp(self):
diff --git a/coremltools/test/modelpackage/test_modelpackage.py b/coremltools/test/modelpackage/test_modelpackage.py
index 6d1dbf5fc..83e361cbe 100644
--- a/coremltools/test/modelpackage/test_modelpackage.py
+++ b/coremltools/test/modelpackage/test_modelpackage.py
@@ -3,39 +3,30 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-import coremltools
-from coremltools.libmodelpackage import ModelPackage
-from coremltools.proto import Model_pb2
-from coremltools import utils
-
 import numpy as np
 import os
 import pytest
 import shutil
 import tempfile
-import unittest
-
-from coremltools.models.utils import (
-    rename_feature,
-    save_spec,
-    _macos_version,
-    _convert_neural_network_spec_weights_to_fp16,
-    convert_double_to_float_multiarray_type,
-)
-from coremltools.models import MLModel, datatypes
-from coremltools.models.neural_network import NeuralNetworkBuilder
-
-
-class MLModelTest(unittest.TestCase):
-    @staticmethod
-    def _remove_path(path):
-        if os.path.isdir(path):
-            shutil.rmtree(path)
-        else:
-            os.remove(path)
 
-    @classmethod
-    def setUpClass(self):
+import coremltools
+from coremltools import utils
+from coremltools.converters.mil import Builder as mb
+from coremltools.libmodelpackage import ModelPackage
+from coremltools.models import MLModel
+from coremltools.models.utils import _MLPACKAGE_AUTHOR_NAME, _WEIGHTS_DIR_NAME, _WEIGHTS_FILE_NAME
+from coremltools.proto import Model_pb2
+
+
+def _remove_path(path):
+    if os.path.isdir(path):
+        shutil.rmtree(path)
+    else:
+        os.remove(path)
+
+class TestMLModel:
+
+    def setup_class(self):
 
         spec = Model_pb2.Model()
         spec.specificationVersion = coremltools.SPECIFICATION_VERSION
@@ -63,45 +54,43 @@ def setUpClass(self):
 
     def test_model_creation(self):
         model = MLModel(self.spec)
-        self.assertIsNotNone(model)
+        assert model is not None
 
         package = tempfile.TemporaryDirectory(suffix=".mlpackage")
         package.cleanup()
 
-        save_spec(self.spec, package.name)
+        utils.save_spec(self.spec, package.name)
         model = MLModel(package.name)
-        self.assertIsNotNone(model)
+        assert model is not None
 
         # cleanup
-        MLModelTest._remove_path(package.name)
+        _remove_path(package.name)
 
     def test_model_api(self):
         model = MLModel(self.spec)
-        self.assertIsNotNone(model)
+        assert model is not None
 
         model.author = "Test author"
-        self.assertEqual(model.author, "Test author")
-        self.assertEqual(model.get_spec().description.metadata.author, "Test author")
+        assert model.author == "Test author"
+        assert model.get_spec().description.metadata.author == "Test author"
 
         model.license = "Test license"
-        self.assertEqual(model.license, "Test license")
-        self.assertEqual(model.get_spec().description.metadata.license, "Test license")
+        assert model.license == "Test license"
+        assert model.get_spec().description.metadata.license == "Test license"
 
         model.short_description = "Test model"
-        self.assertEqual(model.short_description, "Test model")
-        self.assertEqual(
-            model.get_spec().description.metadata.shortDescription, "Test model"
-        )
+        assert model.short_description == "Test model"
+        assert model.get_spec().description.metadata.shortDescription == "Test model"
 
         model.version = "1.3"
-        self.assertEqual(model.version, "1.3")
-        self.assertEqual(model.get_spec().description.metadata.versionString, "1.3")
+        assert model.version == "1.3"
+        assert model.get_spec().description.metadata.versionString == "1.3"
 
         model.input_description["feature_1"] = "This is feature 1"
-        self.assertEqual(model.input_description["feature_1"], "This is feature 1")
+        assert model.input_description["feature_1"] == "This is feature 1"
 
         model.output_description["output"] = "This is output"
-        self.assertEqual(model.output_description["output"], "This is output")
+        assert model.output_description["output"] == "This is output"
 
         package = tempfile.TemporaryDirectory(suffix=".mlpackage")
         package.cleanup()
@@ -109,14 +98,14 @@ def test_model_api(self):
         model.save(package.name)
         loaded_model = MLModel(package.name)
 
-        self.assertEqual(model.author, "Test author")
-        self.assertEqual(model.license, "Test license")
-        self.assertEqual(model.short_description, "Test model")
-        self.assertEqual(model.input_description["feature_1"], "This is feature 1")
-        self.assertEqual(model.output_description["output"], "This is output")
+        assert model.author == "Test author"
+        assert model.license == "Test license"
+        assert model.short_description == "Test model"
+        assert model.input_description["feature_1"] == "This is feature 1"
+        assert model.output_description["output"] == "This is output"
 
         # cleanup
-        MLModelTest._remove_path(package.name)
+        _remove_path(package.name)
 
     def test_predict_api(self):
         model = MLModel(self.spec)
@@ -131,18 +120,18 @@ def test_predict_api(self):
                 loaded_model = MLModel(package.name, compute_units=compute_units)
 
                 preds = loaded_model.predict({"feature_1": 1.0, "feature_2": 1.0})
-                self.assertIsNotNone(preds)
-                self.assertEqual(preds["output"], 3.1)
-                self.assertEqual(loaded_model.compute_unit, compute_units)
+                assert preds is not None
+                assert preds["output"] == 3.1
+                assert loaded_model.compute_unit == compute_units
         else:
             # just check if we can load it
             loaded_model = MLModel(package.name)
 
         # cleanup
-        MLModelTest._remove_path(package.name)
+        _remove_path(package.name)
 
     def test_rename_input(self):
-        rename_feature(self.spec, "feature_1", "renamed_feature", rename_inputs=True)
+        utils.rename_feature(self.spec, "feature_1", "renamed_feature", rename_inputs=True)
         model = MLModel(self.spec)
 
         package = tempfile.TemporaryDirectory(suffix=".mlpackage")
@@ -153,17 +142,17 @@ def test_rename_input(self):
 
         if utils._macos_version() >= (12, 0):
             preds = loaded_model.predict({"renamed_feature": 1.0, "feature_2": 1.0})
-            self.assertIsNotNone(preds)
-            self.assertEqual(preds["output"], 3.1)
+            assert preds is not None
+            assert preds["output"] == 3.1
 
         # reset the spec for next run
-        rename_feature(self.spec, "renamed_feature", "feature_1", rename_inputs=True)
+        utils.rename_feature(self.spec, "renamed_feature", "feature_1", rename_inputs=True)
 
         # cleanup
-        MLModelTest._remove_path(package.name)
+        _remove_path(package.name)
 
     def test_rename_input_bad(self):
-        rename_feature(self.spec, "blah", "bad_name", rename_inputs=True)
+        utils.rename_feature(self.spec, "blah", "bad_name", rename_inputs=True)
         model = MLModel(self.spec)
 
         package = tempfile.TemporaryDirectory(suffix=".mlpackage")
@@ -174,11 +163,11 @@ def test_rename_input_bad(self):
 
         if utils._macos_version() >= (12, 0):
             preds = loaded_model.predict({"feature_1": 1.0, "feature_2": 1.0})
-            self.assertIsNotNone(preds)
-            self.assertEqual(preds["output"], 3.1)
+            assert preds is not None
+            assert preds["output"] == 3.1
 
         # cleanup
-        MLModelTest._remove_path(package.name)
+        _remove_path(package.name)
 
     def test_save(self):
         model = MLModel(self.spec)
@@ -194,10 +183,10 @@ def test_save(self):
 
             if utils._macos_version() >= (12, 0):
                 preds = loaded_model.predict({"feature_1": 1.0, "feature_2": 1.0})
-                self.assertIsNotNone(preds)
-                self.assertEqual(preds["output"], 3.1)
+                assert preds is not None
+                assert preds["output"] == 3.1
 
-            MLModelTest._remove_path(package.name)
+            _remove_path(package.name)
 
     def test_save_in_place(self):
         model = MLModel(self.spec)
@@ -214,10 +203,10 @@ def test_save_in_place(self):
 
             if utils._macos_version() >= (12, 0):
                 preds = loaded_model.predict({"feature_1": 1.0, "feature_2": 1.0})
-                self.assertIsNotNone(preds)
-                self.assertEqual(preds["output"], 3.1)
+                assert preds is not None
+                assert preds["output"] == 3.1
 
-        MLModelTest._remove_path(package.name)
+        _remove_path(package.name)
 
     def test_mil_as_package(self):
         import torch
@@ -336,5 +325,183 @@ def forward(self, x):
 
         shutil.rmtree(package_path)
 
-if __name__ == "__main__":
-    unittest.main()
+class TestSpecAndMLModelAPIs:
+
+    def setup_class(self):
+        # define an mlprogram, which has weights
+        @mb.program(input_specs=[mb.TensorSpec(shape=(4, 5000))])
+        def linear_prog(input):
+            W = mb.const(val=np.random.rand(100, 5000), name="const_W")
+            out = mb.linear(x=input, weight=W, name="output")
+            return out
+
+        # define another mlprogram, which does not have weights
+        @mb.program(input_specs=[mb.TensorSpec(shape=(4, 5, 2))])
+        def relu_prog(input):
+            out = mb.relu(x=input, name="output")
+            return out
+
+        # convert and save model on disk
+        self.mlmodel = coremltools.convert(linear_prog, convert_to="mlprogram")
+        self.mlpackage_path = tempfile.mkdtemp(suffix=utils._MLPACKAGE_EXTENSION)
+        self.mlmodel.save(self.mlpackage_path)
+        self.mlmodel_no_weights = coremltools.convert(relu_prog, convert_to="mlprogram")
+
+    def teardown_class(self):
+        _remove_path(self.mlpackage_path)
+        self.mlmodel = None
+        self.mlmodel_no_weights = None
+
+    def _test_mlmodel_correctness(self, mlmodel):
+        """
+        :param mlmodel: coremltools.models.MLModel
+        Test the following:
+        - calling .predict on mlmodel works correctly
+        - calling .save on mlmodel works correctly
+        """
+        # construct input dictionary
+        spec = mlmodel.get_spec()
+        inputs = spec.description.input
+        input_dict = {}
+        for input in inputs:
+            input_dict[input.name] = np.random.rand(*tuple(input.type.multiArrayType.shape))
+        # check prediction
+        preds = mlmodel.predict(input_dict)
+        assert preds is not None
+        # save, load and predict again to check that the saving and loading worked correctly
+        with tempfile.TemporaryDirectory(suffix=utils._MLPACKAGE_EXTENSION) as temp_path:
+            mlmodel.save(temp_path)
+            mlmodel_reloaded = MLModel(temp_path)
+            preds = mlmodel_reloaded.predict(input_dict)
+            assert preds is not None
+
+    @pytest.mark.skipif(utils._macos_version() < (12, 0), reason="prediction on mlprogram model "
+                                                                    "available only on macOS12+")
+    def test_mlmodel_to_spec_to_mlmodel(self):
+        """
+        convert mlmodel to spec, and then back to mlmodel and verify that it works
+        """
+        spec = self.mlmodel.get_spec()
+        # reload the model from the spec and verify it
+        weights_dir = self.mlmodel.weights_dir
+        mlmodel_from_spec = MLModel(spec, weights_dir=weights_dir)
+        self._test_mlmodel_correctness(mlmodel_from_spec)
+        # check that the original model still works
+        self._test_mlmodel_correctness(self.mlmodel)
+        # check that an error is raised when MLModel is initialized without the weights
+        with pytest.raises(Exception, match="MLModel of type mlProgram cannot be loaded just from the model "
+                                             "spec object. It also needs the path to the weights file. "
+                                             "Please provide that as well, using the 'weights_dir' argument."):
+            MLModel(spec)
+
+    @pytest.mark.skipif(utils._macos_version() < (12, 0), reason="prediction on mlprogram model "
+                                                                    "available only on macOS12+")
+    def test_path_to_mlmodel_to_spec_to_mlmodel(self):
+        """
+        load an mlmodel from disk, convert it to spec, and then convert the spec back to mlmodel
+        """
+        mlmodel_from_disk = MLModel(self.mlpackage_path)
+        spec = mlmodel_from_disk.get_spec()
+        mlmodel_from_spec = MLModel(spec, weights_dir=mlmodel_from_disk.weights_dir)
+        self._test_mlmodel_correctness(mlmodel_from_spec)
+
+    @pytest.mark.skipif(utils._macos_version() < (12, 0), reason="prediction on mlprogram model "
+                                                                    "available only on macOS12+")
+    def test_path_to_spec_to_mlmodel(self):
+        """
+        load a spec from disk, then convert it to mlmodel, and check that it works
+        """
+        spec = utils.load_spec(self.mlpackage_path)
+        weights_dir = self.mlpackage_path + "/Data/" + _MLPACKAGE_AUTHOR_NAME + "/weights"
+        mlmodel = MLModel(spec, weights_dir=weights_dir)
+        self._test_mlmodel_correctness(mlmodel)
+
+    @pytest.mark.skipif(utils._macos_version() < (12, 0), reason="prediction on mlprogram model "
+                                                                    "available only on macOS12+")
+    def test_save_spec_api(self):
+        """
+        save an mlpackage using the save_spec API. Reload the model from disk and verify it works
+        """
+        # get spec and use it to save .mlpackage
+        spec = self.mlmodel.get_spec()
+        with tempfile.TemporaryDirectory(suffix=utils._MLPACKAGE_EXTENSION) as model_path:
+            # this should raise error:
+            with pytest.raises(Exception, match="spec of type mlProgram cannot be saved without"
+                                                " the weights file. Please provide the path to "
+                                                "the weights file as well, using the 'weights_dir' argument."):
+                utils.save_spec(spec, model_path)
+
+            # provide weights dir path to save the spec correctly
+            utils.save_spec(spec, model_path, weights_dir=self.mlmodel.weights_dir)
+            # check the correctness of .mlpackage
+            model = MLModel(model_path)
+            self._test_mlmodel_correctness(model)
+
+    @pytest.mark.skipif(utils._macos_version() < (12, 0), reason="prediction on mlprogram model "
+                                                                    "available only on macOS12+")
+    def test_save_spec_api_model_with_no_weights(self):
+        """
+        save an mlprogram model with no weights, using the save SPI and an empty weights directory.
+        Reload the model from disk and verify it works
+        """
+        spec = self.mlmodel_no_weights.get_spec()
+        with tempfile.TemporaryDirectory(suffix=utils._MLPACKAGE_EXTENSION) as model_path:
+            with tempfile.TemporaryDirectory() as empty_weight_dir:
+                utils.save_spec(spec, model_path, weights_dir=empty_weight_dir)
+                model = MLModel(model_path)
+                self._test_mlmodel_correctness(model)
+
+    @pytest.mark.skipif(utils._macos_version() < (12, 0), reason="prediction on mlprogram model "
+                                                                    "available only on macOS12+")
+    def test_mlmodel_to_spec_to_mlmodel_with_no_weights_model(self):
+        """
+        convert mlmodel to spec, and then back to mlmodel and verify that it works
+        """
+        spec = self.mlmodel_no_weights.get_spec()
+        # if no weights_dir is passed, error will be raised
+        with pytest.raises(Exception, match="MLModel of type mlProgram cannot be loaded just from the model "
+                                             "spec object. It also needs the path to the weights file. "
+                                             "Please provide that as well, using the 'weights_dir' argument."):
+            MLModel(spec)
+
+        # weights_dir will still exist, even though the model has no weights,
+        # with a weights file that only has header and no data
+        weights_dir = self.mlmodel_no_weights.weights_dir
+        assert weights_dir is not None
+        mlmodel_from_spec = MLModel(spec, weights_dir=weights_dir)
+        self._test_mlmodel_correctness(mlmodel_from_spec)
+
+        # load mlmodel from spec using an empty weights_dir
+        with tempfile.TemporaryDirectory() as empty_weight_dir:
+            mlmodel_from_spec = MLModel(spec, weights_dir=weights_dir)
+            self._test_mlmodel_correctness(mlmodel_from_spec)
+
+    def test_weights_path_correctness(self):
+        """
+        test that after reloading an mlmodel from the spec, the weights path is updated
+        """
+        spec = self.mlmodel.get_spec()
+        original_weight_dir_path = self.mlmodel.weights_dir
+        assert os.path.exists(original_weight_dir_path)
+        # load mlmodel from spec: this will create a new mlpackage in a temp location
+        # and copy over the weights
+        mlmodel_reloaded = MLModel(spec, weights_dir=original_weight_dir_path)
+        assert os.path.exists(mlmodel_reloaded.weights_dir)
+        assert mlmodel_reloaded.weights_dir != original_weight_dir_path
+        assert mlmodel_reloaded.weights_dir == mlmodel_reloaded.package_path + "/Data/" \
+                                                + _MLPACKAGE_AUTHOR_NAME + "/weights"
+
+    def test_weights_dir_discovery_method(self):
+        """
+        Test "coremltools.libmodelpackage.ModelPackage.findItemByNameAuthor" function
+        """
+        mlpackage = ModelPackage(self.mlpackage_path)
+        model_package_item_info = mlpackage.findItemByNameAuthor(_WEIGHTS_DIR_NAME, _MLPACKAGE_AUTHOR_NAME)
+        weights_dir_path = model_package_item_info.path()
+        assert weights_dir_path == self.mlpackage_path + "/Data/" + _MLPACKAGE_AUTHOR_NAME + "/weights"
+        # verify that findItemByNameAuthor returns None, when item not found
+        model_package_item_info = mlpackage.findItemByNameAuthor(_WEIGHTS_DIR_NAME, "inexistent_author_name")
+        assert model_package_item_info is None
+
+
+
diff --git a/coremltools/test/neural_network/test_custom_neural_nets.py b/coremltools/test/neural_network/test_custom_neural_nets.py
index fc8a58690..d240d0b8a 100644
--- a/coremltools/test/neural_network/test_custom_neural_nets.py
+++ b/coremltools/test/neural_network/test_custom_neural_nets.py
@@ -1,10 +1,14 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as np
 import os
-import shutil
 import tempfile
+import shutil
 import unittest
 
-import numpy as np
-
 import coremltools
 import coremltools.models.datatypes as datatypes
 from coremltools.models import neural_network as neural_network
diff --git a/coremltools/test/neural_network/test_keras2.py b/coremltools/test/neural_network/test_keras2.py
index eae6477ba..bbc99fcbe 100644
--- a/coremltools/test/neural_network/test_keras2.py
+++ b/coremltools/test/neural_network/test_keras2.py
@@ -1,11 +1,19 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 import unittest
 
-from coremltools._deps import _HAS_KERAS2_TF
-from coremltools.proto import Model_pb2
-from coremltools.proto import FeatureTypes_pb2
-from coremltools.proto import NeuralNetwork_pb2
 import pytest
 
+from coremltools._deps import _HAS_KERAS2_TF
+from coremltools.proto import (
+    FeatureTypes_pb2,
+    Model_pb2,
+    NeuralNetwork_pb2
+)
+
 if _HAS_KERAS2_TF:
     import tensorflow as tf
     from keras.models import Sequential, Model
diff --git a/coremltools/test/neural_network/test_keras2_numeric.py b/coremltools/test/neural_network/test_keras2_numeric.py
index ed88d0d4f..b5c4f9610 100644
--- a/coremltools/test/neural_network/test_keras2_numeric.py
+++ b/coremltools/test/neural_network/test_keras2_numeric.py
@@ -1,3 +1,8 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 import itertools
 import os
 import shutil
diff --git a/coremltools/test/neural_network/test_model.py b/coremltools/test/neural_network/test_model.py
index e4ccb0da6..c6ff6b821 100644
--- a/coremltools/test/neural_network/test_model.py
+++ b/coremltools/test/neural_network/test_model.py
@@ -11,7 +11,7 @@
 import unittest
 
 from coremltools._deps import _HAS_TORCH
-from coremltools.proto import Model_pb2
+from coremltools.converters.mil import Builder as mb
 from coremltools.models.utils import (
     rename_feature,
     save_spec,
@@ -23,6 +23,7 @@
 from coremltools.models import MLModel, datatypes
 from coremltools.models.neural_network import NeuralNetworkBuilder
 from coremltools.models.neural_network.utils import make_image_input, make_nn_classifier
+from coremltools.proto import Model_pb2
 
 if _HAS_TORCH:
     import torch as _torch
@@ -508,15 +509,20 @@ def test_rename_image_input(self):
         np.testing.assert_equal(out, np.array([8.0, 10.0, 12.0]).reshape(3, 1, 1))
 
     @unittest.skipUnless(
-        _is_macos() and _macos_version() >= (12, 0) and _HAS_TORCH, "Only supported on macOS 12+"
+        _is_macos() and _macos_version() >= (12, 0), "Only supported on macOS 12+"
     )
     def test_rename_feature_mlprogram(self):
-        torch_model = _torch.nn.ReLU().eval()
+        @mb.program(input_specs=[mb.TensorSpec(shape=(3,))])
+        def linear_prog(input):
+            W = np.ones((10, 3), dtype=np.float)
+            out = mb.linear(x=input, weight=W, name="output")
+            return out
+
         model = coremltools.convert(
-            _torch.jit.trace(torch_model, _torch.rand(3, )),
-            inputs=[coremltools.TensorType(shape=(3,))],
+            linear_prog,
             convert_to='mlprogram'
         )
+
         spec = model.get_spec()
         input_name = spec.description.input[0].name
         output_name = spec.description.output[0].name
@@ -524,16 +530,18 @@ def test_rename_feature_mlprogram(self):
         # rename input
         rename_feature(spec, input_name, "new_input_name")
         self.assertEqual(spec.description.input[0].name, "new_input_name")
-        model = coremltools.models.MLModel(spec)
+        model = coremltools.models.MLModel(spec, weights_dir=model.weights_dir)
         out = model.predict({"new_input_name": np.array([1.0, 2.0, 3.0])})[output_name]
-        self.assertEqual(out[0], 1.0)
+        self.assertEqual(out.shape, (10,))
+        self.assertEqual(out[0], 6.0)
 
         # rename output
         rename_feature(spec, output_name, "new_output_name")
         self.assertEqual(spec.description.output[0].name, "new_output_name")
-        model = coremltools.models.MLModel(spec)
+        model = coremltools.models.MLModel(spec, weights_dir=model.weights_dir)
         out = model.predict({"new_input_name": np.array([1.0, 2.0, 3.0])})["new_output_name"]
-        self.assertEqual(out[1], 2.0)
+        self.assertEqual(out.shape, (10,))
+        self.assertEqual(out[1], 6.0)
 
     @unittest.skipUnless(
         _is_macos() and _macos_version() >= (12, 0) and _HAS_TORCH, "Only supported on macOS 12+"
@@ -550,7 +558,7 @@ def test_rename_feature_classifier_mlprogram(self):
         input_name = spec.description.input[0].name
 
         rename_feature(spec, 'classLabel', 'highestProbClass')
-        model = coremltools.models.MLModel(spec)
+        model = coremltools.models.MLModel(spec, weights_dir=model.weights_dir)
         output_class = model.predict({input_name: np.array([1.0, 2.0, 3.0])})['highestProbClass']
         self.assertEqual(output_class, 'c')
 
diff --git a/coremltools/test/neural_network/test_neural_networks.py b/coremltools/test/neural_network/test_neural_networks.py
index 1743c9cb6..c65d25ca4 100644
--- a/coremltools/test/neural_network/test_neural_networks.py
+++ b/coremltools/test/neural_network/test_neural_networks.py
@@ -1,10 +1,15 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import os
+import shutil
+import tempfile
 import unittest
 
 import numpy as np
-import tempfile
 import pytest
-import shutil
-import os
 
 import coremltools
 from coremltools._deps import _HAS_KERAS_TF, MSG_KERAS1_NOT_FOUND
diff --git a/coremltools/test/neural_network/test_nn_builder.py b/coremltools/test/neural_network/test_nn_builder.py
index 006cb2b45..f53db22e8 100644
--- a/coremltools/test/neural_network/test_nn_builder.py
+++ b/coremltools/test/neural_network/test_nn_builder.py
@@ -1,7 +1,13 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 import unittest
 
 import numpy as np
 import pytest
+
 import coremltools
 from coremltools.models import datatypes, MLModel
 from coremltools.models.neural_network import NeuralNetworkBuilder
diff --git a/coremltools/test/neural_network/test_numpy_nn_layers.py b/coremltools/test/neural_network/test_numpy_nn_layers.py
index 8da48f511..e84db9939 100644
--- a/coremltools/test/neural_network/test_numpy_nn_layers.py
+++ b/coremltools/test/neural_network/test_numpy_nn_layers.py
@@ -2,6 +2,7 @@
 #
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 import itertools
 import math
 import os
diff --git a/coremltools/test/neural_network/test_quantization.py b/coremltools/test/neural_network/test_quantization.py
index a8939a3c5..7e5fcb02a 100644
--- a/coremltools/test/neural_network/test_quantization.py
+++ b/coremltools/test/neural_network/test_quantization.py
@@ -169,6 +169,7 @@ def test_quantized_large_input_length_conv1d_same_random(self):
         self.keras_tester.test_large_input_length_conv1d_same_random()
 
     def test_quantized_conv_dense(self):
+        pytest.xfail(reason="rdar://87349588 ([Rome gitlab CI] Re-enable time out tests)")
         self.keras_tester.test_conv_dense()
 
     def test_quantized_tiny_conv_crop_1d_random(self):
diff --git a/coremltools/test/neural_network/test_recurrent_stress_tests.py b/coremltools/test/neural_network/test_recurrent_stress_tests.py
index f472c7ad5..50e629e2e 100644
--- a/coremltools/test/neural_network/test_recurrent_stress_tests.py
+++ b/coremltools/test/neural_network/test_recurrent_stress_tests.py
@@ -1,13 +1,18 @@
-import itertools
-import unittest
-from copy import copy
+#  Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+from copy import copy
+import itertools
 import numpy as np
 import pytest
+import unittest
 
 from coremltools._deps import _HAS_KERAS2_TF, _HAS_KERAS_TF
 from coremltools.models.utils import _macos_version, _is_macos
 
+
 np.random.seed(1377)
 
 if _HAS_KERAS2_TF or _HAS_KERAS_TF:
@@ -692,7 +697,7 @@ def _test_rnn_layer(self, keras_major_version, limit=None):
             rnn_params = dict(zip(self.simple_rnn_params_dict.keys(), rnn_params))
             model = Sequential()
             unroll = base_params["unroll"]
-            if base_params["input_dims"][1] == 1 and unroll == True:
+            if base_params["input_dims"][1] == 1 and unroll is True:
                 unroll = False
             if keras_major_version == 2:
                 model.add(
@@ -1117,7 +1122,6 @@ def _test_batched_lstm_layer(self):
         )
 
     def _test_lstm_layer(self, keras_major_version, limit=None):
-        params_keys = list(self.params_dict.keys())
         numerical_err_models = []
         shape_err_models = []
         numerical_failiure = 0
@@ -1137,7 +1141,7 @@ def _test_lstm_layer(self, keras_major_version, limit=None):
             lstm_params = dict(zip(self.lstm_params_dict.keys(), lstm_params))
             model = Sequential()
             unroll = base_params["unroll"]
-            if base_params["input_dims"][1] == 1 and unroll == True:
+            if base_params["input_dims"][1] == 1 and unroll is True:
                 unroll = False
             if lstm_params["bidirectional"] is True:
                 if keras_major_version == 2:
@@ -1357,7 +1361,7 @@ def _test_gru_layer(self, keras_major_version, limit=None):
             gru_params = dict(zip(self.gru_params_dict.keys(), gru_params))
             model = Sequential()
             unroll = base_params["unroll"]
-            if base_params["input_dims"][1] == 1 and unroll == True:
+            if base_params["input_dims"][1] == 1 and unroll is True:
                 unroll = False
             if keras_major_version == 2:
                 model.add(
@@ -1521,7 +1525,7 @@ def _test_lstm_stacked(self, keras_major_version, limit=None):
             base_params = dict(zip(self.params_dict.keys(), base_params))
             model = Sequential()
             unroll = base_params["unroll"]
-            if base_params["input_dims"][1] == 1 and unroll == True:
+            if base_params["input_dims"][1] == 1 and unroll is True:
                 unroll = False
             settings = dict(
                 activation=base_params["activation"],
diff --git a/coremltools/test/neural_network/test_simple_nn_inference.py b/coremltools/test/neural_network/test_simple_nn_inference.py
index b83efbf67..01ddd7c1e 100644
--- a/coremltools/test/neural_network/test_simple_nn_inference.py
+++ b/coremltools/test/neural_network/test_simple_nn_inference.py
@@ -1,10 +1,17 @@
+# Copyright (c) 2021, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import os
+
+import numpy as np
+
 import coremltools
 from coremltools import utils
-import coremltools.models.datatypes as datatypes
 from coremltools.models import neural_network as neural_network
+import coremltools.models.datatypes as datatypes
 
-import numpy as np
-import os
 
 class TestNeuralNetworkPrediction:
 
diff --git a/coremltools/version.py b/coremltools/version.py
index 2a886032f..8bfede6a4 100644
--- a/coremltools/version.py
+++ b/coremltools/version.py
@@ -4,4 +4,4 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 
-__version__ = "5.1.0"  # VERSION_STRING
+__version__ = "5.2.0"  # VERSION_STRING
diff --git a/mlmodel/format/Model.proto b/mlmodel/format/Model.proto
index bc644d72a..ae1518153 100644
--- a/mlmodel/format/Model.proto
+++ b/mlmodel/format/Model.proto
@@ -249,6 +249,7 @@ message SerializedModel {
  *
  * 6 : iOS 15, macOS 12, tvOS 15, watchOS 8 (Core ML 5)
  * - Core ML Audio Feature Print
+ * - new type of model: mlprogram (MILSpec.Program)
  *
  */
 message Model {
diff --git a/mlmodel/src/MILBlob/Blob/MMapFileReader.cpp b/mlmodel/src/MILBlob/Blob/MMapFileReader.cpp
index adf347f50..58030cb1b 100644
--- a/mlmodel/src/MILBlob/Blob/MMapFileReader.cpp
+++ b/mlmodel/src/MILBlob/Blob/MMapFileReader.cpp
@@ -35,6 +35,7 @@ MMapFileReader::MMapFileReader(const std::string& filename)
         mmap(nullptr, fileLength, PROT_READ, MAP_PRIVATE, fileno(f.get()), 0 /*offset*/),
         [length = fileLength](void* ptr) { munmap(ptr, length); });
 
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) -- MAP_FAILED is (void*) -1.
     MILVerifyIsTrue(mmapPtr.get() != nullptr && mmapPtr.get() != MAP_FAILED,
                     std::runtime_error,
                     "Unable to mmap file " + filename);
diff --git a/mlmodel/src/MILBlob/Blob/StorageFormat.hpp b/mlmodel/src/MILBlob/Blob/StorageFormat.hpp
index 9aa7e8b6b..05ecf209c 100644
--- a/mlmodel/src/MILBlob/Blob/StorageFormat.hpp
+++ b/mlmodel/src/MILBlob/Blob/StorageFormat.hpp
@@ -52,6 +52,7 @@ enum BlobDataType : uint32_t
     Float16 = 1,
     Float32 = 2,
     UInt8 = 3,
+    Int8 = 4,
 };
 
 template <typename T>
@@ -72,6 +73,11 @@ struct BlobDataTypeTraits<uint8_t> {
     static constexpr BlobDataType DataType = BlobDataType::UInt8;
 };
 
+template <>
+struct BlobDataTypeTraits<int8_t> {
+    static constexpr BlobDataType DataType = BlobDataType::Int8;
+};
+
 /**
  * blob_metadata: stores information of blob present in weight file
  */
diff --git a/mlmodel/src/MILBlob/Blob/StorageReader.cpp b/mlmodel/src/MILBlob/Blob/StorageReader.cpp
index 2f0f99c83..f2139afe2 100644
--- a/mlmodel/src/MILBlob/Blob/StorageReader.cpp
+++ b/mlmodel/src/MILBlob/Blob/StorageReader.cpp
@@ -23,7 +23,7 @@ class StorageReader::Impl final {
     Impl& operator=(const Impl&) = delete;
     Impl& operator=(Impl&&) = delete;
 
-    Impl(std::string filename) : m_filePath(std::move(filename)) {}
+    explicit Impl(std::string filename) : m_filePath(std::move(filename)) {}
     ~Impl() = default;
 
     const std::string& GetFilename() const
@@ -60,12 +60,18 @@ class StorageReader::Impl final {
         return metadata.offset;
     }
 
+    uint64_t GetDataSize(uint64_t metadataOffset) const
+    {
+        auto metadata = GetMetadata(metadataOffset);
+        return metadata.sizeInBytes;
+    }
+
 private:
     void EnsureLoaded() const
     {
         auto load = [this]() {
             auto reader = MakeMMapFileReader(m_filePath);
-            const storage_header& header = reader->ReadStruct<storage_header>(0);
+            const auto& header = reader->ReadStruct<storage_header>(0);
             MILVerifyIsTrue(header.version == 2, std::runtime_error, "Storage Reader expects file format version 2.");
 
             // once we're good with the structure of the file, then set class state
@@ -103,6 +109,12 @@ const std::string& StorageReader::GetFilename() const
     return m_impl->GetFilename();
 }
 
+template <>
+Util::Span<const int8_t> StorageReader::GetDataView<int8_t>(uint64_t offset) const
+{
+    return m_impl->GetDataView<int8_t>(offset);
+}
+
 template <>
 Util::Span<const uint8_t> StorageReader::GetDataView<uint8_t>(uint64_t offset) const
 {
@@ -126,7 +138,12 @@ Util::Span<const uint8_t> StorageReader::GetRawDataView(uint64_t offset) const
     return m_impl->GetRawDataView(offset);
 }
 
-uint64_t StorageReader::GetDataOffset(uint64_t offset) const
+uint64_t StorageReader::GetDataOffset(uint64_t metadataOffset) const
+{
+    return m_impl->GetDataOffset(metadataOffset);
+}
+
+uint64_t StorageReader::GetDataSize(uint64_t metadataOffset) const
 {
-    return m_impl->GetDataOffset(offset);
+    return m_impl->GetDataSize(metadataOffset);
 }
diff --git a/mlmodel/src/MILBlob/Blob/StorageReader.hpp b/mlmodel/src/MILBlob/Blob/StorageReader.hpp
index 4698ae842..2f900ac66 100644
--- a/mlmodel/src/MILBlob/Blob/StorageReader.hpp
+++ b/mlmodel/src/MILBlob/Blob/StorageReader.hpp
@@ -55,15 +55,23 @@ class StorageReader final {
 
     /**
      * Returns file offset of data from given metadata offset
-     * @throws std::range_error if offset is not valid.
+     * @throws std::range_error if metadataOffset is not valid.
      */
-    uint64_t GetDataOffset(uint64_t offset) const;
+    uint64_t GetDataOffset(uint64_t metadataOffset) const;
+
+    /**
+     * Returns the size of the data blob for the given metadata offset
+     * @throws std::range_error if metadataOffset is not valid.
+     */
+    uint64_t GetDataSize(uint64_t metadataOffset) const;
 
 private:
     class Impl;
     const std::unique_ptr<Impl> m_impl;
 };
 
+template <>
+Util::Span<const int8_t> StorageReader::GetDataView<int8_t>(uint64_t) const;
 template <>
 Util::Span<const uint8_t> StorageReader::GetDataView<uint8_t>(uint64_t) const;
 template <>
diff --git a/mlmodel/src/MILBlob/Blob/StorageWriter.cpp b/mlmodel/src/MILBlob/Blob/StorageWriter.cpp
index 803060181..726a3ceee 100644
--- a/mlmodel/src/MILBlob/Blob/StorageWriter.cpp
+++ b/mlmodel/src/MILBlob/Blob/StorageWriter.cpp
@@ -107,6 +107,12 @@ StorageWriter::StorageWriter(const std::string& filePath, bool truncateFile)
     : m_impl(std::make_unique<Impl>(filePath, truncateFile))
 {}
 
+template <>
+uint64_t StorageWriter::WriteData<int8_t>(Util::Span<const int8_t> data)
+{
+    return m_impl->WriteData(data);
+}
+
 template <>
 uint64_t StorageWriter::WriteData<uint8_t>(Util::Span<const uint8_t> data)
 {
diff --git a/mlmodel/src/MILBlob/Blob/StorageWriter.hpp b/mlmodel/src/MILBlob/Blob/StorageWriter.hpp
index 944f0998b..77903a5b3 100644
--- a/mlmodel/src/MILBlob/Blob/StorageWriter.hpp
+++ b/mlmodel/src/MILBlob/Blob/StorageWriter.hpp
@@ -41,6 +41,8 @@ class StorageWriter final {
     const std::unique_ptr<Impl> m_impl;
 };
 
+template <>
+uint64_t StorageWriter::WriteData<int8_t>(Util::Span<const int8_t>);
 template <>
 uint64_t StorageWriter::WriteData<uint8_t>(Util::Span<const uint8_t>);
 template <>
diff --git a/mlmodel/src/MILBlob/Fp16.hpp b/mlmodel/src/MILBlob/Fp16.hpp
index 2f21a3b01..300e4566f 100644
--- a/mlmodel/src/MILBlob/Fp16.hpp
+++ b/mlmodel/src/MILBlob/Fp16.hpp
@@ -16,7 +16,7 @@ namespace MILBlob {
  *  (https://ieeexplore.ieee.org/document/8766229)
  */
 struct Fp16 {
-    explicit Fp16(uint16_t bytes) : bytes(bytes) {}
+    explicit Fp16(uint16_t bs) : bytes(bs) {}
     Fp16() : bytes(0) {}
 
     static Fp16 FromFloat(float f);
diff --git a/mlmodel/src/MILBlob/Util/Span.hpp b/mlmodel/src/MILBlob/Util/Span.hpp
index c3787db5a..71d58641a 100644
--- a/mlmodel/src/MILBlob/Util/Span.hpp
+++ b/mlmodel/src/MILBlob/Util/Span.hpp
@@ -52,12 +52,12 @@ class SpanSize final {
 public:
     SpanSize() = default;
     ~SpanSize() = default;
-    SpanSize(const SpanSize&) noexcept = default;
+    SpanSize(const SpanSize&) = default;
     SpanSize(SpanSize&&) noexcept = default;
-    SpanSize& operator=(const SpanSize&) noexcept = default;
+    SpanSize& operator=(const SpanSize&) = default;
     SpanSize& operator=(SpanSize&&) noexcept = default;
 
-    constexpr size_t Size() const noexcept
+    constexpr size_t Size() const
     {
         return m_size;
     }
@@ -71,14 +71,14 @@ class SpanSize<DynamicExtent> final {
 public:
     SpanSize() = delete;
     ~SpanSize() = default;
-    SpanSize(const SpanSize&) noexcept = default;
+    SpanSize(const SpanSize&) = default;
     SpanSize(SpanSize&&) noexcept = default;
-    SpanSize& operator=(const SpanSize&) noexcept = default;
+    SpanSize& operator=(const SpanSize&) = default;
     SpanSize& operator=(SpanSize&&) noexcept = default;
 
     explicit SpanSize(size_t size) : m_size(size) {}
 
-    size_t Size() const noexcept
+    size_t Size() const
     {
         return m_size;
     }
@@ -125,24 +125,24 @@ class Span final {
     public:
         SliceIterator(pointer p, size_t stride) : m_ptr(p), m_stride(stride) {}
 
-        bool operator==(const SliceIterator& other) const noexcept
+        bool operator==(const SliceIterator& other) const
         {
             return m_ptr == other.m_ptr && m_stride == other.m_stride;
         }
 
-        bool operator!=(const SliceIterator& other) const noexcept
+        bool operator!=(const SliceIterator& other) const
         {
             return !(*this == other);
         }
 
-        SliceIterator& operator++() noexcept
+        SliceIterator& operator++()
         {
             m_ptr += m_stride;
             return *this;
         }
 
         // NOLINTNEXTLINE(cert-dcl21-cpp)
-        SliceIterator operator++(int) const noexcept
+        SliceIterator operator++(int) const
         {
             return SliceIterator(m_ptr + m_stride, m_stride);
         }
@@ -162,24 +162,24 @@ class Span final {
     public:
         explicit StaticSliceIterator(pointer p) : m_ptr(p) {}
 
-        bool operator==(const StaticSliceIterator<Stride>& other) const noexcept
+        bool operator==(const StaticSliceIterator<Stride>& other) const
         {
             return m_ptr == other.m_ptr;
         }
 
-        bool operator!=(const StaticSliceIterator<Stride>& other) const noexcept
+        bool operator!=(const StaticSliceIterator<Stride>& other) const
         {
             return !(*this == other);
         }
 
-        StaticSliceIterator& operator++() noexcept
+        StaticSliceIterator& operator++()
         {
             m_ptr += Stride;
             return *this;
         }
 
         // NOLINTNEXTLINE(cert-dcl21-cpp)
-        StaticSliceIterator operator++(int) const noexcept
+        StaticSliceIterator operator++(int) const
         {
             return StaticSliceIterator<Stride>(m_ptr + Stride);
         }
@@ -215,10 +215,10 @@ class Span final {
 
     ~Span() = default;
 
-    Span(const Span<T, Extent>&) noexcept = default;
+    Span(const Span<T, Extent>&) = default;
     Span(Span<T, Extent>&&) noexcept = default;
 
-    Span<T, Extent>& operator=(const Span<T, Extent>&) noexcept = default;
+    Span<T, Extent>& operator=(const Span<T, Extent>&) = default;
     Span<T, Extent>& operator=(Span<T, Extent>&&) noexcept = default;
 
     /** Implicit copy constructor for converting a mutable span to a const span. Extent and type must be the same. */
@@ -226,8 +226,8 @@ class Span final {
               typename std::enable_if<!std::is_same<T, NonConstT>::value &&
                                           std::is_same<T, typename std::add_const<NonConstT>::type>::value,
                                       int>::type = 0>
-    Span(const Span<NonConstT, Extent>& other) noexcept : m_ptr(other.Data())
-                                                        , m_size(other.Size())
+    Span(const Span<NonConstT, Extent>& other) : m_ptr(other.Data())
+                                               , m_size(other.Size())
     {}
 
     /** Implicit move constructor for converting a mutable span to a const span. Extent and type must be the same. */
@@ -235,8 +235,8 @@ class Span final {
               typename std::enable_if<!std::is_same<T, NonConstT>::value &&
                                           std::is_same<T, typename std::add_const<NonConstT>::type>::value,
                                       int>::type = 0>
-    Span(Span<NonConstT, Extent>&& other) noexcept : m_ptr(other.Data())
-                                                   , m_size(other.Size())
+    Span(Span<NonConstT, Extent>&& other) : m_ptr(other.Data())
+                                          , m_size(other.Size())
     {}
 
     template <size_t Extent__ = Extent, typename std::enable_if<IsDynamicExtent<Extent__>::value, int>::type = 0>
@@ -257,17 +257,17 @@ class Span final {
     // properties
     //
 
-    pointer Data() const noexcept
+    pointer Data() const
     {
         return m_ptr;
     }
 
-    size_t Size() const noexcept
+    size_t Size() const
     {
         return m_size.Size();
     }
 
-    constexpr bool IsEmpty() const noexcept
+    constexpr bool IsEmpty() const
     {
         return Size() == 0;
     }
@@ -340,22 +340,22 @@ class Span final {
     // basic C++ iterators
     //
 
-    iterator begin() const noexcept
+    iterator begin() const
     {
         return Data();
     }
 
-    iterator end() const noexcept
+    iterator end() const
     {
         return Data() + Size();
     }
 
-    const_iterator cbegin() const noexcept
+    const_iterator cbegin() const
     {
         return Data();
     }
 
-    const_iterator cend() const noexcept
+    const_iterator cend() const
     {
         return Data() + Size();
     }
@@ -444,19 +444,19 @@ Span<TargetT> MakeSpan(const C<T, Args...>& c)
 //     auto span = MakeSpan(v); // span is Span<const int, 3>
 
 template <typename T, size_t N>
-Span<T, N> MakeSpan(std::array<T, N>& v) noexcept
+Span<T, N> MakeSpan(std::array<T, N>& v)
 {
     return Span<T, N>(v.data());
 }
 
 template <typename T, size_t N, typename MutableT = typename std::remove_const<T>::type>
-Span<T, N> MakeSpan(const std::array<MutableT, N>& v) noexcept
+Span<T, N> MakeSpan(const std::array<MutableT, N>& v)
 {
     return Span<T, N>(v.data());
 }
 
 template <typename T, size_t N, typename ConstT = typename std::add_const<T>::type>
-Span<ConstT, N> MakeSpan(const std::array<T, N>& v) noexcept
+Span<ConstT, N> MakeSpan(const std::array<T, N>& v)
 {
     return Span<ConstT, N>(v.data());
 }
diff --git a/mlmodel/tests/MILBlob/StorageIntegrationTests.cpp b/mlmodel/tests/MILBlob/StorageIntegrationTests.cpp
index 730233630..b63bc43ea 100644
--- a/mlmodel/tests/MILBlob/StorageIntegrationTests.cpp
+++ b/mlmodel/tests/MILBlob/StorageIntegrationTests.cpp
@@ -90,10 +90,9 @@ int testStorageIntegrationTestsReadDataWithIncorrectOffset()
     AutoDeleteTempFile tempfile;
 
     const std::vector<uint8_t> data = {0x02, 0x00, 0x40, 0x00, 0x07};
-    uint64_t offset;
     {
         StorageWriter writer(tempfile.GetFilename());
-        offset = writer.WriteData(Util::MakeSpan(data));
+        writer.WriteData(Util::MakeSpan(data));
     }
 
     StorageReader reader(tempfile.GetFilename());
diff --git a/mlmodel/tests/MILBlob/StorageReaderTests.cpp b/mlmodel/tests/MILBlob/StorageReaderTests.cpp
index bd376c6a7..1facaed36 100644
--- a/mlmodel/tests/MILBlob/StorageReaderTests.cpp
+++ b/mlmodel/tests/MILBlob/StorageReaderTests.cpp
@@ -4,6 +4,7 @@
 // found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 #include "MILBlob/Blob/StorageReader.hpp"
+#include "MILBlob/Blob/StorageWriter.hpp"
 #include "MILBlob/Fp16.hpp"
 #include "MILBlob/Util/SpanCast.hpp"
 #include "AutoDeleteTempFile.hpp"
@@ -293,16 +294,39 @@ int testStorageReaderTestsDataOffset()
 
     {  // read data offset for uint8_t weights from metadata 1
         ML_ASSERT_EQ(uint64_t(128), reader.GetDataOffset(64));
+        ML_ASSERT_EQ(uint64_t(5), reader.GetDataSize(64));
     }
 
     {  // read data offset for Fp16 weights from metadata 2
         ML_ASSERT_EQ(uint64_t(256), reader.GetDataOffset(192));
+        ML_ASSERT_EQ(uint64_t(8), reader.GetDataSize(192));
     }
 
     {  // read data offset for float weights from metadata 3
         ML_ASSERT_EQ(uint64_t(384), reader.GetDataOffset(320));
+        ML_ASSERT_EQ(uint64_t(16), reader.GetDataSize(320));
     }
 
     return 0;
 }
 
+int testStorageReaderTestsInt8Data()
+{
+    AutoDeleteTempFile tempfile;
+    const std::vector<int8_t> data{1, -1, -20, 25, 13};
+    uint64_t offset = 0;
+    {
+        StorageWriter writer(tempfile.GetFilename());
+        auto span = Util::MakeSpan(data);
+        offset = writer.WriteData(span);
+    }
+
+    StorageReader reader(tempfile.GetFilename());
+    const auto readData = reader.GetDataView<int8_t>(offset);
+    ML_ASSERT_EQ(readData.Size(), data.size());
+
+    ML_ASSERT_SPAN_EQ(readData, Util::MakeSpan(data));
+
+    return 0;
+}
+
diff --git a/mlmodel/tests/MILBlob/StorageWriterTests.cpp b/mlmodel/tests/MILBlob/StorageWriterTests.cpp
index a7ff90ad1..dde7c9326 100644
--- a/mlmodel/tests/MILBlob/StorageWriterTests.cpp
+++ b/mlmodel/tests/MILBlob/StorageWriterTests.cpp
@@ -109,6 +109,22 @@ int testStorageWriterTestsSupportedTypes()
         ML_ASSERT(IsCorrectData<float>(filePath, offset, expectedSpan));
     }
 
+    // Writing int8 values
+    {
+        const std::vector<int8_t> val = {1, -1, 10, -25};
+        auto expectedSpan = Util::MakeSpan(val);
+        uint64_t offset = 0;
+        {
+            StorageWriter writer(tempfile.GetFilename(), /* truncateFile */ false);
+            offset = writer.WriteData(expectedSpan);
+        }
+
+        ML_ASSERT_EQ(offset % DefaultStorageAlignment, uint64_t(0));
+        ML_ASSERT(IsCorrectHeader(filePath, 4 /*count*/));
+        ML_ASSERT(IsCorrectMetadata<int8_t>(filePath, offset, 4, BlobDataType::Int8));
+        ML_ASSERT(IsCorrectData<int8_t>(filePath, offset, expectedSpan));
+    }
+
     return 0;
 }
 
diff --git a/mlmodel/tests/MLModelTests.hpp b/mlmodel/tests/MLModelTests.hpp
index 23703b08d..78b4dc6b6 100644
--- a/mlmodel/tests/MLModelTests.hpp
+++ b/mlmodel/tests/MLModelTests.hpp
@@ -298,6 +298,7 @@ MLMODEL_TEST(testStorageReaderTestsBasicProperties)
 MLMODEL_TEST(testStorageReaderTestsDataOffset)
 MLMODEL_TEST(testStorageReaderTestsIncorrectDType)
 MLMODEL_TEST(testStorageReaderTestsIncorrectMetadata)
+MLMODEL_TEST(testStorageReaderTestsInt8Data)
 MLMODEL_TEST(testStorageReaderTestsRawData)
 MLMODEL_TEST(testStorageReaderTestsThreeRecords)
 MLMODEL_TEST(testStorageReaderTestsTruncatedData)
diff --git a/modelpackage/src/ModelPackage.cpp b/modelpackage/src/ModelPackage.cpp
index ff33ad086..1dc9a8289 100644
--- a/modelpackage/src/ModelPackage.cpp
+++ b/modelpackage/src/ModelPackage.cpp
@@ -149,6 +149,8 @@ class detail::ModelPackageImpl {
     
     std::unique_ptr<JsonMap> m_manifest;
     
+    bool m_readOnly;
+    
     void validate();
     
     std::unique_ptr<JsonMap> getItemInfoEntries() const;
@@ -163,7 +165,7 @@ class detail::ModelPackageImpl {
 
 public:
 
-    ModelPackageImpl(const std::filesystem::path& path, bool createIfNecessary = true);
+    ModelPackageImpl(const std::filesystem::path& path, bool createIfNecessary = true, bool readOnly = false);
     ~ModelPackageImpl();
     
     inline const std::filesystem::path& path() const {
@@ -172,7 +174,7 @@ class detail::ModelPackageImpl {
     
     std::string setRootModel(const std::filesystem::path& path, const std::string& name, const std::string& author, const std::string& description);
     std::string replaceRootModel(const std::filesystem::path& path, const std::string& name, const std::string& author, const std::string& description);
-    ModelPackageItemInfo getRootModel() const;
+    std::shared_ptr<ModelPackageItemInfo> getRootModel() const;
     
     std::string addItem(const std::filesystem::path& path, const std::string& name, const std::string& author, const std::string& description);
     std::shared_ptr<ModelPackageItemInfo> findItem(const std::string& identifier) const;
@@ -187,11 +189,12 @@ class detail::ModelPackageImpl {
 
 #pragma mark ModelPackageImpl
 
-ModelPackageImpl::ModelPackageImpl(const std::filesystem::path& path, bool createIfNecessary)
+ModelPackageImpl::ModelPackageImpl(const std::filesystem::path& path, bool createIfNecessary, bool readOnly)
 : m_packagePath(path),
   m_manifestPath(path / kModelPackageManifestFileName),
   m_packageDataDirPath(path / kModelPackageDataDir),
-  m_manifest(nullptr)
+  m_manifest(nullptr),
+  m_readOnly(readOnly)
 {
     if (std::filesystem::exists(m_packagePath)) {
         if (std::filesystem::exists(m_manifestPath)) {
@@ -227,9 +230,26 @@ ModelPackageImpl::ModelPackageImpl(const std::filesystem::path& path, bool creat
 
 ModelPackageImpl::~ModelPackageImpl()
 {
-    std::ofstream manifestStream(m_manifestPath, std::ios::binary);
-    m_manifest->serialize(manifestStream);
-    manifestStream.close();
+    if (m_readOnly) {
+        return;
+    }
+    
+    std::filesystem::path uniquedDestination(m_manifestPath);
+    std::filesystem::path suffix(generateIdentifier()); // std::filesystem::path from stringified UUID
+    uniquedDestination.replace_extension(suffix); // unique filename in the presumed writable directory where Manifest.json is sited
+    
+    std::ofstream uniquedStream(uniquedDestination, std::ios::binary);
+    m_manifest->serialize(uniquedStream);
+    uniquedStream.close();
+    if (uniquedStream.fail()) { // If any of the above fail do not go on to move uniquedDestination to m_manifestPath.
+        return;
+    }
+    
+    std::error_code ecode;
+    std::filesystem::rename(uniquedDestination, m_manifestPath, ecode); // On failure sets ecode and makes no changes. Does not throw.
+    if (ecode.value()) {
+        std::filesystem::remove(uniquedDestination);
+    }
 }
 
 void ModelPackageImpl::validate()
@@ -426,14 +446,14 @@ std::string ModelPackageImpl::replaceRootModel(const std::filesystem::path& path
     return identifier;
 }
 
-ModelPackageItemInfo ModelPackageImpl::getRootModel() const
+std::shared_ptr<ModelPackageItemInfo> ModelPackageImpl::getRootModel() const
 {
     if (false == m_manifest->hasKey(kModelPackageRootModelKey)) {
         throw std::runtime_error("Failed to look up root model");
     }
     
     auto rootModelIdentifier = m_manifest->getString(kModelPackageRootModelKey);
-    return *(findItem(rootModelIdentifier));
+    return findItem(rootModelIdentifier);
 }
 
 std::shared_ptr<ModelPackageItemInfo> ModelPackageImpl::findItem(const std::string& identifier) const
@@ -513,8 +533,8 @@ bool ModelPackageImpl::isValid(const std::filesystem::path& path)
 
 #pragma mark ModelPackage
 
-ModelPackage::ModelPackage(const std::string& packagePath, bool createIfNecessary)
-: m_modelPackageImpl(std::make_shared<ModelPackageImpl>(packagePath, createIfNecessary))
+ModelPackage::ModelPackage(const std::string& packagePath, bool createIfNecessary, bool readOnly)
+: m_modelPackageImpl(std::make_shared<ModelPackageImpl>(packagePath, createIfNecessary, readOnly))
 {
 }
 
@@ -537,7 +557,7 @@ std::string ModelPackage::replaceRootModel(const std::string& path, const std::s
     return m_modelPackageImpl->replaceRootModel(path, name, author, description);
 }
 
-ModelPackageItemInfo ModelPackage::getRootModel() const
+std::shared_ptr<ModelPackageItemInfo> ModelPackage::getRootModel() const
 {
     return m_modelPackageImpl->getRootModel();
 }
diff --git a/modelpackage/src/ModelPackage.hpp b/modelpackage/src/ModelPackage.hpp
index 012d3d3d6..f9f50d18a 100644
--- a/modelpackage/src/ModelPackage.hpp
+++ b/modelpackage/src/ModelPackage.hpp
@@ -66,8 +66,9 @@ class ModelPackage {
     /** Creates an instance of model package that exists at the specified path.
         @param path Path of the model package (with extension .mlpackage).
         @param createIfNecessary Create a new model package if one does not exist at the specificed path. Defaults to true.
+        @param readOnly The model package will not be mutated Defaults to false.
         @throw Runtime exception if an invalid model package exists at the specified path. */
-    explicit ModelPackage(const std::string& path, bool createIfNecessary = true);
+    explicit ModelPackage(const std::string& path, bool createIfNecessary = true, bool readOnly = false);
     
     ~ModelPackage();
     
@@ -97,7 +98,7 @@ class ModelPackage {
      Retrieve previously set root model from the model package.
          @return ModelPackageItemInfo with information about the retrieved root model file.
          @throw Runtime exception if the model package does not contain a root model. */
-    ModelPackageItemInfo getRootModel() const;
+    std::shared_ptr<ModelPackageItemInfo> getRootModel() const;
     
     /**
      Add a file or directory in the model package using name and author as a uniqueing key.
diff --git a/modelpackage/src/ModelPackagePython.cpp b/modelpackage/src/ModelPackagePython.cpp
index 762579561..184753677 100644
--- a/modelpackage/src/ModelPackagePython.cpp
+++ b/modelpackage/src/ModelPackagePython.cpp
@@ -24,7 +24,7 @@ namespace py = pybind11;
 PYBIND11_PLUGIN(libmodelpackage) {
     py::module m("libmodelpackage", "Library to create, access and edit model packages");
     
-    py::class_<MPL::ModelPackageItemInfo>(m, "ModelPackageItemInfo")
+    py::class_<MPL::ModelPackageItemInfo, std::shared_ptr<MPL::ModelPackageItemInfo>>(m, "ModelPackageItemInfo")
         .def("identifier", &MPL::ModelPackageItemInfo::identifier)
         .def("path", &MPL::ModelPackageItemInfo::path)
         .def("name", &MPL::ModelPackageItemInfo::name)
@@ -38,7 +38,8 @@ PYBIND11_PLUGIN(libmodelpackage) {
         .def("replaceRootModel", &MPL::ModelPackage::replaceRootModel)
         .def("addItem", &MPL::ModelPackage::addItem)
         .def("getRootModel", &MPL::ModelPackage::getRootModel)
-        .def("isValid", &MPL::ModelPackage::isValid);
+        .def("isValid", &MPL::ModelPackage::isValid)
+        .def("findItemByNameAuthor", py::overload_cast<const std::string&, const std::string&>(&MPL::ModelPackage::findItem, py::const_));
     
     return m.ptr();
 }
diff --git a/reqs/test.pip b/reqs/test.pip
index d1c7bf0f4..c5dbd1c60 100644
--- a/reqs/test.pip
+++ b/reqs/test.pip
@@ -21,12 +21,11 @@ six
 sympy > 1.6
 tensorflow==1.14.0; python_version < '3.8'
 torch==1.5.0; python_version == '3.5'
-torch==1.9.1; python_version > '3.5'
+torch==1.10.2; python_version > '3.5'
 torchvision==0.6.1; python_version == '3.5'
-torchvision==0.10.1; python_version > '3.5'
+torchvision==0.11.3; python_version > '3.5'
 xgboost==1.4.2
 mock
 wrapt
-pyyaml > 5.3
 tqdm
 pytest-timeout
diff --git a/reqs/test_tf2.pip b/reqs/test_tf2.pip
index 116e30bda..08efa4647 100644
--- a/reqs/test_tf2.pip
+++ b/reqs/test_tf2.pip
@@ -1,5 +1,8 @@
 tensorflow==2.1.0; python_version <= '2.7'
-tensorflow==2.5.0; python_version >= '3.5'
+
+tensorflow==2.6.2; python_version >= '3.5'
+tensorflow-estimator==2.6.0; python_version >= '3.5'
+keras==2.6.0; python_version >= '3.5'
 
 tensorflow-addons==0.7.1; python_version <= '2.7'
 tensorflow-addons==0.12.1; python_version >= '3.5'
diff --git a/scripts/release_wheel.sh b/scripts/release_wheel.sh
index 81ef00b6d..ae4af4c8b 100644
--- a/scripts/release_wheel.sh
+++ b/scripts/release_wheel.sh
@@ -62,6 +62,3 @@ python setup.py sdist
 cp $WHEEL_DIR/*.whl dist/.
 twine check dist/*
 
-# Disabled. For now, we treat "release" as a collection job.
-# Upload the wheel
-# twine upload --config-file ~/.pypirc --repository $PYPI $1/dist/*
diff --git a/scripts/test.sh b/scripts/test.sh
index 77d5f77b3..d2415d2d3 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -8,6 +8,7 @@ set -x
 COREMLTOOLS_HOME=$( cd "$( dirname "$0" )/.." && pwd )
 COREMLTOOLS_NAME=$(basename $COREMLTOOLS_HOME)
 BUILD_DIR="${COREMLTOOLS_HOME}/build"
+XML_PATH="${BUILD_DIR}/py-test-report.xml"
 WHEEL_PATH=""
 FAST=0
 SLOW=0
@@ -29,6 +30,7 @@ print_help() {
   echo "Usage: zsh -i test.sh"
   echo
   echo "  --wheel-path=*          Specify which wheel to test. Otherwise, test the current coremltools dir."
+  echo "  --xml-path=*            Path to test xml file."
   echo "  --test-package=*        Test package to run."
   echo "  --python=*              Python to use for configuration."
   echo "  --requirements=*        [Optional] Path to the requirements.txt file."
@@ -49,6 +51,7 @@ while [ $# -gt 0 ]
     --python=*)          PYTHON=${1##--python=} ;;
     --test-package=*)    TEST_PACKAGE=${1##--test-package=} ;;
     --wheel-path=*)      WHEEL_PATH=${1##--wheel-path=} ;;
+    --xml-path=*)        XML_PATH=${1##--xml-path=} ;;
     --cov=*)             COV=${1##--cov=} ;;
     --fast)              FAST=1;;
     --slow)              SLOW=1;;
@@ -94,7 +97,7 @@ fi
 # Now run the tests
 echo "Running tests"
 
-TEST_CMD=($PYTEST_EXECUTABLE -v -ra -W "ignore::UserWarning" -W "ignore::FutureWarning" -W "ignore::DeprecationWarning" --durations=100 --pyargs ${TEST_PACKAGE} --junitxml=${BUILD_DIR}/py-test-report.xml --timeout=${TIME_OUT})
+TEST_CMD=($PYTEST_EXECUTABLE -v -ra -W "ignore::UserWarning" -W "ignore::FutureWarning" -W "ignore::DeprecationWarning" --durations=100 --pyargs ${TEST_PACKAGE} --junitxml=${XML_PATH} --timeout=${TIME_OUT})
 
 if [[ $SLOW != 1 || $FAST != 1 ]]; then
     if [[ $SLOW == 1 ]]; then