Merge branch 'develop' into bf16_verify_tests

ROCm · Jan 28, 2025 · f97da3c · f97da3c
2 parents 17dacdb + 879f306
commit f97da3c
Show file tree

Hide file tree

Showing 13 changed files with 245 additions and 272 deletions.
diff --git a/examples/nlp/python_bert_squad/requirements_bertsquad.txt b/examples/nlp/python_bert_squad/requirements_bertsquad.txt
@@ -1,7 +1,7 @@
 #####################################################################################
 # The MIT License (MIT)
 #
-# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -24,4 +24,5 @@
 tensorflow
 onnxruntime
 tokenizers==0.11.1; python_version == "3.6"
-tokenizers; python_version > "3.6"
+tokenizers==0.20.3; python_version == "3.8"
+tokenizers; python_version > "3.8"
diff --git a/requirements.txt b/requirements.txt
@@ -28,4 +28,4 @@ pybind/pybind11@3e9dfa2866941655c56877882565e7577de6fc7b --build
 msgpack/[email protected] -DMSGPACK_BUILD_TESTS=Off
 sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
 ROCm/composable_kernel@b7775add2d28251674d81e220cd4a857b90b997a -DCK_BUILD_JIT_LIB=On -DCMAKE_POSITION_INDEPENDENT_CODE=On
-ROCm/rocMLIR@9bffae0f4929e14d4d269af4af19e7d0482dd67a -DBUILD_FAT_LIBROCKCOMPILER=On
+ROCm/rocMLIR@112f8f46b38e4356cea1e44c6be373dbd8804a6d -DBUILD_FAT_LIBROCKCOMPILER=On
diff --git a/src/netron_output.cpp b/src/netron_output.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -63,12 +63,19 @@ int get_onnx_type(shape::type_t s_type)
 
 auto make_attribute(const migraphx::value& val)
 {
-    value attribute;
+    value attribute     = value(std::unordered_map<std::string, value>());
     attribute["name"] = val.get_key();
     auto val_string   = val.to<std::string>();
-    val_string        = val_string.substr(val_string.find(":") + 1);
-    attribute["s"]    = base64_encode(val_string);
-    attribute["type"] = "STRING";
+    std::string sub_str = val.get_key() + ":";
+    auto find_key       = val_string.find(sub_str);
+    if(find_key != std::string::npos)
+    {
+        val_string = val_string.substr(find_key + sub_str.length() + 1);
+    }
+    // TODO: doesn't work for some reason with Netron now
+    // attribute["s"]    = base64_encode(val_string);
+    // attribute["type"] = "STRING";
+    attribute["docString"] = val_string;
     return attribute;
 }
 
@@ -78,7 +85,7 @@ auto make_onnx_json_node(instruction_ref ins,
 {
     value node;
     // TODO add support for module inputs
-    value input_arr;
+    value input_arr = value({});
     for(instruction_ref input_ins : ins->inputs())
     {
         auto name = input_ins->name();
@@ -96,7 +103,7 @@ auto make_onnx_json_node(instruction_ref ins,
             input_arr.push_back(ins_uids.at(input_ins) + "->" + ins_uids.at(ins));
         }
     }
-    value output_arr;
+    value output_arr = value({});
     for(instruction_ref output_ins : ins->outputs())
     {
         if(output_ins->name() == "@return")
@@ -108,15 +115,15 @@ auto make_onnx_json_node(instruction_ref ins,
             output_arr.push_back(ins_uids.at(ins) + "->" + ins_uids.at(output_ins));
         }
     }
-    node["input"]  = input_arr;
+    node["input"]          = input_arr;
     node["output"] = output_arr;
     node["name"]   = ins_uids.at(ins);
     node["opType"] = ins->name();
-    value op_attribute_arr;
+    value op_attribute_arr = value({});
     auto op_value = ins->get_operator().to_value();
     std::for_each(op_value.begin(), op_value.end(), [&](auto v) {
         const std::string& attr_key = v.get_key();
-        if(v.is_binary())
+        if(v.is_binary() or attr_key == "code_object")
         {
             return;
         }
@@ -151,17 +158,17 @@ auto make_onnx_json_literal(instruction_ref ins,
 auto make_onnx_json_shape(const shape& s)
 {
     value ret;
-    value dim;
-    auto shape_lens = s.lens();
-    std::transform(shape_lens.begin(),
-                   shape_lens.end(),
-                   std::back_inserter(dim),
-                   [](std::size_t len) { return len; });
+    value dim = value({});
+    for(std::size_t len : s.lens())
+    {
+        // cppcheck-suppress useStlAlgorithm
+        dim.push_back({{"dimValue", len}});
+    }
     ret["dim"] = dim;
     return ret;
 }
 
-// ONNX graph edges called "valuetype"
+// ONNX graph edges called "valueType"
 auto make_onnx_json_edge(instruction_ref ins,
                          instruction_ref out_ins,
                          std::unordered_map<instruction_ref, std::string> ins_uids)
@@ -207,8 +214,11 @@ std::unordered_map<instruction_ref, std::string> make_ins_uids(const module& mod
 
 value make_graph(const module* mod)
 {
-    value graph = {
-        {"node", {}}, {"initializer", {}}, {"input", {}}, {"output", {}}, {"valueInfo", {}}};
+    value graph   = {{"node", value({})},
+                     {"initializer", value({})},
+                     {"input", value({})},
+                     {"output", value({})},
+                     {"valueInfo", value({})}};
     auto ins_uids = make_ins_uids(*mod);
     for(auto ins = mod->begin(); ins != mod->end(); ++ins)
     {
@@ -251,15 +261,17 @@ std::string make_netron_output(const program& prog)
 {
     value output;
     auto prog_value           = prog.to_value();
-    output["irVersion"]       = prog_value.at("version").to<std::string>();
+    // ONNX IR version 6
+    // TODO: investigate sure how this affects things
+    output["irVersion"]       = 6;
     output["producerName"]    = "AMDMIGraphX";
     output["producerVersion"] = prog_value.at("migraphx_version").to<std::string>();
     for(auto& mod : prog.get_modules())
     {
         auto graph      = make_graph(mod);
         output["graph"] = graph;
     }
-    return to_json_string(output);
+    return to_pretty_json_string(output, 4);
 }
 
 } // namespace MIGRAPHX_INLINE_NS

diff --git a/src/onnx/parse_resize.cpp b/src/onnx/parse_resize.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -21,61 +21,82 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+
 #include <migraphx/onnx/op_parser.hpp>
 #include <migraphx/onnx/checks.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/op/resize.hpp>
 #include <migraphx/shape_for_each.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/make_op.hpp>
+#include <vector>
+#include <map>
 
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace onnx {
 
+/*
+ * Algorithm of calc_neighbor_points():
+ * Input: vvv_ind, a collection of neighbors per resized dimension as:
+ *               layer-1: (# resized dimensions, vector)
+ *               layer-2: (A vector of 2 of: hi/low)
+ *               layer-3: Neighor index of every pixel in that output dimension (vector)
+ *        in_s,  the original input tensor shape (vector)
+ *        out_s, the output tensor shape (vector)
+ *    resized_m, lens indices that have to resized (map)
+ *
+ * Output: per resized pixel, its neighboring hi/lo indexes (vector): all permutations.
+ * This api stitches all the neighbors (for every dimension) for a resized pixel,
+ * to yield its neighbor index w.r.t to the input shape, in_s.
+ */
+
 static std::vector<int>
 calc_neighbor_points(const std::vector<std::vector<std::vector<std::size_t>>>& vvv_ind,
-                     int i_dim,
-                     std::vector<std::vector<std::size_t>> vec_dims,
-                     const shape& in_s)
+                     const shape& in_s,
+                     const shape& out_s,
+                     const std::map<size_t, size_t>& resized_m)
 {
-    if(i_dim == vvv_ind.size())
-    {
-        std::vector<int> vec_ind(vec_dims.size());
-        std::transform(vec_dims.begin(), vec_dims.end(), vec_ind.begin(), [&](auto idx) {
-            return static_cast<int>(in_s.index(idx));
-        });
-        return vec_ind;
-    }
+    std::size_t ndims       = out_s.ndim();
+    const auto& strides     = out_s.strides();
+    std::size_t elements_ct = vvv_ind[0][0].size();
 
-    const auto& vv_lo = vvv_ind[i_dim][0];
-    std::vector<std::vector<std::size_t>> vec_dims1;
-    for(std::size_t start = 0; start < vec_dims.size(); start += vv_lo.size())
-    {
-        std::transform(vv_lo.begin(),
-                       vv_lo.end(),
-                       vec_dims.begin() + start,
-                       std::back_inserter(vec_dims1),
-                       [](auto i, auto dim) {
-                           dim.push_back(i);
-                           return dim;
-                       });
-    }
+    // This function computes for each element, all permutations of its neighbor indices into an
+    // Perm block in one go. (Instead of computing each permutation in isolation per element)
+    size_t permutations = 1u << resized_m.size();
+    std::vector<std::vector<std::size_t>> perm_blk(permutations, std::vector<size_t>(strides));
+
+    // final outputted vector: permutations of neighbors.
+    std::vector<int> out_idx_vec(permutations * elements_ct);
 
-    const auto& vv_hi = vvv_ind[i_dim][1];
-    for(std::size_t start = 0; start < vec_dims.size(); start += vv_hi.size())
+    for(size_t e_idx = 0; e_idx < elements_ct; ++e_idx)
     {
-        std::transform(vv_hi.begin(),
-                       vv_hi.end(),
-                       vec_dims.begin() + start,
-                       std::back_inserter(vec_dims1),
-                       [](auto i, auto dim) {
-                           dim.push_back(i);
-                           return dim;
-                       });
+        size_t t_idx = e_idx;
+        for(size_t l_idx = 0; l_idx != ndims; ++l_idx)
+        {
+            auto entry = resized_m.find(l_idx);
+            if(entry != resized_m.end())
+            {
+                size_t hi_cmp_bit = 1u << entry->second;
+                auto lo           = vvv_ind[entry->second][0][e_idx];
+                auto hi           = vvv_ind[entry->second][1][e_idx];
+                for(size_t i = 0; i < permutations; i++)
+                    perm_blk[i][l_idx] = ((i & hi_cmp_bit) != 0) ? hi : lo;
+            }
+            else
+            {
+                size_t idx = t_idx / strides[l_idx];
+                // no permutations in an unmodified lens index, so idx is copied over:
+                for(size_t i = 0; i < permutations; i++)
+                    perm_blk[i][l_idx] = idx;
+            }
+            t_idx %= strides[l_idx];
+        }
+        // write out the permuted indices, calculated off the perm_blk:
+        for(size_t i = 0; i < permutations; i++)
+            out_idx_vec[e_idx + elements_ct * i] = in_s.index(perm_blk[i]);
     }
-    vec_dims.clear();
-    return calc_neighbor_points(vvv_ind, i_dim + 1, std::move(vec_dims1), in_s);
+    return out_idx_vec;
 }
 
 static std::string get_coord_trans_mode(const onnx_parser::attribute_map& attr)
@@ -350,7 +371,6 @@ struct parse_resize : op_parser<parse_resize>
                                ": linear mode not supported for non-constant inputs");
 
             shape out_s{in_s.type(), out_lens};
-            std::size_t out_elements = out_s.elements();
 
             // reshape input to one-dimension
             std::vector<int64_t> rsp_lens = {static_cast<int64_t>(in_s.elements())};
@@ -359,41 +379,55 @@ struct parse_resize : op_parser<parse_resize>
             auto nearest_floor = op::resize::get_nearest_op("floor");
             auto nearest_ceil  = op::resize::get_nearest_op("ceil");
 
-            // get the number of dimensions
-            std::size_t n_dim = out_lens.size();
+            std::vector<size_t> resized_axes; // vector of dimensions to be resized
+            std::size_t out_elements = 1;     // total number of elements to be resized
+            size_t resized_ct        = 0;
+            std::map<size_t, size_t> resized_m; // modified indices --> vvv_ind index below
+            for(std::size_t axis = 0; axis != out_lens.size(); ++axis)
+            {
+                out_elements *= out_lens[axis];
+                if(in_lens[axis] == out_lens[axis])
+                    continue;
+                resized_axes.push_back(axis);
+                resized_m[axis] = resized_ct++;
+            }
+
+            // Neighbor indices. For an axis. Two sets of max/min per element:
             std::vector<std::vector<std::size_t>> vv_ind(2, std::vector<std::size_t>(out_elements));
-            std::vector<std::vector<std::vector<std::size_t>>> vvv_ind(n_dim, vv_ind);
-            std::vector<std::vector<float>> delta(n_dim, std::vector<float>(out_elements));
+            // Neighbor indices. For all resized axes:
+            std::vector<std::vector<std::vector<std::size_t>>> vvv_ind(resized_ct, vv_ind);
+            // Delta list. For each resized axes - per element.
+            std::vector<std::vector<float>> delta(resized_ct, std::vector<float>(out_elements));
 
-            shape_for_each(out_s, [&](const auto& out_idx_v, size_t out_idx) {
-                for(auto ii = 0; ii < in_lens.size(); ++ii)
+            shape_for_each(out_s, [&](const auto& out_idx_v, std::size_t out_idx) {
+                for(size_t ii = 0; ii != resized_ct; ++ii)
                 {
-                    auto idx_val = idx_op(in_lens[ii], out_lens[ii], out_idx_v[ii], vec_scale[ii]);
-                    vvv_ind[ii][0][out_idx] = nearest_floor(in_lens[ii], idx_val);
-                    vvv_ind[ii][1][out_idx] = nearest_ceil(in_lens[ii], idx_val);
+                    auto idx = resized_axes[ii];
+                    auto idx_val =
+                        idx_op(in_lens[idx], out_lens[idx], out_idx_v[idx], vec_scale[idx]);
+                    vvv_ind[ii][0][out_idx] = nearest_floor(in_lens[idx], idx_val);
+                    vvv_ind[ii][1][out_idx] = nearest_ceil(in_lens[idx], idx_val);
                     delta[ii][out_idx]      = idx_val - vvv_ind[ii][0][out_idx];
                 }
             });
 
-            auto ind = calc_neighbor_points(
-                vvv_ind, 0, std::vector<std::vector<std::size_t>>(out_elements), in_s);
-            auto ind_lens = out_lens;
-            ind_lens[0] *= (std::size_t{1} << n_dim);
-            shape ind_s{shape::int32_type, ind_lens};
+            auto ind = calc_neighbor_points(vvv_ind, in_s, out_s, resized_m);
+
+            auto dim_lens = out_lens;
+            // indices matrix size grows 2x per resized-axis:
+            dim_lens[0] *= (1u << resized_ct);
+            shape ind_s{shape::int32_type, dim_lens};
             auto ins_ind = info.add_literal(literal(ind_s, ind));
             auto data    = info.add_instruction(make_op("gather", {{"axis", 0}}), rsp, ins_ind);
 
-            auto dim_lens = out_lens;
-            dim_lens[0] *= (std::size_t{1} << (n_dim - 1));
-            for(std::size_t i = 0; i < n_dim; ++i)
+            for(auto idx = resized_ct; idx != 0u; --idx)
             {
+                dim_lens[0] /= 2; // halved for 2 slices of data (hi & low below)
                 shape dim_s{shape::float_type, dim_lens};
-                const auto& dim_delta = delta[n_dim - i - 1];
+                const auto& dim_delta = delta[idx - 1];
                 std::vector<float> delta_data;
                 for(std::size_t j = 0; j < dim_lens[0] / out_lens[0]; ++j)
-                {
                     delta_data.insert(delta_data.begin(), dim_delta.begin(), dim_delta.end());
-                }
                 auto ins_delta = info.add_literal(dim_s, delta_data);
 
                 // slice the data
@@ -408,9 +442,7 @@ struct parse_resize : op_parser<parse_resize>
                 auto diff = info.add_instruction(make_op("sub"), hi, low);
                 auto ddf  = info.add_instruction(make_op("mul"), diff, ins_delta);
                 data      = info.add_instruction(make_op("add"), ddf, low);
-                dim_lens[0] /= 2;
             }
-
             return data;
         }
     }

diff --git a/src/propagate_constant.cpp b/src/propagate_constant.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -48,6 +48,8 @@ bool skip_propagate(instruction_ref ins)
     auto alias = instruction::get_output_alias(ins, true);
     if(alias != ins)
         return skip_propagate(alias);
+    if(ins->is_undefined())
+        return true;
     return false;
 }