Xilinx · mgehre-amd · Jan 30, 2025 · Oct 22, 2024 · Oct 23, 2024 · Oct 23, 2024
diff --git a/externals/llvm-project b/externals/llvm-project
diff --git a/externals/stablehlo b/externals/stablehlo
diff --git a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
@@ -309,6 +309,61 @@ def Torch_AtenRrelu_Op : Torch_Op<"aten.rrelu_", [
   }];
 }
 
+def Torch_AtenRreluWithNoiseOp : Torch_Op<"aten.rrelu_with_noise", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::rrelu_with_noise : (Tensor, Tensor, Scalar, Scalar, bool, Generator?) -> (Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$self,
+    AnyTorchTensorType:$noise,
+    AnyTorchScalarType:$lower,
+    AnyTorchScalarType:$upper,
+    Torch_BoolType:$training,
+    AnyTorchOptionalGeneratorType:$generator
+  );
+  let results = (outs
+    AnyTorchOptionalTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenRreluWithNoiseOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 6, 1);
+    }
+    void AtenRreluWithNoiseOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 6, 1);
+    }
+  }];
+}
+
+def Torch_AtenRreluWithNoise_Op : Torch_Op<"aten.rrelu_with_noise_", [
+    IsTrailingUnderscoreInplaceVariant,
+    AllowsTypeRefinement
+  ]> {
+  let summary = "Generated op for `aten::rrelu_with_noise_ : (Tensor, Tensor, Scalar, Scalar, bool, Generator?) -> (Tensor)`";
+  let arguments = (ins
+    Torch_NonValueTensorType:$self,
+    Torch_NonValueTensorType:$noise,
+    AnyTorchScalarType:$lower,
+    AnyTorchScalarType:$upper,
+    Torch_BoolType:$training,
+    AnyTorchOptionalGeneratorType:$generator
+  );
+  let results = (outs
+    AnyTorchOptionalNonValueTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenRreluWithNoise_Op::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 6, 1);
+    }
+    void AtenRreluWithNoise_Op::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 6, 1);
+    }
+  }];
+}
+
 def Torch_AtenCeluOp : Torch_Op<"aten.celu", [
     AllowsTypeRefinement,
     HasValueSemantics,
@@ -7352,6 +7407,7 @@ def Torch_AtenMaxPool3dWithIndicesOp : Torch_Op<"aten.max_pool3d_with_indices",
       printDefaultTorchOp(printer, *this, 6, 2);
     }
   }];
+  let hasCanonicalizer = 1;
 }
 
 def Torch_AtenMaxPool3dWithIndicesBackwardOp : Torch_Op<"aten.max_pool3d_with_indices_backward", [
@@ -8079,6 +8135,7 @@ def Torch_AtenTransposeIntOp : Torch_Op<"aten.transpose.int", [
       printDefaultTorchOp(printer, *this, 3, 1);
     }
   }];
+  let hasFolder = 1;
 }
 
 def Torch_AtenPixelShuffleOp : Torch_Op<"aten.pixel_shuffle", [
@@ -9671,6 +9728,7 @@ def Torch_AtenFlattenUsingIntsOp : Torch_Op<"aten.flatten.using_ints", [
       printDefaultTorchOp(printer, *this, 3, 1);
     }
   }];
+  let hasFolder = 1;
 }
 
 def Torch_AtenUnflattenIntOp : Torch_Op<"aten.unflatten.int", [
@@ -9695,6 +9753,7 @@ def Torch_AtenUnflattenIntOp : Torch_Op<"aten.unflatten.int", [
       printDefaultTorchOp(printer, *this, 3, 1);
     }
   }];
+  let hasFolder = 1;
   let hasCanonicalizer = 1;
 }
 
@@ -14085,6 +14144,59 @@ def Torch_AtenUpsampleNearest2dVecOp : Torch_Op<"aten.upsample_nearest2d.vec", [
   }];
 }
 
+def Torch_AtenUpsampleBilinear2dOp : Torch_Op<"aten.upsample_bilinear2d", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::upsample_bilinear2d : (Tensor, int[], bool, float?, float?) -> (Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$self,
+    AnyTorchListOfTorchIntType:$output_size,
+    Torch_BoolType:$align_corners,
+    AnyTorchOptionalFloatType:$scales_h,
+    AnyTorchOptionalFloatType:$scales_w
+  );
+  let results = (outs
+    AnyTorchOptionalTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenUpsampleBilinear2dOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 5, 1);
+    }
+    void AtenUpsampleBilinear2dOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 5, 1);
+    }
+  }];
+}
+
+def Torch_AtenUpsampleBilinear2dVecOp : Torch_Op<"aten.upsample_bilinear2d.vec", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::upsample_bilinear2d.vec : (Tensor, int[]?, bool, float[]?) -> (Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$input,
+    AnyTorchOptionalListOfTorchIntType:$output_size,
+    Torch_BoolType:$align_corners,
+    AnyTorchOptionalListOfTorchFloatType:$scale_factors
+  );
+  let results = (outs
+    AnyTorchOptionalTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenUpsampleBilinear2dVecOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 4, 1);
+    }
+    void AtenUpsampleBilinear2dVecOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 4, 1);
+    }
+  }];
+}
+
 def Torch_AtenScaledDotProductAttentionOp : Torch_Op<"aten.scaled_dot_product_attention", [
     AllowsTypeRefinement,
     HasValueSemantics,
@@ -16861,6 +16973,35 @@ def Torch_AtenLeakyReluBackwardOp : Torch_Op<"aten.leaky_relu_backward", [
   }];
 }
 
+def Torch_AtenRreluWithNoiseBackwardOp : Torch_Op<"aten.rrelu_with_noise_backward", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::rrelu_with_noise_backward : (Tensor, Tensor, Tensor, Scalar, Scalar, bool, bool) -> (Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$grad_output,
+    AnyTorchTensorType:$self,
+    AnyTorchTensorType:$noise,
+    AnyTorchScalarType:$lower,
+    AnyTorchScalarType:$upper,
+    Torch_BoolType:$training,
+    Torch_BoolType:$self_is_result
+  );
+  let results = (outs
+    AnyTorchOptionalTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenRreluWithNoiseBackwardOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 7, 1);
+    }
+    void AtenRreluWithNoiseBackwardOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 7, 1);
+    }
+  }];
+}
+
 def Torch_AtenQuantizePerChannelOp : Torch_Op<"aten.quantize_per_channel", [
     AllowsTypeRefinement,
     HasValueSemantics,

diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp
@@ -1087,9 +1087,6 @@ void mlir::torch::onnx_c::populateDefaultDomainGtoP(
         if (binder.customOpNameStringAttr(autoPad, "auto_pad", "NOTSET"))
           return rewriter.notifyMatchFailure(binder.op,
                                              "auto_pad bind failure");
-        if (autoPad != "NOTSET")
-          return rewriter.notifyMatchFailure(
-              binder.op, "unsupported conversion: auto_pad != NOTSET");
 
         Torch::ValueTensorType resultTypeOut;
         Value operand;
@@ -1136,13 +1133,42 @@ void mlir::torch::onnx_c::populateDefaultDomainGtoP(
           return rewriter.notifyMatchFailure(binder.op,
                                              "dilations bind failure");
 
+        // set default padding
         if (padding.empty())
           padding.resize(spatial, 0);
         if (strides.empty())
           strides.resize(spatial, 1);
         if (dilations.empty())
           dilations.resize(spatial, 1);
 
+        auto inputTensorType = cast<Torch::ValueTensorType>(operand.getType());
+
+        // Padding for the beginning and ending along each spatial axis, it can
+        // take any value greater than or equal to 0. The value represent the
+        // number of pixels added to the beginning and end part of the
+        // corresponding axis. pads format should be as follow [x1_begin,
+        // x2_begin…x1_end, x2_end,…], where xi_begin the number of pixels added
+        // at the beginning of axis i and xi_end, the number of pixels added at
+        // the end of axis i.
+        if (autoPad != "NOTSET" && autoPad != "VALID") {
+          const bool isSameLower = autoPad == "SAME_LOWER";
+          ArrayRef<int64_t> inputShape = inputTensorType.getSizes();
+          padding.resize_for_overwrite(2 * spatial);
+          for (unsigned dimIdx = 0; dimIdx < spatial; dimIdx++) {
+            const int64_t dilatedKernelSize =
+                dilations[dimIdx] * (kernel[dimIdx] - 1) + 1;
+            int64_t totalPad = ((inputShape[dimIdx + 2] + strides[dimIdx] - 1) /
+                                    strides[dimIdx] -
+                                1) *
+                                   strides[dimIdx] +
+                               dilatedKernelSize - inputShape[dimIdx + 2];
+            totalPad = totalPad >= 0 ? totalPad : 0;
+            padding[dimIdx] =
+                isSameLower ? ((totalPad + 1) / 2) : (totalPad / 2);
+            padding[spatial + dimIdx] = totalPad - padding[dimIdx];
+          }
+        }
+
         // If the padding is symmetric we can push the padding operation to the
         // torch operator.
         if (padding.size() == static_cast<size_t>(2 * spatial)) {

diff --git a/lib/Conversion/TorchToLinalg/Linear.cpp b/lib/Conversion/TorchToLinalg/Linear.cpp
@@ -1125,54 +1125,57 @@ class ConvertAtenConvolutionOp : public OpConversionPattern<AtenConvolutionOp> {
     }
 
     if (numGroups == 1 && inputZp) {
-      // The quantized version uses a different channel ordering so we need to
-      // permute the tensors in order to use the existing path. We should
-      // eventually directly support this channel ordering.
-      llvm::SmallVector<int64_t> inPerms, weightPerms;
-      inPerms.push_back(0); // N stays at the front for input.
-      // Then we expect the spatial dimensions
-      for (size_t i = 0; i < numSpatialDims; ++i) {
-        inPerms.push_back(i + 2);
-        weightPerms.push_back(i + 2);
-      }
-      inPerms.push_back(1);
-      weightPerms.append({1, 0});
-
-      paddedInput = transposeValue(op.getLoc(), paddedInput, inPerms, rewriter);
-      weight = transposeValue(op.getLoc(), weight, weightPerms, rewriter);
-      outputTensor =
-          transposeValue(op.getLoc(), outputTensor, inPerms, rewriter);
-
       switch (numSpatialDims) {
       case 2:
         conv = rewriter
-                   .create<linalg::Conv2DNhwcHwcfQOp>(
+                   .create<linalg::Conv2DNchwFchwQOp>(
                        loc, outputTensor.getType(),
                        ValueRange{paddedInput, weight, inputZp, weightZp},
                        outputTensor, stridesAttr, dilationAttr)
                    .getResult(0);
         break;
-      case 3:
+      case 3: {
+        // The quantized version uses a different channel ordering so we need to
+        // permute the tensors in order to use the existing path. We should
+        // eventually directly support this channel ordering.
+        llvm::SmallVector<int64_t> inPerms, weightPerms;
+        inPerms.push_back(0); // N stays at the front for input.
+        // Then we expect the spatial dimensions
+        for (size_t i = 0; i < numSpatialDims; ++i) {
+          inPerms.push_back(i + 2);
+          weightPerms.push_back(i + 2);
+        }
+        inPerms.push_back(1);
+        weightPerms.append({1, 0});
+
+        paddedInput =
+            transposeValue(op.getLoc(), paddedInput, inPerms, rewriter);
+        weight = transposeValue(op.getLoc(), weight, weightPerms, rewriter);
+        outputTensor =
+            transposeValue(op.getLoc(), outputTensor, inPerms, rewriter);
+
         conv = rewriter
                    .create<linalg::Conv3DNdhwcDhwcfQOp>(
                        loc, outputTensor.getType(),
                        ValueRange{paddedInput, weight, inputZp, weightZp},
                        outputTensor, stridesAttr, dilationAttr)
                    .getResult(0);
+
+        llvm::SmallVector<int64_t> outPerms;
+        outPerms.push_back(0);
+        outPerms.push_back(inPerms.size() - 1);
+        for (size_t i = 0; i < numSpatialDims; ++i) {
+          outPerms.push_back(i + 1);
+        }
+        conv = transposeValue(op.getLoc(), conv, outPerms, rewriter);
+
         break;
+      }
       default:
         return rewriter.notifyMatchFailure(
             op, "unimplemented: only 1D, 2D, and 3D convolution supported");
       };
 
-      llvm::SmallVector<int64_t> outPerms;
-      outPerms.push_back(0);
-      outPerms.push_back(inPerms.size() - 1);
-      for (size_t i = 0; i < numSpatialDims; ++i) {
-        outPerms.push_back(i + 1);
-      }
-      conv = transposeValue(op.getLoc(), conv, outPerms, rewriter);
-
       Type newResultType = getTypeConverter()->convertType(op.getType());
       if (accumulatorDType != resultDTy) {
         Type resultElementType =
+16 −0		BUILD.bazel
+4 −0		CMakeLists.txt
+2 −2		WORKSPACE.bazel
+1 −1		build_tools/llvm_version.txt
+1 −0		docs/generated/stablehlo_linalg_passes.md
+7 −0		docs/generated/stablehlo_passes.md
+1 −0		docs/generated/stablehlo_tosa_passes.md
+6 −2		docs/spec.md
+199 −0		rfcs/20241001-microscaling-formats.md
+19 −0		stablehlo/conversions/linalg/tests/miscellaneous.mlir
+9 −10		stablehlo/conversions/linalg/transforms/TypeConversion.cpp
+2 −19		stablehlo/dialect/Base.cpp
+3 −2		stablehlo/dialect/Base.td
+44 −4		stablehlo/dialect/StablehloOps.cpp
+5 −2		stablehlo/dialect/Version.cpp
+1 −1		stablehlo/dialect/Version.h
+49 −1		stablehlo/dialect/VhloBytecode.cpp
+1 −0		stablehlo/dialect/VhloDialect.td
+24 −0		stablehlo/dialect/VhloTypes.cpp
+12 −0		stablehlo/dialect/VhloTypes.td
+15 −43		stablehlo/reference/Tensor.cpp
+6 −4		stablehlo/reference/Types.cpp
+1 −1		stablehlo/testdata/igamma_float64_20_20_float64_20_20_chlo.mlir
+1 −1		stablehlo/testdata/igammac_float64_20_20_float64_20_20_chlo.mlir
+32 −0		stablehlo/tests/interpret/constant.mlir
+40 −8		stablehlo/tests/ops_stablehlo.mlir
+53 −53		stablehlo/tests/ops_stablehlo_quantized.mlir
+4 −0		stablehlo/tests/ops_stablehlo_roundtrip.mlir
+220 −0		stablehlo/tests/transforms/stablehlo_aggressive_folder.mlir
+550 −526		stablehlo/tests/transforms/stablehlo_aggressive_simplification.mlir
+2,936 −0		stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.1_8_0.mlir
+ −		stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.1_8_0.mlir.bc
+32 −0		stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.mlir
+35 −0		stablehlo/tests/vhlo/vhlo_to_version_downgrade_invalid.1_7_0.mlir
+15 −0		stablehlo/tests/vhlo/vhlo_to_version_downgrade_patch.mlir
+7 −2		stablehlo/transforms/CMakeLists.txt
+31 −2		stablehlo/transforms/PassUtils.cpp
+27 −12		stablehlo/transforms/PassUtils.h
+5 −0		stablehlo/transforms/Passes.h
+2 −0		stablehlo/transforms/Passes.td
+245 −7		stablehlo/transforms/StablehloAggressiveFolder.cpp
+98 −492		stablehlo/transforms/StablehloAggressiveSimplification.cpp
+281 −0		stablehlo/transforms/StablehloAggressiveSimplificationPatterns.td
+7 −0		stablehlo/transforms/VhloToVersion.cpp