From 4290572d388e1aeb1827f6fc892fb87e9e847e1c Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Tue, 23 Jan 2024 20:41:10 -0800 Subject: [PATCH] A number of fixups to enable lowering more complex data layouts in DMA BDs (#392) * Fix up index issues for airrt dma offset, wrap and stride lists * Have air-renumber-dma renumber both air.dma and air.channel * Make default dma renumber mode 'global' * Remove memref shape check when linking to shimdmaallocs * Utilize repeat count dimension in aiex.ipu.dma whenever possible --- mlir/include/air/Transform/Passes.td | 2 +- mlir/lib/Conversion/AIRLoweringPass.cpp | 9 +- mlir/lib/Conversion/AIRRtToIpuPass.cpp | 27 +++-- mlir/lib/Conversion/AIRToAIEPass.cpp | 16 +-- mlir/lib/Util/Util.cpp | 4 +- .../Conversion/AIRLowering/air_to_ipu.mlir | 114 ++++++++++++++++-- .../Conversion/AIRRtToIpu/airrt_to_ipu.mlir | 51 ++++++++ .../AIRMiscPasses/air_renumber_dma.mlir | 2 +- 8 files changed, 182 insertions(+), 43 deletions(-) diff --git a/mlir/include/air/Transform/Passes.td b/mlir/include/air/Transform/Passes.td index 9defecf87..7f41bc627 100644 --- a/mlir/include/air/Transform/Passes.td +++ b/mlir/include/air/Transform/Passes.td @@ -1113,7 +1113,7 @@ def AIRRenumberDmaIdPass : Pass<"air-renumber-dma", "func::FuncOp"> { let summary = "Renumber air dma op ids"; let constructor = "xilinx::air::createAIRRenumberDmaIdPass()"; let options = [ - Option<"clMode", "mode", "std::string", /*default=*/"\"herd\"", + Option<"clMode", "mode", "std::string", /*default=*/"\"global\"", "In which hierarchy level to renumber the dma ops">, ]; } diff --git a/mlir/lib/Conversion/AIRLoweringPass.cpp b/mlir/lib/Conversion/AIRLoweringPass.cpp index 2a3287f69..1a34460cb 100644 --- a/mlir/lib/Conversion/AIRLoweringPass.cpp +++ b/mlir/lib/Conversion/AIRLoweringPass.cpp @@ -463,8 +463,6 @@ AIRChannelInterfaceToAIRRtConversionImpl(OpBuilder builder, auto ctx = thisOp->getContext(); MemRefType thisMemrefType = thisOp.getMemref().getType().cast(); - MemRefType theOtherMemrefType = - theOtherOp.getMemref().getType().cast(); bool thisOpIsInShim = thisMemrefType.getMemorySpaceAsInt() == (int)xilinx::air::MemorySpace::L3; @@ -517,19 +515,20 @@ AIRChannelInterfaceToAIRRtConversionImpl(OpBuilder builder, SmallVector lengths(4, one); SmallVector strides(3, zero); - int idx = 4 - thisMemrefType.getRank(); + int idx = 4 - thisOp.getOffsets().size(); for (auto o : thisOp.getOffsets()) { offsets[idx++] = builder.create(loc, IntegerType::get(ctx, 64), o); } - idx = 4 - theOtherMemrefType.getRank(); + idx = 4 - thisOp.getStrides().size(); auto op_strides = thisOp.getStrides(); if (op_strides.size()) for (auto o : op_strides.drop_back()) strides[idx++] = builder.create( loc, IntegerType::get(ctx, 64), o); - idx = 4 - thisMemrefType.getRank(); + idx = 4 - std::max(thisOp.getSizes().size(), + (unsigned long)thisMemrefType.getRank()); // If sizes field is empty, then infer sizes from memref shape if (thisOp.getSizes().empty()) for (auto d : air::getTensorShape(thisMemrefType)) diff --git a/mlir/lib/Conversion/AIRRtToIpuPass.cpp b/mlir/lib/Conversion/AIRRtToIpuPass.cpp index 8d7f94cc5..c0fae50a4 100644 --- a/mlir/lib/Conversion/AIRRtToIpuPass.cpp +++ b/mlir/lib/Conversion/AIRRtToIpuPass.cpp @@ -407,14 +407,25 @@ specializeAffineForInAIRRtDmaWrapAndStride(OpBuilder builder, assert(wraps.size() == 4); assert(strides.size() == 3); - // Temporary hack: stride currently cannot implement repeat with stride = 0. - // This is to be removed when that constraint is gone. - for (unsigned i = 0; i < strides.size() - 1; i++) { - if (mlir::getConstantIntValue(strides[i]) && - *mlir::getConstantIntValue(strides[i])) { - for (unsigned j = i + 1; j < strides.size(); j++) { - if (mlir::getConstantIntValue(strides[j]) && - !*mlir::getConstantIntValue(strides[j])) + // Stride = 0 means repeat that dimension. If highest dimension (dim 0) is not + // used, then move the repeat dimension to dim 0, which is the only dim with + // repeat capability. Else, NYI. Fall back to unrolling BDs. + for (unsigned i = 1; i < strides.size(); i++) { + if (mlir::getConstantIntValue(wraps[i]) && + mlir::getConstantIntValue(strides[i])) { + if (*mlir::getConstantIntValue(wraps[i]) > 1 && + !*mlir::getConstantIntValue(strides[i])) { + // This is a repeat dimension. + if (mlir::getConstantIntValue(wraps[0]) && + *mlir::getConstantIntValue(wraps[0]) == 1) { + // Move the repeat dimension i to dimension 0. + auto tmp = wraps[0]; + wraps[0] = wraps[i]; + wraps[i] = tmp; + tmp = strides[0]; + strides[0] = strides[i]; + strides[i] = tmp; + } else return failure(); } } diff --git a/mlir/lib/Conversion/AIRToAIEPass.cpp b/mlir/lib/Conversion/AIRToAIEPass.cpp index c749b4ad7..381a550a9 100644 --- a/mlir/lib/Conversion/AIRToAIEPass.cpp +++ b/mlir/lib/Conversion/AIRToAIEPass.cpp @@ -1889,19 +1889,9 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase { MemRefType memref_ty, StringAttr dma_name_attr) { for (auto the_other_chan_o : getTheOtherChannelOpThroughSymbol(chan_o)) { - bool areEqualVecs = true; - if (getTensorShape(memref_ty).size() != - the_other_chan_o.getSizes().size()) - areEqualVecs = false; - else - for (unsigned i = 0; i < getTensorShape(memref_ty).size(); i++) - if (getTensorShape(memref_ty)[i] != - mlir::getConstantIntValue(the_other_chan_o.getSizes()[i])) - areEqualVecs = false; - if (areEqualVecs) - the_other_chan_o->setAttr( - "metadata", FlatSymbolRefAttr::get(the_other_chan_o->getContext(), - dma_name_attr)); + the_other_chan_o->setAttr( + "metadata", FlatSymbolRefAttr::get(the_other_chan_o->getContext(), + dma_name_attr)); } } diff --git a/mlir/lib/Util/Util.cpp b/mlir/lib/Util/Util.cpp index 906cee419..dc76caca7 100644 --- a/mlir/lib/Util/Util.cpp +++ b/mlir/lib/Util/Util.cpp @@ -285,7 +285,7 @@ void air::renumberDmaOps(func::FuncOp func, std::string mode) { if (mode == "global") { // Renumber DMA ops per entire module func->walk([&](Operation *func_dma) { - if (isa(func_dma)) { + if (isa(func_dma)) { func_dma->setAttr( "id", mlir::IntegerAttr::get( @@ -297,7 +297,7 @@ void air::renumberDmaOps(func::FuncOp func, std::string mode) { id = 0; // Renumber DMA ops per air herd herd->walk([&](Operation *herd_dma) { - if (isa(herd_dma)) { + if (isa(herd_dma)) { herd_dma->setAttr( "id", mlir::IntegerAttr::get( diff --git a/mlir/test/Conversion/AIRLowering/air_to_ipu.mlir b/mlir/test/Conversion/AIRLowering/air_to_ipu.mlir index 8cbabda21..196eebe7b 100644 --- a/mlir/test/Conversion/AIRLowering/air_to_ipu.mlir +++ b/mlir/test/Conversion/AIRLowering/air_to_ipu.mlir @@ -5,18 +5,18 @@ // //===----------------------------------------------------------------------===// -// RUN: air-opt %s -air-to-std -cse --split-input-file | FileCheck %s +// RUN: air-opt %s -air-to-std -canonicalize -cse --split-input-file | FileCheck %s // CHECK-LABEL: aie.device(ipu) // CHECK: {sym_name = "segment0"} // CHECK: func.func @func0(%[[VAL_0:.*]]: memref<64xi32>, %[[VAL_1:.*]]: memref<64xi32>) -// CHECK: %[[CST_0:.*]] = arith.constant 0 : i64 -// CHECK: %[[CST_1:.*]] = arith.constant 1 : i64 -// CHECK: %[[CST_2:.*]] = arith.constant 2 : i32 -// CHECK: %[[CST_64:.*]] = arith.constant 64 : i64 +// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0 : i64 +// CHECK-DAG: %[[CST_1:.*]] = arith.constant 1 : i64 +// CHECK-DAG: %[[CST_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[CST_7:.*]] = arith.constant 7 : i32 +// CHECK-DAG: %[[CST_64:.*]] = arith.constant 64 : i64 // CHECK: airrt.dma_memcpy_nd(%[[CST_2]], %[[CST_0]], %[[CST_0]], %[[VAL_0]][%[[CST_0]], %[[CST_0]], %[[CST_0]], %[[CST_0]]], [%[[CST_1]], %[[CST_1]], %[[CST_1]], %[[CST_64]]], [%[[CST_0]], %[[CST_0]], %[[CST_0]]]) {metadata = @airMemcpyId2} : (i32, i64, i64, memref<64xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) // CHECK: %[[VAL_2:.*]] = airrt.segment_load "segment0" : i64 -// CHECK: %[[CST_7:.*]] = arith.constant 7 : i32 // CHECK: airrt.dma_memcpy_nd(%[[CST_7]], %[[CST_0]], %[[CST_0]], %[[VAL_1]][%[[CST_0]], %[[CST_0]], %[[CST_0]], %[[CST_0]]], [%[[CST_1]], %[[CST_1]], %[[CST_1]], %[[CST_64]]], [%[[CST_0]], %[[CST_0]], %[[CST_0]]]) {metadata = @airMemcpyId7} : (i32, i64, i64, memref<64xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) module { @@ -70,14 +70,14 @@ module { // CHECK-LABEL: aie.device(ipu) // CHECK: {sym_name = "segment0"} -// CHECK: func.func @func0(%[[VAL_0:.*]]: memref<64xi32>, %[[VAL_1:.*]]: memref<64xi32>) -// CHECK: %[[CST_0:.*]] = arith.constant 0 : i64 -// CHECK: %[[CST_1:.*]] = arith.constant 1 : i64 -// CHECK: %[[CST_2:.*]] = arith.constant 2 : i32 -// CHECK: %[[CST_64:.*]] = arith.constant 64 : i64 +// CHECK: func.func @func1(%[[VAL_0:.*]]: memref<64xi32>, %[[VAL_1:.*]]: memref<64xi32>) +// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0 : i64 +// CHECK-DAG: %[[CST_1:.*]] = arith.constant 1 : i64 +// CHECK-DAG: %[[CST_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[CST_7:.*]] = arith.constant 7 : i32 +// CHECK-DAG: %[[CST_64:.*]] = arith.constant 64 : i64 // CHECK: airrt.dma_memcpy_nd(%[[CST_2]], %[[CST_0]], %[[CST_0]], %[[VAL_0]][%[[CST_0]], %[[CST_0]], %[[CST_0]], %[[CST_0]]], [%[[CST_1]], %[[CST_1]], %[[CST_1]], %[[CST_64]]], [%[[CST_0]], %[[CST_0]], %[[CST_0]]]) {metadata = @airMemcpyId2} : (i32, i64, i64, memref<64xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) // CHECK: %[[VAL_2:.*]] = airrt.segment_load "segment0" : i64 -// CHECK: %[[CST_7:.*]] = arith.constant 7 : i32 // CHECK: airrt.dma_memcpy_nd(%[[CST_7]], %[[CST_0]], %[[CST_0]], %[[VAL_1]][%[[CST_0]], %[[CST_0]], %[[CST_0]], %[[CST_0]]], [%[[CST_1]], %[[CST_1]], %[[CST_1]], %[[CST_64]]], [%[[CST_0]], %[[CST_0]], %[[CST_0]]]) {metadata = @airMemcpyId7} : (i32, i64, i64, memref<64xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) module { @@ -93,7 +93,7 @@ module { air.channel @channel_1 [1, 1] air.channel @channel_2 [1, 1] air.channel @channel_3 [1, 1] - func.func @func0(%arg0: memref<64xi32>, %arg1: memref<64xi32>) { + func.func @func1(%arg0: memref<64xi32>, %arg1: memref<64xi32>) { %0 = air.channel.put async @channel_0[] (%arg0[] [] []) {id = 1 : i32, metadata = @airMemcpyId2} : (memref<64xi32>) %1 = air.segment @segment0 async attributes {id = 2 : i32} { %c1 = arith.constant 1 : index @@ -148,3 +148,91 @@ module { return } } + +// ----- + +// Wrap shape differs from memref shape. + +// CHECK: func.func @func2(%[[VAL_0:.*]]: memref<8x16xi32>, %[[VAL_1:.*]]: memref<16x32xi32>, %[[VAL_2:.*]]: memref<8x32xi32>) +// CHECK-DAG: %[[CST_32:.*]] = arith.constant 32 : i64 +// CHECK-DAG: %[[CST_8:.*]] = arith.constant 8 : i64 +// CHECK-DAG: %[[CST_16:.*]] = arith.constant 16 : i64 +// CHECK-DAG: %[[CST_6:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[CST_5:.*]] = arith.constant 5 : i32 +// CHECK-DAG: %[[CST_4:.*]] = arith.constant 4 : i32 +// CHECK-DAG: %[[CST_1:.*]] = arith.constant 1 : i64 +// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0 : i64 +// CHECK: airrt.dma_memcpy_nd(%[[CST_4]], %{{.*}}, %{{.*}}, %[[VAL_0]][%[[CST_0]], %[[CST_0]], %{{.*}}, %[[CST_0]]], [%[[CST_1]], %[[CST_1]], %[[CST_8]], %[[CST_16]]], [%[[CST_0]], %[[CST_0]], %[[CST_16]]]) : (i32, i64, i64, memref<8x16xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) +// CHECK: airrt.dma_memcpy_nd(%[[CST_5]], %{{.*}}, %{{.*}}, %[[VAL_1]][%[[CST_0]], %[[CST_0]], %[[CST_0]], %{{.*}}], [%[[CST_1]], %[[CST_1]], %[[CST_16]], %[[CST_16]]], [%[[CST_0]], %[[CST_0]], %[[CST_32]]]) : (i32, i64, i64, memref<16x32xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) +// CHECK: airrt.dma_memcpy_nd(%[[CST_6]], %{{.*}}, %{{.*}}, %[[VAL_2]][%[[CST_0]], %[[CST_0]], %{{.*}}, %{{.*}}], [%[[CST_1]], %[[CST_1]], %[[CST_8]], %[[CST_16]]], [%[[CST_0]], %[[CST_0]], %[[CST_32]]]) : (i32, i64, i64, memref<8x32xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) + +#map = affine_map<()[s0] -> (s0 * 8)> +#map1 = affine_map<()[s0] -> (s0 * 16)> +module { + air.channel @channel_2 [1, 1] + air.channel @channel_1 [1, 1] + air.channel @channel_0 [1, 1] + func.func @func2(%arg0: memref<8x16xi32>, %arg1: memref<16x32xi32>, %arg2: memref<8x32xi32>) { + %c2 = arith.constant 2 : index + %c1 = arith.constant 1 : index + %0 = air.launch async (%arg3, %arg4) in (%arg5=%c1, %arg6=%c2) args(%arg7=%arg0, %arg8=%arg1, %arg9=%arg2) : memref<8x16xi32>, memref<16x32xi32>, memref<8x32xi32> attributes {id = 1 : i32} { + %c32 = arith.constant 32 : index + %c8 = arith.constant 8 : index + %c1_0 = arith.constant 1 : index + %c16 = arith.constant 16 : index + %c0 = arith.constant 0 : index + %async_token, %results = air.execute -> (index) { + %5 = affine.apply #map()[%arg3] + air.execute_terminator %5 : index + } + %1 = air.channel.put async [%async_token] @channel_0[] (%arg7[%results, %c0] [%c8, %c16] [%c16, %c1_0]) {id = 1 : i32} : (memref<8x16xi32>) + %async_token_1, %results_2 = air.execute -> (index) { + %5 = affine.apply #map1()[%arg4] + air.execute_terminator %5 : index + } + %2 = air.channel.put async [%async_token_1] @channel_1[] (%arg8[%c0, %results_2] [%c16, %c16] [%c32, %c1_0]) {id = 2 : i32} : (memref<16x32xi32>) + %async_token_3, %results_4 = air.execute -> (index) { + %5 = affine.apply #map()[%arg3] + air.execute_terminator %5 : index + } + %async_token_5, %results_6 = air.execute -> (index) { + %5 = affine.apply #map1()[%arg4] + air.execute_terminator %5 : index + } + %3 = air.channel.get async [%async_token_3, %async_token_5] @channel_2[] (%arg9[%results_4, %results_6] [%c8, %c16] [%c32, %c1_0]) {id = 3 : i32} : (memref<8x32xi32>) + %4 = air.segment @segment_0 async attributes {id = 2 : i32, x_loc = 0 : i64, x_size = 1 : i64, y_loc = 2 : i64, y_size = 1 : i64} { + %c8_7 = arith.constant 8 : index + %c1_8 = arith.constant 1 : index + %c16_9 = arith.constant 16 : index + %c0_10 = arith.constant 0 : index + %async_token_11, %results_12 = air.execute -> (memref<1x1x8x16xi32, 1>) { + %alloc = memref.alloc() : memref<1x1x8x16xi32, 1> + air.execute_terminator %alloc : memref<1x1x8x16xi32, 1> + } + %5 = air.channel.get async [%async_token_11] @channel_0[] (%results_12[] [] []) {id = 4 : i32} : (memref<1x1x8x16xi32, 1>) + %async_token_13, %results_14 = air.execute -> (memref<1x1x16x16xi32, 1>) { + %alloc = memref.alloc() : memref<1x1x16x16xi32, 1> + air.execute_terminator %alloc : memref<1x1x16x16xi32, 1> + } + %6 = air.channel.get async [%async_token_13] @channel_1[] (%results_14[] [] []) {id = 5 : i32} : (memref<1x1x16x16xi32, 1>) + %async_token_15, %results_16 = air.execute -> (memref<1x1x8x16xi32, 1>) { + %alloc = memref.alloc() : memref<1x1x8x16xi32, 1> + air.execute_terminator %alloc : memref<1x1x8x16xi32, 1> + } + %7 = air.channel.put async [%async_token_15] @channel_2[] (%results_16[%c0_10, %c0_10] [%c8_7, %c16_9] [%c16_9, %c1_8]) {id = 6 : i32} : (memref<1x1x8x16xi32, 1>) + %async_token_17 = air.execute [%7] { + memref.dealloc %results_14 : memref<1x1x16x16xi32, 1> + } + %async_token_18 = air.execute [%7] { + memref.dealloc %results_12 : memref<1x1x8x16xi32, 1> + } + %async_token_19 = air.execute [%7] { + memref.dealloc %results_16 : memref<1x1x8x16xi32, 1> + } + air.segment_terminator + } + air.launch_terminator + } + return + } +} diff --git a/mlir/test/Conversion/AIRRtToIpu/airrt_to_ipu.mlir b/mlir/test/Conversion/AIRRtToIpu/airrt_to_ipu.mlir index ffb06701d..7b06e95db 100644 --- a/mlir/test/Conversion/AIRRtToIpu/airrt_to_ipu.mlir +++ b/mlir/test/Conversion/AIRRtToIpu/airrt_to_ipu.mlir @@ -308,3 +308,54 @@ module { return } } + +// ----- + +// Populate repeat dimension (highest dimension) + +// CHECK-LABEL: aie.device(ipu) +// CHECK: func.func @func6(%[[ARG0:.*]]: memref<8x16xi32>, %[[ARG1:.*]]: memref<16x32xi32>, %[[ARG2:.*]]: memref<8x32xi32>) +// CHECK: aiex.ipu.dma_memcpy_nd(0, 0, %[[ARG0]][0, 0, 0, 0][2, 1, 8, 16][0, 0, 16]) {id = 1 : i64, metadata = @airMemcpyId4} : memref<8x16xi32> +// CHECK: aiex.ipu.dma_memcpy_nd(0, 0, %[[ARG1]][0, 0, 0, 0][1, 2, 16, 16][0, 16, 32]) {id = 2 : i64, metadata = @airMemcpyId5} : memref<16x32xi32> +// CHECK: aiex.ipu.dma_memcpy_nd(0, 0, %[[ARG2]][0, 0, 0, 0][1, 2, 8, 16][0, 16, 32]) {id = 3 : i64, metadata = @airMemcpyId12} : memref<8x32xi32> + +#map = affine_map<()[s0] -> (s0 * 8)> +#map1 = affine_map<()[s0] -> (s0 * 16)> +module { + aie.device(ipu) { + %tile_0_0 = aie.tile(0, 0) + aie.shim_dma_allocation @airMemcpyId12(S2MM, 0, 0) + memref.global "public" @airMemcpyId12 : memref<1x1x8x16xi32, 1> + aie.shim_dma_allocation @airMemcpyId4(MM2S, 0, 0) + memref.global "public" @airMemcpyId4 : memref<1x1x8x16xi32, 1> + aie.shim_dma_allocation @airMemcpyId5(MM2S, 1, 0) + memref.global "public" @airMemcpyId5 : memref<1x1x16x16xi32, 1> + } {sym_name = "segment_0"} + airrt.module_metadata{ + } + func.func @func6(%arg0: memref<8x16xi32>, %arg1: memref<16x32xi32>, %arg2: memref<8x32xi32>) { + %c32_i64 = arith.constant 32 : i64 + %c8_i64 = arith.constant 8 : i64 + %c16_i64 = arith.constant 16 : i64 + %c12_i32 = arith.constant 12 : i32 + %c5_i32 = arith.constant 5 : i32 + %c4_i32 = arith.constant 4 : i32 + %c1_i64 = arith.constant 1 : i64 + %c0_i64 = arith.constant 0 : i64 + affine.for %arg3 = 0 to 1 { + affine.for %arg4 = 0 to 2 { + %0 = affine.apply #map()[%arg3] + %1 = arith.index_cast %arg3 : index to i64 + %2 = arith.index_cast %arg4 : index to i64 + %3 = arith.index_cast %0 : index to i64 + %4 = airrt.dma_memcpy_nd(%c4_i32, %1, %2, %arg0[%c0_i64, %c0_i64, %3, %c0_i64], [%c1_i64, %c1_i64, %c8_i64, %c16_i64], [%c0_i64, %c0_i64, %c16_i64]) {metadata = @airMemcpyId4} : (i32, i64, i64, memref<8x16xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) : !airrt.event + %5 = affine.apply #map1()[%arg4] + %6 = arith.index_cast %5 : index to i64 + %7 = airrt.dma_memcpy_nd(%c5_i32, %1, %2, %arg1[%c0_i64, %c0_i64, %c0_i64, %6], [%c1_i64, %c1_i64, %c16_i64, %c16_i64], [%c0_i64, %c0_i64, %c32_i64]) {metadata = @airMemcpyId5} : (i32, i64, i64, memref<16x32xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) : !airrt.event + %8 = airrt.dma_memcpy_nd(%c12_i32, %1, %2, %arg2[%c0_i64, %c0_i64, %3, %6], [%c1_i64, %c1_i64, %c8_i64, %c16_i64], [%c0_i64, %c0_i64, %c32_i64]) {metadata = @airMemcpyId12} : (i32, i64, i64, memref<8x32xi32>, [i64, i64, i64, i64], [i64, i64, i64, i64], [i64, i64, i64]) : !airrt.event + %p = airrt.segment_load "segment_0" : i64 + } + } + return + } +} diff --git a/mlir/test/Transform/AIRMiscPasses/air_renumber_dma.mlir b/mlir/test/Transform/AIRMiscPasses/air_renumber_dma.mlir index 249adbd90..a7715e1cb 100644 --- a/mlir/test/Transform/AIRMiscPasses/air_renumber_dma.mlir +++ b/mlir/test/Transform/AIRMiscPasses/air_renumber_dma.mlir @@ -5,7 +5,7 @@ // //===----------------------------------------------------------------------===// -// RUN: air-opt %s -air-renumber-dma | FileCheck %s +// RUN: air-opt %s -air-renumber-dma="mode=herd" | FileCheck %s // CHECK: id = 1 // CHECK: id = 2 // CHECK: id = 3