From 907b52aad6d0fdf3f1ef7e7eeb9c3f7235a839c0 Mon Sep 17 00:00:00 2001 From: Jorn Tuyls Date: Wed, 14 Aug 2024 22:52:31 -0700 Subject: [PATCH] Add logical objectFifo hoisting pass --- .../AMD-AIE/iree-amd-aie/IR/AMDAIEOps.h | 7 +- .../Transforms/AMDAIEHoistLogicalObjFifo.cpp | 60 +++++ .../iree-amd-aie/Transforms/CMakeLists.txt | 1 + .../iree-amd-aie/Transforms/PassDetail.h | 1 + .../iree-amd-aie/Transforms/Passes.cpp | 1 + .../AMD-AIE/iree-amd-aie/Transforms/Passes.h | 4 + .../AMD-AIE/iree-amd-aie/Transforms/Passes.td | 5 + .../Transforms/test/CMakeLists.txt | 1 + .../test/hoist_logical_obj_fifo.mlir | 205 ++++++++++++++++++ tests/samples/matmul_peeled_objectfifo.mlir | 2 +- 10 files changed, 282 insertions(+), 5 deletions(-) create mode 100644 compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEHoistLogicalObjFifo.cpp create mode 100644 compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/hoist_logical_obj_fifo.mlir diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.h index 5a587ef0e..7a292470f 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.h @@ -7,6 +7,9 @@ #ifndef IREE_COMPILER_AMDAIE_OPS_H_ #define IREE_COMPILER_AMDAIE_OPS_H_ +#include "iree-amd-aie/IR/AMDAIEAttrs.h" +#include "iree-amd-aie/IR/AMDAIEDmaOpInterface.h" +#include "iree-amd-aie/IR/AMDAIETypes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinOps.h" @@ -14,10 +17,6 @@ #include "mlir/Interfaces/CopyOpInterface.h" #include "mlir/Interfaces/ViewLikeInterface.h" -#include "iree-amd-aie/IR/AMDAIEAttrs.h" -#include "iree-amd-aie/IR/AMDAIEDmaOpInterface.h" -#include "iree-amd-aie/IR/AMDAIETypes.h" - // clang-format off #include "iree-amd-aie/IR/AMDAIEAttrs.h" #include "iree-amd-aie/IR/AMDAIEDialect.h" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEHoistLogicalObjFifo.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEHoistLogicalObjFifo.cpp new file mode 100644 index 000000000..58bc5ec66 --- /dev/null +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEHoistLogicalObjFifo.cpp @@ -0,0 +1,60 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree-amd-aie/IR/AMDAIEOps.h" +#include "iree-amd-aie/Transforms/Passes.h" +#include "iree-amd-aie/Transforms/Transforms.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" + +#define DEBUG_TYPE "iree-amdaie-hoist-logical-objectfifo" + +namespace mlir::iree_compiler::AMDAIE { + +/// Hoist logical objectFifo operations until one of the operands is located +/// within the same scope. +LogicalResult hoistLogicalObjFifoOp(RewriterBase &rewriter, + AMDAIE::LogicalObjectFifoFromMemrefOp op) { + Operation *parentOp = op; + while (parentOp) { + Operation *newParentOp = parentOp->getParentOp(); + if (llvm::any_of(op->getOperands(), [&](Value operand) { + return operand.getDefiningOp() && + newParentOp->isProperAncestor(operand.getDefiningOp()); + })) { + break; + } + if (isa( + newParentOp)) { + break; + } + parentOp = newParentOp; + } + if (parentOp && parentOp != op) rewriter.moveOpBefore(op, parentOp); + return failure(); +} + +namespace { +struct AMDAIEHoistLogicalObjFifoPass + : public impl::AMDAIEHoistLogicalObjFifoBase< + AMDAIEHoistLogicalObjFifoPass> { + void runOnOperation() override; +}; + +void AMDAIEHoistLogicalObjFifoPass::runOnOperation() { + Operation *parentOp = getOperation(); + IRRewriter rewriter(parentOp->getContext()); + parentOp->walk([&](AMDAIE::LogicalObjectFifoFromMemrefOp op) { + (void)hoistLogicalObjFifoOp(rewriter, op); + }); +} + +} // namespace + +std::unique_ptr createAMDAIEHoistLogicalObjFifoPass() { + return std::make_unique(); +} + +} // namespace mlir::iree_compiler::AMDAIE diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt index 171525b89..938171a48 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt @@ -67,6 +67,7 @@ iree_cc_library( "AMDAIEFuseFillIntoForall.cpp" "AMDAIEFusePackIntoLoop.cpp" "AMDAIEHoistForAffineApply.cpp" + "AMDAIEHoistLogicalObjFifo.cpp" "AMDAIEInsertCores.cpp" "AMDAIEInsertLoopsForVectorization.cpp" "AMDAIELinkExecutables.cpp" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h index 54ede7e21..5bd44c2e7 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h @@ -45,6 +45,7 @@ namespace mlir::iree_compiler::AMDAIE { #define GEN_PASS_DEF_AMDAIEFUSEFILLINTOFORALL #define GEN_PASS_DEF_AMDAIEFUSEPACKINTOLOOP #define GEN_PASS_DEF_AMDAIEHOISTFORLOOPAFFINEAPPLY +#define GEN_PASS_DEF_AMDAIEHOISTLOGICALOBJFIFO #define GEN_PASS_DEF_AMDAIEINSERTAIEWORKGROUP #define GEN_PASS_DEF_AMDAIEINSERTCORES #define GEN_PASS_DEF_AMDAIEINSERTLOOPSFORVECTORIZATION diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index b0c8f799c..186c125ee 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -584,6 +584,7 @@ void addAMDAIEObjectFifoLoweringPasses(OpPassManager &passManager) { passManager.addNestedPass(createAMDAIECreateAIEWorkgroupPass()); passManager.addPass(createCSEPass()); + passManager.addPass(createAMDAIEHoistLogicalObjFifoPass()); passManager.addPass(createAMDAIECanonicalizeDoublyStridedOpPass()); passManager.addPass(createAMDAIEFlattenLogicalObjectFifoPass()); passManager.addPass(createAMDAIEAssignLogicalObjectFifoDepthPass()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h index 2975fb035..e01890f90 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h @@ -140,6 +140,10 @@ std::unique_ptr createAMDAIEFuseFillIntoForallPass(); /// Hoist an affine.apply op on a scf.for op's induction variable. std::unique_ptr createAMDAIEHoistForLoopAffineApplyPass(); +/// Create a pass to hoist logical objectFifo operations to the scope of its +/// operands. +std::unique_ptr createAMDAIEHoistLogicalObjFifoPass(); + /// Create a pass to transform linalg.generics into a form which benefits later /// vectorization passes (to vector and aievec dialects). std::unique_ptr createAMDAIEInsertLoopsForVectorizationPass(); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td index ee06e34d6..46118ff68 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td @@ -213,6 +213,11 @@ def AMDAIEHoistForLoopAffineApply : Pass<"iree-amdaie-hoist-for-affine-apply"> { let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIEHoistForLoopAffineApplyPass()"; } +def AMDAIEHoistLogicalObjFifo : Pass<"iree-amdaie-hoist-logical-objectfifo"> { + let summary = "Hoist logical objectFifo operations to the scope of its operands."; + let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIEHoistLogicalObjFifoPass()"; +} + def AMDAIEInsertCores : Pass<"iree-amdaie-insert-cores", "ModuleOp"> { let summary = "Insert `amdaie.core` operations inside the innermost " diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt index abba1ca33..cf96a5383 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt @@ -34,6 +34,7 @@ iree_lit_test_suite( "fuse_fill_into_forall.mlir" "fuse_pack_into_loop.mlir" "hoist_for_affine_apply.mlir" + "hoist_logical_obj_fifo.mlir" "insert_cores.mlir" "insert_loops_for_vectorization.mlir" "localize_logical_objectfifo.mlir" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/hoist_logical_obj_fifo.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/hoist_logical_obj_fifo.mlir new file mode 100644 index 000000000..518213533 --- /dev/null +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/hoist_logical_obj_fifo.mlir @@ -0,0 +1,205 @@ +// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(iree-amdaie-hoist-logical-objectfifo)" %s | FileCheck %s + +// CHECK-LABEL: @func_hoist +// CHECK-SAME: %[[ARG0:.+]]: memref<32x64xi32> +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: amdaie.logicalobjectfifo.from_memref %[[ARG0]], {%[[TILE_0_0]]} +// CHECK: scf.forall +// CHECK-NOT: amdaie.logicalobjectfifo.from_memref +module { + func.func @func_hoist(%arg0: memref<32x64xi32>) { + %c0 = arith.constant 0 : index + %tile_0_0 = amdaie.tile(%c0, %c0) + scf.forall (%arg1, %arg2) in (1, 2) { + %obj0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<32x64xi32> -> !amdaie.logicalobjectfifo> + } + return + } +} + + +// ----- + +// CHECK-LABEL: @func_no_hoist +// CHECK-SAME: %[[ARG0:.+]]: memref<32x64xi32> +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: scf.forall +// CHECK: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: amdaie.logicalobjectfifo.from_memref %[[ARG0]], {%[[TILE_0_0]]} +module { + func.func @func_no_hoist(%arg0: memref<32x64xi32>) { + %c0 = arith.constant 0 : index + scf.forall (%arg1, %arg2) in (1, 2) { + %tile_0_0 = amdaie.tile(%c0, %c0) + %obj0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<32x64xi32> -> !amdaie.logicalobjectfifo> + } + return + } +} + +// ----- + +// CHECK-LABEL: @workgroup_hoist +// CHECK-SAME: %[[ARG0:.+]]: memref<32x64xi32> +// CHECK: amdaie.workgroup +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: amdaie.logicalobjectfifo.from_memref %[[ARG0]], {%[[TILE_0_0]]} +// CHECK: scf.forall +// CHECK-NOT: amdaie.logicalobjectfifo.from_memref +func.func @workgroup_hoist(%arg0: memref<32x64xi32>) { + amdaie.workgroup { + %c0 = arith.constant 0 : index + %tile_0_0 = amdaie.tile(%c0, %c0) + scf.forall (%arg1, %arg2) in (1, 2) { + %obj0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<32x64xi32> -> !amdaie.logicalobjectfifo> + } + amdaie.controlcode { + amdaie.end + } + } + return +} + +// ----- + +// CHECK-LABEL: @workgroup_no_hoist +// CHECK-SAME: %[[ARG0:.+]]: memref<32x64xi32> +// CHECK: amdaie.workgroup +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: scf.forall +// CHECK: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: amdaie.logicalobjectfifo.from_memref %[[ARG0]], {%[[TILE_0_0]]} +func.func @workgroup_no_hoist(%arg0: memref<32x64xi32>) { + amdaie.workgroup { + %c0 = arith.constant 0 : index + scf.forall (%arg1, %arg2) in (1, 2) { + %tile_0_0 = amdaie.tile(%c0, %c0) + %obj0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<32x64xi32> -> !amdaie.logicalobjectfifo> + } + amdaie.controlcode { + amdaie.end + } + } + return +} +// ----- + +// CHECK-LABEL: @workgroup_no_hoist_outside +// CHECK-SAME: %[[ARG0:.+]]: memref<32x64xi32> +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: amdaie.workgroup +// CHECK: amdaie.logicalobjectfifo.from_memref %[[ARG0]], {%[[TILE_0_0]]} +func.func @workgroup_no_hoist_outside(%arg0: memref<32x64xi32>) { + %c0 = arith.constant 0 : index + %tile_0_0 = amdaie.tile(%c0, %c0) + amdaie.workgroup { + %obj0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<32x64xi32> -> !amdaie.logicalobjectfifo> + amdaie.controlcode { + amdaie.end + } + } + return +} + +// ----- + +// CHECK-LABEL: @controlcode_hoist +// CHECK-SAME: %[[ARG0:.+]]: memref<32x64xi32> +// CHECK: amdaie.controlcode +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: amdaie.logicalobjectfifo.from_memref %[[ARG0]], {%[[TILE_0_0]]} +// CHECK: scf.forall +// CHECK: scf.forall +// CHECK-NOT: amdaie.logicalobjectfifo.from_memref +func.func @controlcode_hoist(%arg0: memref<32x64xi32>) { + amdaie.workgroup { + amdaie.controlcode { + %c0 = arith.constant 0 : index + %tile_0_0 = amdaie.tile(%c0, %c0) + scf.forall (%arg1, %arg2) in (1, 2) { + scf.forall (%arg3, %arg4) in (1, 2) { + %obj0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<32x64xi32> -> !amdaie.logicalobjectfifo> + } + } + amdaie.end + } + } + return +} + +// ----- + +// CHECK-LABEL: @controlcode_partial_hoist +// CHECK-SAME: %[[ARG0:.+]]: memref<32x64xi32> +// CHECK: amdaie.controlcode +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: scf.forall +// CHECK: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: amdaie.logicalobjectfifo.from_memref %[[ARG0]], {%[[TILE_0_0]]} +// CHECK: scf.forall +// CHECK-NOT: amdaie.logicalobjectfifo.from_memref +func.func @controlcode_partial_hoist(%arg0: memref<32x64xi32>) { + amdaie.workgroup { + amdaie.controlcode { + %c0 = arith.constant 0 : index + scf.forall (%arg1, %arg2) in (1, 2) { + %tile_0_0 = amdaie.tile(%c0, %c0) + scf.forall (%arg3, %arg4) in (1, 2) { + %obj0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<32x64xi32> -> !amdaie.logicalobjectfifo> + } + } + amdaie.end + } + } + return +} + +// ----- + +// CHECK-LABEL: @controlcode_no_hoist +// CHECK-SAME: %[[ARG0:.+]]: memref<32x64xi32> +// CHECK: amdaie.controlcode +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: scf.forall +// CHECK: scf.forall +// CHECK: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: amdaie.logicalobjectfifo.from_memref %[[ARG0]], {%[[TILE_0_0]]} +func.func @controlcode_no_hoist(%arg0: memref<32x64xi32>) { + amdaie.workgroup { + amdaie.controlcode { + %c0 = arith.constant 0 : index + scf.forall (%arg1, %arg2) in (1, 2) { + scf.forall (%arg3, %arg4) in (1, 2) { + %tile_0_0 = amdaie.tile(%c0, %c0) + %obj0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<32x64xi32> -> !amdaie.logicalobjectfifo> + } + } + amdaie.end + } + } + return +} + +// ----- + +// CHECK-LABEL: @controlcode_no_hoist_outside +// CHECK-SAME: %[[ARG0:.+]]: memref<32x64xi32> +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: amdaie.controlcode +// CHECK: amdaie.logicalobjectfifo.from_memref %[[ARG0]], {%[[TILE_0_0]]} +func.func @controlcode_no_hoist_outside(%arg0: memref<32x64xi32>) { + amdaie.workgroup { + %c0 = arith.constant 0 : index + %tile_0_0 = amdaie.tile(%c0, %c0) + amdaie.controlcode { + %obj0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<32x64xi32> -> !amdaie.logicalobjectfifo> + amdaie.end + } + } + return +} diff --git a/tests/samples/matmul_peeled_objectfifo.mlir b/tests/samples/matmul_peeled_objectfifo.mlir index b5a1f13b8..013bc863d 100644 --- a/tests/samples/matmul_peeled_objectfifo.mlir +++ b/tests/samples/matmul_peeled_objectfifo.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --pass-pipeline="builtin.module(fold-memref-alias-ops,iree-amdaie-pack-to-dma,air-copy-to-dma,iree-amdaie-air-dma-to-amdaie-dma,iree-amdaie-insert-cores,cse,iree-amdaie-localize-logicalobjectfifo,iree-amdaie-distribute-cores-and-objectfifos,cse,canonicalize,iree-amdaie-dma-to-circular-dma,func.func(iree-amdaie-create-aie-workgroup),cse,iree-amdaie-canonicalize-doubly-strided-op,iree-amdaie-flatten-logicalobjectfifo,iree-amdaie-access-to-acquire-release,cse,canonicalize,iree-amdaie-dma-loop-subsumption,cse,canonicalize,iree-amdaie-assign-npu-dma-bd-ids,iree-amdaie-controlcode-loop-unroll,cse,canonicalize,iree-amdaie-create-logical-objectfifo-link,iree-amdaie-canonicalize-doubly-strided-op,iree-amdaie-lower-to-aie,canonicalize)" --split-input-file %s | FileCheck %s +// RUN: iree-opt --pass-pipeline="builtin.module(fold-memref-alias-ops,iree-amdaie-pack-to-dma,air-copy-to-dma,iree-amdaie-air-dma-to-amdaie-dma,iree-amdaie-insert-cores,cse,iree-amdaie-localize-logicalobjectfifo,iree-amdaie-distribute-cores-and-objectfifos,cse,canonicalize,iree-amdaie-dma-to-circular-dma,func.func(iree-amdaie-create-aie-workgroup),cse,iree-amdaie-hoist-logical-objectfifo,iree-amdaie-canonicalize-doubly-strided-op,iree-amdaie-flatten-logicalobjectfifo,iree-amdaie-access-to-acquire-release,cse,canonicalize,iree-amdaie-dma-loop-subsumption,cse,canonicalize,iree-amdaie-assign-npu-dma-bd-ids,iree-amdaie-controlcode-loop-unroll,cse,canonicalize,iree-amdaie-create-logical-objectfifo-link,iree-amdaie-canonicalize-doubly-strided-op,iree-amdaie-lower-to-aie,canonicalize)" --split-input-file %s | FileCheck %s // CHECK: aie.device(npu1_4col) // CHECK-DAG: %[[TILE_0_2:.+]] = aie.tile(0, 2)