From e56871b22fb8b75296592e9506f6a0adb277d267 Mon Sep 17 00:00:00 2001 From: Martien de Jong Date: Wed, 29 Jan 2025 14:43:45 +0100 Subject: [PATCH] [AIE2P] add end-to-end pipeliner test --- .../schedule/postpipeliner/end-to-end.ll | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/end-to-end.ll diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/end-to-end.ll b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/end-to-end.ll new file mode 100644 index 000000000000..514da9cef0e7 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/end-to-end.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates + +; This test checks postpipelining for aie2p end-to-end: +; loop rotation, hwloop generation, legalization, codeselection, hwloop lowering, +; pipelining, loop distance padding and assembly generation. +; The result is an eight stage pipeline with seven fully expanded bundles in the prologue + +; RUN: llc -O2 -mtriple=aie2p %s -o - | FileCheck %s + +define <32 x i16> @zol(i32 %n, ptr %p) { +; CHECK-LABEL: zol: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: add.nc lc, r0, #-7 +; CHECK-NEXT: movxm ls, #.LBB0_1 +; CHECK-NEXT: movxm le, #.L_LEnd0 +; CHECK-NEXT: vlda x2, [p0], #64; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: vlda x2, [p0], #64; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: vlda x2, [p0], #64; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: vlda x2, [p0], #64; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: vlda x2, [p0], #64; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: vlda x2, [p0], #64; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: vlda x2, [p0], #64; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: // implicit-def: $x0 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .L_LEnd0: +; CHECK-NEXT: vlda x2, [p0], #64; nopb ; nops ; nopx ; vadd.16 x0, x2, x0; nopv +; CHECK-NEXT: // %bb.2: // %for.cond.cleanup +; CHECK-NEXT: nopa ; nopx ; vadd.16 x0, x2, x0 +; CHECK-NEXT: vadd.16 x0, x2, x0 +; CHECK-NEXT: vadd.16 x0, x2, x0 +; CHECK-NEXT: vadd.16 x0, x2, x0 +; CHECK-NEXT: vadd.16 x0, x2, x0 +; CHECK-NEXT: vadd.16 x0, x2, x0 +; CHECK-NEXT: vadd.16 x0, x2, x0 +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.04 = phi <32 x i16> [ undef, %entry ], [ %add, %for.body ] + %p.addr.03 = phi ptr [ %p, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i8, ptr %p.addr.03, i20 64 + %0 = load <32 x i16>, ptr %p.addr.03, align 64 + %add = add <32 x i16> %0, %s.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 + +for.cond.cleanup: + ret <32 x i16> %add +} + +!0 = distinct !{!0, !1, !2, !3} +!1 = !{!"llvm.loop.mustprogress"} +!2 = !{!"llvm.loop.itercount.range", i64 10} +!3 = !{!"llvm.loop.unroll.disable"}