Xilinx · SagarMaheshwari99 · Jan 23, 2025 · Jan 23, 2025 · Jan 29, 2025
@@ -4,7 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
+// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1092,6 +1092,15 @@ def : PatInaccessibleMem<(int_aie2_v16bf16_to_v16i32 VEC256:$src, mSs:$shft),
 def : PatInaccessibleMem<(int_aie2_clr16f_conf),
           (VCLR_vclr_BF)>;
 
+// AIE does not have a native xor instruction for vector operands.
+// Res = A xor B --> Res = AB' || A'B
+foreach Ty = [v64i8, v32i16, v16i32] in {
+def : Pat<(xor Ty:$src1, Ty:$src2),
+          (VBOR
+            (VBAND (VBNEG_LTZ_S32 Ty:$src1), Ty:$src2),
+            (VBAND (VBNEG_LTZ_S32 Ty:$src2), Ty:$src1))>;
+}
+
 // DIVS
 def : Pat<(int_aie2_divs eR31:$sd_in, eR:$src0, eR:$src1),
           (DIVS eR31:$sd_in, eR:$src0, eR:$src1)>;
@@ -4,7 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
+// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -73,6 +73,13 @@ static LegalityPredicate isValidVectorAIE2(const unsigned TypeIdx) {
   };
 }
 
+static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) {
+  return [=](const LegalityQuery &Query) {
+    const LLT QueryTy = Query.Types[TypeIdx];
+    return QueryTy.isVector() && QueryTy.getSizeInBits() < Size;
+  };
+}
+
 LegalityPredicate
 negatePredicate(const std::function<bool(const LegalityQuery &)> &Func) {
   return [=](const LegalityQuery &Query) { return !Func(Query); };
@@ -213,17 +220,6 @@ AIE2LegalizerInfo::AIE2LegalizerInfo(const AIE2Subtarget &ST) : AIEHelper(ST) {
       .clampScalar(0, S32, S32);
   // FIXME: (s|z|any)ext s20 to s64 is broken.
 
-  getActionDefinitionsBuilder({G_AND, G_OR})
-      .legalFor({S32})
-      .legalFor(AIE2VectorTypes)
-      .widenScalarToNextPow2(0)
-      .clampScalar(0, S32, S32);
-
-  getActionDefinitionsBuilder(G_XOR)
-      .legalFor({S32})
-      .widenScalarToNextPow2(0)
-      .clampScalar(0, S32, S32);
-
   getActionDefinitionsBuilder(G_SEXT_INREG).custom();
 
   getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL})
@@ -250,11 +246,34 @@ AIE2LegalizerInfo::AIE2LegalizerInfo(const AIE2Subtarget &ST) : AIEHelper(ST) {
       // patterns.
       .bitcastIf(typeInSet(0, AIE2AccumulatorTypes), bitcastAccToVectorType(0));
 
-  getActionDefinitionsBuilder({G_ADD, G_SUB})
+  getActionDefinitionsBuilder({G_ADD, G_SUB, G_XOR})
       .legalFor({S32})
       .legalFor({V16S32, V32S16, V64S8})
       .widenScalarToNextPow2(0)
-      .clampScalar(0, S32, S32);
+      .clampScalar(0, S32, S32)
+      // AIE ISA supports only 512-bit vector add/sub/xor
+      .clampMaxNumElements(0, S8, 64)
+      .clampMaxNumElements(0, S16, 32)
+      .clampMaxNumElements(0, S32, 16)
+      // moreElements action could have used here, but it generate code more
+      // like scalarization. We can use G_CONCAT_VECTORS and unmerge to do this
+      // more optimally.
+      .customIf(vectorSmallerThan(0, 512));
+
+  getActionDefinitionsBuilder({G_AND, G_OR})
+      .legalFor({S32})
+      .legalFor({V16S32, V32S16, V64S8})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32)
+      // AIE ISA supports only 512-bit vector and/or
+      .clampMaxNumElements(0, S8, 64)
+      .clampMaxNumElements(0, S16, 32)
+      .clampMaxNumElements(0, S32, 16)
+      // moreElements action could have used here, but it generate code more
+      // like scalarization. We can use G_CONCAT_VECTORS and unmerge to do this
+      // more optimally.
+      .customIf(vectorSmallerThan(0, 512))
+      .bitcastIf(typeInSet(0, AIE2AccumulatorTypes), bitcastAccToVectorType(0));
 
   // FIXME: G_SADDE/G_SSUBE doesn't support lowering. To support this properly,
   // the action needs to be implemented
@@ -546,6 +565,12 @@ bool AIE2LegalizerInfo::legalizeCustom(
     return AIEHelper.legalizeG_SEXT_INREG(Helper, MI);
   case TargetOpcode::G_BITCAST:
     return AIEHelper.legalizeG_BITCAST(Helper, MI);
+  case TargetOpcode::G_ADD:
+  case TargetOpcode::G_SUB:
+  case TargetOpcode::G_XOR:
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_OR:
+    return AIEHelper.legalizeBinOp(Helper, MI);
   }
 
   llvm_unreachable("Un-expected custom legalization");

@@ -4,7 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
+// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,36 +36,8 @@ def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
 def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameIndex">,
   GISDNodeXFormEquiv<frameindex_to_targetframeindex>;
 
-// VBOR / VBAND
-foreach vec256Ty = [v32i8, v16i16, v8i32] in {
-  def : Pat<(vec256Ty (or VEC256:$src1, VEC256:$src2)),
-            (vec256Ty (EXTRACT_SUBREG (VBOR
-                        (v32i16 (REG_SEQUENCE VEC512, VEC256:$src1, sub_256_lo)),
-                        (v32i16 (REG_SEQUENCE VEC512, VEC256:$src2, sub_256_lo))),
-                       sub_256_lo))>;
-  def : Pat<(vec256Ty (and VEC256:$src1, VEC256:$src2)),
-            (vec256Ty (EXTRACT_SUBREG (VBAND
-                        (v32i16 (REG_SEQUENCE VEC512, VEC256:$src1, sub_256_lo)),
-                        (v32i16 (REG_SEQUENCE VEC512, VEC256:$src2, sub_256_lo))),
-                       sub_256_lo))>;
-}
+// VBOR / VBAND / XOR
 foreach vec512Ty = [v64i8, v32i16, v16i32] in {
   def : Pat<(vec512Ty (or VEC512:$src1, VEC512:$src2)), (VBOR VEC512:$src1, VEC512:$src2)>;
   def : Pat<(vec512Ty (and VEC512:$src1, VEC512:$src2)), (VBAND VEC512:$src1, VEC512:$src2)>;
 }
-foreach vec1024Ty = [v128i8, v64i16, v32i32] in {
-def : Pat<(vec1024Ty (or VEC1024:$src1, VEC1024:$src2)),
-          (REG_SEQUENCE VEC1024,
-                 (VBOR (v16i32 (EXTRACT_SUBREG VEC1024:$src1, sub_512_lo)),
-                       (v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_lo))), sub_512_lo,
-                 (VBOR (v16i32 (EXTRACT_SUBREG VEC1024:$src1, sub_512_hi)),
-                       (v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_hi))), sub_512_hi
-          )>;
-def : Pat<(vec1024Ty (and VEC1024:$src1, VEC1024:$src2)),
-          (REG_SEQUENCE VEC1024,
-                 (VBAND (v16i32 (EXTRACT_SUBREG VEC1024:$src1, sub_512_lo)),
-                        (v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_lo))), sub_512_lo,
-                 (VBAND (v16i32 (EXTRACT_SUBREG VEC1024:$src1, sub_512_hi)),
-                        (v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_hi))), sub_512_hi
-          )>;
-}
@@ -4,7 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
+// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -1306,48 +1306,51 @@ bool AIELegalizerHelper::legalizeBinOp(LegalizerHelper &Helper,
                                        MachineInstr &MI) const {
   assert(MI.getOpcode() == TargetOpcode::G_ADD ||
          MI.getOpcode() == TargetOpcode::G_SUB ||
-         MI.getOpcode() == TargetOpcode::G_XOR);
+         MI.getOpcode() == TargetOpcode::G_XOR ||
+         MI.getOpcode() == TargetOpcode::G_AND ||
+         MI.getOpcode() == TargetOpcode::G_OR);
 
   MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
 
   const Register DstReg = MI.getOperand(0).getReg();
   const LLT DstTy = MRI.getType(DstReg);
   const auto VectorSize = DstTy.getSizeInBits();
+
   assert(DstTy.isVector() && VectorSize < 512 &&
          "Expected vector size less than 512-bits");
   assert(!(512 % VectorSize) && "Vector size should be a multiple of 512");
 
   const Register Src1Reg = MI.getOperand(1).getReg();
   const Register Src2Reg = MI.getOperand(2).getReg();
+
   assert(DstTy == MRI.getType(Src1Reg));
-  Register UndefReg = MRI.createGenericVirtualRegister(DstTy);
-  MIRBuilder.buildUndef(UndefReg);
 
   auto NewVecTy = LLT::fixed_vector(
       512 / DstTy.getElementType().getSizeInBits(), DstTy.getElementType());
-  Register NewDstReg = MRI.createGenericVirtualRegister(NewVecTy);
-  Register NewSrc1Reg = MRI.createGenericVirtualRegister(NewVecTy);
-  Register NewSrc2Reg = MRI.createGenericVirtualRegister(NewVecTy);
 
-  unsigned NumberOfPadElts = (512 / VectorSize) - 1;
-  SmallVector<Register, 8> Regs;
+  const Register UndefReg = MRI.createGenericVirtualRegister(DstTy);
+  MIRBuilder.buildUndef(UndefReg);
 
-  Regs.push_back(Src1Reg);
-  for (unsigned i = 0; i < NumberOfPadElts; ++i)
-    Regs.push_back(UndefReg);
-  MIRBuilder.buildMergeLikeInstr(NewSrc1Reg, Regs);
+  const unsigned NumberOfPadElts = (512 / VectorSize) - 1;
+  auto buildMergeInstr = [&](const Register SrcReg) -> Register {
+    SmallVector<Register, 4> Regs;
+    Regs.push_back(SrcReg);
+    for (unsigned i = 0; i < NumberOfPadElts; i++)
+      Regs.push_back(UndefReg);
+    const Register NewSrcReg = MRI.createGenericVirtualRegister(NewVecTy);
+    MIRBuilder.buildMergeLikeInstr(NewSrcReg, Regs);
+    return NewSrcReg;
+  };
 
-  Regs.clear();
-  Regs.push_back(Src2Reg);
-  for (unsigned i = 0; i < NumberOfPadElts; ++i)
-    Regs.push_back(UndefReg);
-  MIRBuilder.buildMergeLikeInstr(NewSrc2Reg, Regs);
+  const Register NewSrc1Reg = buildMergeInstr(Src1Reg);
+  const Register NewSrc2Reg = buildMergeInstr(Src2Reg);
 
+  const Register NewDstReg = MRI.createGenericVirtualRegister(NewVecTy);
   MIRBuilder.buildInstr(MI.getOpcode(), {NewDstReg}, {NewSrc1Reg, NewSrc2Reg},
                         MI.getFlags());
 
-  Regs.clear();
+  SmallVector<Register, 4> Regs;
   Regs.push_back(DstReg);
   for (unsigned i = 0; i < NumberOfPadElts; ++i)
     Regs.push_back(MRI.createGenericVirtualRegister(DstTy));

@@ -4,7 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
+// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -252,12 +252,6 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
       .clampScalar(0, S32, S32);
   // FIXME: (s|z|any)ext s20 to s64 is broken.
 
-  getActionDefinitionsBuilder({G_AND, G_OR})
-      .legalFor({S32})
-      .legalFor(AIE2PVectorTypes)
-      .widenScalarToNextPow2(0)
-      .clampScalar(0, S32, S32);
-
   getActionDefinitionsBuilder(G_SEXT_INREG).custom();
 
   getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL})
@@ -305,7 +299,7 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
       .legalFor({V16S32, V32S16, V64S8})
       .widenScalarToNextPow2(0)
       .clampScalar(0, S32, S32)
-      // AIE ISA supports only 512-bit vector add/sub/xor
+      // AIE ISA supports only 512-bit vector add/sub/xor/and/or
       .clampMaxNumElements(0, S8, 64)
       .clampMaxNumElements(0, S16, 32)
       .clampMaxNumElements(0, S32, 16)
@@ -314,6 +308,22 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
       // more optimally.
       .customIf(vectorSmallerThan(0, 512));
 
+  getActionDefinitionsBuilder({G_AND, G_OR})
+      .legalFor({S32})
+      .legalFor({V16S32, V32S16, V64S8})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32)
+      // AIE ISA supports only 512-bit vector and/or
+      .clampMaxNumElements(0, S8, 64)
+      .clampMaxNumElements(0, S16, 32)
+      .clampMaxNumElements(0, S32, 16)
+      // moreElements action could have used here, but it generate code more
+      // like scalarization. We can use G_CONCAT_VECTORS and unmerge to do this
+      // more optimally.
+      .customIf(vectorSmallerThan(0, 512))
+      .bitcastIf(typeInSet(0, {AccV4S64, AccV8S64, AccV16S64}),
+                 bitcastAccToVectorType(0));
+
   // FIXME: G_SADDE/G_SSUBE doesn't support lowering. To support this properly,
   // the action needs to be implemented
   // FIXME: AIE2 has ADC and SBC operations to read the carry.
@@ -668,6 +678,8 @@ bool AIE2PLegalizerInfo::legalizeCustom(
   case TargetOpcode::G_ADD:
   case TargetOpcode::G_SUB:
   case TargetOpcode::G_XOR:
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_OR:
     return AIEHelper.legalizeBinOp(Helper, MI);
   }
   llvm_unreachable("Un-expected custom legalization");