Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AIE] Legalize G_AND for <8 x s64> vector. #262

Open
wants to merge 3 commits into
base: aie-public
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion llvm/lib/Target/AIE/AIE2InstrPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand Down Expand Up @@ -1092,6 +1092,15 @@ def : PatInaccessibleMem<(int_aie2_v16bf16_to_v16i32 VEC256:$src, mSs:$shft),
def : PatInaccessibleMem<(int_aie2_clr16f_conf),
(VCLR_vclr_BF)>;

// AIE does not have a native xor instruction for vector operands.
// Res = A xor B --> Res = AB' || A'B
foreach Ty = [v64i8, v32i16, v16i32] in {
def : Pat<(xor Ty:$src1, Ty:$src2),
(VBOR
(VBAND (VBNEG_LTZ_S32 Ty:$src1), Ty:$src2),
(VBAND (VBNEG_LTZ_S32 Ty:$src2), Ty:$src1))>;
}

// DIVS
def : Pat<(int_aie2_divs eR31:$sd_in, eR:$src0, eR:$src1),
(DIVS eR31:$sd_in, eR:$src0, eR:$src1)>;
53 changes: 39 additions & 14 deletions llvm/lib/Target/AIE/AIE2LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
/// \file
Expand Down Expand Up @@ -73,6 +73,13 @@ static LegalityPredicate isValidVectorAIE2(const unsigned TypeIdx) {
};
}

static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isVector() && QueryTy.getSizeInBits() < Size;
};
}

LegalityPredicate
negatePredicate(const std::function<bool(const LegalityQuery &)> &Func) {
return [=](const LegalityQuery &Query) { return !Func(Query); };
Expand Down Expand Up @@ -213,17 +220,6 @@ AIE2LegalizerInfo::AIE2LegalizerInfo(const AIE2Subtarget &ST) : AIEHelper(ST) {
.clampScalar(0, S32, S32);
// FIXME: (s|z|any)ext s20 to s64 is broken.

getActionDefinitionsBuilder({G_AND, G_OR})
.legalFor({S32})
.legalFor(AIE2VectorTypes)
.widenScalarToNextPow2(0)
.clampScalar(0, S32, S32);

getActionDefinitionsBuilder(G_XOR)
.legalFor({S32})
.widenScalarToNextPow2(0)
.clampScalar(0, S32, S32);

getActionDefinitionsBuilder(G_SEXT_INREG).custom();

getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL})
Expand All @@ -250,11 +246,34 @@ AIE2LegalizerInfo::AIE2LegalizerInfo(const AIE2Subtarget &ST) : AIEHelper(ST) {
// patterns.
.bitcastIf(typeInSet(0, AIE2AccumulatorTypes), bitcastAccToVectorType(0));

getActionDefinitionsBuilder({G_ADD, G_SUB})
getActionDefinitionsBuilder({G_ADD, G_SUB, G_XOR})
.legalFor({S32})
.legalFor({V16S32, V32S16, V64S8})
.widenScalarToNextPow2(0)
.clampScalar(0, S32, S32);
.clampScalar(0, S32, S32)
// AIE ISA supports only 512-bit vector add/sub/xor
.clampMaxNumElements(0, S8, 64)
.clampMaxNumElements(0, S16, 32)
.clampMaxNumElements(0, S32, 16)
// moreElements action could have used here, but it generate code more
// like scalarization. We can use G_CONCAT_VECTORS and unmerge to do this
// more optimally.
.customIf(vectorSmallerThan(0, 512));

getActionDefinitionsBuilder({G_AND, G_OR})
.legalFor({S32})
.legalFor({V16S32, V32S16, V64S8})
.widenScalarToNextPow2(0)
.clampScalar(0, S32, S32)
// AIE ISA supports only 512-bit vector and/or
.clampMaxNumElements(0, S8, 64)
.clampMaxNumElements(0, S16, 32)
.clampMaxNumElements(0, S32, 16)
// moreElements action could have used here, but it generate code more
// like scalarization. We can use G_CONCAT_VECTORS and unmerge to do this
// more optimally.
.customIf(vectorSmallerThan(0, 512))
.bitcastIf(typeInSet(0, AIE2AccumulatorTypes), bitcastAccToVectorType(0));

// FIXME: G_SADDE/G_SSUBE doesn't support lowering. To support this properly,
// the action needs to be implemented
Expand Down Expand Up @@ -546,6 +565,12 @@ bool AIE2LegalizerInfo::legalizeCustom(
return AIEHelper.legalizeG_SEXT_INREG(Helper, MI);
case TargetOpcode::G_BITCAST:
return AIEHelper.legalizeG_BITCAST(Helper, MI);
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_XOR:
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
return AIEHelper.legalizeBinOp(Helper, MI);
}

llvm_unreachable("Un-expected custom legalization");
Expand Down
32 changes: 2 additions & 30 deletions llvm/lib/Target/AIE/AIEBaseInstrPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand Down Expand Up @@ -36,36 +36,8 @@ def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameIndex">,
GISDNodeXFormEquiv<frameindex_to_targetframeindex>;

// VBOR / VBAND
foreach vec256Ty = [v32i8, v16i16, v8i32] in {
def : Pat<(vec256Ty (or VEC256:$src1, VEC256:$src2)),
(vec256Ty (EXTRACT_SUBREG (VBOR
(v32i16 (REG_SEQUENCE VEC512, VEC256:$src1, sub_256_lo)),
(v32i16 (REG_SEQUENCE VEC512, VEC256:$src2, sub_256_lo))),
sub_256_lo))>;
def : Pat<(vec256Ty (and VEC256:$src1, VEC256:$src2)),
(vec256Ty (EXTRACT_SUBREG (VBAND
(v32i16 (REG_SEQUENCE VEC512, VEC256:$src1, sub_256_lo)),
(v32i16 (REG_SEQUENCE VEC512, VEC256:$src2, sub_256_lo))),
sub_256_lo))>;
}
// VBOR / VBAND / XOR
foreach vec512Ty = [v64i8, v32i16, v16i32] in {
def : Pat<(vec512Ty (or VEC512:$src1, VEC512:$src2)), (VBOR VEC512:$src1, VEC512:$src2)>;
def : Pat<(vec512Ty (and VEC512:$src1, VEC512:$src2)), (VBAND VEC512:$src1, VEC512:$src2)>;
}
foreach vec1024Ty = [v128i8, v64i16, v32i32] in {
def : Pat<(vec1024Ty (or VEC1024:$src1, VEC1024:$src2)),
(REG_SEQUENCE VEC1024,
(VBOR (v16i32 (EXTRACT_SUBREG VEC1024:$src1, sub_512_lo)),
(v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_lo))), sub_512_lo,
(VBOR (v16i32 (EXTRACT_SUBREG VEC1024:$src1, sub_512_hi)),
(v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_hi))), sub_512_hi
)>;
def : Pat<(vec1024Ty (and VEC1024:$src1, VEC1024:$src2)),
(REG_SEQUENCE VEC1024,
(VBAND (v16i32 (EXTRACT_SUBREG VEC1024:$src1, sub_512_lo)),
(v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_lo))), sub_512_lo,
(VBAND (v16i32 (EXTRACT_SUBREG VEC1024:$src1, sub_512_hi)),
(v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_hi))), sub_512_hi
)>;
}
41 changes: 22 additions & 19 deletions llvm/lib/Target/AIE/AIELegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
/// \file
Expand Down Expand Up @@ -1306,48 +1306,51 @@ bool AIELegalizerHelper::legalizeBinOp(LegalizerHelper &Helper,
MachineInstr &MI) const {
assert(MI.getOpcode() == TargetOpcode::G_ADD ||
MI.getOpcode() == TargetOpcode::G_SUB ||
MI.getOpcode() == TargetOpcode::G_XOR);
MI.getOpcode() == TargetOpcode::G_XOR ||
MI.getOpcode() == TargetOpcode::G_AND ||
MI.getOpcode() == TargetOpcode::G_OR);

MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

const Register DstReg = MI.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
const auto VectorSize = DstTy.getSizeInBits();

assert(DstTy.isVector() && VectorSize < 512 &&
"Expected vector size less than 512-bits");
assert(!(512 % VectorSize) && "Vector size should be a multiple of 512");

const Register Src1Reg = MI.getOperand(1).getReg();
const Register Src2Reg = MI.getOperand(2).getReg();

assert(DstTy == MRI.getType(Src1Reg));
Register UndefReg = MRI.createGenericVirtualRegister(DstTy);
MIRBuilder.buildUndef(UndefReg);

auto NewVecTy = LLT::fixed_vector(
512 / DstTy.getElementType().getSizeInBits(), DstTy.getElementType());
Register NewDstReg = MRI.createGenericVirtualRegister(NewVecTy);
Register NewSrc1Reg = MRI.createGenericVirtualRegister(NewVecTy);
Register NewSrc2Reg = MRI.createGenericVirtualRegister(NewVecTy);

unsigned NumberOfPadElts = (512 / VectorSize) - 1;
SmallVector<Register, 8> Regs;
const Register UndefReg = MRI.createGenericVirtualRegister(DstTy);
MIRBuilder.buildUndef(UndefReg);

Regs.push_back(Src1Reg);
for (unsigned i = 0; i < NumberOfPadElts; ++i)
Regs.push_back(UndefReg);
MIRBuilder.buildMergeLikeInstr(NewSrc1Reg, Regs);
const unsigned NumberOfPadElts = (512 / VectorSize) - 1;
auto buildMergeInstr = [&](const Register SrcReg) -> Register {
SmallVector<Register, 4> Regs;
Regs.push_back(SrcReg);
for (unsigned i = 0; i < NumberOfPadElts; i++)
Regs.push_back(UndefReg);
const Register NewSrcReg = MRI.createGenericVirtualRegister(NewVecTy);
MIRBuilder.buildMergeLikeInstr(NewSrcReg, Regs);
return NewSrcReg;
};

Regs.clear();
Regs.push_back(Src2Reg);
for (unsigned i = 0; i < NumberOfPadElts; ++i)
Regs.push_back(UndefReg);
MIRBuilder.buildMergeLikeInstr(NewSrc2Reg, Regs);
const Register NewSrc1Reg = buildMergeInstr(Src1Reg);
const Register NewSrc2Reg = buildMergeInstr(Src2Reg);

const Register NewDstReg = MRI.createGenericVirtualRegister(NewVecTy);
MIRBuilder.buildInstr(MI.getOpcode(), {NewDstReg}, {NewSrc1Reg, NewSrc2Reg},
MI.getFlags());

Regs.clear();
SmallVector<Register, 4> Regs;
Regs.push_back(DstReg);
for (unsigned i = 0; i < NumberOfPadElts; ++i)
Regs.push_back(MRI.createGenericVirtualRegister(DstTy));
Expand Down
28 changes: 20 additions & 8 deletions llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
/// \file
Expand Down Expand Up @@ -252,12 +252,6 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
.clampScalar(0, S32, S32);
// FIXME: (s|z|any)ext s20 to s64 is broken.

getActionDefinitionsBuilder({G_AND, G_OR})
.legalFor({S32})
.legalFor(AIE2PVectorTypes)
.widenScalarToNextPow2(0)
.clampScalar(0, S32, S32);

getActionDefinitionsBuilder(G_SEXT_INREG).custom();

getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL})
Expand Down Expand Up @@ -305,7 +299,7 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
.legalFor({V16S32, V32S16, V64S8})
.widenScalarToNextPow2(0)
.clampScalar(0, S32, S32)
// AIE ISA supports only 512-bit vector add/sub/xor
// AIE ISA supports only 512-bit vector add/sub/xor/and/or
.clampMaxNumElements(0, S8, 64)
.clampMaxNumElements(0, S16, 32)
.clampMaxNumElements(0, S32, 16)
Expand All @@ -314,6 +308,22 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
// more optimally.
.customIf(vectorSmallerThan(0, 512));

getActionDefinitionsBuilder({G_AND, G_OR})
.legalFor({S32})
.legalFor({V16S32, V32S16, V64S8})
.widenScalarToNextPow2(0)
.clampScalar(0, S32, S32)
// AIE ISA supports only 512-bit vector and/or
.clampMaxNumElements(0, S8, 64)
.clampMaxNumElements(0, S16, 32)
.clampMaxNumElements(0, S32, 16)
// moreElements action could have used here, but it generate code more
// like scalarization. We can use G_CONCAT_VECTORS and unmerge to do this
// more optimally.
.customIf(vectorSmallerThan(0, 512))
.bitcastIf(typeInSet(0, {AccV4S64, AccV8S64, AccV16S64}),
bitcastAccToVectorType(0));

// FIXME: G_SADDE/G_SSUBE doesn't support lowering. To support this properly,
// the action needs to be implemented
// FIXME: AIE2 has ADC and SBC operations to read the carry.
Expand Down Expand Up @@ -668,6 +678,8 @@ bool AIE2PLegalizerInfo::legalizeCustom(
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_XOR:
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
return AIEHelper.legalizeBinOp(Helper, MI);
}
llvm_unreachable("Un-expected custom legalization");
Expand Down
Loading