Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU][True16][CodeGen] true16 codegen for icmp and is_fpclass #124757

Merged
merged 1 commit into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1207,9 +1207,8 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
unsigned FakeS16Opc, unsigned S32Opc,
unsigned S64Opc) {
if (Size == 16)
// FIXME-TRUE16 use TrueS16Opc when realtrue16 is supported for CMP code
return ST.hasTrue16BitInsts()
? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
: S16Opc;
if (Size == 32)
return S32Opc;
Expand Down
57 changes: 53 additions & 4 deletions llvm/lib/Target/AMDGPU/VOPCInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,20 @@ multiclass VOPCClassPat64<string inst_name> {
>;
}

multiclass VOPCClassPat64_t16<string inst_name> {
defvar inst = !cast<VOP_Pseudo>(inst_name#"_t16_e64");
defvar P = inst.Pfl;
def : GCNPat <
(i1:$sdst
(AMDGPUfp_class
(P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
i32:$src1)),
(inst i32:$src0_modifiers, VSrcT_f16:$src0,
0 /* src1_modifiers */, (f16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
0) /* op_sel */
>;
}

multiclass VOPCClassPat64_fake16<string inst_name> {
defvar inst = !cast<VOP_Pseudo>(inst_name#"_fake16_e64");
defvar P = inst.Pfl;
Expand Down Expand Up @@ -1158,6 +1172,7 @@ multiclass VOPC_CLASS_F16 <string opName> {
}
let True16Predicate = UseRealTrue16Insts in {
defm _t16 : VOPC_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, 0>;
defm : VOPCClassPat64_t16<NAME>;
}
let True16Predicate = UseFakeTrue16Insts in {
defm _fake16 : VOPC_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, 0>;
Expand Down Expand Up @@ -1207,27 +1222,30 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;

// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt, dag dstInst = (inst $src0, $src1)> {
let WaveSizePredicate = isWave64 in
def : GCNPat <
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
(i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
(i64 (COPY_TO_REGCLASS dstInst, SReg_64))
>;

let WaveSizePredicate = isWave32 in {
def : GCNPat <
(i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
(i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
(i32 (COPY_TO_REGCLASS dstInst, SReg_32))
>;

// Support codegen of i64 setcc in wave32 mode.
def : GCNPat <
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
(i64 (REG_SEQUENCE SReg_64, (inst $src0, $src1), sub0, (S_MOV_B32 (i32 0)), sub1))
(i64 (REG_SEQUENCE SReg_64, dstInst, sub0, (S_MOV_B32 (i32 0)), sub1))
>;
}
}

multiclass ICMP_Pattern_t16<PatFrags cond, Instruction inst, ValueType vt>
: ICMP_Pattern<cond, inst, vt, (inst 0, $src0, 0, $src1)>;

defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
Expand All @@ -1250,6 +1268,19 @@ defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;

let True16Predicate = UseRealTrue16Insts in {
defm : ICMP_Pattern_t16 <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
defm : ICMP_Pattern_t16 <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
defm : ICMP_Pattern_t16 <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
defm : ICMP_Pattern_t16 <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
defm : ICMP_Pattern_t16 <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
defm : ICMP_Pattern_t16 <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
defm : ICMP_Pattern_t16 <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
defm : ICMP_Pattern_t16 <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
defm : ICMP_Pattern_t16 <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
defm : ICMP_Pattern_t16 <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
} // End True16Predicate = UseRealTrue16Insts

let True16Predicate = UseFakeTrue16Insts in {
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
Expand Down Expand Up @@ -1335,6 +1366,24 @@ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;

let True16Predicate = UseRealTrue16Insts in {
defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;

defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
} // End True16Predicate = UseRealTrue16Insts

let True16Predicate = UseFakeTrue16Insts in {
defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2012,6 +2012,10 @@ def : ClassPat<V_CMP_CLASS_F16_e64, f16> {
let True16Predicate = NotHasTrue16BitInsts;
}

def : ClassPat_t16<V_CMP_CLASS_F16_t16_e64, f16> {
let True16Predicate = UseRealTrue16Insts;
}

def : ClassPat_t16<V_CMP_CLASS_F16_fake16_e64, f16> {
let True16Predicate = UseFakeTrue16Insts;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
Expand Down Expand Up @@ -55,11 +53,9 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
Expand Down Expand Up @@ -55,11 +53,9 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
Expand Down
Loading