forked from OSchip/llvm-project
[AMDGPU] Divergence driven instruction selection. Shift operations.
Summary: This change enables VOP3 shifts to be explicitly selected
dependent on the divergence.
Differential Revision: https://reviews.llvm.org/D52559
Reviewers: rampitec
llvm-svn: 343455
This commit is contained in:
parent
06e65cae4a
commit
b048fa3344
|
|
@ -442,19 +442,19 @@ def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
|
|||
[(set i32:$sdst, (UniformBinFrag<shl> i32:$src0, i32:$src1))]
|
||||
>;
|
||||
def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
|
||||
[(set i64:$sdst, (shl i64:$src0, i32:$src1))]
|
||||
[(set i64:$sdst, (UniformBinFrag<shl> i64:$src0, i32:$src1))]
|
||||
>;
|
||||
def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
|
||||
[(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))]
|
||||
>;
|
||||
def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
|
||||
[(set i64:$sdst, (srl i64:$src0, i32:$src1))]
|
||||
[(set i64:$sdst, (UniformBinFrag<srl> i64:$src0, i32:$src1))]
|
||||
>;
|
||||
def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
|
||||
[(set i32:$sdst, (UniformBinFrag<sra> i32:$src0, i32:$src1))]
|
||||
>;
|
||||
def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
|
||||
[(set i64:$sdst, (sra i64:$src0, i32:$src1))]
|
||||
[(set i64:$sdst, (UniformBinFrag<sra> i64:$src0, i32:$src1))]
|
||||
>;
|
||||
} // End Defs = [SCC]
|
||||
|
||||
|
|
|
|||
|
|
@ -17,16 +17,16 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
|
|||
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
|
||||
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT src0),
|
||||
(DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))];
|
||||
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT src0),
|
||||
(DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))];
|
||||
|
||||
list<dag> ret1 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT src0)))];
|
||||
(DivergentFragOrOp<node, P>.ret (P.Src0VT src0)))];
|
||||
|
||||
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
|
||||
!if(!eq(P.NumSrcArgs, 2), ret2,
|
||||
|
|
@ -35,18 +35,18 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
|
|||
|
||||
class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
|
||||
(DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
|
||||
(VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
|
||||
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst,
|
||||
(node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
|
||||
(DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
|
||||
(P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
|
||||
|
||||
list<dag> ret1 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
|
||||
(DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
|
||||
|
||||
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
|
||||
!if(!eq(P.NumSrcArgs, 2), ret2,
|
||||
|
|
@ -55,18 +55,18 @@ class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
|
|||
|
||||
class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
|
||||
(DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
|
||||
(VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))];
|
||||
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst,
|
||||
(node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
|
||||
(DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
|
||||
(P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
|
||||
|
||||
list<dag> ret1 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
|
||||
(DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
|
||||
|
||||
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
|
||||
!if(!eq(P.NumSrcArgs, 2), ret2,
|
||||
|
|
@ -75,18 +75,18 @@ class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
|
|||
|
||||
class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
|
||||
(DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
|
||||
(VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))];
|
||||
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst,
|
||||
(node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
|
||||
(DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
|
||||
(P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))];
|
||||
|
||||
list<dag> ret1 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
|
||||
(DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
|
||||
|
||||
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
|
||||
!if(!eq(P.NumSrcArgs, 2), ret2,
|
||||
|
|
@ -94,9 +94,9 @@ class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
|
|||
}
|
||||
|
||||
class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
|
||||
list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0))];
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1))];
|
||||
list<dag> ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0))];
|
||||
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
|
||||
!if(!eq(P.NumSrcArgs, 2), ret2,
|
||||
ret1));
|
||||
|
|
@ -185,6 +185,7 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
|
|||
getAsm64<HasDst, NumSrcArgs, HasIntClamp,
|
||||
HasModifiers, HasOMod, DstVT>.ret,
|
||||
P.Asm64));
|
||||
let NeedPatGen = P.NeedPatGen;
|
||||
}
|
||||
|
||||
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
|
||||
|
|
@ -381,12 +382,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3
|
|||
|
||||
let SchedRW = [Write64Bit] in {
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>>;
|
||||
def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>>;
|
||||
def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>>;
|
||||
let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
|
||||
def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>;
|
||||
def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>;
|
||||
def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>;
|
||||
def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
|
||||
} // End SubtargetPredicate = isSICI
|
||||
} // End SubtargetPredicate = isSICI, Predicates = [isSICI]
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
|
||||
|
|
@ -395,6 +396,22 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
|
|||
} // End SubtargetPredicate = isVI
|
||||
} // End SchedRW = [Write64Bit]
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
def : GCNPat <
|
||||
(getDivergentFrag<shl>.ret i64:$x, i32:$y),
|
||||
(V_LSHLREV_B64 $y, $x)
|
||||
>;
|
||||
def : AMDGPUPat <
|
||||
(getDivergentFrag<srl>.ret i64:$x, i32:$y),
|
||||
(V_LSHRREV_B64 $y, $x)
|
||||
>;
|
||||
def : AMDGPUPat <
|
||||
(getDivergentFrag<sra>.ret i64:$x, i32:$y),
|
||||
(V_ASHRREV_I64 $y, $x)
|
||||
>;
|
||||
}
|
||||
|
||||
|
||||
let SubtargetPredicate = isCIVI in {
|
||||
|
||||
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
|
||||
|
|
|
|||
|
|
@ -572,6 +572,11 @@ class VOPPatOrNull<SDPatternOperator Op, VOPProfile P> {
|
|||
list<dag> ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen<Op, P>.ret, []);
|
||||
}
|
||||
|
||||
class DivergentFragOrOp<SDPatternOperator Op, VOPProfile P> {
|
||||
SDPatternOperator ret = !if(!eq(P.NeedPatGen,PatGenMode.Pattern),
|
||||
!if(!isa<SDNode>(Op), getDivergentFrag<Op>.ret, Op), Op);
|
||||
}
|
||||
|
||||
include "VOPCInstructions.td"
|
||||
include "VOP1Instructions.td"
|
||||
include "VOP2Instructions.td"
|
||||
|
|
|
|||
Loading…
Reference in New Issue