forked from OSchip/llvm-project
[X86] Merge ISD::ADD/SUB nodes into X86ISD::ADD/SUB equivalents (PR40483)
Avoid ADD/SUB instruction duplication by reusing the X86ISD::ADD/SUB results. Includes ADD commutation - I tried to include NEG+SUB SUB commutation as well but this causes regressions as we don't have good combine coverage to simplify X86ISD::SUB. Differential Revision: https://reviews.llvm.org/D58597 llvm-svn: 354771
This commit is contained in:
parent
fd99780c09
commit
c61f1e8e6c
|
@ -40972,20 +40972,38 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
|
|||
return Op.getValue(1);
|
||||
}
|
||||
|
||||
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG) {
|
||||
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
|
||||
"Expected X86ISD::ADD or X86ISD::SUB");
|
||||
|
||||
// If we don't use the flag result, simplify back to a simple ADD/SUB.
|
||||
if (N->hasAnyUseOfValue(1))
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
SDValue Res = DAG.getNode(X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB,
|
||||
DL, LHS.getSimpleValueType(), LHS, RHS);
|
||||
return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL);
|
||||
MVT VT = LHS.getSimpleValueType();
|
||||
unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB;
|
||||
|
||||
// If we don't use the flag result, simplify back to a generic ADD/SUB.
|
||||
if (!N->hasAnyUseOfValue(1)) {
|
||||
SDLoc DL(N);
|
||||
SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
|
||||
return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL);
|
||||
}
|
||||
|
||||
// Fold any similar generic ADD/SUB opcodes to reuse this node.
|
||||
auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
|
||||
// TODO: Add SUB(RHS, LHS) -> SUB(0, SUB(LHS, RHS)) negation support, this
|
||||
// currently causes regressions as we don't have broad x86sub combines.
|
||||
if (Negate)
|
||||
return;
|
||||
SDValue Ops[] = {N0, N1};
|
||||
SDVTList VTs = DAG.getVTList(N->getValueType(0));
|
||||
if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops))
|
||||
DCI.CombineTo(GenericAddSub, SDValue(N, 0));
|
||||
};
|
||||
MatchGeneric(LHS, RHS, false);
|
||||
MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
|
||||
|
@ -42198,7 +42216,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::ADD: return combineAdd(N, DAG, Subtarget);
|
||||
case ISD::SUB: return combineSub(N, DAG, Subtarget);
|
||||
case X86ISD::ADD:
|
||||
case X86ISD::SUB: return combineX86AddSub(N, DAG);
|
||||
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
|
||||
case X86ISD::SBB: return combineSBB(N, DAG);
|
||||
case X86ISD::ADC: return combineADC(N, DAG, DCI);
|
||||
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
|
||||
|
|
|
@ -5,33 +5,22 @@
|
|||
define i32 @PR40483_add1(i32*, i32) nounwind {
|
||||
; X86-LABEL: PR40483_add1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl (%edx), %esi
|
||||
; X86-NEXT: leal (%esi,%ecx), %eax
|
||||
; X86-NEXT: addl %ecx, %esi
|
||||
; X86-NEXT: movl %esi, (%edx)
|
||||
; X86-NEXT: jae .LBB0_1
|
||||
; X86-NEXT: # %bb.2:
|
||||
; X86-NEXT: movl (%ecx), %eax
|
||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %eax, (%ecx)
|
||||
; X86-NEXT: jae .LBB0_2
|
||||
; X86-NEXT: # %bb.1:
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: .LBB0_1:
|
||||
; X86-NEXT: orl %eax, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .LBB0_2:
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: PR40483_add1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; X64-NEXT: movl (%rdi), %ecx
|
||||
; X64-NEXT: leal (%rcx,%rsi), %edx
|
||||
; X64-NEXT: orl %edx, %edx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: addl %esi, %ecx
|
||||
; X64-NEXT: movl %ecx, (%rdi)
|
||||
; X64-NEXT: cmovael %edx, %eax
|
||||
; X64-NEXT: addl (%rdi), %esi
|
||||
; X64-NEXT: movl %esi, (%rdi)
|
||||
; X64-NEXT: cmovael %esi, %eax
|
||||
; X64-NEXT: retq
|
||||
%3 = load i32, i32* %0, align 8
|
||||
%4 = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %3, i32 %1)
|
||||
|
@ -48,34 +37,23 @@ define i32 @PR40483_add1(i32*, i32) nounwind {
|
|||
define i32 @PR40483_add2(i32*, i32) nounwind {
|
||||
; X86-LABEL: PR40483_add2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl (%esi), %edi
|
||||
; X86-NEXT: leal (%edi,%edx), %ecx
|
||||
; X86-NEXT: movl (%edx), %ecx
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: addl %edx, %edi
|
||||
; X86-NEXT: movl %edi, (%esi)
|
||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, (%edx)
|
||||
; X86-NEXT: jae .LBB1_2
|
||||
; X86-NEXT: # %bb.1:
|
||||
; X86-NEXT: orl %ecx, %ecx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: .LBB1_2:
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: PR40483_add2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; X64-NEXT: movl (%rdi), %ecx
|
||||
; X64-NEXT: leal (%rcx,%rsi), %edx
|
||||
; X64-NEXT: orl %edx, %edx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: addl %esi, %ecx
|
||||
; X64-NEXT: movl %ecx, (%rdi)
|
||||
; X64-NEXT: cmovbl %edx, %eax
|
||||
; X64-NEXT: addl (%rdi), %esi
|
||||
; X64-NEXT: movl %esi, (%rdi)
|
||||
; X64-NEXT: cmovbl %esi, %eax
|
||||
; X64-NEXT: retq
|
||||
%3 = load i32, i32* %0, align 8
|
||||
%4 = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %3, i32 %1)
|
||||
|
|
|
@ -11,11 +11,11 @@ define void @PR25858_i32(%WideUInt32* sret, %WideUInt32*, %WideUInt32*) nounwind
|
|||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl (%ecx), %esi
|
||||
; X86-NEXT: movl 4(%ecx), %ecx
|
||||
; X86-NEXT: subl (%edx), %esi
|
||||
; X86-NEXT: sbbl 4(%edx), %ecx
|
||||
; X86-NEXT: movl %ecx, 4(%eax)
|
||||
; X86-NEXT: movl (%edx), %esi
|
||||
; X86-NEXT: movl 4(%edx), %edx
|
||||
; X86-NEXT: subl (%ecx), %esi
|
||||
; X86-NEXT: sbbl 4(%ecx), %edx
|
||||
; X86-NEXT: movl %edx, 4(%eax)
|
||||
; X86-NEXT: movl %esi, (%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: retl $4
|
||||
|
@ -249,35 +249,24 @@ define i32 @PR40483_sub3(i32*, i32) nounwind {
|
|||
define i32 @PR40483_sub4(i32*, i32) nounwind {
|
||||
; X86-LABEL: PR40483_sub4:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl (%esi), %edi
|
||||
; X86-NEXT: movl %edi, %ecx
|
||||
; X86-NEXT: subl %edx, %ecx
|
||||
; X86-NEXT: movl (%edx), %ecx
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: subl %edx, %edi
|
||||
; X86-NEXT: movl %edi, (%esi)
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, (%edx)
|
||||
; X86-NEXT: jae .LBB6_2
|
||||
; X86-NEXT: # %bb.1:
|
||||
; X86-NEXT: orl %ecx, %ecx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: .LBB6_2:
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: PR40483_sub4:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl (%rdi), %ecx
|
||||
; X64-NEXT: movl %ecx, %edx
|
||||
; X64-NEXT: subl %esi, %edx
|
||||
; X64-NEXT: orl %edx, %edx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: subl %esi, %ecx
|
||||
; X64-NEXT: movl %ecx, (%rdi)
|
||||
; X64-NEXT: cmovbl %edx, %eax
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: xorl %ecx, %ecx
|
||||
; X64-NEXT: subl %esi, %eax
|
||||
; X64-NEXT: movl %eax, (%rdi)
|
||||
; X64-NEXT: cmovael %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
%3 = load i32, i32* %0, align 8
|
||||
%4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
|
||||
|
|
Loading…
Reference in New Issue