[X86] Merge ISD::ADD/SUB nodes into X86ISD::ADD/SUB equivalents (PR40483)
Avoid ADD/SUB instruction duplication by reusing the X86ISD::ADD/SUB results. Includes ADD commutation - I tried to include NEG+SUB SUB commutation as well but this causes regressions as we don't have good combine coverage to simplify X86ISD::SUB. Differential Revision: https://reviews.llvm.org/D58597 llvm-svn: 354771
This commit is contained in:
		
							parent
							
								
									fd99780c09
								
							
						
					
					
						commit
						c61f1e8e6c
					
				| 
						 | 
					@ -40972,20 +40972,38 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
 | 
				
			||||||
  return Op.getValue(1);
 | 
					  return Op.getValue(1);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG) {
 | 
					static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
 | 
				
			||||||
 | 
					                                TargetLowering::DAGCombinerInfo &DCI) {
 | 
				
			||||||
  assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
 | 
					  assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
 | 
				
			||||||
         "Expected X86ISD::ADD or X86ISD::SUB");
 | 
					         "Expected X86ISD::ADD or X86ISD::SUB");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // If we don't use the flag result, simplify back to a simple ADD/SUB.
 | 
					 | 
				
			||||||
  if (N->hasAnyUseOfValue(1))
 | 
					 | 
				
			||||||
    return SDValue();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SDLoc DL(N);
 | 
					 | 
				
			||||||
  SDValue LHS = N->getOperand(0);
 | 
					  SDValue LHS = N->getOperand(0);
 | 
				
			||||||
  SDValue RHS = N->getOperand(1);
 | 
					  SDValue RHS = N->getOperand(1);
 | 
				
			||||||
  SDValue Res = DAG.getNode(X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB,
 | 
					  MVT VT = LHS.getSimpleValueType();
 | 
				
			||||||
                            DL, LHS.getSimpleValueType(), LHS, RHS);
 | 
					  unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB;
 | 
				
			||||||
  return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL);
 | 
					
 | 
				
			||||||
 | 
					  // If we don't use the flag result, simplify back to a generic ADD/SUB.
 | 
				
			||||||
 | 
					  if (!N->hasAnyUseOfValue(1)) {
 | 
				
			||||||
 | 
					    SDLoc DL(N);
 | 
				
			||||||
 | 
					    SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
 | 
				
			||||||
 | 
					    return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Fold any similar generic ADD/SUB opcodes to reuse this node.
 | 
				
			||||||
 | 
					  auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
 | 
				
			||||||
 | 
					    // TODO: Add SUB(RHS, LHS) -> SUB(0, SUB(LHS, RHS)) negation support, this
 | 
				
			||||||
 | 
					    // currently causes regressions as we don't have broad x86sub combines.
 | 
				
			||||||
 | 
					    if (Negate)
 | 
				
			||||||
 | 
					      return;
 | 
				
			||||||
 | 
					    SDValue Ops[] = {N0, N1};
 | 
				
			||||||
 | 
					    SDVTList VTs = DAG.getVTList(N->getValueType(0));
 | 
				
			||||||
 | 
					    if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops))
 | 
				
			||||||
 | 
					      DCI.CombineTo(GenericAddSub, SDValue(N, 0));
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  MatchGeneric(LHS, RHS, false);
 | 
				
			||||||
 | 
					  MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return SDValue();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
 | 
					static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
 | 
				
			||||||
| 
						 | 
					@ -42198,7 +42216,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
 | 
				
			||||||
  case ISD::ADD:            return combineAdd(N, DAG, Subtarget);
 | 
					  case ISD::ADD:            return combineAdd(N, DAG, Subtarget);
 | 
				
			||||||
  case ISD::SUB:            return combineSub(N, DAG, Subtarget);
 | 
					  case ISD::SUB:            return combineSub(N, DAG, Subtarget);
 | 
				
			||||||
  case X86ISD::ADD:
 | 
					  case X86ISD::ADD:
 | 
				
			||||||
  case X86ISD::SUB:         return combineX86AddSub(N, DAG);
 | 
					  case X86ISD::SUB:         return combineX86AddSub(N, DAG, DCI);
 | 
				
			||||||
  case X86ISD::SBB:         return combineSBB(N, DAG);
 | 
					  case X86ISD::SBB:         return combineSBB(N, DAG);
 | 
				
			||||||
  case X86ISD::ADC:         return combineADC(N, DAG, DCI);
 | 
					  case X86ISD::ADC:         return combineADC(N, DAG, DCI);
 | 
				
			||||||
  case ISD::MUL:            return combineMul(N, DAG, DCI, Subtarget);
 | 
					  case ISD::MUL:            return combineMul(N, DAG, DCI, Subtarget);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,33 +5,22 @@
 | 
				
			||||||
define i32 @PR40483_add1(i32*, i32) nounwind {
 | 
					define i32 @PR40483_add1(i32*, i32) nounwind {
 | 
				
			||||||
; X86-LABEL: PR40483_add1:
 | 
					; X86-LABEL: PR40483_add1:
 | 
				
			||||||
; X86:       # %bb.0:
 | 
					; X86:       # %bb.0:
 | 
				
			||||||
; X86-NEXT:    pushl %esi
 | 
					 | 
				
			||||||
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
				
			||||||
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 | 
					; X86-NEXT:    movl (%ecx), %eax
 | 
				
			||||||
; X86-NEXT:    movl (%edx), %esi
 | 
					; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 | 
				
			||||||
; X86-NEXT:    leal (%esi,%ecx), %eax
 | 
					; X86-NEXT:    movl %eax, (%ecx)
 | 
				
			||||||
; X86-NEXT:    addl %ecx, %esi
 | 
					; X86-NEXT:    jae .LBB0_2
 | 
				
			||||||
; X86-NEXT:    movl %esi, (%edx)
 | 
					; X86-NEXT:  # %bb.1:
 | 
				
			||||||
; X86-NEXT:    jae .LBB0_1
 | 
					 | 
				
			||||||
; X86-NEXT:  # %bb.2:
 | 
					 | 
				
			||||||
; X86-NEXT:    xorl %eax, %eax
 | 
					; X86-NEXT:    xorl %eax, %eax
 | 
				
			||||||
; X86-NEXT:    popl %esi
 | 
					; X86-NEXT:  .LBB0_2:
 | 
				
			||||||
; X86-NEXT:    retl
 | 
					 | 
				
			||||||
; X86-NEXT:  .LBB0_1:
 | 
					 | 
				
			||||||
; X86-NEXT:    orl %eax, %eax
 | 
					 | 
				
			||||||
; X86-NEXT:    popl %esi
 | 
					 | 
				
			||||||
; X86-NEXT:    retl
 | 
					; X86-NEXT:    retl
 | 
				
			||||||
;
 | 
					;
 | 
				
			||||||
; X64-LABEL: PR40483_add1:
 | 
					; X64-LABEL: PR40483_add1:
 | 
				
			||||||
; X64:       # %bb.0:
 | 
					; X64:       # %bb.0:
 | 
				
			||||||
; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 | 
					 | 
				
			||||||
; X64-NEXT:    movl (%rdi), %ecx
 | 
					 | 
				
			||||||
; X64-NEXT:    leal (%rcx,%rsi), %edx
 | 
					 | 
				
			||||||
; X64-NEXT:    orl %edx, %edx
 | 
					 | 
				
			||||||
; X64-NEXT:    xorl %eax, %eax
 | 
					; X64-NEXT:    xorl %eax, %eax
 | 
				
			||||||
; X64-NEXT:    addl %esi, %ecx
 | 
					; X64-NEXT:    addl (%rdi), %esi
 | 
				
			||||||
; X64-NEXT:    movl %ecx, (%rdi)
 | 
					; X64-NEXT:    movl %esi, (%rdi)
 | 
				
			||||||
; X64-NEXT:    cmovael %edx, %eax
 | 
					; X64-NEXT:    cmovael %esi, %eax
 | 
				
			||||||
; X64-NEXT:    retq
 | 
					; X64-NEXT:    retq
 | 
				
			||||||
  %3 = load i32, i32* %0, align 8
 | 
					  %3 = load i32, i32* %0, align 8
 | 
				
			||||||
  %4 = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %3, i32 %1)
 | 
					  %4 = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %3, i32 %1)
 | 
				
			||||||
| 
						 | 
					@ -48,34 +37,23 @@ define i32 @PR40483_add1(i32*, i32) nounwind {
 | 
				
			||||||
define i32 @PR40483_add2(i32*, i32) nounwind {
 | 
					define i32 @PR40483_add2(i32*, i32) nounwind {
 | 
				
			||||||
; X86-LABEL: PR40483_add2:
 | 
					; X86-LABEL: PR40483_add2:
 | 
				
			||||||
; X86:       # %bb.0:
 | 
					; X86:       # %bb.0:
 | 
				
			||||||
; X86-NEXT:    pushl %edi
 | 
					 | 
				
			||||||
; X86-NEXT:    pushl %esi
 | 
					 | 
				
			||||||
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 | 
				
			||||||
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 | 
					; X86-NEXT:    movl (%edx), %ecx
 | 
				
			||||||
; X86-NEXT:    movl (%esi), %edi
 | 
					 | 
				
			||||||
; X86-NEXT:    leal (%edi,%edx), %ecx
 | 
					 | 
				
			||||||
; X86-NEXT:    xorl %eax, %eax
 | 
					; X86-NEXT:    xorl %eax, %eax
 | 
				
			||||||
; X86-NEXT:    addl %edx, %edi
 | 
					; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 | 
				
			||||||
; X86-NEXT:    movl %edi, (%esi)
 | 
					; X86-NEXT:    movl %ecx, (%edx)
 | 
				
			||||||
; X86-NEXT:    jae .LBB1_2
 | 
					; X86-NEXT:    jae .LBB1_2
 | 
				
			||||||
; X86-NEXT:  # %bb.1:
 | 
					; X86-NEXT:  # %bb.1:
 | 
				
			||||||
; X86-NEXT:    orl %ecx, %ecx
 | 
					 | 
				
			||||||
; X86-NEXT:    movl %ecx, %eax
 | 
					; X86-NEXT:    movl %ecx, %eax
 | 
				
			||||||
; X86-NEXT:  .LBB1_2:
 | 
					; X86-NEXT:  .LBB1_2:
 | 
				
			||||||
; X86-NEXT:    popl %esi
 | 
					 | 
				
			||||||
; X86-NEXT:    popl %edi
 | 
					 | 
				
			||||||
; X86-NEXT:    retl
 | 
					; X86-NEXT:    retl
 | 
				
			||||||
;
 | 
					;
 | 
				
			||||||
; X64-LABEL: PR40483_add2:
 | 
					; X64-LABEL: PR40483_add2:
 | 
				
			||||||
; X64:       # %bb.0:
 | 
					; X64:       # %bb.0:
 | 
				
			||||||
; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 | 
					 | 
				
			||||||
; X64-NEXT:    movl (%rdi), %ecx
 | 
					 | 
				
			||||||
; X64-NEXT:    leal (%rcx,%rsi), %edx
 | 
					 | 
				
			||||||
; X64-NEXT:    orl %edx, %edx
 | 
					 | 
				
			||||||
; X64-NEXT:    xorl %eax, %eax
 | 
					; X64-NEXT:    xorl %eax, %eax
 | 
				
			||||||
; X64-NEXT:    addl %esi, %ecx
 | 
					; X64-NEXT:    addl (%rdi), %esi
 | 
				
			||||||
; X64-NEXT:    movl %ecx, (%rdi)
 | 
					; X64-NEXT:    movl %esi, (%rdi)
 | 
				
			||||||
; X64-NEXT:    cmovbl %edx, %eax
 | 
					; X64-NEXT:    cmovbl %esi, %eax
 | 
				
			||||||
; X64-NEXT:    retq
 | 
					; X64-NEXT:    retq
 | 
				
			||||||
  %3 = load i32, i32* %0, align 8
 | 
					  %3 = load i32, i32* %0, align 8
 | 
				
			||||||
  %4 = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %3, i32 %1)
 | 
					  %4 = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %3, i32 %1)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,11 +11,11 @@ define void @PR25858_i32(%WideUInt32* sret, %WideUInt32*, %WideUInt32*) nounwind
 | 
				
			||||||
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
				
			||||||
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
				
			||||||
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 | 
				
			||||||
; X86-NEXT:    movl (%ecx), %esi
 | 
					; X86-NEXT:    movl (%edx), %esi
 | 
				
			||||||
; X86-NEXT:    movl 4(%ecx), %ecx
 | 
					; X86-NEXT:    movl 4(%edx), %edx
 | 
				
			||||||
; X86-NEXT:    subl (%edx), %esi
 | 
					; X86-NEXT:    subl (%ecx), %esi
 | 
				
			||||||
; X86-NEXT:    sbbl 4(%edx), %ecx
 | 
					; X86-NEXT:    sbbl 4(%ecx), %edx
 | 
				
			||||||
; X86-NEXT:    movl %ecx, 4(%eax)
 | 
					; X86-NEXT:    movl %edx, 4(%eax)
 | 
				
			||||||
; X86-NEXT:    movl %esi, (%eax)
 | 
					; X86-NEXT:    movl %esi, (%eax)
 | 
				
			||||||
; X86-NEXT:    popl %esi
 | 
					; X86-NEXT:    popl %esi
 | 
				
			||||||
; X86-NEXT:    retl $4
 | 
					; X86-NEXT:    retl $4
 | 
				
			||||||
| 
						 | 
					@ -249,35 +249,24 @@ define i32 @PR40483_sub3(i32*, i32) nounwind {
 | 
				
			||||||
define i32 @PR40483_sub4(i32*, i32) nounwind {
 | 
					define i32 @PR40483_sub4(i32*, i32) nounwind {
 | 
				
			||||||
; X86-LABEL: PR40483_sub4:
 | 
					; X86-LABEL: PR40483_sub4:
 | 
				
			||||||
; X86:       # %bb.0:
 | 
					; X86:       # %bb.0:
 | 
				
			||||||
; X86-NEXT:    pushl %edi
 | 
					 | 
				
			||||||
; X86-NEXT:    pushl %esi
 | 
					 | 
				
			||||||
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 | 
				
			||||||
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 | 
					; X86-NEXT:    movl (%edx), %ecx
 | 
				
			||||||
; X86-NEXT:    movl (%esi), %edi
 | 
					 | 
				
			||||||
; X86-NEXT:    movl %edi, %ecx
 | 
					 | 
				
			||||||
; X86-NEXT:    subl %edx, %ecx
 | 
					 | 
				
			||||||
; X86-NEXT:    xorl %eax, %eax
 | 
					; X86-NEXT:    xorl %eax, %eax
 | 
				
			||||||
; X86-NEXT:    subl %edx, %edi
 | 
					; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 | 
				
			||||||
; X86-NEXT:    movl %edi, (%esi)
 | 
					; X86-NEXT:    movl %ecx, (%edx)
 | 
				
			||||||
; X86-NEXT:    jae .LBB6_2
 | 
					; X86-NEXT:    jae .LBB6_2
 | 
				
			||||||
; X86-NEXT:  # %bb.1:
 | 
					; X86-NEXT:  # %bb.1:
 | 
				
			||||||
; X86-NEXT:    orl %ecx, %ecx
 | 
					 | 
				
			||||||
; X86-NEXT:    movl %ecx, %eax
 | 
					; X86-NEXT:    movl %ecx, %eax
 | 
				
			||||||
; X86-NEXT:  .LBB6_2:
 | 
					; X86-NEXT:  .LBB6_2:
 | 
				
			||||||
; X86-NEXT:    popl %esi
 | 
					 | 
				
			||||||
; X86-NEXT:    popl %edi
 | 
					 | 
				
			||||||
; X86-NEXT:    retl
 | 
					; X86-NEXT:    retl
 | 
				
			||||||
;
 | 
					;
 | 
				
			||||||
; X64-LABEL: PR40483_sub4:
 | 
					; X64-LABEL: PR40483_sub4:
 | 
				
			||||||
; X64:       # %bb.0:
 | 
					; X64:       # %bb.0:
 | 
				
			||||||
; X64-NEXT:    movl (%rdi), %ecx
 | 
					; X64-NEXT:    movl (%rdi), %eax
 | 
				
			||||||
; X64-NEXT:    movl %ecx, %edx
 | 
					; X64-NEXT:    xorl %ecx, %ecx
 | 
				
			||||||
; X64-NEXT:    subl %esi, %edx
 | 
					; X64-NEXT:    subl %esi, %eax
 | 
				
			||||||
; X64-NEXT:    orl %edx, %edx
 | 
					; X64-NEXT:    movl %eax, (%rdi)
 | 
				
			||||||
; X64-NEXT:    xorl %eax, %eax
 | 
					; X64-NEXT:    cmovael %ecx, %eax
 | 
				
			||||||
; X64-NEXT:    subl %esi, %ecx
 | 
					 | 
				
			||||||
; X64-NEXT:    movl %ecx, (%rdi)
 | 
					 | 
				
			||||||
; X64-NEXT:    cmovbl %edx, %eax
 | 
					 | 
				
			||||||
; X64-NEXT:    retq
 | 
					; X64-NEXT:    retq
 | 
				
			||||||
  %3 = load i32, i32* %0, align 8
 | 
					  %3 = load i32, i32* %0, align 8
 | 
				
			||||||
  %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
 | 
					  %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue