[X86] Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y)) (RECOMMITTED)
As noticed on PR39174, if we're extracting a single non-constant bit index, then try to use BT+SETCC instead to avoid messing around moving the shift amount to the ECX register, using slow x86 shift ops etc. Recommitted with a fix to ensure we zext/trunc the SETCC result to the original type. Differential Revision: https://reviews.llvm.org/D122891
This commit is contained in:
		
							parent
							
								
									ea624e697b
								
							
						
					
					
						commit
						b8652fbcbb
					
				| 
						 | 
					@ -47329,6 +47329,19 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
 | 
				
			||||||
  if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
 | 
					  if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
 | 
				
			||||||
    return R;
 | 
					    return R;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y), COND_B) iff Y is not a constant
 | 
				
			||||||
 | 
					  // avoids slow variable shift (moving shift amount to ECX etc.)
 | 
				
			||||||
 | 
					  if (isOneConstant(N1) && N0->hasOneUse()) {
 | 
				
			||||||
 | 
					    SDValue Src = N0;
 | 
				
			||||||
 | 
					    while ((Src.getOpcode() == ISD::ZERO_EXTEND ||
 | 
				
			||||||
 | 
					            Src.getOpcode() == ISD::TRUNCATE) &&
 | 
				
			||||||
 | 
					           Src.getOperand(0)->hasOneUse())
 | 
				
			||||||
 | 
					      Src = Src.getOperand(0);
 | 
				
			||||||
 | 
					    if (Src.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(Src.getOperand(1)))
 | 
				
			||||||
 | 
					      if (SDValue BT = getBT(Src.getOperand(0), Src.getOperand(1), dl, DAG))
 | 
				
			||||||
 | 
					        return DAG.getZExtOrTrunc(getSETCC(X86::COND_B, BT, dl, DAG), dl, VT);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
 | 
					  if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
 | 
				
			||||||
    // Attempt to recursively combine a bitmask AND with shuffles.
 | 
					    // Attempt to recursively combine a bitmask AND with shuffles.
 | 
				
			||||||
    SDValue Op(N, 0);
 | 
					    SDValue Op(N, 0);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -139,19 +139,17 @@ define zeroext i1 @t6(i32 %a) #0 {
 | 
				
			||||||
define zeroext i1 @t7(i32 %0) {
 | 
					define zeroext i1 @t7(i32 %0) {
 | 
				
			||||||
; X86-LABEL: t7:
 | 
					; X86-LABEL: t7:
 | 
				
			||||||
; X86:       ## %bb.0:
 | 
					; X86:       ## %bb.0:
 | 
				
			||||||
; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
				
			||||||
; X86-NEXT:    movb $19, %al
 | 
					; X86-NEXT:    movl $19, %ecx
 | 
				
			||||||
; X86-NEXT:    shrb %cl, %al
 | 
					; X86-NEXT:    btl %eax, %ecx
 | 
				
			||||||
; X86-NEXT:    andb $1, %al
 | 
					; X86-NEXT:    setb %al
 | 
				
			||||||
; X86-NEXT:    retl
 | 
					; X86-NEXT:    retl
 | 
				
			||||||
;
 | 
					;
 | 
				
			||||||
; X64-LABEL: t7:
 | 
					; X64-LABEL: t7:
 | 
				
			||||||
; X64:       ## %bb.0:
 | 
					; X64:       ## %bb.0:
 | 
				
			||||||
; X64-NEXT:    movl %edi, %ecx
 | 
					; X64-NEXT:    movl $19, %eax
 | 
				
			||||||
; X64-NEXT:    movb $19, %al
 | 
					; X64-NEXT:    btl %edi, %eax
 | 
				
			||||||
; X64-NEXT:    ## kill: def $cl killed $cl killed $ecx
 | 
					; X64-NEXT:    setb %al
 | 
				
			||||||
; X64-NEXT:    shrb %cl, %al
 | 
					 | 
				
			||||||
; X64-NEXT:    andb $1, %al
 | 
					 | 
				
			||||||
; X64-NEXT:    retq
 | 
					; X64-NEXT:    retq
 | 
				
			||||||
  %2 = trunc i32 %0 to i5
 | 
					  %2 = trunc i32 %0 to i5
 | 
				
			||||||
  %3 = lshr i5 -13, %2
 | 
					  %3 = lshr i5 -13, %2
 | 
				
			||||||
| 
						 | 
					@ -163,20 +161,16 @@ define zeroext i1 @t7(i32 %0) {
 | 
				
			||||||
define zeroext i1 @t8(i8 %0, i8 %1) {
 | 
					define zeroext i1 @t8(i8 %0, i8 %1) {
 | 
				
			||||||
; X86-LABEL: t8:
 | 
					; X86-LABEL: t8:
 | 
				
			||||||
; X86:       ## %bb.0:
 | 
					; X86:       ## %bb.0:
 | 
				
			||||||
; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
				
			||||||
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
				
			||||||
; X86-NEXT:    shrb %cl, %al
 | 
					; X86-NEXT:    btl %eax, %ecx
 | 
				
			||||||
; X86-NEXT:    andb $1, %al
 | 
					; X86-NEXT:    setb %al
 | 
				
			||||||
; X86-NEXT:    retl
 | 
					; X86-NEXT:    retl
 | 
				
			||||||
;
 | 
					;
 | 
				
			||||||
; X64-LABEL: t8:
 | 
					; X64-LABEL: t8:
 | 
				
			||||||
; X64:       ## %bb.0:
 | 
					; X64:       ## %bb.0:
 | 
				
			||||||
; X64-NEXT:    movl %esi, %ecx
 | 
					; X64-NEXT:    btl %esi, %edi
 | 
				
			||||||
; X64-NEXT:    movl %edi, %eax
 | 
					; X64-NEXT:    setb %al
 | 
				
			||||||
; X64-NEXT:    ## kill: def $cl killed $cl killed $ecx
 | 
					 | 
				
			||||||
; X64-NEXT:    shrb %cl, %al
 | 
					 | 
				
			||||||
; X64-NEXT:    andb $1, %al
 | 
					 | 
				
			||||||
; X64-NEXT:    ## kill: def $al killed $al killed $eax
 | 
					 | 
				
			||||||
; X64-NEXT:    retq
 | 
					; X64-NEXT:    retq
 | 
				
			||||||
  %3 = lshr i8 %0, %1
 | 
					  %3 = lshr i8 %0, %1
 | 
				
			||||||
  %4 = and i8 %3, 1
 | 
					  %4 = and i8 %3, 1
 | 
				
			||||||
| 
						 | 
					@ -184,6 +178,30 @@ define zeroext i1 @t8(i8 %0, i8 %1) {
 | 
				
			||||||
  ret i1 %5
 | 
					  ret i1 %5
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define i64 @t9(i32 %0, i32 %1) {
 | 
				
			||||||
 | 
					; X86-LABEL: t9:
 | 
				
			||||||
 | 
					; X86:       ## %bb.0:
 | 
				
			||||||
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
				
			||||||
 | 
					; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 | 
				
			||||||
 | 
					; X86-NEXT:    xorl %eax, %eax
 | 
				
			||||||
 | 
					; X86-NEXT:    btl %edx, %ecx
 | 
				
			||||||
 | 
					; X86-NEXT:    setb %al
 | 
				
			||||||
 | 
					; X86-NEXT:    xorl %edx, %edx
 | 
				
			||||||
 | 
					; X86-NEXT:    retl
 | 
				
			||||||
 | 
					;
 | 
				
			||||||
 | 
					; X64-LABEL: t9:
 | 
				
			||||||
 | 
					; X64:       ## %bb.0:
 | 
				
			||||||
 | 
					; X64-NEXT:    xorl %eax, %eax
 | 
				
			||||||
 | 
					; X64-NEXT:    btl %esi, %edi
 | 
				
			||||||
 | 
					; X64-NEXT:    setb %al
 | 
				
			||||||
 | 
					; X64-NEXT:    retq
 | 
				
			||||||
 | 
					  %3 = lshr i32 %0, %1
 | 
				
			||||||
 | 
					  %4 = and i32 %3, 1
 | 
				
			||||||
 | 
					  %5 = icmp ne i32 %4, 0
 | 
				
			||||||
 | 
					  %6 = zext i1 %5 to i64
 | 
				
			||||||
 | 
					  ret i64 %6
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define i16 @shift_and(i16 %a) {
 | 
					define i16 @shift_and(i16 %a) {
 | 
				
			||||||
; X86-LABEL: shift_and:
 | 
					; X86-LABEL: shift_and:
 | 
				
			||||||
; X86:       ## %bb.0:
 | 
					; X86:       ## %bb.0:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue