[X86] Allow zextload/extload i1->i8 to be folded into instructions during isel
Previously we just emitted this as a MOV8rm which would likely get folded during the peephole pass anyway. This just makes it explicit earlier. The gpr-to-mask.ll test changed because the kaddb instruction has no memory form. llvm-svn: 324860
This commit is contained in:
parent
d061dd06e8
commit
dfc322ddf4
|
|
@ -1028,6 +1028,17 @@ def i64immZExt32SExt8 : ImmLeaf<i64, [{
|
|||
}]>;
|
||||
|
||||
// Helper fragments for loads.
|
||||
|
||||
// It's safe to fold a zextload/extload from i1 as a regular i8 load. The
|
||||
// upper bits are guaranteed to be zero and we were going to emit a MOV8rm
|
||||
// which might get folded during peephole anyway.
|
||||
def loadi8 : PatFrag<(ops node:$ptr), (i8 (unindexedload node:$ptr)), [{
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
ISD::LoadExtType ExtType = LD->getExtensionType();
|
||||
return ExtType == ISD::NON_EXTLOAD || ExtType == ISD::EXTLOAD ||
|
||||
ExtType == ISD::ZEXTLOAD;
|
||||
}]>;
|
||||
|
||||
// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
|
||||
// known to be 32-bit aligned or better. Ditto for i8 to i16.
|
||||
def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
|
||||
|
|
@ -1058,7 +1069,6 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
|
|||
return false;
|
||||
}]>;
|
||||
|
||||
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
|
||||
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
|
||||
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
|
||||
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
|
||||
|
|
|
|||
|
|
@ -109,13 +109,13 @@ define void @test_load_add(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float
|
|||
; X86-64-NEXT: testb $1, %dil
|
||||
; X86-64-NEXT: je .LBB2_2
|
||||
; X86-64-NEXT: # %bb.1: # %if
|
||||
; X86-64-NEXT: kmovb (%rdx), %k0
|
||||
; X86-64-NEXT: kmovb (%rcx), %k1
|
||||
; X86-64-NEXT: kaddb %k1, %k0, %k1
|
||||
; X86-64-NEXT: movb (%rdx), %al
|
||||
; X86-64-NEXT: addb (%rcx), %al
|
||||
; X86-64-NEXT: jmp .LBB2_3
|
||||
; X86-64-NEXT: .LBB2_2: # %else
|
||||
; X86-64-NEXT: kmovb (%rcx), %k1
|
||||
; X86-64-NEXT: movb (%rcx), %al
|
||||
; X86-64-NEXT: .LBB2_3: # %exit
|
||||
; X86-64-NEXT: kmovd %eax, %k1
|
||||
; X86-64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
|
||||
; X86-64-NEXT: vmovss %xmm1, (%rsi)
|
||||
; X86-64-NEXT: retq
|
||||
|
|
@ -130,13 +130,13 @@ define void @test_load_add(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float
|
|||
; X86-32-NEXT: je .LBB2_2
|
||||
; X86-32-NEXT: # %bb.1: # %if
|
||||
; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-32-NEXT: kmovb (%edx), %k0
|
||||
; X86-32-NEXT: kmovb (%ecx), %k1
|
||||
; X86-32-NEXT: kaddb %k1, %k0, %k1
|
||||
; X86-32-NEXT: movb (%edx), %dl
|
||||
; X86-32-NEXT: addb (%ecx), %dl
|
||||
; X86-32-NEXT: jmp .LBB2_3
|
||||
; X86-32-NEXT: .LBB2_2: # %else
|
||||
; X86-32-NEXT: kmovb (%ecx), %k1
|
||||
; X86-32-NEXT: movb (%ecx), %dl
|
||||
; X86-32-NEXT: .LBB2_3: # %exit
|
||||
; X86-32-NEXT: kmovd %edx, %k1
|
||||
; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
|
||||
; X86-32-NEXT: vmovss %xmm0, (%eax)
|
||||
; X86-32-NEXT: retl
|
||||
|
|
|
|||
Loading…
Reference in New Issue