[X86] Allow zextload/extload i1->i8 to be folded into instructions during isel

Previously we just emitted this as a MOV8rm which would likely get folded during the peephole pass anyway. This just makes it explicit earlier.

The gpr-to-mask.ll test changed because the kaddb instruction has no memory form.

llvm-svn: 324860
This commit is contained in:
Craig Topper 2018-02-12 01:33:36 +00:00
parent d061dd06e8
commit dfc322ddf4
2 changed files with 19 additions and 9 deletions

View File

@ -1028,6 +1028,17 @@ def i64immZExt32SExt8 : ImmLeaf<i64, [{
}]>;
// Helper fragments for loads.
// It's safe to fold a zextload/extload from i1 as a regular i8 load. The
// upper bits are guaranteed to be zero and we were going to emit a MOV8rm
// which might get folded during peephole anyway.
def loadi8 : PatFrag<(ops node:$ptr), (i8 (unindexedload node:$ptr)), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
return ExtType == ISD::NON_EXTLOAD || ExtType == ISD::EXTLOAD ||
ExtType == ISD::ZEXTLOAD;
}]>;
// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
// known to be 32-bit aligned or better. Ditto for i8 to i16.
def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
@ -1058,7 +1069,6 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
return false;
}]>;
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;

View File

@ -109,13 +109,13 @@ define void @test_load_add(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float
; X86-64-NEXT: testb $1, %dil
; X86-64-NEXT: je .LBB2_2
; X86-64-NEXT: # %bb.1: # %if
; X86-64-NEXT: kmovb (%rdx), %k0
; X86-64-NEXT: kmovb (%rcx), %k1
; X86-64-NEXT: kaddb %k1, %k0, %k1
; X86-64-NEXT: movb (%rdx), %al
; X86-64-NEXT: addb (%rcx), %al
; X86-64-NEXT: jmp .LBB2_3
; X86-64-NEXT: .LBB2_2: # %else
; X86-64-NEXT: kmovb (%rcx), %k1
; X86-64-NEXT: movb (%rcx), %al
; X86-64-NEXT: .LBB2_3: # %exit
; X86-64-NEXT: kmovd %eax, %k1
; X86-64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
; X86-64-NEXT: vmovss %xmm1, (%rsi)
; X86-64-NEXT: retq
@ -130,13 +130,13 @@ define void @test_load_add(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float
; X86-32-NEXT: je .LBB2_2
; X86-32-NEXT: # %bb.1: # %if
; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-32-NEXT: kmovb (%edx), %k0
; X86-32-NEXT: kmovb (%ecx), %k1
; X86-32-NEXT: kaddb %k1, %k0, %k1
; X86-32-NEXT: movb (%edx), %dl
; X86-32-NEXT: addb (%ecx), %dl
; X86-32-NEXT: jmp .LBB2_3
; X86-32-NEXT: .LBB2_2: # %else
; X86-32-NEXT: kmovb (%ecx), %k1
; X86-32-NEXT: movb (%ecx), %dl
; X86-32-NEXT: .LBB2_3: # %exit
; X86-32-NEXT: kmovd %edx, %k1
; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; X86-32-NEXT: vmovss %xmm0, (%eax)
; X86-32-NEXT: retl