[x86/SLH] Fix a bug where we would try to post-load harden non-GPRs.

Found cases that hit the assert I added. This patch factors the validity
checking into a nice helper routine and calls it when deciding to harden
post-load, and asserts it when doing so later.

I've added tests for the various ways of loading a floating point type,
as well as loading all vector permutations. Even though many of these go
to identical instructions, it seems good to somewhat comprehensively
test them.

I'm confident there will be more fixes needed here, I'll try to add
tests each time as I get this predicate adjusted.

llvm-svn: 337160
This commit is contained in:
Chandler Carruth 2018-07-16 11:38:48 +00:00
parent d1a381b17a
commit e66a6f48e3
2 changed files with 297 additions and 13 deletions

View File

@ -173,6 +173,7 @@ private:
MachineInstr *
sinkPostLoadHardenedInst(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &HardenedLoads);
bool canHardenPostLoad(MachineInstr &MI);
void hardenPostLoad(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
void checkReturnInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
void checkCallInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
@ -1115,12 +1116,12 @@ void X86SpeculativeLoadHardeningPass::checkAllLoads(
(IndexReg && LoadDepRegs.test(IndexReg)))
continue;
// If post-load hardening is enabled, this load is known to be
// data-invariant, and we aren't already going to harden one of the
// If post-load hardening is enabled, this load is compatible with
// post-load hardening, and we aren't already going to harden one of the
// address registers, queue it up to be hardened post-load. Notably, even
// once hardened this won't introduce a useful dependency that could prune
// out subsequent loads.
if (EnablePostLoadHardening && isDataInvariantLoad(MI) &&
if (EnablePostLoadHardening && canHardenPostLoad(MI) &&
!HardenedAddrRegs.count(BaseReg) &&
!HardenedAddrRegs.count(IndexReg)) {
HardenPostLoad.insert(&MI);
@ -1602,6 +1603,25 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
return MI;
}
bool X86SpeculativeLoadHardeningPass::canHardenPostLoad(MachineInstr &MI) {
if (!isDataInvariantLoad(MI))
return false;
auto &DefOp = MI.getOperand(0);
unsigned OldDefReg = DefOp.getReg();
auto *DefRC = MRI->getRegClass(OldDefReg);
int DefRegBytes = TRI->getRegSizeInBits(*DefRC) / 8;
if (DefRegBytes > 8)
// We don't support post-load hardening of vectors.
return false;
const TargetRegisterClass *GPRRegClasses[] = {
&X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
&X86::GR64RegClass};
return DefRC->hasSuperClassEq(GPRRegClasses[Log2_32(DefRegBytes)]);
}
// We can harden non-leaking loads into register without touching the address
// by just hiding all of the loaded bits. We use an `or` instruction to do
// this because having the poison value be all ones allows us to use the same
@ -1609,8 +1629,8 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
// execution and coercing them to one is sufficient.
void X86SpeculativeLoadHardeningPass::hardenPostLoad(
MachineInstr &MI, MachineSSAUpdater &PredStateSSA) {
assert(isDataInvariantLoad(MI) &&
"Cannot get here with a non-invariant load!");
assert(canHardenPostLoad(MI) &&
"Invalid instruction for post-load hardening!");
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc Loc = MI.getDebugLoc();
@ -1625,14 +1645,6 @@ void X86SpeculativeLoadHardeningPass::hardenPostLoad(
unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
unsigned OrOpCode = OrOpCodes[Log2_32(DefRegBytes)];
#ifndef NDEBUG
const TargetRegisterClass *OrRegClasses[] = {
&X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
&X86::GR64RegClass};
assert(DefRC->hasSuperClassEq(OrRegClasses[Log2_32(DefRegBytes)]) &&
"Cannot define this register with OR instruction!");
#endif
unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
auto GetStateRegInRC = [&](const TargetRegisterClass &RC) {

View File

@ -590,3 +590,275 @@ lpad:
call void @sink(i32 %leak)
unreachable
}
declare void @sink_float(float)
declare void @sink_double(double)
; Test direct and converting loads of floating point values.
define void @test_fp_loads(float* %fptr, double* %dptr, i32* %i32ptr, i64* %i64ptr) nounwind {
; X64-LABEL: test_fp_loads:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %r15
; X64-NEXT: pushq %r14
; X64-NEXT: pushq %r12
; X64-NEXT: pushq %rbx
; X64-NEXT: pushq %rax
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: movq %rcx, %r15
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rsi, %rbx
; X64-NEXT: movq %rdi, %r12
; X64-NEXT: movq $-1, %rcx
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %r12
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_float
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %rbx
; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_double
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: cvtsd2ss %xmm0, %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_float
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: cvtss2sd %xmm0, %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_double
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %r14
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: cvtsi2ssl (%r14), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_float
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %r15
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: cvtsi2sdq (%r15), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_double
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: cvtsi2ssq (%r15), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_float
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: cvtsi2sdl (%r14), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_double
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: addq $8, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
; X64-NEXT: popq %r14
; X64-NEXT: popq %r15
; X64-NEXT: retq
;
; X64-LFENCE-LABEL: test_fp_loads:
; X64-LFENCE: # %bb.0: # %entry
; X64-LFENCE-NEXT: pushq %r15
; X64-LFENCE-NEXT: pushq %r14
; X64-LFENCE-NEXT: pushq %r12
; X64-LFENCE-NEXT: pushq %rbx
; X64-LFENCE-NEXT: pushq %rax
; X64-LFENCE-NEXT: movq %rcx, %r15
; X64-LFENCE-NEXT: movq %rdx, %r14
; X64-LFENCE-NEXT: movq %rsi, %rbx
; X64-LFENCE-NEXT: movq %rdi, %r12
; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-LFENCE-NEXT: callq sink_float
; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-LFENCE-NEXT: callq sink_double
; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-LFENCE-NEXT: cvtsd2ss %xmm0, %xmm0
; X64-LFENCE-NEXT: callq sink_float
; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-LFENCE-NEXT: cvtss2sd %xmm0, %xmm0
; X64-LFENCE-NEXT: callq sink_double
; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
; X64-LFENCE-NEXT: cvtsi2ssl (%r14), %xmm0
; X64-LFENCE-NEXT: callq sink_float
; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
; X64-LFENCE-NEXT: cvtsi2sdq (%r15), %xmm0
; X64-LFENCE-NEXT: callq sink_double
; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
; X64-LFENCE-NEXT: cvtsi2ssq (%r15), %xmm0
; X64-LFENCE-NEXT: callq sink_float
; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
; X64-LFENCE-NEXT: cvtsi2sdl (%r14), %xmm0
; X64-LFENCE-NEXT: callq sink_double
; X64-LFENCE-NEXT: addq $8, %rsp
; X64-LFENCE-NEXT: popq %rbx
; X64-LFENCE-NEXT: popq %r12
; X64-LFENCE-NEXT: popq %r14
; X64-LFENCE-NEXT: popq %r15
; X64-LFENCE-NEXT: retq
entry:
%f1 = load float, float* %fptr
call void @sink_float(float %f1)
%d1 = load double, double* %dptr
call void @sink_double(double %d1)
%f2.d = load double, double* %dptr
%f2 = fptrunc double %f2.d to float
call void @sink_float(float %f2)
%d2.f = load float, float* %fptr
%d2 = fpext float %d2.f to double
call void @sink_double(double %d2)
%f3.i = load i32, i32* %i32ptr
%f3 = sitofp i32 %f3.i to float
call void @sink_float(float %f3)
%d3.i = load i64, i64* %i64ptr
%d3 = sitofp i64 %d3.i to double
call void @sink_double(double %d3)
%f4.i = load i64, i64* %i64ptr
%f4 = sitofp i64 %f4.i to float
call void @sink_float(float %f4)
%d4.i = load i32, i32* %i32ptr
%d4 = sitofp i32 %d4.i to double
call void @sink_double(double %d4)
ret void
}
declare void @sink_v4f32(<4 x float>)
declare void @sink_v2f64(<2 x double>)
declare void @sink_v16i8(<16 x i8>)
declare void @sink_v8i16(<8 x i16>)
declare void @sink_v4i32(<4 x i32>)
declare void @sink_v2i64(<2 x i64>)
; Test loads of vectors.
define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind {
; X64-LABEL: test_vec_loads:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %r15
; X64-NEXT: pushq %r14
; X64-NEXT: pushq %r13
; X64-NEXT: pushq %r12
; X64-NEXT: pushq %rbx
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: movq %r9, %r14
; X64-NEXT: movq %r8, %r15
; X64-NEXT: movq %rcx, %r12
; X64-NEXT: movq %rdx, %r13
; X64-NEXT: movq %rsi, %rbx
; X64-NEXT: movq $-1, %rcx
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %rdi
; X64-NEXT: movaps (%rdi), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_v4f32
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %rbx
; X64-NEXT: movaps (%rbx), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_v2f64
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %r13
; X64-NEXT: movaps (%r13), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_v16i8
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %r12
; X64-NEXT: movaps (%r12), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_v8i16
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %r15
; X64-NEXT: movaps (%r15), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_v4i32
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: orq %rax, %r14
; X64-NEXT: movaps (%r14), %xmm0
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: callq sink_v2i64
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
; X64-NEXT: popq %r14
; X64-NEXT: popq %r15
; X64-NEXT: retq
;
; X64-LFENCE-LABEL: test_vec_loads:
; X64-LFENCE: # %bb.0: # %entry
; X64-LFENCE-NEXT: pushq %r15
; X64-LFENCE-NEXT: pushq %r14
; X64-LFENCE-NEXT: pushq %r13
; X64-LFENCE-NEXT: pushq %r12
; X64-LFENCE-NEXT: pushq %rbx
; X64-LFENCE-NEXT: movq %r9, %r14
; X64-LFENCE-NEXT: movq %r8, %r15
; X64-LFENCE-NEXT: movq %rcx, %r12
; X64-LFENCE-NEXT: movq %rdx, %r13
; X64-LFENCE-NEXT: movq %rsi, %rbx
; X64-LFENCE-NEXT: movaps (%rdi), %xmm0
; X64-LFENCE-NEXT: callq sink_v4f32
; X64-LFENCE-NEXT: movaps (%rbx), %xmm0
; X64-LFENCE-NEXT: callq sink_v2f64
; X64-LFENCE-NEXT: movaps (%r13), %xmm0
; X64-LFENCE-NEXT: callq sink_v16i8
; X64-LFENCE-NEXT: movaps (%r12), %xmm0
; X64-LFENCE-NEXT: callq sink_v8i16
; X64-LFENCE-NEXT: movaps (%r15), %xmm0
; X64-LFENCE-NEXT: callq sink_v4i32
; X64-LFENCE-NEXT: movaps (%r14), %xmm0
; X64-LFENCE-NEXT: callq sink_v2i64
; X64-LFENCE-NEXT: popq %rbx
; X64-LFENCE-NEXT: popq %r12
; X64-LFENCE-NEXT: popq %r13
; X64-LFENCE-NEXT: popq %r14
; X64-LFENCE-NEXT: popq %r15
; X64-LFENCE-NEXT: retq
entry:
%x1 = load <4 x float>, <4 x float>* %v4f32ptr
call void @sink_v4f32(<4 x float> %x1)
%x2 = load <2 x double>, <2 x double>* %v2f64ptr
call void @sink_v2f64(<2 x double> %x2)
%x3 = load <16 x i8>, <16 x i8>* %v16i8ptr
call void @sink_v16i8(<16 x i8> %x3)
%x4 = load <8 x i16>, <8 x i16>* %v8i16ptr
call void @sink_v8i16(<8 x i16> %x4)
%x5 = load <4 x i32>, <4 x i32>* %v4i32ptr
call void @sink_v4i32(<4 x i32> %x5)
%x6 = load <2 x i64>, <2 x i64>* %v2i64ptr
call void @sink_v2i64(<2 x i64> %x6)
ret void
}