[X86] Cleanup type conversion of 64-bit load-store pairs.

Summary:
Simplify and generalize chain handling and search for 64-bit load-store pairs.
Nontemporal test now converts 64-bit integer load-store into f64 which it realizes directly instead of splitting into two i32 pairs.

Reviewers: craig.topper, spatel

Reviewed By: craig.topper

Subscribers: hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D40918

llvm-svn: 320505
This commit is contained in:
Nirav Dave 2017-12-12 18:25:48 +00:00
parent 757026dbe6
commit 674d053d18
2 changed files with 69 additions and 102 deletions

View File

@ -34474,28 +34474,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
SDNode* LdVal = St->getValue().getNode();
LoadSDNode *Ld = nullptr;
int TokenFactorIndex = -1;
LoadSDNode *Ld = cast<LoadSDNode>(St->getValue().getNode());
SmallVector<SDValue, 8> Ops;
SDNode* ChainVal = St->getChain().getNode();
// Must be a store of a load. We currently handle two cases: the load
// is a direct child, and it's under an intervening TokenFactor. It is
// possible to dig deeper under nested TokenFactors.
if (ChainVal == LdVal)
Ld = cast<LoadSDNode>(St->getChain());
else if (St->getValue().hasOneUse() &&
ChainVal->getOpcode() == ISD::TokenFactor) {
for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
if (ChainVal->getOperand(i).getNode() == LdVal) {
TokenFactorIndex = i;
Ld = cast<LoadSDNode>(St->getValue());
} else
Ops.push_back(ChainVal->getOperand(i));
}
}
if (!Ld || !ISD::isNormalLoad(Ld))
if (!ISD::isNormalLoad(Ld))
return SDValue();
// If this is not the MMX case, i.e. we are just turning i64 load/store
@ -34512,17 +34494,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
if (Subtarget.is64Bit() || F64IsLegal) {
MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
Ld->getMemOperand());
// Make sure new load is placed in same chain order.
SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
if (TokenFactorIndex >= 0) {
Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(),
St->getMemOperand());
}
// Otherwise, lower to two pairs of 32-bit loads / stores.
@ -34537,23 +34514,19 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
MinAlign(Ld->getAlignment(), 4),
Ld->getMemOperand()->getFlags());
// Make sure new loads are placed in same chain order.
SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd);
NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd);
if (TokenFactorIndex >= 0) {
Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}
DAG.makeEquivalentMemoryOrdering(Ld, LoLd);
DAG.makeEquivalentMemoryOrdering(Ld, HiLd);
LoAddr = St->getBasePtr();
HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, StDL);
SDValue LoSt =
DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getPointerInfo(),
DAG.getStore(St->getChain(), StDL, LoLd, LoAddr, St->getPointerInfo(),
St->getAlignment(), St->getMemOperand()->getFlags());
SDValue HiSt = DAG.getStore(
NewChain, StDL, HiLd, HiAddr, St->getPointerInfo().getWithOffset(4),
MinAlign(St->getAlignment(), 4), St->getMemOperand()->getFlags());
SDValue HiSt = DAG.getStore(St->getChain(), StDL, HiLd, HiAddr,
St->getPointerInfo().getWithOffset(4),
MinAlign(St->getAlignment(), 4),
St->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
}

View File

@ -9,45 +9,42 @@ define i32 @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: pushl %ebp
; X32-SSE-NEXT: movl %esp, %ebp
; X32-SSE-NEXT: pushl %edi
; X32-SSE-NEXT: pushl %esi
; X32-SSE-NEXT: andl $-16, %esp
; X32-SSE-NEXT: movl 76(%ebp), %ecx
; X32-SSE-NEXT: subl $16, %esp
; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
; X32-SSE-NEXT: movl 12(%ebp), %eax
; X32-SSE-NEXT: movdqa 56(%ebp), %xmm3
; X32-SSE-NEXT: movdqa 40(%ebp), %xmm4
; X32-SSE-NEXT: movdqa 24(%ebp), %xmm5
; X32-SSE-NEXT: movl 8(%ebp), %esi
; X32-SSE-NEXT: movl 80(%ebp), %edx
; X32-SSE-NEXT: movl (%edx), %edi
; X32-SSE-NEXT: movdqa 56(%ebp), %xmm4
; X32-SSE-NEXT: movdqa 40(%ebp), %xmm5
; X32-SSE-NEXT: movdqa 24(%ebp), %xmm6
; X32-SSE-NEXT: movl 8(%ebp), %edx
; X32-SSE-NEXT: movl 80(%ebp), %ecx
; X32-SSE-NEXT: movl (%ecx), %esi
; X32-SSE-NEXT: addps {{\.LCPI.*}}, %xmm0
; X32-SSE-NEXT: movntps %xmm0, (%esi)
; X32-SSE-NEXT: movntps %xmm0, (%edx)
; X32-SSE-NEXT: paddq {{\.LCPI.*}}, %xmm2
; X32-SSE-NEXT: addl (%edx), %edi
; X32-SSE-NEXT: movntdq %xmm2, (%esi)
; X32-SSE-NEXT: addl (%ecx), %esi
; X32-SSE-NEXT: movntdq %xmm2, (%edx)
; X32-SSE-NEXT: addpd {{\.LCPI.*}}, %xmm1
; X32-SSE-NEXT: addl (%edx), %edi
; X32-SSE-NEXT: movntpd %xmm1, (%esi)
; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm5
; X32-SSE-NEXT: addl (%edx), %edi
; X32-SSE-NEXT: movntdq %xmm5, (%esi)
; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm4
; X32-SSE-NEXT: addl (%edx), %edi
; X32-SSE-NEXT: movntdq %xmm4, (%esi)
; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm3
; X32-SSE-NEXT: addl (%edx), %edi
; X32-SSE-NEXT: movntdq %xmm3, (%esi)
; X32-SSE-NEXT: addl (%edx), %edi
; X32-SSE-NEXT: movntil %eax, (%esi)
; X32-SSE-NEXT: movl (%edx), %eax
; X32-SSE-NEXT: movntil %ecx, 4(%esi)
; X32-SSE-NEXT: movl 72(%ebp), %ecx
; X32-SSE-NEXT: movntil %ecx, (%esi)
; X32-SSE-NEXT: addl %edi, %eax
; X32-SSE-NEXT: addl (%edx), %eax
; X32-SSE-NEXT: leal -8(%ebp), %esp
; X32-SSE-NEXT: addl (%ecx), %esi
; X32-SSE-NEXT: movntpd %xmm1, (%edx)
; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm6
; X32-SSE-NEXT: addl (%ecx), %esi
; X32-SSE-NEXT: movntdq %xmm6, (%edx)
; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm5
; X32-SSE-NEXT: addl (%ecx), %esi
; X32-SSE-NEXT: movntdq %xmm5, (%edx)
; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm4
; X32-SSE-NEXT: addl (%ecx), %esi
; X32-SSE-NEXT: movntdq %xmm4, (%edx)
; X32-SSE-NEXT: addl (%ecx), %esi
; X32-SSE-NEXT: movntil %eax, (%edx)
; X32-SSE-NEXT: movl (%ecx), %eax
; X32-SSE-NEXT: addl %esi, %eax
; X32-SSE-NEXT: movsd %xmm3, (%edx)
; X32-SSE-NEXT: addl (%ecx), %eax
; X32-SSE-NEXT: leal -4(%ebp), %esp
; X32-SSE-NEXT: popl %esi
; X32-SSE-NEXT: popl %edi
; X32-SSE-NEXT: popl %ebp
; X32-SSE-NEXT: retl
;
@ -55,45 +52,42 @@ define i32 @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: pushl %ebp
; X32-AVX-NEXT: movl %esp, %ebp
; X32-AVX-NEXT: pushl %edi
; X32-AVX-NEXT: pushl %esi
; X32-AVX-NEXT: andl $-16, %esp
; X32-AVX-NEXT: movl 76(%ebp), %ecx
; X32-AVX-NEXT: subl $16, %esp
; X32-AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
; X32-AVX-NEXT: movl 12(%ebp), %eax
; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm3
; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm4
; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm5
; X32-AVX-NEXT: movl 8(%ebp), %esi
; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm4
; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm5
; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm6
; X32-AVX-NEXT: movl 8(%ebp), %ecx
; X32-AVX-NEXT: movl 80(%ebp), %edx
; X32-AVX-NEXT: movl (%edx), %edi
; X32-AVX-NEXT: movl (%edx), %esi
; X32-AVX-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
; X32-AVX-NEXT: vmovntps %xmm0, (%esi)
; X32-AVX-NEXT: vmovntps %xmm0, (%ecx)
; X32-AVX-NEXT: vpaddq {{\.LCPI.*}}, %xmm2, %xmm0
; X32-AVX-NEXT: addl (%edx), %edi
; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
; X32-AVX-NEXT: addl (%edx), %esi
; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
; X32-AVX-NEXT: vaddpd {{\.LCPI.*}}, %xmm1, %xmm0
; X32-AVX-NEXT: addl (%edx), %edi
; X32-AVX-NEXT: vmovntpd %xmm0, (%esi)
; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm5, %xmm0
; X32-AVX-NEXT: addl (%edx), %edi
; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm4, %xmm0
; X32-AVX-NEXT: addl (%edx), %edi
; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm3, %xmm0
; X32-AVX-NEXT: addl (%edx), %edi
; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
; X32-AVX-NEXT: addl (%edx), %edi
; X32-AVX-NEXT: movntil %eax, (%esi)
; X32-AVX-NEXT: addl (%edx), %esi
; X32-AVX-NEXT: vmovntpd %xmm0, (%ecx)
; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm6, %xmm0
; X32-AVX-NEXT: addl (%edx), %esi
; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm5, %xmm0
; X32-AVX-NEXT: addl (%edx), %esi
; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm4, %xmm0
; X32-AVX-NEXT: addl (%edx), %esi
; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
; X32-AVX-NEXT: addl (%edx), %esi
; X32-AVX-NEXT: movntil %eax, (%ecx)
; X32-AVX-NEXT: movl (%edx), %eax
; X32-AVX-NEXT: movntil %ecx, 4(%esi)
; X32-AVX-NEXT: movl 72(%ebp), %ecx
; X32-AVX-NEXT: movntil %ecx, (%esi)
; X32-AVX-NEXT: addl %edi, %eax
; X32-AVX-NEXT: addl %esi, %eax
; X32-AVX-NEXT: vmovsd %xmm3, (%ecx)
; X32-AVX-NEXT: addl (%edx), %eax
; X32-AVX-NEXT: leal -8(%ebp), %esp
; X32-AVX-NEXT: leal -4(%ebp), %esp
; X32-AVX-NEXT: popl %esi
; X32-AVX-NEXT: popl %edi
; X32-AVX-NEXT: popl %ebp
; X32-AVX-NEXT: retl
;