forked from OSchip/llvm-project
[X86] Add DAG combine to fold any_extend_vector_inreg+truncstore to an extractelement+store
We have custom code that ignores the normal promoting type legalization on less than 128-bit vector types like v4i8 to emit pavgb, paddusb, psubusb since we don't have the equivalent instruction on a larger element type like v4i32. If this operation appears before a store, we can be left with an any_extend_vector_inreg followed by a truncstore after type legalization. When truncstore isn't legal, this will normally be decomposed into shuffles and a non-truncating store. This will then combine away the any_extend_vector_inreg and shuffle leaving just the store. On avx512, truncstore is legal so we don't decompose it and we had no combines to fix it. This patch adds a new DAG combine to detect this case and emit either an extract_store for 64-bit stoers or a extractelement+store for 32 and 16 bit stores. This makes the avx512 codegen match the avx2 codegen for these situations. I'm restricting to only when -x86-experimental-vector-widening-legalization is false. When we're widening we're not likely to create this any_extend_inreg+truncstore combination. This means we should be able to remove this code when we flip the default. I would like to flip the default soon, but I need to investigate some performance regressions its causing in our branch that I wasn't seeing on trunk. Differential Revision: https://reviews.llvm.org/D65538 llvm-svn: 367488
This commit is contained in:
parent
c724215a70
commit
b51dc64063
|
|
@ -40179,6 +40179,41 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
|||
MVT::v16i8, St->getMemOperand());
|
||||
}
|
||||
|
||||
// Look for a truncating store to a less than 128 bit vector that has been
|
||||
// truncated from an any_extend_inreg from a 128 bit vector with the same
|
||||
// element size. We can use a 64/32/16-bit extractelement and store that.
|
||||
// Disabling this when widening legalization is in effect since the trunc
|
||||
// store would have been unlikely to be created in that case. Only doing this
|
||||
// when truncstore is legal since it would otherwise be decomposed below and
|
||||
// then combined away.
|
||||
if (St->isTruncatingStore() && TLI.isTruncStoreLegal(VT, StVT) &&
|
||||
StoredVal.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
|
||||
StoredVal.getValueType().is128BitVector() &&
|
||||
!ExperimentalVectorWideningLegalization) {
|
||||
EVT OrigVT = StoredVal.getOperand(0).getValueType();
|
||||
if (OrigVT.is128BitVector() &&
|
||||
OrigVT.getVectorElementType() == StVT.getVectorElementType()) {
|
||||
unsigned StoreSize = StVT.getSizeInBits();
|
||||
assert((128 % StoreSize == 0) && "Unexpected store size!");
|
||||
MVT IntVT = MVT::getIntegerVT(StoreSize);
|
||||
MVT CastVT = MVT::getVectorVT(IntVT, 128 / StoreSize);
|
||||
StoredVal = DAG.getBitcast(CastVT, StoredVal.getOperand(0));
|
||||
// Use extract_store for the 64-bit case to support 32-bit targets.
|
||||
if (IntVT == MVT::i64) {
|
||||
SDVTList Tys = DAG.getVTList(MVT::Other);
|
||||
SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()};
|
||||
return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops,
|
||||
IntVT, St->getMemOperand());
|
||||
}
|
||||
|
||||
// Otherwise just use an extract and store.
|
||||
StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, IntVT, StoredVal,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
|
||||
St->getMemOperand());
|
||||
}
|
||||
}
|
||||
|
||||
// Optimize trunc store (of multiple scalars) to shuffle and store.
|
||||
// First, pack all of the elements in one place. Next, store to memory
|
||||
// in fewer chunks.
|
||||
|
|
|
|||
|
|
@ -269,17 +269,13 @@ define void @test_x86_vcvtps2ph_128_m(<4 x i16>* nocapture %d, <4 x float> %a) n
|
|||
; X32-AVX512VL: # %bb.0: # %entry
|
||||
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
|
||||
; X32-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
|
||||
; X32-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X32-AVX512VL-NEXT: vpmovdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x00]
|
||||
; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
|
||||
; X64-AVX512VL: # %bb.0: # %entry
|
||||
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
|
||||
; X64-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
|
||||
; X64-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-AVX512VL-NEXT: vpmovdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07]
|
||||
; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
entry:
|
||||
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a, i32 3)
|
||||
|
|
|
|||
|
|
@ -1527,30 +1527,13 @@ define void @addus_v8i8(<8 x i8>* %p1, <8 x i8>* %p2) {
|
|||
; SSE-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: addus_v8i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: addus_v8i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: addus_v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512-NEXT: vpmovwb %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: addus_v8i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <8 x i8>, <8 x i8>* %p1, align 8
|
||||
%ld2 = load <8 x i8>, <8 x i8>* %p2, align 8
|
||||
%1 = add <8 x i8> %ld2, %ld1
|
||||
|
|
@ -1569,30 +1552,13 @@ define void @addus_v4i8(<4 x i8>* %p1, <4 x i8>* %p2) {
|
|||
; SSE-NEXT: movd %xmm1, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: addus_v4i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: addus_v4i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: addus_v4i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovdb %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: addus_v4i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <4 x i8>, <4 x i8>* %p1, align 4
|
||||
%ld2 = load <4 x i8>, <4 x i8>* %p2, align 4
|
||||
%1 = add <4 x i8> %ld2, %ld1
|
||||
|
|
@ -1635,36 +1601,15 @@ define void @addus_v2i8(<2 x i8>* %p1, <2 x i8>* %p2) {
|
|||
; SSE41-NEXT: pextrw $0, %xmm1, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: addus_v2i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: movzwl (%rsi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: addus_v2i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: movzwl (%rsi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: addus_v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512-NEXT: movzwl (%rsi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqb %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: addus_v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movzwl (%rdi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: movzwl (%rsi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm1
|
||||
; AVX-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <2 x i8>, <2 x i8>* %p1, align 2
|
||||
%ld2 = load <2 x i8>, <2 x i8>* %p2, align 2
|
||||
%1 = add <2 x i8> %ld2, %ld1
|
||||
|
|
@ -1683,30 +1628,13 @@ define void @addus_v4i16(<4 x i16>* %p1, <4 x i16>* %p2) {
|
|||
; SSE-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: addus_v4i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: addus_v4i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: addus_v4i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512-NEXT: vpmovdw %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: addus_v4i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <4 x i16>, <4 x i16>* %p1, align 4
|
||||
%ld2 = load <4 x i16>, <4 x i16>* %p2, align 4
|
||||
%1 = add <4 x i16> %ld2, %ld1
|
||||
|
|
@ -1725,30 +1653,13 @@ define void @addus_v2i16(<2 x i16>* %p1, <2 x i16>* %p2) {
|
|||
; SSE-NEXT: movd %xmm1, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: addus_v2i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: addus_v2i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: addus_v2i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqw %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: addus_v2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <2 x i16>, <2 x i16>* %p1, align 2
|
||||
%ld2 = load <2 x i16>, <2 x i16>* %p2, align 2
|
||||
%1 = add <2 x i16> %ld2, %ld1
|
||||
|
|
|
|||
|
|
@ -2184,30 +2184,13 @@ define void @subus_v8i8(<8 x i8>* %p1, <8 x i8>* %p2) {
|
|||
; SSE-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: subus_v8i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: subus_v8i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: subus_v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512-NEXT: vpmovwb %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: subus_v8i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <8 x i8>, <8 x i8>* %p1, align 8
|
||||
%ld2 = load <8 x i8>, <8 x i8>* %p2, align 8
|
||||
%1 = sub <8 x i8> %ld1, %ld2
|
||||
|
|
@ -2226,30 +2209,13 @@ define void @subus_v4i8(<4 x i8>* %p1, <4 x i8>* %p2) {
|
|||
; SSE-NEXT: movd %xmm0, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: subus_v4i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: subus_v4i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: subus_v4i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovdb %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: subus_v4i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <4 x i8>, <4 x i8>* %p1, align 8
|
||||
%ld2 = load <4 x i8>, <4 x i8>* %p2, align 8
|
||||
%1 = sub <4 x i8> %ld1, %ld2
|
||||
|
|
@ -2286,30 +2252,13 @@ define void @subus_v2i8(<2 x i8>* %p1, <2 x i8>* %p2) {
|
|||
; SSE41-NEXT: pextrw $0, %xmm0, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: subus_v2i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: subus_v2i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: subus_v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqb %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: subus_v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <2 x i8>, <2 x i8>* %p1, align 8
|
||||
%ld2 = load <2 x i8>, <2 x i8>* %p2, align 8
|
||||
%1 = sub <2 x i8> %ld1, %ld2
|
||||
|
|
@ -2328,30 +2277,13 @@ define void @subus_v4i16(<4 x i16>* %p1, <4 x i16>* %p2) {
|
|||
; SSE-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: subus_v4i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: subus_v4i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: subus_v4i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512-NEXT: vpmovdw %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: subus_v4i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <4 x i16>, <4 x i16>* %p1, align 8
|
||||
%ld2 = load <4 x i16>, <4 x i16>* %p2, align 8
|
||||
%1 = sub <4 x i16> %ld1, %ld2
|
||||
|
|
@ -2370,30 +2302,13 @@ define void @subus_v2i16(<2 x i16>* %p1, <2 x i16>* %p2) {
|
|||
; SSE-NEXT: movd %xmm0, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: subus_v2i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: subus_v2i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: subus_v2i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqw %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: subus_v2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%ld1 = load <2 x i16>, <2 x i16>* %p1, align 8
|
||||
%ld2 = load <2 x i16>, <2 x i16>* %p2, align 8
|
||||
%1 = sub <2 x i16> %ld1, %ld2
|
||||
|
|
|
|||
|
|
@ -210,30 +210,13 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
|
|||
; SSE-NEXT: movq %xmm1, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v8i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v8i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512-NEXT: vpmovwb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v8i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <8 x i8>, <8 x i8>* %px
|
||||
%y = load <8 x i8>, <8 x i8>* %py
|
||||
%z = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %x, <8 x i8> %y)
|
||||
|
|
@ -250,30 +233,13 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
|
|||
; SSE-NEXT: movd %xmm1, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v4i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v4i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovdb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v4i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <4 x i8>, <4 x i8>* %px
|
||||
%y = load <4 x i8>, <4 x i8>* %py
|
||||
%z = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
|
||||
|
|
@ -314,36 +280,15 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
|
|||
; SSE41-NEXT: pextrw $0, %xmm1, (%rdx)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v2i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: movzwl (%rsi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v2i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: movzwl (%rsi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512-NEXT: movzwl (%rsi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movzwl (%rdi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: movzwl (%rsi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm1
|
||||
; AVX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <2 x i8>, <2 x i8>* %px
|
||||
%y = load <2 x i8>, <2 x i8>* %py
|
||||
%z = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y)
|
||||
|
|
@ -360,30 +305,13 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
|
|||
; SSE-NEXT: movq %xmm1, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v4i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v4i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512-NEXT: vpmovdw %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v4i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <4 x i16>, <4 x i16>* %px
|
||||
%y = load <4 x i16>, <4 x i16>* %py
|
||||
%z = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %x, <4 x i16> %y)
|
||||
|
|
@ -400,30 +328,13 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
|
|||
; SSE-NEXT: movd %xmm1, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v2i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v2i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqw %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <2 x i16>, <2 x i16>* %px
|
||||
%y = load <2 x i16>, <2 x i16>* %py
|
||||
%z = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
|
||||
|
|
|
|||
|
|
@ -210,30 +210,13 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
|
|||
; SSE-NEXT: movq %xmm0, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v8i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v8i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512-NEXT: vpmovwb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v8i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <8 x i8>, <8 x i8>* %px
|
||||
%y = load <8 x i8>, <8 x i8>* %py
|
||||
%z = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %x, <8 x i8> %y)
|
||||
|
|
@ -250,30 +233,13 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
|
|||
; SSE-NEXT: movd %xmm0, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v4i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v4i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovdb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v4i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <4 x i8>, <4 x i8>* %px
|
||||
%y = load <4 x i8>, <4 x i8>* %py
|
||||
%z = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
|
||||
|
|
@ -314,36 +280,15 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
|
|||
; SSE41-NEXT: pextrw $0, %xmm0, (%rdx)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v2i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: movzwl (%rsi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v2i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: movzwl (%rsi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512-NEXT: movzwl (%rsi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movzwl (%rdi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: movzwl (%rsi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm1
|
||||
; AVX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <2 x i8>, <2 x i8>* %px
|
||||
%y = load <2 x i8>, <2 x i8>* %py
|
||||
%z = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %x, <2 x i8> %y)
|
||||
|
|
@ -360,30 +305,13 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
|
|||
; SSE-NEXT: movq %xmm0, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v4i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v4i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512-NEXT: vpmovdw %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v4i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <4 x i16>, <4 x i16>* %px
|
||||
%y = load <4 x i16>, <4 x i16>* %py
|
||||
%z = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %x, <4 x i16> %y)
|
||||
|
|
@ -400,30 +328,13 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
|
|||
; SSE-NEXT: movd %xmm0, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v2i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v2i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqw %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <2 x i16>, <2 x i16>* %px
|
||||
%y = load <2 x i16>, <2 x i16>* %py
|
||||
%z = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
|
||||
|
|
|
|||
|
|
@ -210,30 +210,13 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
|
|||
; SSE-NEXT: movq %xmm1, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v8i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v8i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512-NEXT: vpmovwb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v8i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <8 x i8>, <8 x i8>* %px
|
||||
%y = load <8 x i8>, <8 x i8>* %py
|
||||
%z = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %x, <8 x i8> %y)
|
||||
|
|
@ -250,30 +233,13 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
|
|||
; SSE-NEXT: movd %xmm1, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v4i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v4i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovdb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v4i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <4 x i8>, <4 x i8>* %px
|
||||
%y = load <4 x i8>, <4 x i8>* %py
|
||||
%z = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
|
||||
|
|
@ -314,36 +280,15 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
|
|||
; SSE41-NEXT: pextrw $0, %xmm1, (%rdx)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v2i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: movzwl (%rsi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v2i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: movzwl (%rsi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512-NEXT: movzwl (%rsi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movzwl (%rdi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: movzwl (%rsi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm1
|
||||
; AVX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <2 x i8>, <2 x i8>* %px
|
||||
%y = load <2 x i8>, <2 x i8>* %py
|
||||
%z = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y)
|
||||
|
|
@ -360,30 +305,13 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
|
|||
; SSE-NEXT: movq %xmm1, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v4i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v4i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512-NEXT: vpmovdw %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v4i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <4 x i16>, <4 x i16>* %px
|
||||
%y = load <4 x i16>, <4 x i16>* %py
|
||||
%z = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %x, <4 x i16> %y)
|
||||
|
|
@ -400,30 +328,13 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
|
|||
; SSE-NEXT: movd %xmm1, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v2i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v2i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqw %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <2 x i16>, <2 x i16>* %px
|
||||
%y = load <2 x i16>, <2 x i16>* %py
|
||||
%z = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
|
||||
|
|
|
|||
|
|
@ -210,30 +210,13 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
|
|||
; SSE-NEXT: movq %xmm0, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v8i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v8i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512-NEXT: vpmovwb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v8i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <8 x i8>, <8 x i8>* %px
|
||||
%y = load <8 x i8>, <8 x i8>* %py
|
||||
%z = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %x, <8 x i8> %y)
|
||||
|
|
@ -250,30 +233,13 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
|
|||
; SSE-NEXT: movd %xmm0, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v4i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v4i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovdb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v4i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <4 x i8>, <4 x i8>* %px
|
||||
%y = load <4 x i8>, <4 x i8>* %py
|
||||
%z = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
|
||||
|
|
@ -314,36 +280,15 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
|
|||
; SSE41-NEXT: pextrw $0, %xmm0, (%rdx)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v2i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: movzwl (%rsi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v2i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: movzwl (%rsi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512-NEXT: movzwl (%rsi), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqb %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movzwl (%rdi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: movzwl (%rsi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm1
|
||||
; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <2 x i8>, <2 x i8>* %px
|
||||
%y = load <2 x i8>, <2 x i8>* %py
|
||||
%z = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x, <2 x i8> %y)
|
||||
|
|
@ -360,30 +305,13 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
|
|||
; SSE-NEXT: movq %xmm0, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v4i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v4i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512-NEXT: vpmovdw %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v4i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <4 x i16>, <4 x i16>* %px
|
||||
%y = load <4 x i16>, <4 x i16>* %py
|
||||
%z = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %x, <4 x i16> %y)
|
||||
|
|
@ -400,30 +328,13 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
|
|||
; SSE-NEXT: movd %xmm0, (%rdx)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v2i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v2i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovqw %xmm0, (%rdx)
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: v2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, (%rdx)
|
||||
; AVX-NEXT: retq
|
||||
%x = load <2 x i16>, <2 x i16>* %px
|
||||
%y = load <2 x i16>, <2 x i16>* %py
|
||||
%z = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
|
||||
|
|
|
|||
|
|
@ -1055,67 +1055,20 @@ define <4 x double> @cvt_4i16_to_4f64(<4 x i16> %a0) nounwind {
|
|||
}
|
||||
|
||||
define <2 x double> @cvt_8i16_to_2f64(<8 x i16> %a0) nounwind {
|
||||
; AVX1-LABEL: cvt_8i16_to_2f64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: movswl %ax, %ecx
|
||||
; AVX1-NEXT: shrl $16, %eax
|
||||
; AVX1-NEXT: cwtl
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: cvt_8i16_to_2f64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: movswl %ax, %ecx
|
||||
; AVX2-NEXT: shrl $16, %eax
|
||||
; AVX2-NEXT: cwtl
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: cvt_8i16_to_2f64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512F-NEXT: movswl %ax, %ecx
|
||||
; AVX512F-NEXT: shrl $16, %eax
|
||||
; AVX512F-NEXT: cwtl
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: cvt_8i16_to_2f64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpmovqw %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512VL-NEXT: movswl %ax, %ecx
|
||||
; AVX512VL-NEXT: shrl $16, %eax
|
||||
; AVX512VL-NEXT: cwtl
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: cvt_8i16_to_2f64:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovd %xmm0, %eax
|
||||
; ALL-NEXT: movswl %ax, %ecx
|
||||
; ALL-NEXT: shrl $16, %eax
|
||||
; ALL-NEXT: cwtl
|
||||
; ALL-NEXT: vmovd %eax, %xmm0
|
||||
; ALL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovd %ecx, %xmm1
|
||||
; ALL-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; ALL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; ALL-NEXT: retq
|
||||
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
|
||||
%2 = bitcast <2 x i16> %1 to <2 x half>
|
||||
%3 = fpext <2 x half> %2 to <2 x double>
|
||||
|
|
|
|||
Loading…
Reference in New Issue